{ "best_metric": null, "best_model_checkpoint": null, "epoch": 28.125, "eval_steps": 250, "global_step": 6750, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.004166666666666667, "grad_norm": 15.225836429636562, "learning_rate": 5e-05, "loss": 0.6348, "num_input_tokens_seen": 91672, "step": 1 }, { "epoch": 0.004166666666666667, "loss": 0.6350572109222412, "loss_ce": 0.3329332172870636, "loss_iou": 0.44140625, "loss_num": 0.060546875, "loss_xval": 0.302734375, "num_input_tokens_seen": 91672, "step": 1 }, { "epoch": 0.008333333333333333, "grad_norm": 43.774989330975025, "learning_rate": 5e-05, "loss": 0.685, "num_input_tokens_seen": 182816, "step": 2 }, { "epoch": 0.008333333333333333, "loss": 0.5322504043579102, "loss_ce": 0.18776792287826538, "loss_iou": 0.451171875, "loss_num": 0.06884765625, "loss_xval": 0.34375, "num_input_tokens_seen": 182816, "step": 2 }, { "epoch": 0.0125, "grad_norm": 33.173891217282254, "learning_rate": 5e-05, "loss": 0.8189, "num_input_tokens_seen": 274016, "step": 3 }, { "epoch": 0.0125, "loss": 0.7139671444892883, "loss_ce": 0.21665266156196594, "loss_iou": 0.1796875, "loss_num": 0.099609375, "loss_xval": 0.498046875, "num_input_tokens_seen": 274016, "step": 3 }, { "epoch": 0.016666666666666666, "grad_norm": 40.975983462161, "learning_rate": 5e-05, "loss": 0.7548, "num_input_tokens_seen": 365104, "step": 4 }, { "epoch": 0.016666666666666666, "loss": 0.7472636699676514, "loss_ce": 0.21650193631649017, "loss_iou": 0.34375, "loss_num": 0.1064453125, "loss_xval": 0.53125, "num_input_tokens_seen": 365104, "step": 4 }, { "epoch": 0.020833333333333332, "grad_norm": 29.770808911000874, "learning_rate": 5e-05, "loss": 0.5785, "num_input_tokens_seen": 456624, "step": 5 }, { "epoch": 0.020833333333333332, "loss": 0.6999983787536621, "loss_ce": 0.11918780207633972, "loss_iou": 0.2421875, "loss_num": 0.1162109375, "loss_xval": 0.58203125, "num_input_tokens_seen": 456624, "step": 5 }, { "epoch": 0.025, "grad_norm": 28.600650860163952, "learning_rate": 5e-05, "loss": 0.5233, "num_input_tokens_seen": 547728, "step": 6 }, { "epoch": 0.025, "loss": 0.4359915554523468, "loss_ce": 0.034868501126766205, "loss_iou": 0.40625, "loss_num": 0.080078125, "loss_xval": 0.400390625, "num_input_tokens_seen": 547728, "step": 6 }, { "epoch": 0.029166666666666667, "grad_norm": 71.49824118709361, "learning_rate": 5e-05, "loss": 0.5259, "num_input_tokens_seen": 638328, "step": 7 }, { "epoch": 0.029166666666666667, "loss": 0.5717108845710754, "loss_ce": 0.059625912457704544, "loss_iou": 0.390625, "loss_num": 0.10205078125, "loss_xval": 0.51171875, "num_input_tokens_seen": 638328, "step": 7 }, { "epoch": 0.03333333333333333, "grad_norm": 21.14414243809237, "learning_rate": 5e-05, "loss": 0.3605, "num_input_tokens_seen": 730144, "step": 8 }, { "epoch": 0.03333333333333333, "loss": 0.3930358290672302, "loss_ce": 0.09115596115589142, "loss_iou": 0.5546875, "loss_num": 0.06005859375, "loss_xval": 0.302734375, "num_input_tokens_seen": 730144, "step": 8 }, { "epoch": 0.0375, "grad_norm": 8.929265747178142, "learning_rate": 5e-05, "loss": 0.5824, "num_input_tokens_seen": 820752, "step": 9 }, { "epoch": 0.0375, "loss": 0.616942286491394, "loss_ce": 0.13989152014255524, "loss_iou": 0.41796875, "loss_num": 0.09521484375, "loss_xval": 0.4765625, "num_input_tokens_seen": 820752, "step": 9 }, { "epoch": 0.041666666666666664, "grad_norm": 23.71291071409022, "learning_rate": 5e-05, "loss": 0.4991, "num_input_tokens_seen": 911332, "step": 10 }, { "epoch": 0.041666666666666664, "loss": 0.5156526565551758, "loss_ce": 0.07937334477901459, "loss_iou": 0.30859375, "loss_num": 0.0869140625, "loss_xval": 0.435546875, "num_input_tokens_seen": 911332, "step": 10 }, { "epoch": 0.04583333333333333, "grad_norm": 12.311647302325172, "learning_rate": 5e-05, "loss": 0.4041, "num_input_tokens_seen": 1002020, "step": 11 }, { "epoch": 0.04583333333333333, "loss": 0.4094349145889282, "loss_ce": 0.09174692630767822, "loss_iou": 0.35546875, "loss_num": 0.0634765625, "loss_xval": 0.318359375, "num_input_tokens_seen": 1002020, "step": 11 }, { "epoch": 0.05, "grad_norm": 25.222662070545216, "learning_rate": 5e-05, "loss": 0.4487, "num_input_tokens_seen": 1093716, "step": 12 }, { "epoch": 0.05, "loss": 0.4946695566177368, "loss_ce": 0.052530914545059204, "loss_iou": 0.0, "loss_num": 0.08837890625, "loss_xval": 0.44140625, "num_input_tokens_seen": 1093716, "step": 12 }, { "epoch": 0.05416666666666667, "grad_norm": 36.595578870386646, "learning_rate": 5e-05, "loss": 0.4639, "num_input_tokens_seen": 1185292, "step": 13 }, { "epoch": 0.05416666666666667, "loss": 0.4886914789676666, "loss_ce": 0.06559577584266663, "loss_iou": 0.55078125, "loss_num": 0.083984375, "loss_xval": 0.423828125, "num_input_tokens_seen": 1185292, "step": 13 }, { "epoch": 0.058333333333333334, "grad_norm": 16.932254739386053, "learning_rate": 5e-05, "loss": 0.4863, "num_input_tokens_seen": 1276800, "step": 14 }, { "epoch": 0.058333333333333334, "loss": 0.5169302821159363, "loss_ce": 0.04061192646622658, "loss_iou": 0.4140625, "loss_num": 0.0947265625, "loss_xval": 0.4765625, "num_input_tokens_seen": 1276800, "step": 14 }, { "epoch": 0.0625, "grad_norm": 20.112262019681005, "learning_rate": 5e-05, "loss": 0.4915, "num_input_tokens_seen": 1368160, "step": 15 }, { "epoch": 0.0625, "loss": 0.4668349027633667, "loss_ce": 0.052039965987205505, "loss_iou": 0.3515625, "loss_num": 0.08251953125, "loss_xval": 0.4140625, "num_input_tokens_seen": 1368160, "step": 15 }, { "epoch": 0.06666666666666667, "grad_norm": 19.330753258290258, "learning_rate": 5e-05, "loss": 0.3854, "num_input_tokens_seen": 1459500, "step": 16 }, { "epoch": 0.06666666666666667, "loss": 0.30341359972953796, "loss_ce": 0.025642598047852516, "loss_iou": 0.21484375, "loss_num": 0.05517578125, "loss_xval": 0.27734375, "num_input_tokens_seen": 1459500, "step": 16 }, { "epoch": 0.07083333333333333, "grad_norm": 15.206029260523568, "learning_rate": 5e-05, "loss": 0.3651, "num_input_tokens_seen": 1549624, "step": 17 }, { "epoch": 0.07083333333333333, "loss": 0.31986337900161743, "loss_ce": 0.00980481505393982, "loss_iou": 0.404296875, "loss_num": 0.0615234375, "loss_xval": 0.310546875, "num_input_tokens_seen": 1549624, "step": 17 }, { "epoch": 0.075, "grad_norm": 11.090189016650587, "learning_rate": 5e-05, "loss": 0.3437, "num_input_tokens_seen": 1641188, "step": 18 }, { "epoch": 0.075, "loss": 0.3119279146194458, "loss_ce": 0.006019714288413525, "loss_iou": 0.298828125, "loss_num": 0.060791015625, "loss_xval": 0.306640625, "num_input_tokens_seen": 1641188, "step": 18 }, { "epoch": 0.07916666666666666, "grad_norm": 7.815000271195775, "learning_rate": 5e-05, "loss": 0.4179, "num_input_tokens_seen": 1732592, "step": 19 }, { "epoch": 0.07916666666666666, "loss": 0.37536707520484924, "loss_ce": 0.05020228400826454, "loss_iou": 0.234375, "loss_num": 0.064453125, "loss_xval": 0.32421875, "num_input_tokens_seen": 1732592, "step": 19 }, { "epoch": 0.08333333333333333, "grad_norm": 17.019565838229987, "learning_rate": 5e-05, "loss": 0.3924, "num_input_tokens_seen": 1824392, "step": 20 }, { "epoch": 0.08333333333333333, "loss": 0.4389875829219818, "loss_ce": 0.053001243621110916, "loss_iou": 0.197265625, "loss_num": 0.07666015625, "loss_xval": 0.38671875, "num_input_tokens_seen": 1824392, "step": 20 }, { "epoch": 0.0875, "grad_norm": 27.289690836694703, "learning_rate": 5e-05, "loss": 0.3755, "num_input_tokens_seen": 1916148, "step": 21 }, { "epoch": 0.0875, "loss": 0.38594403862953186, "loss_ce": 0.006244330201297998, "loss_iou": 0.283203125, "loss_num": 0.0751953125, "loss_xval": 0.37890625, "num_input_tokens_seen": 1916148, "step": 21 }, { "epoch": 0.09166666666666666, "grad_norm": 9.666250864795536, "learning_rate": 5e-05, "loss": 0.3872, "num_input_tokens_seen": 2007508, "step": 22 }, { "epoch": 0.09166666666666666, "loss": 0.42383676767349243, "loss_ce": 0.02857312560081482, "loss_iou": 0.15234375, "loss_num": 0.07861328125, "loss_xval": 0.39453125, "num_input_tokens_seen": 2007508, "step": 22 }, { "epoch": 0.09583333333333334, "grad_norm": 51.40578636615118, "learning_rate": 5e-05, "loss": 0.4555, "num_input_tokens_seen": 2098884, "step": 23 }, { "epoch": 0.09583333333333334, "loss": 0.5797292590141296, "loss_ce": 0.024309329688549042, "loss_iou": 0.1376953125, "loss_num": 0.11083984375, "loss_xval": 0.5546875, "num_input_tokens_seen": 2098884, "step": 23 }, { "epoch": 0.1, "grad_norm": 51.60493277040869, "learning_rate": 5e-05, "loss": 0.3568, "num_input_tokens_seen": 2190524, "step": 24 }, { "epoch": 0.1, "loss": 0.3909764885902405, "loss_ce": 0.031479425728321075, "loss_iou": 0.2216796875, "loss_num": 0.0712890625, "loss_xval": 0.359375, "num_input_tokens_seen": 2190524, "step": 24 }, { "epoch": 0.10416666666666667, "grad_norm": 16.93786629596498, "learning_rate": 5e-05, "loss": 0.3903, "num_input_tokens_seen": 2281704, "step": 25 }, { "epoch": 0.10416666666666667, "loss": 0.4263181686401367, "loss_ce": 0.012866010889410973, "loss_iou": 0.3984375, "loss_num": 0.08203125, "loss_xval": 0.4140625, "num_input_tokens_seen": 2281704, "step": 25 }, { "epoch": 0.10833333333333334, "grad_norm": 8.204292663190124, "learning_rate": 5e-05, "loss": 0.3353, "num_input_tokens_seen": 2373136, "step": 26 }, { "epoch": 0.10833333333333334, "loss": 0.3441917300224304, "loss_ce": 0.04109114035964012, "loss_iou": 0.416015625, "loss_num": 0.059814453125, "loss_xval": 0.302734375, "num_input_tokens_seen": 2373136, "step": 26 }, { "epoch": 0.1125, "grad_norm": 14.965225605623639, "learning_rate": 5e-05, "loss": 0.3129, "num_input_tokens_seen": 2464864, "step": 27 }, { "epoch": 0.1125, "loss": 0.2762772738933563, "loss_ce": 0.008210879750549793, "loss_iou": 0.185546875, "loss_num": 0.05322265625, "loss_xval": 0.267578125, "num_input_tokens_seen": 2464864, "step": 27 }, { "epoch": 0.11666666666666667, "grad_norm": 8.997632193662453, "learning_rate": 5e-05, "loss": 0.3726, "num_input_tokens_seen": 2556148, "step": 28 }, { "epoch": 0.11666666666666667, "loss": 0.29735785722732544, "loss_ce": 0.0031683961860835552, "loss_iou": 0.39453125, "loss_num": 0.05810546875, "loss_xval": 0.294921875, "num_input_tokens_seen": 2556148, "step": 28 }, { "epoch": 0.12083333333333333, "grad_norm": 9.340585910836419, "learning_rate": 5e-05, "loss": 0.3192, "num_input_tokens_seen": 2647432, "step": 29 }, { "epoch": 0.12083333333333333, "loss": 0.3471378684043884, "loss_ce": 0.037689611315727234, "loss_iou": 0.50390625, "loss_num": 0.060791015625, "loss_xval": 0.30859375, "num_input_tokens_seen": 2647432, "step": 29 }, { "epoch": 0.125, "grad_norm": 6.897516566909319, "learning_rate": 5e-05, "loss": 0.4767, "num_input_tokens_seen": 2738528, "step": 30 }, { "epoch": 0.125, "loss": 0.5621503591537476, "loss_ce": 0.0030683819204568863, "loss_iou": 0.6015625, "loss_num": 0.1103515625, "loss_xval": 0.55859375, "num_input_tokens_seen": 2738528, "step": 30 }, { "epoch": 0.12916666666666668, "grad_norm": 6.137522658485071, "learning_rate": 5e-05, "loss": 0.3425, "num_input_tokens_seen": 2829600, "step": 31 }, { "epoch": 0.12916666666666668, "loss": 0.31172770261764526, "loss_ce": 0.00258462643250823, "loss_iou": 0.408203125, "loss_num": 0.060791015625, "loss_xval": 0.30859375, "num_input_tokens_seen": 2829600, "step": 31 }, { "epoch": 0.13333333333333333, "grad_norm": 11.061040660241414, "learning_rate": 5e-05, "loss": 0.3697, "num_input_tokens_seen": 2920560, "step": 32 }, { "epoch": 0.13333333333333333, "loss": 0.455640584230423, "loss_ce": 0.01374603807926178, "loss_iou": 0.021728515625, "loss_num": 0.08837890625, "loss_xval": 0.44140625, "num_input_tokens_seen": 2920560, "step": 32 }, { "epoch": 0.1375, "grad_norm": 7.153570971149252, "learning_rate": 5e-05, "loss": 0.2913, "num_input_tokens_seen": 3011632, "step": 33 }, { "epoch": 0.1375, "loss": 0.26216161251068115, "loss_ce": 0.0017856480553746223, "loss_iou": 0.466796875, "loss_num": 0.051025390625, "loss_xval": 0.259765625, "num_input_tokens_seen": 3011632, "step": 33 }, { "epoch": 0.14166666666666666, "grad_norm": 9.363662789430165, "learning_rate": 5e-05, "loss": 0.3185, "num_input_tokens_seen": 3103048, "step": 34 }, { "epoch": 0.14166666666666666, "loss": 0.23897996544837952, "loss_ce": 0.0021635466255247593, "loss_iou": 0.431640625, "loss_num": 0.046142578125, "loss_xval": 0.236328125, "num_input_tokens_seen": 3103048, "step": 34 }, { "epoch": 0.14583333333333334, "grad_norm": 42.136960884006605, "learning_rate": 5e-05, "loss": 0.345, "num_input_tokens_seen": 3194184, "step": 35 }, { "epoch": 0.14583333333333334, "loss": 0.2797856330871582, "loss_ce": 0.00503586744889617, "loss_iou": 0.443359375, "loss_num": 0.0537109375, "loss_xval": 0.275390625, "num_input_tokens_seen": 3194184, "step": 35 }, { "epoch": 0.15, "grad_norm": 19.84075677755947, "learning_rate": 5e-05, "loss": 0.3154, "num_input_tokens_seen": 3286064, "step": 36 }, { "epoch": 0.15, "loss": 0.34752780199050903, "loss_ce": 0.006341293454170227, "loss_iou": 0.5, "loss_num": 0.06689453125, "loss_xval": 0.341796875, "num_input_tokens_seen": 3286064, "step": 36 }, { "epoch": 0.15416666666666667, "grad_norm": 12.230342812593133, "learning_rate": 5e-05, "loss": 0.2361, "num_input_tokens_seen": 3377544, "step": 37 }, { "epoch": 0.15416666666666667, "loss": 0.2773832082748413, "loss_ce": 0.016152730211615562, "loss_iou": 0.4140625, "loss_num": 0.051025390625, "loss_xval": 0.26171875, "num_input_tokens_seen": 3377544, "step": 37 }, { "epoch": 0.15833333333333333, "grad_norm": 22.44920054627549, "learning_rate": 5e-05, "loss": 0.3588, "num_input_tokens_seen": 3468760, "step": 38 }, { "epoch": 0.15833333333333333, "loss": 0.338986337184906, "loss_ce": 0.011349605396389961, "loss_iou": 0.384765625, "loss_num": 0.064453125, "loss_xval": 0.328125, "num_input_tokens_seen": 3468760, "step": 38 }, { "epoch": 0.1625, "grad_norm": 45.16270859992362, "learning_rate": 5e-05, "loss": 0.3721, "num_input_tokens_seen": 3560220, "step": 39 }, { "epoch": 0.1625, "loss": 0.4003419578075409, "loss_ce": 0.00947281252592802, "loss_iou": 0.640625, "loss_num": 0.076171875, "loss_xval": 0.390625, "num_input_tokens_seen": 3560220, "step": 39 }, { "epoch": 0.16666666666666666, "grad_norm": 8.84677489699275, "learning_rate": 5e-05, "loss": 0.2993, "num_input_tokens_seen": 3652116, "step": 40 }, { "epoch": 0.16666666666666666, "loss": 0.3454613983631134, "loss_ce": 0.00494627607986331, "loss_iou": 0.447265625, "loss_num": 0.06689453125, "loss_xval": 0.33984375, "num_input_tokens_seen": 3652116, "step": 40 }, { "epoch": 0.17083333333333334, "grad_norm": 19.296762031984866, "learning_rate": 5e-05, "loss": 0.3231, "num_input_tokens_seen": 3743564, "step": 41 }, { "epoch": 0.17083333333333334, "loss": 0.33203789591789246, "loss_ce": 0.010382615029811859, "loss_iou": 0.408203125, "loss_num": 0.06298828125, "loss_xval": 0.322265625, "num_input_tokens_seen": 3743564, "step": 41 }, { "epoch": 0.175, "grad_norm": 9.611147368567874, "learning_rate": 5e-05, "loss": 0.4113, "num_input_tokens_seen": 3833652, "step": 42 }, { "epoch": 0.175, "loss": 0.3624129593372345, "loss_ce": 0.005113149061799049, "loss_iou": 0.29296875, "loss_num": 0.0703125, "loss_xval": 0.357421875, "num_input_tokens_seen": 3833652, "step": 42 }, { "epoch": 0.17916666666666667, "grad_norm": 14.443376364235421, "learning_rate": 5e-05, "loss": 0.2944, "num_input_tokens_seen": 3923920, "step": 43 }, { "epoch": 0.17916666666666667, "loss": 0.25886574387550354, "loss_ce": 0.007675546687096357, "loss_iou": 0.20703125, "loss_num": 0.049560546875, "loss_xval": 0.251953125, "num_input_tokens_seen": 3923920, "step": 43 }, { "epoch": 0.18333333333333332, "grad_norm": 9.688109757625302, "learning_rate": 5e-05, "loss": 0.317, "num_input_tokens_seen": 4015400, "step": 44 }, { "epoch": 0.18333333333333332, "loss": 0.3496725559234619, "loss_ce": 0.007753598503768444, "loss_iou": 0.54296875, "loss_num": 0.06640625, "loss_xval": 0.341796875, "num_input_tokens_seen": 4015400, "step": 44 }, { "epoch": 0.1875, "grad_norm": 14.737713800416389, "learning_rate": 5e-05, "loss": 0.3431, "num_input_tokens_seen": 4107036, "step": 45 }, { "epoch": 0.1875, "loss": 0.28952276706695557, "loss_ce": 0.010653123259544373, "loss_iou": 0.484375, "loss_num": 0.053955078125, "loss_xval": 0.279296875, "num_input_tokens_seen": 4107036, "step": 45 }, { "epoch": 0.19166666666666668, "grad_norm": 12.460730457125793, "learning_rate": 5e-05, "loss": 0.3115, "num_input_tokens_seen": 4198220, "step": 46 }, { "epoch": 0.19166666666666668, "loss": 0.35599175095558167, "loss_ce": 0.007236876059323549, "loss_iou": 0.2216796875, "loss_num": 0.06884765625, "loss_xval": 0.349609375, "num_input_tokens_seen": 4198220, "step": 46 }, { "epoch": 0.19583333333333333, "grad_norm": 8.898392444091169, "learning_rate": 5e-05, "loss": 0.2926, "num_input_tokens_seen": 4289704, "step": 47 }, { "epoch": 0.19583333333333333, "loss": 0.318065345287323, "loss_ce": 0.0007435796433128417, "loss_iou": 0.462890625, "loss_num": 0.061767578125, "loss_xval": 0.31640625, "num_input_tokens_seen": 4289704, "step": 47 }, { "epoch": 0.2, "grad_norm": 67.97750606769903, "learning_rate": 5e-05, "loss": 0.3416, "num_input_tokens_seen": 4380952, "step": 48 }, { "epoch": 0.2, "loss": 0.3091282844543457, "loss_ce": 0.014999864622950554, "loss_iou": 0.38671875, "loss_num": 0.057373046875, "loss_xval": 0.294921875, "num_input_tokens_seen": 4380952, "step": 48 }, { "epoch": 0.20416666666666666, "grad_norm": 6.486228680019228, "learning_rate": 5e-05, "loss": 0.3388, "num_input_tokens_seen": 4471980, "step": 49 }, { "epoch": 0.20416666666666666, "loss": 0.370783269405365, "loss_ce": 0.0007881773635745049, "loss_iou": 0.44921875, "loss_num": 0.072265625, "loss_xval": 0.369140625, "num_input_tokens_seen": 4471980, "step": 49 }, { "epoch": 0.20833333333333334, "grad_norm": 8.955989103250289, "learning_rate": 5e-05, "loss": 0.2479, "num_input_tokens_seen": 4563032, "step": 50 }, { "epoch": 0.20833333333333334, "loss": 0.2502215504646301, "loss_ce": 0.024360958486795425, "loss_iou": 0.23828125, "loss_num": 0.044189453125, "loss_xval": 0.2255859375, "num_input_tokens_seen": 4563032, "step": 50 }, { "epoch": 0.2125, "grad_norm": 31.650556935603895, "learning_rate": 5e-05, "loss": 0.3706, "num_input_tokens_seen": 4654648, "step": 51 }, { "epoch": 0.2125, "loss": 0.33480727672576904, "loss_ce": 0.018278930336236954, "loss_iou": 0.1875, "loss_num": 0.0625, "loss_xval": 0.31640625, "num_input_tokens_seen": 4654648, "step": 51 }, { "epoch": 0.21666666666666667, "grad_norm": 7.122796481536201, "learning_rate": 5e-05, "loss": 0.2971, "num_input_tokens_seen": 4746260, "step": 52 }, { "epoch": 0.21666666666666667, "loss": 0.2732027769088745, "loss_ce": 0.012216457165777683, "loss_iou": 0.47265625, "loss_num": 0.05029296875, "loss_xval": 0.26171875, "num_input_tokens_seen": 4746260, "step": 52 }, { "epoch": 0.22083333333333333, "grad_norm": 6.577564970207067, "learning_rate": 5e-05, "loss": 0.2967, "num_input_tokens_seen": 4837624, "step": 53 }, { "epoch": 0.22083333333333333, "loss": 0.29663506150245667, "loss_ce": 0.011112616397440434, "loss_iou": 0.47265625, "loss_num": 0.05517578125, "loss_xval": 0.28515625, "num_input_tokens_seen": 4837624, "step": 53 }, { "epoch": 0.225, "grad_norm": 4.981634336121052, "learning_rate": 5e-05, "loss": 0.3019, "num_input_tokens_seen": 4928564, "step": 54 }, { "epoch": 0.225, "loss": 0.2648267447948456, "loss_ce": 0.002497634617611766, "loss_iou": 0.466796875, "loss_num": 0.05029296875, "loss_xval": 0.26171875, "num_input_tokens_seen": 4928564, "step": 54 }, { "epoch": 0.22916666666666666, "grad_norm": 13.548163897961462, "learning_rate": 5e-05, "loss": 0.3469, "num_input_tokens_seen": 5018920, "step": 55 }, { "epoch": 0.22916666666666666, "loss": 0.37160640954971313, "loss_ce": 0.031274404376745224, "loss_iou": 0.384765625, "loss_num": 0.06640625, "loss_xval": 0.33984375, "num_input_tokens_seen": 5018920, "step": 55 }, { "epoch": 0.23333333333333334, "grad_norm": 8.432532350358397, "learning_rate": 5e-05, "loss": 0.298, "num_input_tokens_seen": 5110704, "step": 56 }, { "epoch": 0.23333333333333334, "loss": 0.2524632215499878, "loss_ce": 0.00435529975220561, "loss_iou": 0.302734375, "loss_num": 0.04833984375, "loss_xval": 0.248046875, "num_input_tokens_seen": 5110704, "step": 56 }, { "epoch": 0.2375, "grad_norm": 8.823900463968759, "learning_rate": 5e-05, "loss": 0.2824, "num_input_tokens_seen": 5201996, "step": 57 }, { "epoch": 0.2375, "loss": 0.25943028926849365, "loss_ce": 0.012115844525396824, "loss_iou": 0.458984375, "loss_num": 0.04736328125, "loss_xval": 0.2470703125, "num_input_tokens_seen": 5201996, "step": 57 }, { "epoch": 0.24166666666666667, "grad_norm": 4.634353168268041, "learning_rate": 5e-05, "loss": 0.3658, "num_input_tokens_seen": 5292808, "step": 58 }, { "epoch": 0.24166666666666667, "loss": 0.4157818555831909, "loss_ce": 0.023814085870981216, "loss_iou": 0.3046875, "loss_num": 0.0771484375, "loss_xval": 0.392578125, "num_input_tokens_seen": 5292808, "step": 58 }, { "epoch": 0.24583333333333332, "grad_norm": 5.143584787871478, "learning_rate": 5e-05, "loss": 0.2389, "num_input_tokens_seen": 5384260, "step": 59 }, { "epoch": 0.24583333333333332, "loss": 0.2749660611152649, "loss_ce": 0.005678959656506777, "loss_iou": 0.5, "loss_num": 0.051513671875, "loss_xval": 0.26953125, "num_input_tokens_seen": 5384260, "step": 59 }, { "epoch": 0.25, "grad_norm": 10.50448792162475, "learning_rate": 5e-05, "loss": 0.3283, "num_input_tokens_seen": 5475680, "step": 60 }, { "epoch": 0.25, "loss": 0.3407435119152069, "loss_ce": 0.005263775587081909, "loss_iou": 0.455078125, "loss_num": 0.06494140625, "loss_xval": 0.3359375, "num_input_tokens_seen": 5475680, "step": 60 }, { "epoch": 0.25416666666666665, "grad_norm": 11.771930179404578, "learning_rate": 5e-05, "loss": 0.2479, "num_input_tokens_seen": 5567168, "step": 61 }, { "epoch": 0.25416666666666665, "loss": 0.3167330026626587, "loss_ce": 0.010031351819634438, "loss_iou": 0.376953125, "loss_num": 0.0595703125, "loss_xval": 0.306640625, "num_input_tokens_seen": 5567168, "step": 61 }, { "epoch": 0.25833333333333336, "grad_norm": 22.159889524888786, "learning_rate": 5e-05, "loss": 0.3364, "num_input_tokens_seen": 5657968, "step": 62 }, { "epoch": 0.25833333333333336, "loss": 0.34341248869895935, "loss_ce": 0.011869520880281925, "loss_iou": 0.49609375, "loss_num": 0.06396484375, "loss_xval": 0.33203125, "num_input_tokens_seen": 5657968, "step": 62 }, { "epoch": 0.2625, "grad_norm": 16.96247501163317, "learning_rate": 5e-05, "loss": 0.2797, "num_input_tokens_seen": 5749784, "step": 63 }, { "epoch": 0.2625, "loss": 0.19430118799209595, "loss_ce": 0.0032611587084829807, "loss_iou": 0.5546875, "loss_num": 0.035400390625, "loss_xval": 0.19140625, "num_input_tokens_seen": 5749784, "step": 63 }, { "epoch": 0.26666666666666666, "grad_norm": 46.41347542383105, "learning_rate": 5e-05, "loss": 0.3089, "num_input_tokens_seen": 5841148, "step": 64 }, { "epoch": 0.26666666666666666, "loss": 0.34790486097335815, "loss_ce": 0.005558639299124479, "loss_iou": 0.408203125, "loss_num": 0.06640625, "loss_xval": 0.341796875, "num_input_tokens_seen": 5841148, "step": 64 }, { "epoch": 0.2708333333333333, "grad_norm": 6.935369905975971, "learning_rate": 5e-05, "loss": 0.237, "num_input_tokens_seen": 5932988, "step": 65 }, { "epoch": 0.2708333333333333, "loss": 0.24167031049728394, "loss_ce": 0.001985268434509635, "loss_iou": 0.453125, "loss_num": 0.045654296875, "loss_xval": 0.2392578125, "num_input_tokens_seen": 5932988, "step": 65 }, { "epoch": 0.275, "grad_norm": 4.4304114101387375, "learning_rate": 5e-05, "loss": 0.3343, "num_input_tokens_seen": 6023916, "step": 66 }, { "epoch": 0.275, "loss": 0.3570048213005066, "loss_ce": 0.003122997935861349, "loss_iou": 0.53515625, "loss_num": 0.06787109375, "loss_xval": 0.353515625, "num_input_tokens_seen": 6023916, "step": 66 }, { "epoch": 0.2791666666666667, "grad_norm": 26.0353832503157, "learning_rate": 5e-05, "loss": 0.3269, "num_input_tokens_seen": 6115132, "step": 67 }, { "epoch": 0.2791666666666667, "loss": 0.34636855125427246, "loss_ce": 0.007257209159433842, "loss_iou": 0.1767578125, "loss_num": 0.06689453125, "loss_xval": 0.33984375, "num_input_tokens_seen": 6115132, "step": 67 }, { "epoch": 0.2833333333333333, "grad_norm": 16.533984696631137, "learning_rate": 5e-05, "loss": 0.2739, "num_input_tokens_seen": 6206608, "step": 68 }, { "epoch": 0.2833333333333333, "loss": 0.2337610274553299, "loss_ce": 0.018154341727495193, "loss_iou": 0.46484375, "loss_num": 0.04052734375, "loss_xval": 0.2158203125, "num_input_tokens_seen": 6206608, "step": 68 }, { "epoch": 0.2875, "grad_norm": 9.690989332225378, "learning_rate": 5e-05, "loss": 0.2708, "num_input_tokens_seen": 6297836, "step": 69 }, { "epoch": 0.2875, "loss": 0.26258543133735657, "loss_ce": 0.0014770347625017166, "loss_iou": 0.32421875, "loss_num": 0.05029296875, "loss_xval": 0.26171875, "num_input_tokens_seen": 6297836, "step": 69 }, { "epoch": 0.2916666666666667, "grad_norm": 6.900383133357548, "learning_rate": 5e-05, "loss": 0.2778, "num_input_tokens_seen": 6388956, "step": 70 }, { "epoch": 0.2916666666666667, "loss": 0.3020828366279602, "loss_ce": 0.01985626295208931, "loss_iou": 0.46484375, "loss_num": 0.053955078125, "loss_xval": 0.28125, "num_input_tokens_seen": 6388956, "step": 70 }, { "epoch": 0.29583333333333334, "grad_norm": 8.989876897160364, "learning_rate": 5e-05, "loss": 0.1958, "num_input_tokens_seen": 6480796, "step": 71 }, { "epoch": 0.29583333333333334, "loss": 0.2226022183895111, "loss_ce": 0.004340487997978926, "loss_iou": 0.34765625, "loss_num": 0.041748046875, "loss_xval": 0.21875, "num_input_tokens_seen": 6480796, "step": 71 }, { "epoch": 0.3, "grad_norm": 10.580237135943857, "learning_rate": 5e-05, "loss": 0.249, "num_input_tokens_seen": 6571884, "step": 72 }, { "epoch": 0.3, "loss": 0.24384717643260956, "loss_ce": 0.004589363466948271, "loss_iou": 0.546875, "loss_num": 0.044921875, "loss_xval": 0.2392578125, "num_input_tokens_seen": 6571884, "step": 72 }, { "epoch": 0.30416666666666664, "grad_norm": 17.721014973302793, "learning_rate": 5e-05, "loss": 0.2367, "num_input_tokens_seen": 6663112, "step": 73 }, { "epoch": 0.30416666666666664, "loss": 0.28261661529541016, "loss_ce": 0.0018548790831118822, "loss_iou": 0.46875, "loss_num": 0.053466796875, "loss_xval": 0.28125, "num_input_tokens_seen": 6663112, "step": 73 }, { "epoch": 0.30833333333333335, "grad_norm": 10.30157162097328, "learning_rate": 5e-05, "loss": 0.2444, "num_input_tokens_seen": 6755296, "step": 74 }, { "epoch": 0.30833333333333335, "loss": 0.2022906094789505, "loss_ce": 0.00679500587284565, "loss_iou": 0.55859375, "loss_num": 0.035888671875, "loss_xval": 0.1953125, "num_input_tokens_seen": 6755296, "step": 74 }, { "epoch": 0.3125, "grad_norm": 5.447215047810138, "learning_rate": 5e-05, "loss": 0.2813, "num_input_tokens_seen": 6846452, "step": 75 }, { "epoch": 0.3125, "loss": 0.2468206286430359, "loss_ce": 0.0020086378790438175, "loss_iou": 0.384765625, "loss_num": 0.046630859375, "loss_xval": 0.2451171875, "num_input_tokens_seen": 6846452, "step": 75 }, { "epoch": 0.31666666666666665, "grad_norm": 9.019464644230485, "learning_rate": 5e-05, "loss": 0.3132, "num_input_tokens_seen": 6938392, "step": 76 }, { "epoch": 0.31666666666666665, "loss": 0.2754860520362854, "loss_ce": 0.006504114717245102, "loss_iou": 0.384765625, "loss_num": 0.051513671875, "loss_xval": 0.26953125, "num_input_tokens_seen": 6938392, "step": 76 }, { "epoch": 0.32083333333333336, "grad_norm": 9.476201368391846, "learning_rate": 5e-05, "loss": 0.3307, "num_input_tokens_seen": 7030100, "step": 77 }, { "epoch": 0.32083333333333336, "loss": 0.27541017532348633, "loss_ce": 0.014820555225014687, "loss_iou": 0.27734375, "loss_num": 0.050537109375, "loss_xval": 0.259765625, "num_input_tokens_seen": 7030100, "step": 77 }, { "epoch": 0.325, "grad_norm": 11.219509778544023, "learning_rate": 5e-05, "loss": 0.213, "num_input_tokens_seen": 7120872, "step": 78 }, { "epoch": 0.325, "loss": 0.2377692461013794, "loss_ce": 0.0004035182937514037, "loss_iou": 0.267578125, "loss_num": 0.0458984375, "loss_xval": 0.2373046875, "num_input_tokens_seen": 7120872, "step": 78 }, { "epoch": 0.32916666666666666, "grad_norm": 5.808669094612053, "learning_rate": 5e-05, "loss": 0.2224, "num_input_tokens_seen": 7212292, "step": 79 }, { "epoch": 0.32916666666666666, "loss": 0.21997323632240295, "loss_ce": 0.0014368824195116758, "loss_iou": 0.265625, "loss_num": 0.0419921875, "loss_xval": 0.21875, "num_input_tokens_seen": 7212292, "step": 79 }, { "epoch": 0.3333333333333333, "grad_norm": 10.864936156293977, "learning_rate": 5e-05, "loss": 0.3307, "num_input_tokens_seen": 7303588, "step": 80 }, { "epoch": 0.3333333333333333, "loss": 0.30561554431915283, "loss_ce": 0.0013247651513665915, "loss_iou": 0.28125, "loss_num": 0.05908203125, "loss_xval": 0.3046875, "num_input_tokens_seen": 7303588, "step": 80 }, { "epoch": 0.3375, "grad_norm": 5.325260315463099, "learning_rate": 5e-05, "loss": 0.3184, "num_input_tokens_seen": 7394048, "step": 81 }, { "epoch": 0.3375, "loss": 0.23910076916217804, "loss_ce": 0.02486737072467804, "loss_iou": 0.30859375, "loss_num": 0.041015625, "loss_xval": 0.2138671875, "num_input_tokens_seen": 7394048, "step": 81 }, { "epoch": 0.3416666666666667, "grad_norm": 7.976433511080435, "learning_rate": 5e-05, "loss": 0.2626, "num_input_tokens_seen": 7485128, "step": 82 }, { "epoch": 0.3416666666666667, "loss": 0.2837026119232178, "loss_ce": 0.0012929437216371298, "loss_iou": 0.314453125, "loss_num": 0.054443359375, "loss_xval": 0.283203125, "num_input_tokens_seen": 7485128, "step": 82 }, { "epoch": 0.3458333333333333, "grad_norm": 16.49736325407084, "learning_rate": 5e-05, "loss": 0.2772, "num_input_tokens_seen": 7576344, "step": 83 }, { "epoch": 0.3458333333333333, "loss": 0.22794455289840698, "loss_ce": 0.004677943419665098, "loss_iou": 0.337890625, "loss_num": 0.04248046875, "loss_xval": 0.2236328125, "num_input_tokens_seen": 7576344, "step": 83 }, { "epoch": 0.35, "grad_norm": 15.249594869388494, "learning_rate": 5e-05, "loss": 0.2613, "num_input_tokens_seen": 7668016, "step": 84 }, { "epoch": 0.35, "loss": 0.37888282537460327, "loss_ce": 0.004035438410937786, "loss_iou": 0.30859375, "loss_num": 0.0732421875, "loss_xval": 0.375, "num_input_tokens_seen": 7668016, "step": 84 }, { "epoch": 0.3541666666666667, "grad_norm": 7.649439612726913, "learning_rate": 5e-05, "loss": 0.2568, "num_input_tokens_seen": 7758804, "step": 85 }, { "epoch": 0.3541666666666667, "loss": 0.2840992212295532, "loss_ce": 0.002971289912238717, "loss_iou": 0.44921875, "loss_num": 0.05322265625, "loss_xval": 0.28125, "num_input_tokens_seen": 7758804, "step": 85 }, { "epoch": 0.35833333333333334, "grad_norm": 16.462632446510703, "learning_rate": 5e-05, "loss": 0.3259, "num_input_tokens_seen": 7850368, "step": 86 }, { "epoch": 0.35833333333333334, "loss": 0.30999892950057983, "loss_ce": 0.007264568004757166, "loss_iou": 0.328125, "loss_num": 0.058349609375, "loss_xval": 0.302734375, "num_input_tokens_seen": 7850368, "step": 86 }, { "epoch": 0.3625, "grad_norm": 9.740030239140937, "learning_rate": 5e-05, "loss": 0.2168, "num_input_tokens_seen": 7941692, "step": 87 }, { "epoch": 0.3625, "loss": 0.22348150610923767, "loss_ce": 0.0030530274379998446, "loss_iou": 0.443359375, "loss_num": 0.041015625, "loss_xval": 0.220703125, "num_input_tokens_seen": 7941692, "step": 87 }, { "epoch": 0.36666666666666664, "grad_norm": 6.63686532350692, "learning_rate": 5e-05, "loss": 0.2207, "num_input_tokens_seen": 8032592, "step": 88 }, { "epoch": 0.36666666666666664, "loss": 0.22159643471240997, "loss_ce": 0.0024802093394100666, "loss_iou": 0.365234375, "loss_num": 0.041259765625, "loss_xval": 0.21875, "num_input_tokens_seen": 8032592, "step": 88 }, { "epoch": 0.37083333333333335, "grad_norm": 37.485183748386525, "learning_rate": 5e-05, "loss": 0.2739, "num_input_tokens_seen": 8124148, "step": 89 }, { "epoch": 0.37083333333333335, "loss": 0.2878793478012085, "loss_ce": 0.0028451611287891865, "loss_iou": 0.404296875, "loss_num": 0.05419921875, "loss_xval": 0.28515625, "num_input_tokens_seen": 8124148, "step": 89 }, { "epoch": 0.375, "grad_norm": 17.34743136643758, "learning_rate": 5e-05, "loss": 0.322, "num_input_tokens_seen": 8215068, "step": 90 }, { "epoch": 0.375, "loss": 0.3442806005477905, "loss_ce": 0.003948563244193792, "loss_iou": 0.41796875, "loss_num": 0.06494140625, "loss_xval": 0.33984375, "num_input_tokens_seen": 8215068, "step": 90 }, { "epoch": 0.37916666666666665, "grad_norm": 9.224857697957768, "learning_rate": 5e-05, "loss": 0.475, "num_input_tokens_seen": 8306212, "step": 91 }, { "epoch": 0.37916666666666665, "loss": 0.6492692828178406, "loss_ce": 0.2845231592655182, "loss_iou": 0.408203125, "loss_num": 0.0703125, "loss_xval": 0.365234375, "num_input_tokens_seen": 8306212, "step": 91 }, { "epoch": 0.38333333333333336, "grad_norm": 5.8131095835936035, "learning_rate": 5e-05, "loss": 0.2281, "num_input_tokens_seen": 8397524, "step": 92 }, { "epoch": 0.38333333333333336, "loss": 0.21828222274780273, "loss_ce": 0.0027060469146817923, "loss_iou": 0.1572265625, "loss_num": 0.0419921875, "loss_xval": 0.2158203125, "num_input_tokens_seen": 8397524, "step": 92 }, { "epoch": 0.3875, "grad_norm": 5.37773321348737, "learning_rate": 5e-05, "loss": 0.2631, "num_input_tokens_seen": 8488852, "step": 93 }, { "epoch": 0.3875, "loss": 0.3609490692615509, "loss_ce": 0.0015740722883492708, "loss_iou": 0.318359375, "loss_num": 0.0693359375, "loss_xval": 0.359375, "num_input_tokens_seen": 8488852, "step": 93 }, { "epoch": 0.39166666666666666, "grad_norm": 11.54814011667921, "learning_rate": 5e-05, "loss": 0.2231, "num_input_tokens_seen": 8580496, "step": 94 }, { "epoch": 0.39166666666666666, "loss": 0.2142496556043625, "loss_ce": 0.0024576662108302116, "loss_iou": 0.439453125, "loss_num": 0.0390625, "loss_xval": 0.2119140625, "num_input_tokens_seen": 8580496, "step": 94 }, { "epoch": 0.3958333333333333, "grad_norm": 4.732196078472765, "learning_rate": 5e-05, "loss": 0.268, "num_input_tokens_seen": 8671772, "step": 95 }, { "epoch": 0.3958333333333333, "loss": 0.23772427439689636, "loss_ce": 0.003501851111650467, "loss_iou": 0.283203125, "loss_num": 0.044677734375, "loss_xval": 0.234375, "num_input_tokens_seen": 8671772, "step": 95 }, { "epoch": 0.4, "grad_norm": 11.019497865376769, "learning_rate": 5e-05, "loss": 0.1872, "num_input_tokens_seen": 8763588, "step": 96 }, { "epoch": 0.4, "loss": 0.18473270535469055, "loss_ce": 0.008768357336521149, "loss_iou": 0.400390625, "loss_num": 0.0322265625, "loss_xval": 0.17578125, "num_input_tokens_seen": 8763588, "step": 96 }, { "epoch": 0.4041666666666667, "grad_norm": 21.95480025427789, "learning_rate": 5e-05, "loss": 0.4284, "num_input_tokens_seen": 8854600, "step": 97 }, { "epoch": 0.4041666666666667, "loss": 0.43076610565185547, "loss_ce": 0.0046186321415007114, "loss_iou": 0.244140625, "loss_num": 0.08349609375, "loss_xval": 0.42578125, "num_input_tokens_seen": 8854600, "step": 97 }, { "epoch": 0.4083333333333333, "grad_norm": 18.314886493997594, "learning_rate": 5e-05, "loss": 0.259, "num_input_tokens_seen": 8945532, "step": 98 }, { "epoch": 0.4083333333333333, "loss": 0.28518539667129517, "loss_ce": 0.001432962715625763, "loss_iou": 0.37109375, "loss_num": 0.053955078125, "loss_xval": 0.283203125, "num_input_tokens_seen": 8945532, "step": 98 }, { "epoch": 0.4125, "grad_norm": 11.880511271847071, "learning_rate": 5e-05, "loss": 0.2449, "num_input_tokens_seen": 9037100, "step": 99 }, { "epoch": 0.4125, "loss": 0.2502959072589874, "loss_ce": 0.0009062608005478978, "loss_iou": 0.388671875, "loss_num": 0.046875, "loss_xval": 0.2490234375, "num_input_tokens_seen": 9037100, "step": 99 }, { "epoch": 0.4166666666666667, "grad_norm": 8.111545719884933, "learning_rate": 5e-05, "loss": 0.2111, "num_input_tokens_seen": 9128664, "step": 100 }, { "epoch": 0.4166666666666667, "loss": 0.1966070681810379, "loss_ce": 0.004254776053130627, "loss_iou": 0.38671875, "loss_num": 0.035400390625, "loss_xval": 0.1923828125, "num_input_tokens_seen": 9128664, "step": 100 }, { "epoch": 0.42083333333333334, "grad_norm": 26.817760455564922, "learning_rate": 5e-05, "loss": 0.2578, "num_input_tokens_seen": 9220284, "step": 101 }, { "epoch": 0.42083333333333334, "loss": 0.17894884943962097, "loss_ce": 0.00060411257436499, "loss_iou": 0.400390625, "loss_num": 0.032470703125, "loss_xval": 0.1787109375, "num_input_tokens_seen": 9220284, "step": 101 }, { "epoch": 0.425, "grad_norm": 34.603333402240615, "learning_rate": 5e-05, "loss": 0.3548, "num_input_tokens_seen": 9311456, "step": 102 }, { "epoch": 0.425, "loss": 0.4787541925907135, "loss_ce": 0.0017644375329837203, "loss_iou": 0.40625, "loss_num": 0.09228515625, "loss_xval": 0.4765625, "num_input_tokens_seen": 9311456, "step": 102 }, { "epoch": 0.42916666666666664, "grad_norm": 14.937731018546275, "learning_rate": 5e-05, "loss": 0.3112, "num_input_tokens_seen": 9402384, "step": 103 }, { "epoch": 0.42916666666666664, "loss": 0.3006994128227234, "loss_ce": 0.002115420065820217, "loss_iou": 0.322265625, "loss_num": 0.05712890625, "loss_xval": 0.298828125, "num_input_tokens_seen": 9402384, "step": 103 }, { "epoch": 0.43333333333333335, "grad_norm": 43.31661854600968, "learning_rate": 5e-05, "loss": 0.2905, "num_input_tokens_seen": 9491964, "step": 104 }, { "epoch": 0.43333333333333335, "loss": 0.26289206743240356, "loss_ce": 0.002882319502532482, "loss_iou": 0.58984375, "loss_num": 0.047119140625, "loss_xval": 0.259765625, "num_input_tokens_seen": 9491964, "step": 104 }, { "epoch": 0.4375, "grad_norm": 11.993910734445342, "learning_rate": 5e-05, "loss": 0.2776, "num_input_tokens_seen": 9583456, "step": 105 }, { "epoch": 0.4375, "loss": 0.2879137396812439, "loss_ce": 0.0011705834185704589, "loss_iou": 0.43359375, "loss_num": 0.0537109375, "loss_xval": 0.287109375, "num_input_tokens_seen": 9583456, "step": 105 }, { "epoch": 0.44166666666666665, "grad_norm": 16.616379534353655, "learning_rate": 5e-05, "loss": 0.2202, "num_input_tokens_seen": 9674304, "step": 106 }, { "epoch": 0.44166666666666665, "loss": 0.23460063338279724, "loss_ce": 0.003094287123531103, "loss_iou": 0.40625, "loss_num": 0.04296875, "loss_xval": 0.2314453125, "num_input_tokens_seen": 9674304, "step": 106 }, { "epoch": 0.44583333333333336, "grad_norm": 6.232161255971668, "learning_rate": 5e-05, "loss": 0.2685, "num_input_tokens_seen": 9765824, "step": 107 }, { "epoch": 0.44583333333333336, "loss": 0.22564728558063507, "loss_ce": 0.00476106209680438, "loss_iou": 0.330078125, "loss_num": 0.04150390625, "loss_xval": 0.220703125, "num_input_tokens_seen": 9765824, "step": 107 }, { "epoch": 0.45, "grad_norm": 69.52339064652675, "learning_rate": 5e-05, "loss": 0.288, "num_input_tokens_seen": 9857156, "step": 108 }, { "epoch": 0.45, "loss": 0.27105996012687683, "loss_ce": 0.0035123610869050026, "loss_iou": 0.33203125, "loss_num": 0.05078125, "loss_xval": 0.267578125, "num_input_tokens_seen": 9857156, "step": 108 }, { "epoch": 0.45416666666666666, "grad_norm": 18.49210837706306, "learning_rate": 5e-05, "loss": 0.2633, "num_input_tokens_seen": 9948644, "step": 109 }, { "epoch": 0.45416666666666666, "loss": 0.2351188361644745, "loss_ce": 0.001476275036111474, "loss_iou": 0.330078125, "loss_num": 0.0439453125, "loss_xval": 0.2333984375, "num_input_tokens_seen": 9948644, "step": 109 }, { "epoch": 0.4583333333333333, "grad_norm": 17.00180692584306, "learning_rate": 5e-05, "loss": 0.2762, "num_input_tokens_seen": 10040404, "step": 110 }, { "epoch": 0.4583333333333333, "loss": 0.2623690068721771, "loss_ce": 0.004007187206298113, "loss_iou": 0.453125, "loss_num": 0.0478515625, "loss_xval": 0.2578125, "num_input_tokens_seen": 10040404, "step": 110 }, { "epoch": 0.4625, "grad_norm": 7.786667888006078, "learning_rate": 5e-05, "loss": 0.2906, "num_input_tokens_seen": 10131492, "step": 111 }, { "epoch": 0.4625, "loss": 0.1874142587184906, "loss_ce": 0.0004025435191579163, "loss_iou": 0.51171875, "loss_num": 0.032958984375, "loss_xval": 0.1875, "num_input_tokens_seen": 10131492, "step": 111 }, { "epoch": 0.4666666666666667, "grad_norm": 10.089216726276307, "learning_rate": 5e-05, "loss": 0.3346, "num_input_tokens_seen": 10222468, "step": 112 }, { "epoch": 0.4666666666666667, "loss": 0.37050771713256836, "loss_ce": 0.009789920412003994, "loss_iou": 0.5859375, "loss_num": 0.06689453125, "loss_xval": 0.361328125, "num_input_tokens_seen": 10222468, "step": 112 }, { "epoch": 0.4708333333333333, "grad_norm": 13.538809032933797, "learning_rate": 5e-05, "loss": 0.1975, "num_input_tokens_seen": 10313368, "step": 113 }, { "epoch": 0.4708333333333333, "loss": 0.1806737631559372, "loss_ce": 0.0014745458029210567, "loss_iou": 0.388671875, "loss_num": 0.032470703125, "loss_xval": 0.1796875, "num_input_tokens_seen": 10313368, "step": 113 }, { "epoch": 0.475, "grad_norm": 9.844448685947137, "learning_rate": 5e-05, "loss": 0.2637, "num_input_tokens_seen": 10404232, "step": 114 }, { "epoch": 0.475, "loss": 0.20554538071155548, "loss_ce": 0.0026645271573215723, "loss_iou": 0.474609375, "loss_num": 0.036376953125, "loss_xval": 0.203125, "num_input_tokens_seen": 10404232, "step": 114 }, { "epoch": 0.4791666666666667, "grad_norm": 29.180353821250876, "learning_rate": 5e-05, "loss": 0.3316, "num_input_tokens_seen": 10495580, "step": 115 }, { "epoch": 0.4791666666666667, "loss": 0.2651398777961731, "loss_ce": 0.0038788975216448307, "loss_iou": 0.490234375, "loss_num": 0.0478515625, "loss_xval": 0.26171875, "num_input_tokens_seen": 10495580, "step": 115 }, { "epoch": 0.48333333333333334, "grad_norm": 9.586548205484243, "learning_rate": 5e-05, "loss": 0.3117, "num_input_tokens_seen": 10586396, "step": 116 }, { "epoch": 0.48333333333333334, "loss": 0.34220072627067566, "loss_ce": 0.01053567323833704, "loss_iou": 0.26171875, "loss_num": 0.06396484375, "loss_xval": 0.33203125, "num_input_tokens_seen": 10586396, "step": 116 }, { "epoch": 0.4875, "grad_norm": 9.509270620685877, "learning_rate": 5e-05, "loss": 0.3585, "num_input_tokens_seen": 10678060, "step": 117 }, { "epoch": 0.4875, "loss": 0.3074171543121338, "loss_ce": 0.020185697823762894, "loss_iou": 0.357421875, "loss_num": 0.053955078125, "loss_xval": 0.287109375, "num_input_tokens_seen": 10678060, "step": 117 }, { "epoch": 0.49166666666666664, "grad_norm": 5.073874129263811, "learning_rate": 5e-05, "loss": 0.2701, "num_input_tokens_seen": 10769996, "step": 118 }, { "epoch": 0.49166666666666664, "loss": 0.27782976627349854, "loss_ce": 0.00036397005897015333, "loss_iou": 0.5625, "loss_num": 0.050048828125, "loss_xval": 0.27734375, "num_input_tokens_seen": 10769996, "step": 118 }, { "epoch": 0.49583333333333335, "grad_norm": 8.65526030944302, "learning_rate": 5e-05, "loss": 0.2471, "num_input_tokens_seen": 10860812, "step": 119 }, { "epoch": 0.49583333333333335, "loss": 0.26075470447540283, "loss_ce": 0.0018435618840157986, "loss_iou": 0.5234375, "loss_num": 0.046875, "loss_xval": 0.259765625, "num_input_tokens_seen": 10860812, "step": 119 }, { "epoch": 0.5, "grad_norm": 18.74547743651069, "learning_rate": 5e-05, "loss": 0.3201, "num_input_tokens_seen": 10952028, "step": 120 }, { "epoch": 0.5, "loss": 0.35629406571388245, "loss_ce": 0.017671015113592148, "loss_iou": 0.373046875, "loss_num": 0.064453125, "loss_xval": 0.337890625, "num_input_tokens_seen": 10952028, "step": 120 }, { "epoch": 0.5041666666666667, "grad_norm": 12.242796749612552, "learning_rate": 5e-05, "loss": 0.2829, "num_input_tokens_seen": 11042748, "step": 121 }, { "epoch": 0.5041666666666667, "loss": 0.29572194814682007, "loss_ce": 0.0006780114490538836, "loss_iou": 0.35546875, "loss_num": 0.0556640625, "loss_xval": 0.294921875, "num_input_tokens_seen": 11042748, "step": 121 }, { "epoch": 0.5083333333333333, "grad_norm": 80.74118621393487, "learning_rate": 5e-05, "loss": 0.323, "num_input_tokens_seen": 11134452, "step": 122 }, { "epoch": 0.5083333333333333, "loss": 0.3073047995567322, "loss_ce": 0.0019458993338048458, "loss_iou": 0.458984375, "loss_num": 0.056640625, "loss_xval": 0.3046875, "num_input_tokens_seen": 11134452, "step": 122 }, { "epoch": 0.5125, "grad_norm": 19.969269792064445, "learning_rate": 5e-05, "loss": 0.3026, "num_input_tokens_seen": 11226092, "step": 123 }, { "epoch": 0.5125, "loss": 0.37382322549819946, "loss_ce": 0.0026074047200381756, "loss_iou": 0.400390625, "loss_num": 0.0703125, "loss_xval": 0.37109375, "num_input_tokens_seen": 11226092, "step": 123 }, { "epoch": 0.5166666666666667, "grad_norm": 7.3966393387659926, "learning_rate": 5e-05, "loss": 0.2249, "num_input_tokens_seen": 11317360, "step": 124 }, { "epoch": 0.5166666666666667, "loss": 0.22373417019844055, "loss_ce": 0.00443485751748085, "loss_iou": 0.462890625, "loss_num": 0.039306640625, "loss_xval": 0.2197265625, "num_input_tokens_seen": 11317360, "step": 124 }, { "epoch": 0.5208333333333334, "grad_norm": 27.827753563904736, "learning_rate": 5e-05, "loss": 0.2454, "num_input_tokens_seen": 11408524, "step": 125 }, { "epoch": 0.5208333333333334, "loss": 0.23156914114952087, "loss_ce": 0.027467573061585426, "loss_iou": 0.271484375, "loss_num": 0.0380859375, "loss_xval": 0.2041015625, "num_input_tokens_seen": 11408524, "step": 125 }, { "epoch": 0.525, "grad_norm": 16.137469248679036, "learning_rate": 5e-05, "loss": 0.249, "num_input_tokens_seen": 11499824, "step": 126 }, { "epoch": 0.525, "loss": 0.21419081091880798, "loss_ce": 0.0033753756433725357, "loss_iou": 0.470703125, "loss_num": 0.03759765625, "loss_xval": 0.2109375, "num_input_tokens_seen": 11499824, "step": 126 }, { "epoch": 0.5291666666666667, "grad_norm": 31.700797289386937, "learning_rate": 5e-05, "loss": 0.3223, "num_input_tokens_seen": 11591604, "step": 127 }, { "epoch": 0.5291666666666667, "loss": 0.2671835422515869, "loss_ce": 0.004915494006127119, "loss_iou": 0.435546875, "loss_num": 0.048095703125, "loss_xval": 0.26171875, "num_input_tokens_seen": 11591604, "step": 127 }, { "epoch": 0.5333333333333333, "grad_norm": 9.023404502399261, "learning_rate": 5e-05, "loss": 0.2629, "num_input_tokens_seen": 11683324, "step": 128 }, { "epoch": 0.5333333333333333, "loss": 0.21468223631381989, "loss_ce": 0.014853136613965034, "loss_iou": 0.1669921875, "loss_num": 0.038330078125, "loss_xval": 0.2001953125, "num_input_tokens_seen": 11683324, "step": 128 }, { "epoch": 0.5375, "grad_norm": 25.693519570004373, "learning_rate": 5e-05, "loss": 0.3072, "num_input_tokens_seen": 11774320, "step": 129 }, { "epoch": 0.5375, "loss": 0.39914512634277344, "loss_ce": 0.000707621977198869, "loss_iou": 0.3984375, "loss_num": 0.07568359375, "loss_xval": 0.3984375, "num_input_tokens_seen": 11774320, "step": 129 }, { "epoch": 0.5416666666666666, "grad_norm": 9.866756200521515, "learning_rate": 5e-05, "loss": 0.2577, "num_input_tokens_seen": 11865392, "step": 130 }, { "epoch": 0.5416666666666666, "loss": 0.29488706588745117, "loss_ce": 0.01870298571884632, "loss_iou": 0.37109375, "loss_num": 0.05126953125, "loss_xval": 0.275390625, "num_input_tokens_seen": 11865392, "step": 130 }, { "epoch": 0.5458333333333333, "grad_norm": 10.436733246035443, "learning_rate": 5e-05, "loss": 0.3341, "num_input_tokens_seen": 11957140, "step": 131 }, { "epoch": 0.5458333333333333, "loss": 0.32612884044647217, "loss_ce": 0.0023983772844076157, "loss_iou": 0.412109375, "loss_num": 0.060546875, "loss_xval": 0.32421875, "num_input_tokens_seen": 11957140, "step": 131 }, { "epoch": 0.55, "grad_norm": 10.922744020062677, "learning_rate": 5e-05, "loss": 0.2328, "num_input_tokens_seen": 12048596, "step": 132 }, { "epoch": 0.55, "loss": 0.1844669133424759, "loss_ce": 0.002460076939314604, "loss_iou": 0.37109375, "loss_num": 0.032470703125, "loss_xval": 0.181640625, "num_input_tokens_seen": 12048596, "step": 132 }, { "epoch": 0.5541666666666667, "grad_norm": 23.953360994914252, "learning_rate": 5e-05, "loss": 0.2737, "num_input_tokens_seen": 12140068, "step": 133 }, { "epoch": 0.5541666666666667, "loss": 0.24452215433120728, "loss_ce": 0.0019684485159814358, "loss_iou": 0.3515625, "loss_num": 0.044677734375, "loss_xval": 0.2421875, "num_input_tokens_seen": 12140068, "step": 133 }, { "epoch": 0.5583333333333333, "grad_norm": 4.560719803742324, "learning_rate": 5e-05, "loss": 0.2217, "num_input_tokens_seen": 12231244, "step": 134 }, { "epoch": 0.5583333333333333, "loss": 0.2742983102798462, "loss_ce": 0.0020815201569348574, "loss_iou": 0.515625, "loss_num": 0.048828125, "loss_xval": 0.271484375, "num_input_tokens_seen": 12231244, "step": 134 }, { "epoch": 0.5625, "grad_norm": 7.269681166884061, "learning_rate": 5e-05, "loss": 0.2293, "num_input_tokens_seen": 12322624, "step": 135 }, { "epoch": 0.5625, "loss": 0.16657213866710663, "loss_ce": 0.006843137554824352, "loss_iou": 0.4140625, "loss_num": 0.0274658203125, "loss_xval": 0.16015625, "num_input_tokens_seen": 12322624, "step": 135 }, { "epoch": 0.5666666666666667, "grad_norm": 7.655933198469534, "learning_rate": 5e-05, "loss": 0.3303, "num_input_tokens_seen": 12413664, "step": 136 }, { "epoch": 0.5666666666666667, "loss": 0.3964250683784485, "loss_ce": 0.08453544229269028, "loss_iou": 0.1513671875, "loss_num": 0.060791015625, "loss_xval": 0.3125, "num_input_tokens_seen": 12413664, "step": 136 }, { "epoch": 0.5708333333333333, "grad_norm": 7.870782926065455, "learning_rate": 5e-05, "loss": 0.3437, "num_input_tokens_seen": 12505268, "step": 137 }, { "epoch": 0.5708333333333333, "loss": 0.3565272092819214, "loss_ce": 0.0037439956795424223, "loss_iou": 0.302734375, "loss_num": 0.0673828125, "loss_xval": 0.353515625, "num_input_tokens_seen": 12505268, "step": 137 }, { "epoch": 0.575, "grad_norm": 4.491010759014802, "learning_rate": 5e-05, "loss": 0.1927, "num_input_tokens_seen": 12596560, "step": 138 }, { "epoch": 0.575, "loss": 0.18377013504505157, "loss_ce": 0.001732780598104, "loss_iou": 0.36328125, "loss_num": 0.032470703125, "loss_xval": 0.181640625, "num_input_tokens_seen": 12596560, "step": 138 }, { "epoch": 0.5791666666666667, "grad_norm": 6.588859235712896, "learning_rate": 5e-05, "loss": 0.351, "num_input_tokens_seen": 12687640, "step": 139 }, { "epoch": 0.5791666666666667, "loss": 0.3708999454975128, "loss_ce": 0.012837199494242668, "loss_iou": 0.416015625, "loss_num": 0.06689453125, "loss_xval": 0.357421875, "num_input_tokens_seen": 12687640, "step": 139 }, { "epoch": 0.5833333333333334, "grad_norm": 11.319551129339365, "learning_rate": 5e-05, "loss": 0.2907, "num_input_tokens_seen": 12778776, "step": 140 }, { "epoch": 0.5833333333333334, "loss": 0.21045735478401184, "loss_ce": 0.0015340123791247606, "loss_iou": 0.388671875, "loss_num": 0.03759765625, "loss_xval": 0.208984375, "num_input_tokens_seen": 12778776, "step": 140 }, { "epoch": 0.5875, "grad_norm": 9.707420160182311, "learning_rate": 5e-05, "loss": 0.3175, "num_input_tokens_seen": 12869816, "step": 141 }, { "epoch": 0.5875, "loss": 0.3253687620162964, "loss_ce": 0.004812105558812618, "loss_iou": 0.419921875, "loss_num": 0.0595703125, "loss_xval": 0.3203125, "num_input_tokens_seen": 12869816, "step": 141 }, { "epoch": 0.5916666666666667, "grad_norm": 4.292805483536467, "learning_rate": 5e-05, "loss": 0.2064, "num_input_tokens_seen": 12960888, "step": 142 }, { "epoch": 0.5916666666666667, "loss": 0.25991156697273254, "loss_ce": 0.00014595442917197943, "loss_iou": 0.404296875, "loss_num": 0.04736328125, "loss_xval": 0.259765625, "num_input_tokens_seen": 12960888, "step": 142 }, { "epoch": 0.5958333333333333, "grad_norm": 21.379545153312392, "learning_rate": 5e-05, "loss": 0.2521, "num_input_tokens_seen": 13052452, "step": 143 }, { "epoch": 0.5958333333333333, "loss": 0.23024097084999084, "loss_ce": 0.002030514180660248, "loss_iou": 0.34765625, "loss_num": 0.041748046875, "loss_xval": 0.228515625, "num_input_tokens_seen": 13052452, "step": 143 }, { "epoch": 0.6, "grad_norm": 8.227914536382473, "learning_rate": 5e-05, "loss": 0.248, "num_input_tokens_seen": 13143192, "step": 144 }, { "epoch": 0.6, "loss": 0.23771356046199799, "loss_ce": 0.0054137492552399635, "loss_iou": 0.328125, "loss_num": 0.042724609375, "loss_xval": 0.232421875, "num_input_tokens_seen": 13143192, "step": 144 }, { "epoch": 0.6041666666666666, "grad_norm": 8.446517974381681, "learning_rate": 5e-05, "loss": 0.2465, "num_input_tokens_seen": 13234824, "step": 145 }, { "epoch": 0.6041666666666666, "loss": 0.20083218812942505, "loss_ce": 0.0016134518664330244, "loss_iou": 0.2578125, "loss_num": 0.036865234375, "loss_xval": 0.19921875, "num_input_tokens_seen": 13234824, "step": 145 }, { "epoch": 0.6083333333333333, "grad_norm": 8.30597746700804, "learning_rate": 5e-05, "loss": 0.2549, "num_input_tokens_seen": 13325684, "step": 146 }, { "epoch": 0.6083333333333333, "loss": 0.19978776574134827, "loss_ce": 0.0020948995370417833, "loss_iou": 0.310546875, "loss_num": 0.035888671875, "loss_xval": 0.197265625, "num_input_tokens_seen": 13325684, "step": 146 }, { "epoch": 0.6125, "grad_norm": 5.005823165635506, "learning_rate": 5e-05, "loss": 0.2255, "num_input_tokens_seen": 13417384, "step": 147 }, { "epoch": 0.6125, "loss": 0.22988131642341614, "loss_ce": 0.008873018436133862, "loss_iou": 0.4609375, "loss_num": 0.038818359375, "loss_xval": 0.220703125, "num_input_tokens_seen": 13417384, "step": 147 }, { "epoch": 0.6166666666666667, "grad_norm": 7.873135529753248, "learning_rate": 5e-05, "loss": 0.2545, "num_input_tokens_seen": 13508672, "step": 148 }, { "epoch": 0.6166666666666667, "loss": 0.2839931845664978, "loss_ce": 0.0019497520988807082, "loss_iou": 0.404296875, "loss_num": 0.0517578125, "loss_xval": 0.28125, "num_input_tokens_seen": 13508672, "step": 148 }, { "epoch": 0.6208333333333333, "grad_norm": 4.554201262753171, "learning_rate": 5e-05, "loss": 0.1965, "num_input_tokens_seen": 13600320, "step": 149 }, { "epoch": 0.6208333333333333, "loss": 0.17919516563415527, "loss_ce": 0.0008504376164637506, "loss_iou": 0.34765625, "loss_num": 0.031494140625, "loss_xval": 0.1787109375, "num_input_tokens_seen": 13600320, "step": 149 }, { "epoch": 0.625, "grad_norm": 9.670541579455165, "learning_rate": 5e-05, "loss": 0.3536, "num_input_tokens_seen": 13689296, "step": 150 }, { "epoch": 0.625, "loss": 0.39481455087661743, "loss_ce": 0.043099481612443924, "loss_iou": 0.1962890625, "loss_num": 0.06787109375, "loss_xval": 0.3515625, "num_input_tokens_seen": 13689296, "step": 150 }, { "epoch": 0.6291666666666667, "grad_norm": 19.88548591475147, "learning_rate": 5e-05, "loss": 0.2459, "num_input_tokens_seen": 13780820, "step": 151 }, { "epoch": 0.6291666666666667, "loss": 0.22300229966640472, "loss_ce": 0.0016888338141143322, "loss_iou": 0.33203125, "loss_num": 0.040283203125, "loss_xval": 0.2216796875, "num_input_tokens_seen": 13780820, "step": 151 }, { "epoch": 0.6333333333333333, "grad_norm": 5.031981077375081, "learning_rate": 5e-05, "loss": 0.1604, "num_input_tokens_seen": 13872392, "step": 152 }, { "epoch": 0.6333333333333333, "loss": 0.19695362448692322, "loss_ce": 0.0008171653607860208, "loss_iou": 0.353515625, "loss_num": 0.034912109375, "loss_xval": 0.1962890625, "num_input_tokens_seen": 13872392, "step": 152 }, { "epoch": 0.6375, "grad_norm": 8.194026676883864, "learning_rate": 5e-05, "loss": 0.2498, "num_input_tokens_seen": 13963840, "step": 153 }, { "epoch": 0.6375, "loss": 0.1730082929134369, "loss_ce": 0.009373043663799763, "loss_iou": 0.2216796875, "loss_num": 0.030029296875, "loss_xval": 0.1640625, "num_input_tokens_seen": 13963840, "step": 153 }, { "epoch": 0.6416666666666667, "grad_norm": 7.232097422901255, "learning_rate": 5e-05, "loss": 0.1843, "num_input_tokens_seen": 14054860, "step": 154 }, { "epoch": 0.6416666666666667, "loss": 0.23983432352542877, "loss_ce": 0.0012784129939973354, "loss_iou": 0.29296875, "loss_num": 0.044189453125, "loss_xval": 0.23828125, "num_input_tokens_seen": 14054860, "step": 154 }, { "epoch": 0.6458333333333334, "grad_norm": 10.002820581844796, "learning_rate": 5e-05, "loss": 0.2336, "num_input_tokens_seen": 14146504, "step": 155 }, { "epoch": 0.6458333333333334, "loss": 0.24934682250022888, "loss_ce": 0.0051146335899829865, "loss_iou": 0.408203125, "loss_num": 0.043701171875, "loss_xval": 0.244140625, "num_input_tokens_seen": 14146504, "step": 155 }, { "epoch": 0.65, "grad_norm": 12.315543047257076, "learning_rate": 5e-05, "loss": 0.2067, "num_input_tokens_seen": 14237576, "step": 156 }, { "epoch": 0.65, "loss": 0.21194952726364136, "loss_ce": 0.003270330373197794, "loss_iou": 0.283203125, "loss_num": 0.038330078125, "loss_xval": 0.208984375, "num_input_tokens_seen": 14237576, "step": 156 }, { "epoch": 0.6541666666666667, "grad_norm": 2.8928526807905732, "learning_rate": 5e-05, "loss": 0.2503, "num_input_tokens_seen": 14329332, "step": 157 }, { "epoch": 0.6541666666666667, "loss": 0.32788723707199097, "loss_ce": 0.012030322104692459, "loss_iou": 0.25390625, "loss_num": 0.06005859375, "loss_xval": 0.31640625, "num_input_tokens_seen": 14329332, "step": 157 }, { "epoch": 0.6583333333333333, "grad_norm": 5.933111792460177, "learning_rate": 5e-05, "loss": 0.3143, "num_input_tokens_seen": 14420520, "step": 158 }, { "epoch": 0.6583333333333333, "loss": 0.3221921920776367, "loss_ce": 0.002001758897677064, "loss_iou": 0.185546875, "loss_num": 0.061767578125, "loss_xval": 0.3203125, "num_input_tokens_seen": 14420520, "step": 158 }, { "epoch": 0.6625, "grad_norm": 9.471262618082035, "learning_rate": 5e-05, "loss": 0.233, "num_input_tokens_seen": 14511276, "step": 159 }, { "epoch": 0.6625, "loss": 0.19342216849327087, "loss_ce": 0.0015886790351942182, "loss_iou": 0.267578125, "loss_num": 0.034912109375, "loss_xval": 0.19140625, "num_input_tokens_seen": 14511276, "step": 159 }, { "epoch": 0.6666666666666666, "grad_norm": 7.9125575547617535, "learning_rate": 5e-05, "loss": 0.346, "num_input_tokens_seen": 14602924, "step": 160 }, { "epoch": 0.6666666666666666, "loss": 0.23464325070381165, "loss_ce": 0.0049679577350616455, "loss_iou": 0.33203125, "loss_num": 0.041748046875, "loss_xval": 0.2294921875, "num_input_tokens_seen": 14602924, "step": 160 }, { "epoch": 0.6708333333333333, "grad_norm": 3.765087310549006, "learning_rate": 5e-05, "loss": 0.2288, "num_input_tokens_seen": 14694336, "step": 161 }, { "epoch": 0.6708333333333333, "loss": 0.23900958895683289, "loss_ce": 0.0012776643270626664, "loss_iou": 0.322265625, "loss_num": 0.04345703125, "loss_xval": 0.2373046875, "num_input_tokens_seen": 14694336, "step": 161 }, { "epoch": 0.675, "grad_norm": 3.2411568645640965, "learning_rate": 5e-05, "loss": 0.2227, "num_input_tokens_seen": 14786132, "step": 162 }, { "epoch": 0.675, "loss": 0.13679921627044678, "loss_ce": 0.002338771941140294, "loss_iou": 0.298828125, "loss_num": 0.02294921875, "loss_xval": 0.134765625, "num_input_tokens_seen": 14786132, "step": 162 }, { "epoch": 0.6791666666666667, "grad_norm": 6.949492710171496, "learning_rate": 5e-05, "loss": 0.321, "num_input_tokens_seen": 14877432, "step": 163 }, { "epoch": 0.6791666666666667, "loss": 0.2304680496454239, "loss_ce": 0.0015862042782828212, "loss_iou": 0.361328125, "loss_num": 0.041015625, "loss_xval": 0.228515625, "num_input_tokens_seen": 14877432, "step": 163 }, { "epoch": 0.6833333333333333, "grad_norm": 8.767037215884402, "learning_rate": 5e-05, "loss": 0.2281, "num_input_tokens_seen": 14968476, "step": 164 }, { "epoch": 0.6833333333333333, "loss": 0.24121038615703583, "loss_ce": 0.000365669431630522, "loss_iou": 0.490234375, "loss_num": 0.041748046875, "loss_xval": 0.2412109375, "num_input_tokens_seen": 14968476, "step": 164 }, { "epoch": 0.6875, "grad_norm": 3.3738702456098504, "learning_rate": 5e-05, "loss": 0.2412, "num_input_tokens_seen": 15059764, "step": 165 }, { "epoch": 0.6875, "loss": 0.1965644657611847, "loss_ce": 0.0044258031994104385, "loss_iou": 0.330078125, "loss_num": 0.0341796875, "loss_xval": 0.1923828125, "num_input_tokens_seen": 15059764, "step": 165 }, { "epoch": 0.6916666666666667, "grad_norm": 7.237334795990486, "learning_rate": 5e-05, "loss": 0.272, "num_input_tokens_seen": 15151340, "step": 166 }, { "epoch": 0.6916666666666667, "loss": 0.2698882818222046, "loss_ce": 0.0034087959211319685, "loss_iou": 0.341796875, "loss_num": 0.048828125, "loss_xval": 0.265625, "num_input_tokens_seen": 15151340, "step": 166 }, { "epoch": 0.6958333333333333, "grad_norm": 4.492972449650082, "learning_rate": 5e-05, "loss": 0.2617, "num_input_tokens_seen": 15243280, "step": 167 }, { "epoch": 0.6958333333333333, "loss": 0.3138772249221802, "loss_ce": 0.0039406768046319485, "loss_iou": 0.2431640625, "loss_num": 0.05859375, "loss_xval": 0.310546875, "num_input_tokens_seen": 15243280, "step": 167 }, { "epoch": 0.7, "grad_norm": 5.065317522783334, "learning_rate": 5e-05, "loss": 0.2395, "num_input_tokens_seen": 15333408, "step": 168 }, { "epoch": 0.7, "loss": 0.24651506543159485, "loss_ce": 0.0019166837446391582, "loss_iou": 0.275390625, "loss_num": 0.04541015625, "loss_xval": 0.244140625, "num_input_tokens_seen": 15333408, "step": 168 }, { "epoch": 0.7041666666666667, "grad_norm": 37.52703552517235, "learning_rate": 5e-05, "loss": 0.2407, "num_input_tokens_seen": 15425096, "step": 169 }, { "epoch": 0.7041666666666667, "loss": 0.2536095082759857, "loss_ce": 0.0022667348384857178, "loss_iou": 0.33203125, "loss_num": 0.0458984375, "loss_xval": 0.251953125, "num_input_tokens_seen": 15425096, "step": 169 }, { "epoch": 0.7083333333333334, "grad_norm": 11.029881651516087, "learning_rate": 5e-05, "loss": 0.2446, "num_input_tokens_seen": 15516556, "step": 170 }, { "epoch": 0.7083333333333334, "loss": 0.2343108355998993, "loss_ce": 0.013241507112979889, "loss_iou": 0.412109375, "loss_num": 0.03857421875, "loss_xval": 0.220703125, "num_input_tokens_seen": 15516556, "step": 170 }, { "epoch": 0.7125, "grad_norm": 4.589534025223679, "learning_rate": 5e-05, "loss": 0.2029, "num_input_tokens_seen": 15607732, "step": 171 }, { "epoch": 0.7125, "loss": 0.16617685556411743, "loss_ce": 0.0017481537070125341, "loss_iou": 0.26171875, "loss_num": 0.029296875, "loss_xval": 0.1640625, "num_input_tokens_seen": 15607732, "step": 171 }, { "epoch": 0.7166666666666667, "grad_norm": 5.615815550066399, "learning_rate": 5e-05, "loss": 0.2673, "num_input_tokens_seen": 15699344, "step": 172 }, { "epoch": 0.7166666666666667, "loss": 0.3418896198272705, "loss_ce": 0.0038158849347382784, "loss_iou": 0.271484375, "loss_num": 0.06396484375, "loss_xval": 0.337890625, "num_input_tokens_seen": 15699344, "step": 172 }, { "epoch": 0.7208333333333333, "grad_norm": 25.13652914203415, "learning_rate": 5e-05, "loss": 0.276, "num_input_tokens_seen": 15791220, "step": 173 }, { "epoch": 0.7208333333333333, "loss": 0.26045161485671997, "loss_ce": 0.0012353132478892803, "loss_iou": 0.267578125, "loss_num": 0.048095703125, "loss_xval": 0.259765625, "num_input_tokens_seen": 15791220, "step": 173 }, { "epoch": 0.725, "grad_norm": 13.090432791261154, "learning_rate": 5e-05, "loss": 0.2685, "num_input_tokens_seen": 15882808, "step": 174 }, { "epoch": 0.725, "loss": 0.19032147526741028, "loss_ce": 0.004835621453821659, "loss_iou": 0.462890625, "loss_num": 0.0306396484375, "loss_xval": 0.185546875, "num_input_tokens_seen": 15882808, "step": 174 }, { "epoch": 0.7291666666666666, "grad_norm": 7.67733189232213, "learning_rate": 5e-05, "loss": 0.1935, "num_input_tokens_seen": 15973956, "step": 175 }, { "epoch": 0.7291666666666666, "loss": 0.16638563573360443, "loss_ce": 0.0017127819592133164, "loss_iou": 0.388671875, "loss_num": 0.027587890625, "loss_xval": 0.1650390625, "num_input_tokens_seen": 15973956, "step": 175 }, { "epoch": 0.7333333333333333, "grad_norm": 6.305357125357973, "learning_rate": 5e-05, "loss": 0.2835, "num_input_tokens_seen": 16065444, "step": 176 }, { "epoch": 0.7333333333333333, "loss": 0.22557082772254944, "loss_ce": 0.017501965165138245, "loss_iou": 0.40625, "loss_num": 0.035888671875, "loss_xval": 0.2080078125, "num_input_tokens_seen": 16065444, "step": 176 }, { "epoch": 0.7375, "grad_norm": 8.085000629433312, "learning_rate": 5e-05, "loss": 0.2616, "num_input_tokens_seen": 16156380, "step": 177 }, { "epoch": 0.7375, "loss": 0.29290682077407837, "loss_ce": 0.0012808414176106453, "loss_iou": 0.396484375, "loss_num": 0.052734375, "loss_xval": 0.291015625, "num_input_tokens_seen": 16156380, "step": 177 }, { "epoch": 0.7416666666666667, "grad_norm": 13.57474615095706, "learning_rate": 5e-05, "loss": 0.2052, "num_input_tokens_seen": 16247840, "step": 178 }, { "epoch": 0.7416666666666667, "loss": 0.2054433822631836, "loss_ce": 0.026518816128373146, "loss_iou": 0.265625, "loss_num": 0.031982421875, "loss_xval": 0.1787109375, "num_input_tokens_seen": 16247840, "step": 178 }, { "epoch": 0.7458333333333333, "grad_norm": 14.721342043506576, "learning_rate": 5e-05, "loss": 0.2228, "num_input_tokens_seen": 16338660, "step": 179 }, { "epoch": 0.7458333333333333, "loss": 0.21274816989898682, "loss_ce": 0.009684196673333645, "loss_iou": 0.25, "loss_num": 0.037109375, "loss_xval": 0.203125, "num_input_tokens_seen": 16338660, "step": 179 }, { "epoch": 0.75, "grad_norm": 8.959153814889348, "learning_rate": 5e-05, "loss": 0.2944, "num_input_tokens_seen": 16430176, "step": 180 }, { "epoch": 0.75, "loss": 0.3967931270599365, "loss_ce": 0.0061376336961984634, "loss_iou": 0.25390625, "loss_num": 0.07470703125, "loss_xval": 0.390625, "num_input_tokens_seen": 16430176, "step": 180 }, { "epoch": 0.7541666666666667, "grad_norm": 4.907715439269668, "learning_rate": 5e-05, "loss": 0.2148, "num_input_tokens_seen": 16521752, "step": 181 }, { "epoch": 0.7541666666666667, "loss": 0.2014847695827484, "loss_ce": 0.004097080789506435, "loss_iou": 0.28125, "loss_num": 0.035400390625, "loss_xval": 0.197265625, "num_input_tokens_seen": 16521752, "step": 181 }, { "epoch": 0.7583333333333333, "grad_norm": 12.251478551523167, "learning_rate": 5e-05, "loss": 0.2387, "num_input_tokens_seen": 16613096, "step": 182 }, { "epoch": 0.7583333333333333, "loss": 0.20524071156978607, "loss_ce": 0.00028466549701988697, "loss_iou": 0.259765625, "loss_num": 0.037353515625, "loss_xval": 0.205078125, "num_input_tokens_seen": 16613096, "step": 182 }, { "epoch": 0.7625, "grad_norm": 14.719844192142459, "learning_rate": 5e-05, "loss": 0.1968, "num_input_tokens_seen": 16703780, "step": 183 }, { "epoch": 0.7625, "loss": 0.21019019186496735, "loss_ce": 0.005417247768491507, "loss_iou": 0.306640625, "loss_num": 0.03662109375, "loss_xval": 0.205078125, "num_input_tokens_seen": 16703780, "step": 183 }, { "epoch": 0.7666666666666667, "grad_norm": 13.62691494879007, "learning_rate": 5e-05, "loss": 0.2891, "num_input_tokens_seen": 16794776, "step": 184 }, { "epoch": 0.7666666666666667, "loss": 0.27239733934402466, "loss_ce": 0.000912959803827107, "loss_iou": 0.62890625, "loss_num": 0.045166015625, "loss_xval": 0.271484375, "num_input_tokens_seen": 16794776, "step": 184 }, { "epoch": 0.7708333333333334, "grad_norm": 24.418623447127, "learning_rate": 5e-05, "loss": 0.2182, "num_input_tokens_seen": 16886236, "step": 185 }, { "epoch": 0.7708333333333334, "loss": 0.22815991938114166, "loss_ce": 0.022166268900036812, "loss_iou": 0.404296875, "loss_num": 0.03515625, "loss_xval": 0.2060546875, "num_input_tokens_seen": 16886236, "step": 185 }, { "epoch": 0.775, "grad_norm": 9.956108878177691, "learning_rate": 5e-05, "loss": 0.2518, "num_input_tokens_seen": 16977808, "step": 186 }, { "epoch": 0.775, "loss": 0.22952379286289215, "loss_ce": 0.007844101637601852, "loss_iou": 0.310546875, "loss_num": 0.039794921875, "loss_xval": 0.2216796875, "num_input_tokens_seen": 16977808, "step": 186 }, { "epoch": 0.7791666666666667, "grad_norm": 10.905812593604734, "learning_rate": 5e-05, "loss": 0.19, "num_input_tokens_seen": 17069052, "step": 187 }, { "epoch": 0.7791666666666667, "loss": 0.21624162793159485, "loss_ce": 0.009637624025344849, "loss_iou": 0.25, "loss_num": 0.03759765625, "loss_xval": 0.20703125, "num_input_tokens_seen": 17069052, "step": 187 }, { "epoch": 0.7833333333333333, "grad_norm": 7.466897830018128, "learning_rate": 5e-05, "loss": 0.3065, "num_input_tokens_seen": 17158888, "step": 188 }, { "epoch": 0.7833333333333333, "loss": 0.3236009478569031, "loss_ce": 0.0020066953729838133, "loss_iou": 0.40234375, "loss_num": 0.05810546875, "loss_xval": 0.322265625, "num_input_tokens_seen": 17158888, "step": 188 }, { "epoch": 0.7875, "grad_norm": 11.43031761199009, "learning_rate": 5e-05, "loss": 0.2721, "num_input_tokens_seen": 17250224, "step": 189 }, { "epoch": 0.7875, "loss": 0.27115926146507263, "loss_ce": 0.002482494106516242, "loss_iou": 0.419921875, "loss_num": 0.04736328125, "loss_xval": 0.26953125, "num_input_tokens_seen": 17250224, "step": 189 }, { "epoch": 0.7916666666666666, "grad_norm": 6.376115694685188, "learning_rate": 5e-05, "loss": 0.3104, "num_input_tokens_seen": 17341208, "step": 190 }, { "epoch": 0.7916666666666666, "loss": 0.3440701961517334, "loss_ce": 0.0027616131119430065, "loss_iou": 0.330078125, "loss_num": 0.0634765625, "loss_xval": 0.341796875, "num_input_tokens_seen": 17341208, "step": 190 }, { "epoch": 0.7958333333333333, "grad_norm": 7.893330863623168, "learning_rate": 5e-05, "loss": 0.2149, "num_input_tokens_seen": 17432612, "step": 191 }, { "epoch": 0.7958333333333333, "loss": 0.23782208561897278, "loss_ce": 0.0010667047463357449, "loss_iou": 0.2177734375, "loss_num": 0.044189453125, "loss_xval": 0.236328125, "num_input_tokens_seen": 17432612, "step": 191 }, { "epoch": 0.8, "grad_norm": 22.18997521206718, "learning_rate": 5e-05, "loss": 0.3162, "num_input_tokens_seen": 17523780, "step": 192 }, { "epoch": 0.8, "loss": 0.34398892521858215, "loss_ce": 0.002802408766001463, "loss_iou": 0.388671875, "loss_num": 0.062255859375, "loss_xval": 0.341796875, "num_input_tokens_seen": 17523780, "step": 192 }, { "epoch": 0.8041666666666667, "grad_norm": 7.048614109115301, "learning_rate": 5e-05, "loss": 0.2152, "num_input_tokens_seen": 17615380, "step": 193 }, { "epoch": 0.8041666666666667, "loss": 0.21223366260528564, "loss_ce": 0.0012351283803582191, "loss_iou": 0.373046875, "loss_num": 0.03662109375, "loss_xval": 0.2109375, "num_input_tokens_seen": 17615380, "step": 193 }, { "epoch": 0.8083333333333333, "grad_norm": 7.8711229313668305, "learning_rate": 5e-05, "loss": 0.2219, "num_input_tokens_seen": 17706776, "step": 194 }, { "epoch": 0.8083333333333333, "loss": 0.24827273190021515, "loss_ce": 0.004498313646763563, "loss_iou": 0.30078125, "loss_num": 0.044189453125, "loss_xval": 0.244140625, "num_input_tokens_seen": 17706776, "step": 194 }, { "epoch": 0.8125, "grad_norm": 11.585751160130735, "learning_rate": 5e-05, "loss": 0.2551, "num_input_tokens_seen": 17798216, "step": 195 }, { "epoch": 0.8125, "loss": 0.2494007796049118, "loss_ce": 0.0014149582711979747, "loss_iou": 0.494140625, "loss_num": 0.0419921875, "loss_xval": 0.248046875, "num_input_tokens_seen": 17798216, "step": 195 }, { "epoch": 0.8166666666666667, "grad_norm": 11.481203844670077, "learning_rate": 5e-05, "loss": 0.2607, "num_input_tokens_seen": 17889460, "step": 196 }, { "epoch": 0.8166666666666667, "loss": 0.303374707698822, "loss_ce": 0.003509008791297674, "loss_iou": 0.392578125, "loss_num": 0.053955078125, "loss_xval": 0.30078125, "num_input_tokens_seen": 17889460, "step": 196 }, { "epoch": 0.8208333333333333, "grad_norm": 12.774468104359062, "learning_rate": 5e-05, "loss": 0.2782, "num_input_tokens_seen": 17981152, "step": 197 }, { "epoch": 0.8208333333333333, "loss": 0.27713096141815186, "loss_ce": 0.001740352949127555, "loss_iou": 0.302734375, "loss_num": 0.05029296875, "loss_xval": 0.275390625, "num_input_tokens_seen": 17981152, "step": 197 }, { "epoch": 0.825, "grad_norm": 6.114818970612895, "learning_rate": 5e-05, "loss": 0.285, "num_input_tokens_seen": 18072828, "step": 198 }, { "epoch": 0.825, "loss": 0.22632259130477905, "loss_ce": 0.0010418322635814548, "loss_iou": 0.296875, "loss_num": 0.040283203125, "loss_xval": 0.2255859375, "num_input_tokens_seen": 18072828, "step": 198 }, { "epoch": 0.8291666666666667, "grad_norm": 18.157425733012563, "learning_rate": 5e-05, "loss": 0.2555, "num_input_tokens_seen": 18164472, "step": 199 }, { "epoch": 0.8291666666666667, "loss": 0.28996580839157104, "loss_ce": 0.002185048069804907, "loss_iou": 0.3359375, "loss_num": 0.05224609375, "loss_xval": 0.287109375, "num_input_tokens_seen": 18164472, "step": 199 }, { "epoch": 0.8333333333333334, "grad_norm": 7.129196337506752, "learning_rate": 5e-05, "loss": 0.2419, "num_input_tokens_seen": 18254864, "step": 200 }, { "epoch": 0.8333333333333334, "loss": 0.2802783250808716, "loss_ce": 0.01379882637411356, "loss_iou": 0.1826171875, "loss_num": 0.05029296875, "loss_xval": 0.265625, "num_input_tokens_seen": 18254864, "step": 200 }, { "epoch": 0.8375, "grad_norm": 12.797975605688038, "learning_rate": 5e-05, "loss": 0.2356, "num_input_tokens_seen": 18346400, "step": 201 }, { "epoch": 0.8375, "loss": 0.24703559279441833, "loss_ce": 0.0029560080729424953, "loss_iou": 0.43359375, "loss_num": 0.0419921875, "loss_xval": 0.244140625, "num_input_tokens_seen": 18346400, "step": 201 }, { "epoch": 0.8416666666666667, "grad_norm": 16.933843013817935, "learning_rate": 5e-05, "loss": 0.3267, "num_input_tokens_seen": 18437604, "step": 202 }, { "epoch": 0.8416666666666667, "loss": 0.2825471758842468, "loss_ce": 0.0008394101168960333, "loss_iou": 0.36328125, "loss_num": 0.050537109375, "loss_xval": 0.28125, "num_input_tokens_seen": 18437604, "step": 202 }, { "epoch": 0.8458333333333333, "grad_norm": 5.550749003705444, "learning_rate": 5e-05, "loss": 0.2229, "num_input_tokens_seen": 18528984, "step": 203 }, { "epoch": 0.8458333333333333, "loss": 0.22848908603191376, "loss_ce": 0.0011941741686314344, "loss_iou": 0.396484375, "loss_num": 0.0390625, "loss_xval": 0.2275390625, "num_input_tokens_seen": 18528984, "step": 203 }, { "epoch": 0.85, "grad_norm": 3.87991186767316, "learning_rate": 5e-05, "loss": 0.2451, "num_input_tokens_seen": 18620812, "step": 204 }, { "epoch": 0.85, "loss": 0.21404461562633514, "loss_ce": 0.0022221102844923735, "loss_iou": 0.2734375, "loss_num": 0.037841796875, "loss_xval": 0.2119140625, "num_input_tokens_seen": 18620812, "step": 204 }, { "epoch": 0.8541666666666666, "grad_norm": 13.040976616283691, "learning_rate": 5e-05, "loss": 0.2431, "num_input_tokens_seen": 18711628, "step": 205 }, { "epoch": 0.8541666666666666, "loss": 0.2216428518295288, "loss_ce": 0.003564231563359499, "loss_iou": 0.291015625, "loss_num": 0.038818359375, "loss_xval": 0.2177734375, "num_input_tokens_seen": 18711628, "step": 205 }, { "epoch": 0.8583333333333333, "grad_norm": 16.947379452066215, "learning_rate": 5e-05, "loss": 0.1924, "num_input_tokens_seen": 18803292, "step": 206 }, { "epoch": 0.8583333333333333, "loss": 0.1999918520450592, "loss_ce": 0.004557272419333458, "loss_iou": 0.359375, "loss_num": 0.033203125, "loss_xval": 0.1953125, "num_input_tokens_seen": 18803292, "step": 206 }, { "epoch": 0.8625, "grad_norm": 6.14659998334949, "learning_rate": 5e-05, "loss": 0.2451, "num_input_tokens_seen": 18894160, "step": 207 }, { "epoch": 0.8625, "loss": 0.3386986255645752, "loss_ce": 0.0001366123033221811, "loss_iou": 0.310546875, "loss_num": 0.0625, "loss_xval": 0.337890625, "num_input_tokens_seen": 18894160, "step": 207 }, { "epoch": 0.8666666666666667, "grad_norm": 11.278150709662043, "learning_rate": 5e-05, "loss": 0.2223, "num_input_tokens_seen": 18985776, "step": 208 }, { "epoch": 0.8666666666666667, "loss": 0.22064605355262756, "loss_ce": 0.0021401969715952873, "loss_iou": 0.46875, "loss_num": 0.035888671875, "loss_xval": 0.21875, "num_input_tokens_seen": 18985776, "step": 208 }, { "epoch": 0.8708333333333333, "grad_norm": 10.346683453020976, "learning_rate": 5e-05, "loss": 0.1693, "num_input_tokens_seen": 19076880, "step": 209 }, { "epoch": 0.8708333333333333, "loss": 0.15130558609962463, "loss_ce": 0.016723062843084335, "loss_iou": 0.25390625, "loss_num": 0.022705078125, "loss_xval": 0.134765625, "num_input_tokens_seen": 19076880, "step": 209 }, { "epoch": 0.875, "grad_norm": 8.402730925321864, "learning_rate": 5e-05, "loss": 0.2744, "num_input_tokens_seen": 19167716, "step": 210 }, { "epoch": 0.875, "loss": 0.31717830896377563, "loss_ce": 0.0007720459252595901, "loss_iou": 0.353515625, "loss_num": 0.057373046875, "loss_xval": 0.31640625, "num_input_tokens_seen": 19167716, "step": 210 }, { "epoch": 0.8791666666666667, "grad_norm": 18.201038700412692, "learning_rate": 5e-05, "loss": 0.2495, "num_input_tokens_seen": 19259032, "step": 211 }, { "epoch": 0.8791666666666667, "loss": 0.2656664550304413, "loss_ce": 0.0011400955263525248, "loss_iou": 0.3984375, "loss_num": 0.04638671875, "loss_xval": 0.263671875, "num_input_tokens_seen": 19259032, "step": 211 }, { "epoch": 0.8833333333333333, "grad_norm": 4.753155681213643, "learning_rate": 5e-05, "loss": 0.2433, "num_input_tokens_seen": 19349292, "step": 212 }, { "epoch": 0.8833333333333333, "loss": 0.2522953152656555, "loss_ce": 0.0007084023673087358, "loss_iou": 0.41015625, "loss_num": 0.04345703125, "loss_xval": 0.251953125, "num_input_tokens_seen": 19349292, "step": 212 }, { "epoch": 0.8875, "grad_norm": 24.512003053945374, "learning_rate": 5e-05, "loss": 0.2135, "num_input_tokens_seen": 19441488, "step": 213 }, { "epoch": 0.8875, "loss": 0.22530657052993774, "loss_ce": 0.003199649043381214, "loss_iou": 0.41796875, "loss_num": 0.037353515625, "loss_xval": 0.2216796875, "num_input_tokens_seen": 19441488, "step": 213 }, { "epoch": 0.8916666666666667, "grad_norm": 17.0384982817077, "learning_rate": 5e-05, "loss": 0.2938, "num_input_tokens_seen": 19532868, "step": 214 }, { "epoch": 0.8916666666666667, "loss": 0.37303754687309265, "loss_ce": 0.0029203486628830433, "loss_iou": 0.220703125, "loss_num": 0.0703125, "loss_xval": 0.37109375, "num_input_tokens_seen": 19532868, "step": 214 }, { "epoch": 0.8958333333333334, "grad_norm": 29.921832394338654, "learning_rate": 5e-05, "loss": 0.2486, "num_input_tokens_seen": 19624120, "step": 215 }, { "epoch": 0.8958333333333334, "loss": 0.22709499299526215, "loss_ce": 0.005018561612814665, "loss_iou": 0.326171875, "loss_num": 0.038818359375, "loss_xval": 0.2216796875, "num_input_tokens_seen": 19624120, "step": 215 }, { "epoch": 0.9, "grad_norm": 9.333427646818315, "learning_rate": 5e-05, "loss": 0.2147, "num_input_tokens_seen": 19715496, "step": 216 }, { "epoch": 0.9, "loss": 0.2823033928871155, "loss_ce": 0.003311683889478445, "loss_iou": 0.314453125, "loss_num": 0.050537109375, "loss_xval": 0.279296875, "num_input_tokens_seen": 19715496, "step": 216 }, { "epoch": 0.9041666666666667, "grad_norm": 4.019548375654141, "learning_rate": 5e-05, "loss": 0.2971, "num_input_tokens_seen": 19806812, "step": 217 }, { "epoch": 0.9041666666666667, "loss": 0.30291223526000977, "loss_ce": 0.0006051263189874589, "loss_iou": 0.474609375, "loss_num": 0.05224609375, "loss_xval": 0.302734375, "num_input_tokens_seen": 19806812, "step": 217 }, { "epoch": 0.9083333333333333, "grad_norm": 4.067634067480343, "learning_rate": 5e-05, "loss": 0.2725, "num_input_tokens_seen": 19898440, "step": 218 }, { "epoch": 0.9083333333333333, "loss": 0.20470158755779266, "loss_ce": 0.007558043580502272, "loss_iou": 0.5, "loss_num": 0.03076171875, "loss_xval": 0.197265625, "num_input_tokens_seen": 19898440, "step": 218 }, { "epoch": 0.9125, "grad_norm": 4.292723853828602, "learning_rate": 5e-05, "loss": 0.2351, "num_input_tokens_seen": 19989852, "step": 219 }, { "epoch": 0.9125, "loss": 0.29990026354789734, "loss_ce": 0.00043127487879246473, "loss_iou": 0.27734375, "loss_num": 0.05517578125, "loss_xval": 0.298828125, "num_input_tokens_seen": 19989852, "step": 219 }, { "epoch": 0.9166666666666666, "grad_norm": 8.376505004890973, "learning_rate": 5e-05, "loss": 0.2903, "num_input_tokens_seen": 20081004, "step": 220 }, { "epoch": 0.9166666666666666, "loss": 0.2845813035964966, "loss_ce": 0.0008288907120004296, "loss_iou": 0.255859375, "loss_num": 0.05224609375, "loss_xval": 0.283203125, "num_input_tokens_seen": 20081004, "step": 220 }, { "epoch": 0.9208333333333333, "grad_norm": 15.542808906125297, "learning_rate": 5e-05, "loss": 0.2976, "num_input_tokens_seen": 20172584, "step": 221 }, { "epoch": 0.9208333333333333, "loss": 0.33047717809677124, "loss_ce": 0.003969602286815643, "loss_iou": 0.275390625, "loss_num": 0.060546875, "loss_xval": 0.326171875, "num_input_tokens_seen": 20172584, "step": 221 }, { "epoch": 0.925, "grad_norm": 12.923561463971636, "learning_rate": 5e-05, "loss": 0.2815, "num_input_tokens_seen": 20264016, "step": 222 }, { "epoch": 0.925, "loss": 0.3422977030277252, "loss_ce": 0.0036746645346283913, "loss_iou": 0.421875, "loss_num": 0.060302734375, "loss_xval": 0.337890625, "num_input_tokens_seen": 20264016, "step": 222 }, { "epoch": 0.9291666666666667, "grad_norm": 6.655322104784028, "learning_rate": 5e-05, "loss": 0.2127, "num_input_tokens_seen": 20355796, "step": 223 }, { "epoch": 0.9291666666666667, "loss": 0.19036522507667542, "loss_ce": 0.0031093659345060587, "loss_iou": 0.259765625, "loss_num": 0.032958984375, "loss_xval": 0.1875, "num_input_tokens_seen": 20355796, "step": 223 }, { "epoch": 0.9333333333333333, "grad_norm": 37.85533531919503, "learning_rate": 5e-05, "loss": 0.2133, "num_input_tokens_seen": 20447404, "step": 224 }, { "epoch": 0.9333333333333333, "loss": 0.257813036441803, "loss_ce": 0.004028834868222475, "loss_iou": 0.373046875, "loss_num": 0.044189453125, "loss_xval": 0.25390625, "num_input_tokens_seen": 20447404, "step": 224 }, { "epoch": 0.9375, "grad_norm": 30.416837635863704, "learning_rate": 5e-05, "loss": 0.2211, "num_input_tokens_seen": 20538856, "step": 225 }, { "epoch": 0.9375, "loss": 0.24632111191749573, "loss_ce": 0.002546711126342416, "loss_iou": 0.435546875, "loss_num": 0.041015625, "loss_xval": 0.244140625, "num_input_tokens_seen": 20538856, "step": 225 }, { "epoch": 0.9416666666666667, "grad_norm": 11.051058935071033, "learning_rate": 5e-05, "loss": 0.1935, "num_input_tokens_seen": 20629960, "step": 226 }, { "epoch": 0.9416666666666667, "loss": 0.17319779098033905, "loss_ce": 0.003947307821363211, "loss_iou": 0.388671875, "loss_num": 0.02685546875, "loss_xval": 0.1689453125, "num_input_tokens_seen": 20629960, "step": 226 }, { "epoch": 0.9458333333333333, "grad_norm": 48.233469576133025, "learning_rate": 5e-05, "loss": 0.2862, "num_input_tokens_seen": 20721456, "step": 227 }, { "epoch": 0.9458333333333333, "loss": 0.29020804166793823, "loss_ce": 0.002122091129422188, "loss_iou": 0.3984375, "loss_num": 0.05029296875, "loss_xval": 0.2890625, "num_input_tokens_seen": 20721456, "step": 227 }, { "epoch": 0.95, "grad_norm": 8.113741595409435, "learning_rate": 5e-05, "loss": 0.2437, "num_input_tokens_seen": 20812236, "step": 228 }, { "epoch": 0.95, "loss": 0.2340792417526245, "loss_ce": 0.004526016302406788, "loss_iou": 0.392578125, "loss_num": 0.038818359375, "loss_xval": 0.2294921875, "num_input_tokens_seen": 20812236, "step": 228 }, { "epoch": 0.9541666666666667, "grad_norm": 4.566364595359815, "learning_rate": 5e-05, "loss": 0.2271, "num_input_tokens_seen": 20903888, "step": 229 }, { "epoch": 0.9541666666666667, "loss": 0.19538497924804688, "loss_ce": 0.0030632000416517258, "loss_iou": 0.34765625, "loss_num": 0.0322265625, "loss_xval": 0.1923828125, "num_input_tokens_seen": 20903888, "step": 229 }, { "epoch": 0.9583333333333334, "grad_norm": 7.209407509206219, "learning_rate": 5e-05, "loss": 0.2768, "num_input_tokens_seen": 20995620, "step": 230 }, { "epoch": 0.9583333333333334, "loss": 0.30961310863494873, "loss_ce": 0.00449835229665041, "loss_iou": 0.251953125, "loss_num": 0.056396484375, "loss_xval": 0.3046875, "num_input_tokens_seen": 20995620, "step": 230 }, { "epoch": 0.9625, "grad_norm": 5.104687256344693, "learning_rate": 5e-05, "loss": 0.255, "num_input_tokens_seen": 21086992, "step": 231 }, { "epoch": 0.9625, "loss": 0.252264142036438, "loss_ce": 0.010686978697776794, "loss_iou": 0.2421875, "loss_num": 0.0439453125, "loss_xval": 0.2412109375, "num_input_tokens_seen": 21086992, "step": 231 }, { "epoch": 0.9666666666666667, "grad_norm": 7.374332772342045, "learning_rate": 5e-05, "loss": 0.2872, "num_input_tokens_seen": 21178204, "step": 232 }, { "epoch": 0.9666666666666667, "loss": 0.29594823718070984, "loss_ce": 0.00017185957403853536, "loss_iou": 0.1591796875, "loss_num": 0.05615234375, "loss_xval": 0.294921875, "num_input_tokens_seen": 21178204, "step": 232 }, { "epoch": 0.9708333333333333, "grad_norm": 18.00709127423236, "learning_rate": 5e-05, "loss": 0.2467, "num_input_tokens_seen": 21269452, "step": 233 }, { "epoch": 0.9708333333333333, "loss": 0.25176990032196045, "loss_ce": 0.005004778038710356, "loss_iou": 0.4140625, "loss_num": 0.04150390625, "loss_xval": 0.2470703125, "num_input_tokens_seen": 21269452, "step": 233 }, { "epoch": 0.975, "grad_norm": 5.732409812390045, "learning_rate": 5e-05, "loss": 0.3674, "num_input_tokens_seen": 21360916, "step": 234 }, { "epoch": 0.975, "loss": 0.4744373559951782, "loss_ce": 7.211390766315162e-05, "loss_iou": 0.474609375, "loss_num": 0.0859375, "loss_xval": 0.474609375, "num_input_tokens_seen": 21360916, "step": 234 }, { "epoch": 0.9791666666666666, "grad_norm": 36.35128373431882, "learning_rate": 5e-05, "loss": 0.2559, "num_input_tokens_seen": 21452632, "step": 235 }, { "epoch": 0.9791666666666666, "loss": 0.2777550220489502, "loss_ce": 0.0019981854129582644, "loss_iou": 0.423828125, "loss_num": 0.047119140625, "loss_xval": 0.275390625, "num_input_tokens_seen": 21452632, "step": 235 }, { "epoch": 0.9833333333333333, "grad_norm": 11.575343055187652, "learning_rate": 5e-05, "loss": 0.2949, "num_input_tokens_seen": 21544024, "step": 236 }, { "epoch": 0.9833333333333333, "loss": 0.2095259130001068, "loss_ce": 0.003837417345494032, "loss_iou": 0.1787109375, "loss_num": 0.037841796875, "loss_xval": 0.2060546875, "num_input_tokens_seen": 21544024, "step": 236 }, { "epoch": 0.9875, "grad_norm": 10.772901233226547, "learning_rate": 5e-05, "loss": 0.2122, "num_input_tokens_seen": 21635772, "step": 237 }, { "epoch": 0.9875, "loss": 0.25139501690864563, "loss_ce": 0.0066440412774682045, "loss_iou": 0.25390625, "loss_num": 0.044189453125, "loss_xval": 0.2451171875, "num_input_tokens_seen": 21635772, "step": 237 }, { "epoch": 0.9916666666666667, "grad_norm": 13.725125356732619, "learning_rate": 5e-05, "loss": 0.2273, "num_input_tokens_seen": 21727184, "step": 238 }, { "epoch": 0.9916666666666667, "loss": 0.22478605806827545, "loss_ce": 0.0029842983931303024, "loss_iou": 0.314453125, "loss_num": 0.038330078125, "loss_xval": 0.2216796875, "num_input_tokens_seen": 21727184, "step": 238 }, { "epoch": 0.9958333333333333, "grad_norm": 8.603437946176337, "learning_rate": 5e-05, "loss": 0.2233, "num_input_tokens_seen": 21818536, "step": 239 }, { "epoch": 0.9958333333333333, "loss": 0.20272454619407654, "loss_ce": 0.011501399800181389, "loss_iou": 0.24609375, "loss_num": 0.033447265625, "loss_xval": 0.19140625, "num_input_tokens_seen": 21818536, "step": 239 }, { "epoch": 1.0, "grad_norm": 5.559071328596137, "learning_rate": 5e-05, "loss": 0.3042, "num_input_tokens_seen": 21910184, "step": 240 }, { "epoch": 1.0, "loss": 0.33313196897506714, "loss_ce": 0.0036031443160027266, "loss_iou": 0.271484375, "loss_num": 0.060546875, "loss_xval": 0.330078125, "num_input_tokens_seen": 21910184, "step": 240 }, { "epoch": 1.0041666666666667, "grad_norm": 16.35994363459648, "learning_rate": 5e-05, "loss": 0.2529, "num_input_tokens_seen": 22001376, "step": 241 }, { "epoch": 1.0041666666666667, "loss": 0.2235061228275299, "loss_ce": 0.0001784780470188707, "loss_iou": 0.34765625, "loss_num": 0.0380859375, "loss_xval": 0.2236328125, "num_input_tokens_seen": 22001376, "step": 241 }, { "epoch": 1.0083333333333333, "grad_norm": 17.81522470048974, "learning_rate": 5e-05, "loss": 0.3148, "num_input_tokens_seen": 22092840, "step": 242 }, { "epoch": 1.0083333333333333, "loss": 0.3183177709579468, "loss_ce": 0.0004466568643692881, "loss_iou": 0.44140625, "loss_num": 0.05517578125, "loss_xval": 0.318359375, "num_input_tokens_seen": 22092840, "step": 242 }, { "epoch": 1.0125, "grad_norm": 23.345640800716485, "learning_rate": 5e-05, "loss": 0.3031, "num_input_tokens_seen": 22184588, "step": 243 }, { "epoch": 1.0125, "loss": 0.20814365148544312, "loss_ce": 0.0030655222944915295, "loss_iou": 0.36328125, "loss_num": 0.033935546875, "loss_xval": 0.205078125, "num_input_tokens_seen": 22184588, "step": 243 }, { "epoch": 1.0166666666666666, "grad_norm": 6.665116879017802, "learning_rate": 5e-05, "loss": 0.2888, "num_input_tokens_seen": 22275524, "step": 244 }, { "epoch": 1.0166666666666666, "loss": 0.21397538483142853, "loss_ce": 0.00033707439433783293, "loss_iou": 0.337890625, "loss_num": 0.0361328125, "loss_xval": 0.2138671875, "num_input_tokens_seen": 22275524, "step": 244 }, { "epoch": 1.0208333333333333, "grad_norm": 6.587733942465594, "learning_rate": 5e-05, "loss": 0.2162, "num_input_tokens_seen": 22366976, "step": 245 }, { "epoch": 1.0208333333333333, "loss": 0.2064395546913147, "loss_ce": 0.0006289951270446181, "loss_iou": 0.32421875, "loss_num": 0.034912109375, "loss_xval": 0.2060546875, "num_input_tokens_seen": 22366976, "step": 245 }, { "epoch": 1.025, "grad_norm": 3.399496317951607, "learning_rate": 5e-05, "loss": 0.1985, "num_input_tokens_seen": 22458628, "step": 246 }, { "epoch": 1.025, "loss": 0.18037378787994385, "loss_ce": 0.001357677741907537, "loss_iou": 0.01239013671875, "loss_num": 0.03564453125, "loss_xval": 0.1787109375, "num_input_tokens_seen": 22458628, "step": 246 }, { "epoch": 1.0291666666666666, "grad_norm": 7.5768226569415775, "learning_rate": 5e-05, "loss": 0.2429, "num_input_tokens_seen": 22549536, "step": 247 }, { "epoch": 1.0291666666666666, "loss": 0.22366707026958466, "loss_ce": 0.0014991069911047816, "loss_iou": 0.3984375, "loss_num": 0.03662109375, "loss_xval": 0.22265625, "num_input_tokens_seen": 22549536, "step": 247 }, { "epoch": 1.0333333333333334, "grad_norm": 6.386220702582793, "learning_rate": 5e-05, "loss": 0.2185, "num_input_tokens_seen": 22641084, "step": 248 }, { "epoch": 1.0333333333333334, "loss": 0.2533068358898163, "loss_ce": 0.002147157210856676, "loss_iou": 0.37109375, "loss_num": 0.04296875, "loss_xval": 0.251953125, "num_input_tokens_seen": 22641084, "step": 248 }, { "epoch": 1.0375, "grad_norm": 10.756252270916605, "learning_rate": 5e-05, "loss": 0.2297, "num_input_tokens_seen": 22731980, "step": 249 }, { "epoch": 1.0375, "loss": 0.20865212380886078, "loss_ce": 0.001681906171143055, "loss_iou": 0.3046875, "loss_num": 0.035400390625, "loss_xval": 0.20703125, "num_input_tokens_seen": 22731980, "step": 249 }, { "epoch": 1.0416666666666667, "grad_norm": 11.368459723856441, "learning_rate": 5e-05, "loss": 0.2095, "num_input_tokens_seen": 22823056, "step": 250 }, { "epoch": 1.0416666666666667, "eval_seeclick_CIoU": 0.21376945078372955, "eval_seeclick_GIoU": 0.1773088276386261, "eval_seeclick_IoU": 0.3156193494796753, "eval_seeclick_MAE_all": 0.09457048028707504, "eval_seeclick_MAE_h": 0.05827994458377361, "eval_seeclick_MAE_w": 0.1870652362704277, "eval_seeclick_MAE_x_boxes": 0.21573207527399063, "eval_seeclick_MAE_y_boxes": 0.059349725022912025, "eval_seeclick_NUM_probability": 0.9999997615814209, "eval_seeclick_inside_bbox": 0.5994318127632141, "eval_seeclick_loss": 0.5629642605781555, "eval_seeclick_loss_ce": 0.06879188492894173, "eval_seeclick_loss_iou": 0.4404296875, "eval_seeclick_loss_num": 0.0899200439453125, "eval_seeclick_loss_xval": 0.4935302734375, "eval_seeclick_runtime": 76.372, "eval_seeclick_samples_per_second": 0.563, "eval_seeclick_steps_per_second": 0.026, "num_input_tokens_seen": 22823056, "step": 250 }, { "epoch": 1.0416666666666667, "eval_icons_CIoU": 0.30915556102991104, "eval_icons_GIoU": 0.32306139916181564, "eval_icons_IoU": 0.39342615008354187, "eval_icons_MAE_all": 0.0679849199950695, "eval_icons_MAE_h": 0.1593536138534546, "eval_icons_MAE_w": 0.08073913678526878, "eval_icons_MAE_x_boxes": 0.08451684936881065, "eval_icons_MAE_y_boxes": 0.1595774106681347, "eval_icons_NUM_probability": 1.0, "eval_icons_inside_bbox": 0.5104166716337204, "eval_icons_loss": 0.34727242588996887, "eval_icons_loss_ce": 0.00019893267926818226, "eval_icons_loss_iou": 0.283203125, "eval_icons_loss_num": 0.0634307861328125, "eval_icons_loss_xval": 0.3458251953125, "eval_icons_runtime": 95.8043, "eval_icons_samples_per_second": 0.522, "eval_icons_steps_per_second": 0.021, "num_input_tokens_seen": 22823056, "step": 250 }, { "epoch": 1.0416666666666667, "eval_screenspot_CIoU": 0.29506024221579236, "eval_screenspot_GIoU": 0.2745039314031601, "eval_screenspot_IoU": 0.37262112895647687, "eval_screenspot_MAE_all": 0.10671550035476685, "eval_screenspot_MAE_h": 0.11584235727787018, "eval_screenspot_MAE_w": 0.21633888532718024, "eval_screenspot_MAE_x_boxes": 0.18261671562989554, "eval_screenspot_MAE_y_boxes": 0.10873916993538539, "eval_screenspot_NUM_probability": 0.9998256166776022, "eval_screenspot_inside_bbox": 0.6329166690508524, "eval_screenspot_loss": 0.566607654094696, "eval_screenspot_loss_ce": 6.591878506393793e-05, "eval_screenspot_loss_iou": 0.3468831380208333, "eval_screenspot_loss_num": 0.10788981119791667, "eval_screenspot_loss_xval": 0.5740559895833334, "eval_screenspot_runtime": 158.8771, "eval_screenspot_samples_per_second": 0.56, "eval_screenspot_steps_per_second": 0.019, "num_input_tokens_seen": 22823056, "step": 250 }, { "epoch": 1.0416666666666667, "eval_compot_CIoU": 0.3294719457626343, "eval_compot_GIoU": 0.31741394102573395, "eval_compot_IoU": 0.42581257224082947, "eval_compot_MAE_all": 0.07543003186583519, "eval_compot_MAE_h": 0.12245305627584457, "eval_compot_MAE_w": 0.1439114511013031, "eval_compot_MAE_x_boxes": 0.14288334921002388, "eval_compot_MAE_y_boxes": 0.12545089423656464, "eval_compot_NUM_probability": 0.9999739527702332, "eval_compot_inside_bbox": 0.5277777910232544, "eval_compot_loss": 0.42674627900123596, "eval_compot_loss_ce": 0.016201181337237358, "eval_compot_loss_iou": 0.3455810546875, "eval_compot_loss_num": 0.0708465576171875, "eval_compot_loss_xval": 0.388427734375, "eval_compot_runtime": 88.5242, "eval_compot_samples_per_second": 0.565, "eval_compot_steps_per_second": 0.023, "num_input_tokens_seen": 22823056, "step": 250 }, { "epoch": 1.0416666666666667, "loss": 0.3685532510280609, "loss_ce": 0.016990739852190018, "loss_iou": 0.380859375, "loss_num": 0.0625, "loss_xval": 0.3515625, "num_input_tokens_seen": 22823056, "step": 250 }, { "epoch": 1.0458333333333334, "grad_norm": 13.750598628368111, "learning_rate": 5e-05, "loss": 0.2017, "num_input_tokens_seen": 22914132, "step": 251 }, { "epoch": 1.0458333333333334, "loss": 0.18371494114398956, "loss_ce": 0.0029288064688444138, "loss_iou": 0.36328125, "loss_num": 0.0289306640625, "loss_xval": 0.1806640625, "num_input_tokens_seen": 22914132, "step": 251 }, { "epoch": 1.05, "grad_norm": 7.245198716742816, "learning_rate": 5e-05, "loss": 0.2539, "num_input_tokens_seen": 23004908, "step": 252 }, { "epoch": 1.05, "loss": 0.2888379693031311, "loss_ce": 0.0006299512460827827, "loss_iou": 0.34375, "loss_num": 0.05078125, "loss_xval": 0.2890625, "num_input_tokens_seen": 23004908, "step": 252 }, { "epoch": 1.0541666666666667, "grad_norm": 10.747899233785548, "learning_rate": 5e-05, "loss": 0.2191, "num_input_tokens_seen": 23096088, "step": 253 }, { "epoch": 1.0541666666666667, "loss": 0.22767138481140137, "loss_ce": 7.128823199309409e-05, "loss_iou": 0.44921875, "loss_num": 0.03662109375, "loss_xval": 0.2275390625, "num_input_tokens_seen": 23096088, "step": 253 }, { "epoch": 1.0583333333333333, "grad_norm": 12.232746097702254, "learning_rate": 5e-05, "loss": 0.2383, "num_input_tokens_seen": 23187160, "step": 254 }, { "epoch": 1.0583333333333333, "loss": 0.2036757469177246, "loss_ce": 0.0005507570458576083, "loss_iou": 0.330078125, "loss_num": 0.033935546875, "loss_xval": 0.203125, "num_input_tokens_seen": 23187160, "step": 254 }, { "epoch": 1.0625, "grad_norm": 7.885073097843605, "learning_rate": 5e-05, "loss": 0.1814, "num_input_tokens_seen": 23278440, "step": 255 }, { "epoch": 1.0625, "loss": 0.20344702899456024, "loss_ce": 0.0006882417947053909, "loss_iou": 0.369140625, "loss_num": 0.032958984375, "loss_xval": 0.203125, "num_input_tokens_seen": 23278440, "step": 255 }, { "epoch": 1.0666666666666667, "grad_norm": 16.40812586396314, "learning_rate": 5e-05, "loss": 0.236, "num_input_tokens_seen": 23370636, "step": 256 }, { "epoch": 1.0666666666666667, "loss": 0.26500198245048523, "loss_ce": 0.0009028694476000965, "loss_iou": 0.2890625, "loss_num": 0.047119140625, "loss_xval": 0.263671875, "num_input_tokens_seen": 23370636, "step": 256 }, { "epoch": 1.0708333333333333, "grad_norm": 11.215786680034467, "learning_rate": 5e-05, "loss": 0.2144, "num_input_tokens_seen": 23462460, "step": 257 }, { "epoch": 1.0708333333333333, "loss": 0.19554069638252258, "loss_ce": 0.001387865049764514, "loss_iou": 0.44921875, "loss_num": 0.029541015625, "loss_xval": 0.1943359375, "num_input_tokens_seen": 23462460, "step": 257 }, { "epoch": 1.075, "grad_norm": 12.60165243155499, "learning_rate": 5e-05, "loss": 0.2056, "num_input_tokens_seen": 23553788, "step": 258 }, { "epoch": 1.075, "loss": 0.17294445633888245, "loss_ce": 0.0011457615764811635, "loss_iou": 0.158203125, "loss_num": 0.0311279296875, "loss_xval": 0.171875, "num_input_tokens_seen": 23553788, "step": 258 }, { "epoch": 1.0791666666666666, "grad_norm": 28.365998182995593, "learning_rate": 5e-05, "loss": 0.2485, "num_input_tokens_seen": 23645260, "step": 259 }, { "epoch": 1.0791666666666666, "loss": 0.22533473372459412, "loss_ce": 0.00267848395742476, "loss_iou": 0.224609375, "loss_num": 0.0400390625, "loss_xval": 0.22265625, "num_input_tokens_seen": 23645260, "step": 259 }, { "epoch": 1.0833333333333333, "grad_norm": 3.6123675014465024, "learning_rate": 5e-05, "loss": 0.2137, "num_input_tokens_seen": 23736488, "step": 260 }, { "epoch": 1.0833333333333333, "loss": 0.2302786111831665, "loss_ce": 0.00432644784450531, "loss_iou": 0.27734375, "loss_num": 0.03955078125, "loss_xval": 0.2255859375, "num_input_tokens_seen": 23736488, "step": 260 }, { "epoch": 1.0875, "grad_norm": 6.263666981437489, "learning_rate": 5e-05, "loss": 0.2006, "num_input_tokens_seen": 23827712, "step": 261 }, { "epoch": 1.0875, "loss": 0.1470654010772705, "loss_ce": 0.0011608521454036236, "loss_iou": 0.2734375, "loss_num": 0.0234375, "loss_xval": 0.1455078125, "num_input_tokens_seen": 23827712, "step": 261 }, { "epoch": 1.0916666666666666, "grad_norm": 17.564574153957086, "learning_rate": 5e-05, "loss": 0.2446, "num_input_tokens_seen": 23918900, "step": 262 }, { "epoch": 1.0916666666666666, "loss": 0.23478296399116516, "loss_ce": 0.0009572738781571388, "loss_iou": 0.34375, "loss_num": 0.03955078125, "loss_xval": 0.2333984375, "num_input_tokens_seen": 23918900, "step": 262 }, { "epoch": 1.0958333333333334, "grad_norm": 6.90548226071317, "learning_rate": 5e-05, "loss": 0.1946, "num_input_tokens_seen": 24010364, "step": 263 }, { "epoch": 1.0958333333333334, "loss": 0.23993369936943054, "loss_ce": 0.0012862500734627247, "loss_iou": 0.240234375, "loss_num": 0.042724609375, "loss_xval": 0.23828125, "num_input_tokens_seen": 24010364, "step": 263 }, { "epoch": 1.1, "grad_norm": 10.53808713151041, "learning_rate": 5e-05, "loss": 0.1977, "num_input_tokens_seen": 24102292, "step": 264 }, { "epoch": 1.1, "loss": 0.14225786924362183, "loss_ce": 0.0004732094530481845, "loss_iou": 0.353515625, "loss_num": 0.02099609375, "loss_xval": 0.1416015625, "num_input_tokens_seen": 24102292, "step": 264 }, { "epoch": 1.1041666666666667, "grad_norm": 7.929983801483684, "learning_rate": 5e-05, "loss": 0.2208, "num_input_tokens_seen": 24193944, "step": 265 }, { "epoch": 1.1041666666666667, "loss": 0.22911550104618073, "loss_ce": 0.0102434316650033, "loss_iou": 0.51171875, "loss_num": 0.032958984375, "loss_xval": 0.21875, "num_input_tokens_seen": 24193944, "step": 265 }, { "epoch": 1.1083333333333334, "grad_norm": 15.646854153164862, "learning_rate": 5e-05, "loss": 0.2539, "num_input_tokens_seen": 24283588, "step": 266 }, { "epoch": 1.1083333333333334, "loss": 0.24303734302520752, "loss_ce": 5.6382563343504444e-05, "loss_iou": 0.51953125, "loss_num": 0.03759765625, "loss_xval": 0.2431640625, "num_input_tokens_seen": 24283588, "step": 266 }, { "epoch": 1.1125, "grad_norm": 18.931589220962483, "learning_rate": 5e-05, "loss": 0.2833, "num_input_tokens_seen": 24374832, "step": 267 }, { "epoch": 1.1125, "loss": 0.33671608567237854, "loss_ce": 0.0007785820635035634, "loss_iou": 0.263671875, "loss_num": 0.0615234375, "loss_xval": 0.3359375, "num_input_tokens_seen": 24374832, "step": 267 }, { "epoch": 1.1166666666666667, "grad_norm": 4.215656324565556, "learning_rate": 5e-05, "loss": 0.2252, "num_input_tokens_seen": 24466228, "step": 268 }, { "epoch": 1.1166666666666667, "loss": 0.1903807669878006, "loss_ce": 0.0022093746811151505, "loss_iou": 0.30078125, "loss_num": 0.03125, "loss_xval": 0.1884765625, "num_input_tokens_seen": 24466228, "step": 268 }, { "epoch": 1.1208333333333333, "grad_norm": 8.300731681675398, "learning_rate": 5e-05, "loss": 0.2449, "num_input_tokens_seen": 24557400, "step": 269 }, { "epoch": 1.1208333333333333, "loss": 0.21225546300411224, "loss_ce": 0.0002803659299388528, "loss_iou": 0.357421875, "loss_num": 0.03466796875, "loss_xval": 0.2119140625, "num_input_tokens_seen": 24557400, "step": 269 }, { "epoch": 1.125, "grad_norm": 5.119351160147199, "learning_rate": 5e-05, "loss": 0.2128, "num_input_tokens_seen": 24648884, "step": 270 }, { "epoch": 1.125, "loss": 0.21190392971038818, "loss_ce": 0.007558222860097885, "loss_iou": 0.34375, "loss_num": 0.033447265625, "loss_xval": 0.2041015625, "num_input_tokens_seen": 24648884, "step": 270 }, { "epoch": 1.1291666666666667, "grad_norm": 4.529424949583247, "learning_rate": 5e-05, "loss": 0.1556, "num_input_tokens_seen": 24740556, "step": 271 }, { "epoch": 1.1291666666666667, "loss": 0.13350823521614075, "loss_ce": 0.0006652114097960293, "loss_iou": 0.2490234375, "loss_num": 0.021240234375, "loss_xval": 0.1328125, "num_input_tokens_seen": 24740556, "step": 271 }, { "epoch": 1.1333333333333333, "grad_norm": 6.937498607854174, "learning_rate": 5e-05, "loss": 0.2454, "num_input_tokens_seen": 24831980, "step": 272 }, { "epoch": 1.1333333333333333, "loss": 0.32967180013656616, "loss_ce": 0.0028895826544612646, "loss_iou": 0.380859375, "loss_num": 0.05712890625, "loss_xval": 0.326171875, "num_input_tokens_seen": 24831980, "step": 272 }, { "epoch": 1.1375, "grad_norm": 2.920099778327708, "learning_rate": 5e-05, "loss": 0.1839, "num_input_tokens_seen": 24923548, "step": 273 }, { "epoch": 1.1375, "loss": 0.2199542224407196, "loss_ce": 0.001448356662876904, "loss_iou": 0.115234375, "loss_num": 0.041259765625, "loss_xval": 0.21875, "num_input_tokens_seen": 24923548, "step": 273 }, { "epoch": 1.1416666666666666, "grad_norm": 13.780157485919663, "learning_rate": 5e-05, "loss": 0.2412, "num_input_tokens_seen": 25014488, "step": 274 }, { "epoch": 1.1416666666666666, "loss": 0.32479098439216614, "loss_ce": 0.00014497421216219664, "loss_iou": 0.369140625, "loss_num": 0.056884765625, "loss_xval": 0.32421875, "num_input_tokens_seen": 25014488, "step": 274 }, { "epoch": 1.1458333333333333, "grad_norm": 9.083108705402712, "learning_rate": 5e-05, "loss": 0.2343, "num_input_tokens_seen": 25106400, "step": 275 }, { "epoch": 1.1458333333333333, "loss": 0.2582840025424957, "loss_ce": 0.0035232664085924625, "loss_iou": 0.255859375, "loss_num": 0.04541015625, "loss_xval": 0.25390625, "num_input_tokens_seen": 25106400, "step": 275 }, { "epoch": 1.15, "grad_norm": 12.211370049752565, "learning_rate": 5e-05, "loss": 0.196, "num_input_tokens_seen": 25197264, "step": 276 }, { "epoch": 1.15, "loss": 0.18650269508361816, "loss_ce": 0.0001013166838674806, "loss_iou": 0.26171875, "loss_num": 0.031494140625, "loss_xval": 0.1865234375, "num_input_tokens_seen": 25197264, "step": 276 }, { "epoch": 1.1541666666666668, "grad_norm": 9.56433072906577, "learning_rate": 5e-05, "loss": 0.1665, "num_input_tokens_seen": 25288508, "step": 277 }, { "epoch": 1.1541666666666668, "loss": 0.20603136718273163, "loss_ce": 0.00034288677852600813, "loss_iou": 0.439453125, "loss_num": 0.031494140625, "loss_xval": 0.2060546875, "num_input_tokens_seen": 25288508, "step": 277 }, { "epoch": 1.1583333333333332, "grad_norm": 6.93267755197553, "learning_rate": 5e-05, "loss": 0.2313, "num_input_tokens_seen": 25379688, "step": 278 }, { "epoch": 1.1583333333333332, "loss": 0.2501569390296936, "loss_ce": 0.00015694127068854868, "loss_iou": 0.40625, "loss_num": 0.041015625, "loss_xval": 0.25, "num_input_tokens_seen": 25379688, "step": 278 }, { "epoch": 1.1625, "grad_norm": 6.723997876952268, "learning_rate": 5e-05, "loss": 0.1984, "num_input_tokens_seen": 25471164, "step": 279 }, { "epoch": 1.1625, "loss": 0.25789207220077515, "loss_ce": 7.95528685557656e-05, "loss_iou": 0.34375, "loss_num": 0.0439453125, "loss_xval": 0.2578125, "num_input_tokens_seen": 25471164, "step": 279 }, { "epoch": 1.1666666666666667, "grad_norm": 4.1449049890656795, "learning_rate": 5e-05, "loss": 0.2235, "num_input_tokens_seen": 25562844, "step": 280 }, { "epoch": 1.1666666666666667, "loss": 0.2036113440990448, "loss_ce": 0.002988772466778755, "loss_iou": 0.3671875, "loss_num": 0.031982421875, "loss_xval": 0.2001953125, "num_input_tokens_seen": 25562844, "step": 280 }, { "epoch": 1.1708333333333334, "grad_norm": 12.380592979305964, "learning_rate": 5e-05, "loss": 0.2817, "num_input_tokens_seen": 25653916, "step": 281 }, { "epoch": 1.1708333333333334, "loss": 0.3497365713119507, "loss_ce": 0.041875243186950684, "loss_iou": 0.1904296875, "loss_num": 0.057373046875, "loss_xval": 0.30859375, "num_input_tokens_seen": 25653916, "step": 281 }, { "epoch": 1.175, "grad_norm": 13.97477290146763, "learning_rate": 5e-05, "loss": 0.2029, "num_input_tokens_seen": 25745660, "step": 282 }, { "epoch": 1.175, "loss": 0.16901439428329468, "loss_ce": 0.002144268713891506, "loss_iou": 0.296875, "loss_num": 0.0267333984375, "loss_xval": 0.1669921875, "num_input_tokens_seen": 25745660, "step": 282 }, { "epoch": 1.1791666666666667, "grad_norm": 13.599248755250013, "learning_rate": 5e-05, "loss": 0.2368, "num_input_tokens_seen": 25837200, "step": 283 }, { "epoch": 1.1791666666666667, "loss": 0.2460094392299652, "loss_ce": 0.001746740541420877, "loss_iou": 0.380859375, "loss_num": 0.040283203125, "loss_xval": 0.244140625, "num_input_tokens_seen": 25837200, "step": 283 }, { "epoch": 1.1833333333333333, "grad_norm": 9.55427246297203, "learning_rate": 5e-05, "loss": 0.1973, "num_input_tokens_seen": 25928152, "step": 284 }, { "epoch": 1.1833333333333333, "loss": 0.2068982720375061, "loss_ce": 0.0030408508609980345, "loss_iou": 0.259765625, "loss_num": 0.034912109375, "loss_xval": 0.2041015625, "num_input_tokens_seen": 25928152, "step": 284 }, { "epoch": 1.1875, "grad_norm": 28.158141134403348, "learning_rate": 5e-05, "loss": 0.2553, "num_input_tokens_seen": 26019552, "step": 285 }, { "epoch": 1.1875, "loss": 0.25668060779571533, "loss_ce": 0.003934011794626713, "loss_iou": 0.376953125, "loss_num": 0.0419921875, "loss_xval": 0.251953125, "num_input_tokens_seen": 26019552, "step": 285 }, { "epoch": 1.1916666666666667, "grad_norm": 28.306902581620154, "learning_rate": 5e-05, "loss": 0.2502, "num_input_tokens_seen": 26109036, "step": 286 }, { "epoch": 1.1916666666666667, "loss": 0.27036812901496887, "loss_ce": 0.0004096394404768944, "loss_iou": 0.408203125, "loss_num": 0.044677734375, "loss_xval": 0.26953125, "num_input_tokens_seen": 26109036, "step": 286 }, { "epoch": 1.1958333333333333, "grad_norm": 4.381139671677197, "learning_rate": 5e-05, "loss": 0.3646, "num_input_tokens_seen": 26200192, "step": 287 }, { "epoch": 1.1958333333333333, "loss": 0.33801934123039246, "loss_ce": 0.0015935404226183891, "loss_iou": 0.32421875, "loss_num": 0.06005859375, "loss_xval": 0.3359375, "num_input_tokens_seen": 26200192, "step": 287 }, { "epoch": 1.2, "grad_norm": 8.970869508879645, "learning_rate": 5e-05, "loss": 0.2122, "num_input_tokens_seen": 26291316, "step": 288 }, { "epoch": 1.2, "loss": 0.17160077393054962, "loss_ce": 0.0007023363141342998, "loss_iou": 0.33984375, "loss_num": 0.0263671875, "loss_xval": 0.1708984375, "num_input_tokens_seen": 26291316, "step": 288 }, { "epoch": 1.2041666666666666, "grad_norm": 5.9025805154361795, "learning_rate": 5e-05, "loss": 0.2123, "num_input_tokens_seen": 26382640, "step": 289 }, { "epoch": 1.2041666666666666, "loss": 0.1856706142425537, "loss_ce": 0.0011613458627834916, "loss_iou": 0.17578125, "loss_num": 0.032958984375, "loss_xval": 0.1845703125, "num_input_tokens_seen": 26382640, "step": 289 }, { "epoch": 1.2083333333333333, "grad_norm": 3.034340467333528, "learning_rate": 5e-05, "loss": 0.1987, "num_input_tokens_seen": 26473560, "step": 290 }, { "epoch": 1.2083333333333333, "loss": 0.20361942052841187, "loss_ce": 0.0009216800681315362, "loss_iou": 0.310546875, "loss_num": 0.033447265625, "loss_xval": 0.203125, "num_input_tokens_seen": 26473560, "step": 290 }, { "epoch": 1.2125, "grad_norm": 5.07195649570692, "learning_rate": 5e-05, "loss": 0.2551, "num_input_tokens_seen": 26564448, "step": 291 }, { "epoch": 1.2125, "loss": 0.3689958453178406, "loss_ce": 0.000587630202062428, "loss_iou": 0.46484375, "loss_num": 0.06298828125, "loss_xval": 0.369140625, "num_input_tokens_seen": 26564448, "step": 291 }, { "epoch": 1.2166666666666668, "grad_norm": 6.2466441370427, "learning_rate": 5e-05, "loss": 0.2091, "num_input_tokens_seen": 26655164, "step": 292 }, { "epoch": 1.2166666666666668, "loss": 0.2707892656326294, "loss_ce": 0.0007697429973632097, "loss_iou": 0.44921875, "loss_num": 0.04345703125, "loss_xval": 0.26953125, "num_input_tokens_seen": 26655164, "step": 292 }, { "epoch": 1.2208333333333332, "grad_norm": 8.151889617477465, "learning_rate": 5e-05, "loss": 0.2068, "num_input_tokens_seen": 26746876, "step": 293 }, { "epoch": 1.2208333333333332, "loss": 0.22343799471855164, "loss_ce": 0.0002934715012088418, "loss_iou": 0.376953125, "loss_num": 0.035888671875, "loss_xval": 0.22265625, "num_input_tokens_seen": 26746876, "step": 293 }, { "epoch": 1.225, "grad_norm": 4.124519965046716, "learning_rate": 5e-05, "loss": 0.2132, "num_input_tokens_seen": 26838780, "step": 294 }, { "epoch": 1.225, "loss": 0.16030102968215942, "loss_ce": 0.0009382428834214807, "loss_iou": 0.330078125, "loss_num": 0.024169921875, "loss_xval": 0.1591796875, "num_input_tokens_seen": 26838780, "step": 294 }, { "epoch": 1.2291666666666667, "grad_norm": 6.533995886459005, "learning_rate": 5e-05, "loss": 0.2646, "num_input_tokens_seen": 26929904, "step": 295 }, { "epoch": 1.2291666666666667, "loss": 0.2608182430267334, "loss_ce": 0.0008084540022537112, "loss_iou": 0.431640625, "loss_num": 0.0419921875, "loss_xval": 0.259765625, "num_input_tokens_seen": 26929904, "step": 295 }, { "epoch": 1.2333333333333334, "grad_norm": 18.092046203643505, "learning_rate": 5e-05, "loss": 0.2806, "num_input_tokens_seen": 27021036, "step": 296 }, { "epoch": 1.2333333333333334, "loss": 0.2797492742538452, "loss_ce": 0.0020393121521919966, "loss_iou": 0.30859375, "loss_num": 0.04833984375, "loss_xval": 0.27734375, "num_input_tokens_seen": 27021036, "step": 296 }, { "epoch": 1.2375, "grad_norm": 7.343572686469682, "learning_rate": 5e-05, "loss": 0.3192, "num_input_tokens_seen": 27112136, "step": 297 }, { "epoch": 1.2375, "loss": 0.2837493419647217, "loss_ce": 0.0007903836667537689, "loss_iou": 0.5078125, "loss_num": 0.04443359375, "loss_xval": 0.283203125, "num_input_tokens_seen": 27112136, "step": 297 }, { "epoch": 1.2416666666666667, "grad_norm": 7.332784386468288, "learning_rate": 5e-05, "loss": 0.2304, "num_input_tokens_seen": 27203900, "step": 298 }, { "epoch": 1.2416666666666667, "loss": 0.2656678259372711, "loss_ce": 0.005841171368956566, "loss_iou": 0.427734375, "loss_num": 0.041748046875, "loss_xval": 0.259765625, "num_input_tokens_seen": 27203900, "step": 298 }, { "epoch": 1.2458333333333333, "grad_norm": 7.39725694818393, "learning_rate": 5e-05, "loss": 0.2118, "num_input_tokens_seen": 27293496, "step": 299 }, { "epoch": 1.2458333333333333, "loss": 0.20304223895072937, "loss_ce": 0.00028345605824142694, "loss_iou": 0.349609375, "loss_num": 0.0322265625, "loss_xval": 0.203125, "num_input_tokens_seen": 27293496, "step": 299 }, { "epoch": 1.25, "grad_norm": 10.010729098569774, "learning_rate": 5e-05, "loss": 0.1894, "num_input_tokens_seen": 27384992, "step": 300 }, { "epoch": 1.25, "loss": 0.13992911577224731, "loss_ce": 0.003088284283876419, "loss_iou": 0.318359375, "loss_num": 0.019775390625, "loss_xval": 0.13671875, "num_input_tokens_seen": 27384992, "step": 300 }, { "epoch": 1.2541666666666667, "grad_norm": 7.37177077354589, "learning_rate": 5e-05, "loss": 0.2112, "num_input_tokens_seen": 27476184, "step": 301 }, { "epoch": 1.2541666666666667, "loss": 0.230925515294075, "loss_ce": 0.002226787619292736, "loss_iou": 0.439453125, "loss_num": 0.03515625, "loss_xval": 0.228515625, "num_input_tokens_seen": 27476184, "step": 301 }, { "epoch": 1.2583333333333333, "grad_norm": 6.352901220947042, "learning_rate": 5e-05, "loss": 0.2913, "num_input_tokens_seen": 27567100, "step": 302 }, { "epoch": 1.2583333333333333, "loss": 0.2581457197666168, "loss_ce": 0.0015539309242740273, "loss_iou": 0.279296875, "loss_num": 0.044677734375, "loss_xval": 0.255859375, "num_input_tokens_seen": 27567100, "step": 302 }, { "epoch": 1.2625, "grad_norm": 8.923033195819553, "learning_rate": 5e-05, "loss": 0.1702, "num_input_tokens_seen": 27658556, "step": 303 }, { "epoch": 1.2625, "loss": 0.1497621238231659, "loss_ce": 0.00034806010080501437, "loss_iou": 0.2421875, "loss_num": 0.0240478515625, "loss_xval": 0.1494140625, "num_input_tokens_seen": 27658556, "step": 303 }, { "epoch": 1.2666666666666666, "grad_norm": 25.031495809517857, "learning_rate": 5e-05, "loss": 0.2271, "num_input_tokens_seen": 27749608, "step": 304 }, { "epoch": 1.2666666666666666, "loss": 0.18870463967323303, "loss_ce": 0.00028909966931678355, "loss_iou": 0.3515625, "loss_num": 0.0291748046875, "loss_xval": 0.1884765625, "num_input_tokens_seen": 27749608, "step": 304 }, { "epoch": 1.2708333333333333, "grad_norm": 12.884676627528473, "learning_rate": 5e-05, "loss": 0.2223, "num_input_tokens_seen": 27840920, "step": 305 }, { "epoch": 1.2708333333333333, "loss": 0.22538474202156067, "loss_ce": 0.02140524610877037, "loss_iou": 0.232421875, "loss_num": 0.03515625, "loss_xval": 0.2041015625, "num_input_tokens_seen": 27840920, "step": 305 }, { "epoch": 1.275, "grad_norm": 3.0296539839659946, "learning_rate": 5e-05, "loss": 0.1609, "num_input_tokens_seen": 27932776, "step": 306 }, { "epoch": 1.275, "loss": 0.13345500826835632, "loss_ce": 0.0054032509215176105, "loss_iou": 0.3125, "loss_num": 0.0179443359375, "loss_xval": 0.1279296875, "num_input_tokens_seen": 27932776, "step": 306 }, { "epoch": 1.2791666666666668, "grad_norm": 9.317356311054386, "learning_rate": 5e-05, "loss": 0.2375, "num_input_tokens_seen": 28023864, "step": 307 }, { "epoch": 1.2791666666666668, "loss": 0.18880173563957214, "loss_ce": 0.004658671095967293, "loss_iou": 0.35546875, "loss_num": 0.028076171875, "loss_xval": 0.1845703125, "num_input_tokens_seen": 28023864, "step": 307 }, { "epoch": 1.2833333333333332, "grad_norm": 10.675024473336514, "learning_rate": 5e-05, "loss": 0.2169, "num_input_tokens_seen": 28114932, "step": 308 }, { "epoch": 1.2833333333333332, "loss": 0.24450092017650604, "loss_ce": 0.00011613914102781564, "loss_iou": 0.470703125, "loss_num": 0.037353515625, "loss_xval": 0.244140625, "num_input_tokens_seen": 28114932, "step": 308 }, { "epoch": 1.2875, "grad_norm": 51.09535546619375, "learning_rate": 5e-05, "loss": 0.2699, "num_input_tokens_seen": 28205920, "step": 309 }, { "epoch": 1.2875, "loss": 0.19558678567409515, "loss_ce": 0.005096061155200005, "loss_iou": 0.369140625, "loss_num": 0.029052734375, "loss_xval": 0.1904296875, "num_input_tokens_seen": 28205920, "step": 309 }, { "epoch": 1.2916666666666667, "grad_norm": 17.307347393595503, "learning_rate": 5e-05, "loss": 0.2243, "num_input_tokens_seen": 28297280, "step": 310 }, { "epoch": 1.2916666666666667, "loss": 0.23106957972049713, "loss_ce": 5.150996003067121e-05, "loss_iou": 0.396484375, "loss_num": 0.036376953125, "loss_xval": 0.2314453125, "num_input_tokens_seen": 28297280, "step": 310 }, { "epoch": 1.2958333333333334, "grad_norm": 5.621660380808616, "learning_rate": 5e-05, "loss": 0.2527, "num_input_tokens_seen": 28388768, "step": 311 }, { "epoch": 1.2958333333333334, "loss": 0.23239217698574066, "loss_ce": 0.00253378227353096, "loss_iou": 0.291015625, "loss_num": 0.038818359375, "loss_xval": 0.2294921875, "num_input_tokens_seen": 28388768, "step": 311 }, { "epoch": 1.3, "grad_norm": 3.7105957761308814, "learning_rate": 5e-05, "loss": 0.1835, "num_input_tokens_seen": 28479672, "step": 312 }, { "epoch": 1.3, "loss": 0.15457478165626526, "loss_ce": 0.0021394838113337755, "loss_iou": 0.35546875, "loss_num": 0.0216064453125, "loss_xval": 0.15234375, "num_input_tokens_seen": 28479672, "step": 312 }, { "epoch": 1.3041666666666667, "grad_norm": 9.313963201622089, "learning_rate": 5e-05, "loss": 0.2508, "num_input_tokens_seen": 28571164, "step": 313 }, { "epoch": 1.3041666666666667, "loss": 0.2276507318019867, "loss_ce": 0.0029192741494625807, "loss_iou": 0.365234375, "loss_num": 0.03564453125, "loss_xval": 0.224609375, "num_input_tokens_seen": 28571164, "step": 313 }, { "epoch": 1.3083333333333333, "grad_norm": 7.631332001679039, "learning_rate": 5e-05, "loss": 0.2597, "num_input_tokens_seen": 28661232, "step": 314 }, { "epoch": 1.3083333333333333, "loss": 0.20918793976306915, "loss_ce": 0.0005087353638373315, "loss_iou": 0.296875, "loss_num": 0.0341796875, "loss_xval": 0.208984375, "num_input_tokens_seen": 28661232, "step": 314 }, { "epoch": 1.3125, "grad_norm": 8.850758042334006, "learning_rate": 5e-05, "loss": 0.2298, "num_input_tokens_seen": 28753364, "step": 315 }, { "epoch": 1.3125, "loss": 0.2625318169593811, "loss_ce": 0.0017285854555666447, "loss_iou": 0.3046875, "loss_num": 0.04443359375, "loss_xval": 0.26171875, "num_input_tokens_seen": 28753364, "step": 315 }, { "epoch": 1.3166666666666667, "grad_norm": 6.859930907832721, "learning_rate": 5e-05, "loss": 0.2106, "num_input_tokens_seen": 28844244, "step": 316 }, { "epoch": 1.3166666666666667, "loss": 0.1807193160057068, "loss_ce": 0.0006350984331220388, "loss_iou": 0.115234375, "loss_num": 0.033203125, "loss_xval": 0.1796875, "num_input_tokens_seen": 28844244, "step": 316 }, { "epoch": 1.3208333333333333, "grad_norm": 12.939751788005317, "learning_rate": 5e-05, "loss": 0.2817, "num_input_tokens_seen": 28935828, "step": 317 }, { "epoch": 1.3208333333333333, "loss": 0.2727729082107544, "loss_ce": 0.00202095415443182, "loss_iou": 0.365234375, "loss_num": 0.044921875, "loss_xval": 0.271484375, "num_input_tokens_seen": 28935828, "step": 317 }, { "epoch": 1.325, "grad_norm": 6.5877122224806115, "learning_rate": 5e-05, "loss": 0.2098, "num_input_tokens_seen": 29027540, "step": 318 }, { "epoch": 1.325, "loss": 0.18838170170783997, "loss_ce": 0.003750366624444723, "loss_iou": 0.322265625, "loss_num": 0.02880859375, "loss_xval": 0.1845703125, "num_input_tokens_seen": 29027540, "step": 318 }, { "epoch": 1.3291666666666666, "grad_norm": 5.688954810244233, "learning_rate": 5e-05, "loss": 0.1954, "num_input_tokens_seen": 29118992, "step": 319 }, { "epoch": 1.3291666666666666, "loss": 0.19413119554519653, "loss_ce": 0.00016146828420460224, "loss_iou": 0.35546875, "loss_num": 0.02978515625, "loss_xval": 0.1943359375, "num_input_tokens_seen": 29118992, "step": 319 }, { "epoch": 1.3333333333333333, "grad_norm": 26.71855142634084, "learning_rate": 5e-05, "loss": 0.199, "num_input_tokens_seen": 29210064, "step": 320 }, { "epoch": 1.3333333333333333, "loss": 0.17129886150360107, "loss_ce": 0.001392227946780622, "loss_iou": 0.31640625, "loss_num": 0.02587890625, "loss_xval": 0.169921875, "num_input_tokens_seen": 29210064, "step": 320 }, { "epoch": 1.3375, "grad_norm": 9.486748776798056, "learning_rate": 5e-05, "loss": 0.1816, "num_input_tokens_seen": 29301120, "step": 321 }, { "epoch": 1.3375, "loss": 0.21057261526584625, "loss_ce": 0.002961541526019573, "loss_iou": 0.26953125, "loss_num": 0.03466796875, "loss_xval": 0.2080078125, "num_input_tokens_seen": 29301120, "step": 321 }, { "epoch": 1.3416666666666668, "grad_norm": 3.174802850022777, "learning_rate": 5e-05, "loss": 0.2765, "num_input_tokens_seen": 29391332, "step": 322 }, { "epoch": 1.3416666666666668, "loss": 0.3159889280796051, "loss_ce": 0.0038856619503349066, "loss_iou": 0.24609375, "loss_num": 0.05615234375, "loss_xval": 0.3125, "num_input_tokens_seen": 29391332, "step": 322 }, { "epoch": 1.3458333333333332, "grad_norm": 8.782382534885203, "learning_rate": 5e-05, "loss": 0.2103, "num_input_tokens_seen": 29483488, "step": 323 }, { "epoch": 1.3458333333333332, "loss": 0.21020320057868958, "loss_ce": 0.0057964809238910675, "loss_iou": 0.408203125, "loss_num": 0.0303955078125, "loss_xval": 0.2041015625, "num_input_tokens_seen": 29483488, "step": 323 }, { "epoch": 1.35, "grad_norm": 11.461747008609803, "learning_rate": 5e-05, "loss": 0.2401, "num_input_tokens_seen": 29575216, "step": 324 }, { "epoch": 1.35, "loss": 0.22908198833465576, "loss_ce": 0.004472623113542795, "loss_iou": 0.2021484375, "loss_num": 0.039794921875, "loss_xval": 0.224609375, "num_input_tokens_seen": 29575216, "step": 324 }, { "epoch": 1.3541666666666667, "grad_norm": 6.9226387606348885, "learning_rate": 5e-05, "loss": 0.1771, "num_input_tokens_seen": 29666440, "step": 325 }, { "epoch": 1.3541666666666667, "loss": 0.1849173903465271, "loss_ce": 0.0015983112389221787, "loss_iou": 0.1943359375, "loss_num": 0.03173828125, "loss_xval": 0.18359375, "num_input_tokens_seen": 29666440, "step": 325 }, { "epoch": 1.3583333333333334, "grad_norm": 7.252189463288932, "learning_rate": 5e-05, "loss": 0.2452, "num_input_tokens_seen": 29757144, "step": 326 }, { "epoch": 1.3583333333333334, "loss": 0.2724003493785858, "loss_ce": 0.0028080574702471495, "loss_iou": 0.416015625, "loss_num": 0.04296875, "loss_xval": 0.26953125, "num_input_tokens_seen": 29757144, "step": 326 }, { "epoch": 1.3625, "grad_norm": 30.468996079722707, "learning_rate": 5e-05, "loss": 0.2274, "num_input_tokens_seen": 29848332, "step": 327 }, { "epoch": 1.3625, "loss": 0.2479363977909088, "loss_ce": 0.0006219286005944014, "loss_iou": 0.55859375, "loss_num": 0.034912109375, "loss_xval": 0.2470703125, "num_input_tokens_seen": 29848332, "step": 327 }, { "epoch": 1.3666666666666667, "grad_norm": 13.758287459279725, "learning_rate": 5e-05, "loss": 0.2675, "num_input_tokens_seen": 29939644, "step": 328 }, { "epoch": 1.3666666666666667, "loss": 0.29800552129745483, "loss_ce": 0.00955338403582573, "loss_iou": 0.240234375, "loss_num": 0.051513671875, "loss_xval": 0.2890625, "num_input_tokens_seen": 29939644, "step": 328 }, { "epoch": 1.3708333333333333, "grad_norm": 4.239781836095859, "learning_rate": 5e-05, "loss": 0.2141, "num_input_tokens_seen": 30031300, "step": 329 }, { "epoch": 1.3708333333333333, "loss": 0.230190247297287, "loss_ce": 0.004787414334714413, "loss_iou": 0.3125, "loss_num": 0.036865234375, "loss_xval": 0.2255859375, "num_input_tokens_seen": 30031300, "step": 329 }, { "epoch": 1.375, "grad_norm": 10.217216745472696, "learning_rate": 5e-05, "loss": 0.2542, "num_input_tokens_seen": 30123104, "step": 330 }, { "epoch": 1.375, "loss": 0.2735680043697357, "loss_ce": 0.0028770905919373035, "loss_iou": 0.248046875, "loss_num": 0.047607421875, "loss_xval": 0.271484375, "num_input_tokens_seen": 30123104, "step": 330 }, { "epoch": 1.3791666666666667, "grad_norm": 8.133878191487339, "learning_rate": 5e-05, "loss": 0.2429, "num_input_tokens_seen": 30214872, "step": 331 }, { "epoch": 1.3791666666666667, "loss": 0.2163221538066864, "loss_ce": 0.0047743008472025394, "loss_iou": 0.34765625, "loss_num": 0.033203125, "loss_xval": 0.2119140625, "num_input_tokens_seen": 30214872, "step": 331 }, { "epoch": 1.3833333333333333, "grad_norm": 12.464149511832298, "learning_rate": 5e-05, "loss": 0.2662, "num_input_tokens_seen": 30305752, "step": 332 }, { "epoch": 1.3833333333333333, "loss": 0.19596882164478302, "loss_ce": 0.00016803990001790226, "loss_iou": 0.291015625, "loss_num": 0.031494140625, "loss_xval": 0.1953125, "num_input_tokens_seen": 30305752, "step": 332 }, { "epoch": 1.3875, "grad_norm": 7.075216555936379, "learning_rate": 5e-05, "loss": 0.2613, "num_input_tokens_seen": 30397216, "step": 333 }, { "epoch": 1.3875, "loss": 0.3156003952026367, "loss_ce": 0.004443188663572073, "loss_iou": 0.373046875, "loss_num": 0.05224609375, "loss_xval": 0.310546875, "num_input_tokens_seen": 30397216, "step": 333 }, { "epoch": 1.3916666666666666, "grad_norm": 13.164973320959898, "learning_rate": 5e-05, "loss": 0.1839, "num_input_tokens_seen": 30488728, "step": 334 }, { "epoch": 1.3916666666666666, "loss": 0.1610163450241089, "loss_ce": 0.00028026686049997807, "loss_iou": 0.349609375, "loss_num": 0.0228271484375, "loss_xval": 0.1611328125, "num_input_tokens_seen": 30488728, "step": 334 }, { "epoch": 1.3958333333333333, "grad_norm": 16.539594407020676, "learning_rate": 5e-05, "loss": 0.3598, "num_input_tokens_seen": 30579524, "step": 335 }, { "epoch": 1.3958333333333333, "loss": 0.28894931077957153, "loss_ce": 0.0024503041058778763, "loss_iou": 0.4375, "loss_num": 0.045654296875, "loss_xval": 0.287109375, "num_input_tokens_seen": 30579524, "step": 335 }, { "epoch": 1.4, "grad_norm": 4.990172501920475, "learning_rate": 5e-05, "loss": 0.2102, "num_input_tokens_seen": 30668872, "step": 336 }, { "epoch": 1.4, "loss": 0.15133045613765717, "loss_ce": 0.00014637643471360207, "loss_iou": 0.2216796875, "loss_num": 0.0242919921875, "loss_xval": 0.1513671875, "num_input_tokens_seen": 30668872, "step": 336 }, { "epoch": 1.4041666666666668, "grad_norm": 5.22741505793893, "learning_rate": 5e-05, "loss": 0.2785, "num_input_tokens_seen": 30760380, "step": 337 }, { "epoch": 1.4041666666666668, "loss": 0.23793606460094452, "loss_ce": 0.0017300141043961048, "loss_iou": 0.359375, "loss_num": 0.03759765625, "loss_xval": 0.236328125, "num_input_tokens_seen": 30760380, "step": 337 }, { "epoch": 1.4083333333333332, "grad_norm": 12.380202022294139, "learning_rate": 5e-05, "loss": 0.216, "num_input_tokens_seen": 30851520, "step": 338 }, { "epoch": 1.4083333333333332, "loss": 0.22748208045959473, "loss_ce": 0.0022013087291270494, "loss_iou": 0.435546875, "loss_num": 0.033203125, "loss_xval": 0.2255859375, "num_input_tokens_seen": 30851520, "step": 338 }, { "epoch": 1.4125, "grad_norm": 13.422377024212908, "learning_rate": 5e-05, "loss": 0.2825, "num_input_tokens_seen": 30942864, "step": 339 }, { "epoch": 1.4125, "loss": 0.31169962882995605, "loss_ce": 0.0024955125991255045, "loss_iou": 0.34765625, "loss_num": 0.052490234375, "loss_xval": 0.30859375, "num_input_tokens_seen": 30942864, "step": 339 }, { "epoch": 1.4166666666666667, "grad_norm": 3.790921840710961, "learning_rate": 5e-05, "loss": 0.2376, "num_input_tokens_seen": 31033868, "step": 340 }, { "epoch": 1.4166666666666667, "loss": 0.15136732161045074, "loss_ce": 0.0005799724021926522, "loss_iou": 0.259765625, "loss_num": 0.0230712890625, "loss_xval": 0.150390625, "num_input_tokens_seen": 31033868, "step": 340 }, { "epoch": 1.4208333333333334, "grad_norm": 7.052725036547459, "learning_rate": 5e-05, "loss": 0.2162, "num_input_tokens_seen": 31125672, "step": 341 }, { "epoch": 1.4208333333333334, "loss": 0.2005731761455536, "loss_ce": 0.0010492515284568071, "loss_iou": 0.28125, "loss_num": 0.0322265625, "loss_xval": 0.19921875, "num_input_tokens_seen": 31125672, "step": 341 }, { "epoch": 1.425, "grad_norm": 10.400646348384424, "learning_rate": 5e-05, "loss": 0.1949, "num_input_tokens_seen": 31217040, "step": 342 }, { "epoch": 1.425, "loss": 0.15420687198638916, "loss_ce": 0.0019852002151310444, "loss_iou": 0.298828125, "loss_num": 0.022216796875, "loss_xval": 0.15234375, "num_input_tokens_seen": 31217040, "step": 342 }, { "epoch": 1.4291666666666667, "grad_norm": 17.131715673792243, "learning_rate": 5e-05, "loss": 0.2112, "num_input_tokens_seen": 31308424, "step": 343 }, { "epoch": 1.4291666666666667, "loss": 0.20999836921691895, "loss_ce": 0.0018684857059270144, "loss_iou": 0.34765625, "loss_num": 0.0322265625, "loss_xval": 0.2080078125, "num_input_tokens_seen": 31308424, "step": 343 }, { "epoch": 1.4333333333333333, "grad_norm": 12.977675520043995, "learning_rate": 5e-05, "loss": 0.2513, "num_input_tokens_seen": 31400176, "step": 344 }, { "epoch": 1.4333333333333333, "loss": 0.2407962679862976, "loss_ce": 0.0017520927358418703, "loss_iou": 0.369140625, "loss_num": 0.03759765625, "loss_xval": 0.2392578125, "num_input_tokens_seen": 31400176, "step": 344 }, { "epoch": 1.4375, "grad_norm": 9.949923126654774, "learning_rate": 5e-05, "loss": 0.2511, "num_input_tokens_seen": 31491676, "step": 345 }, { "epoch": 1.4375, "loss": 0.2417999505996704, "loss_ce": 0.0037628610152751207, "loss_iou": 0.3125, "loss_num": 0.0390625, "loss_xval": 0.23828125, "num_input_tokens_seen": 31491676, "step": 345 }, { "epoch": 1.4416666666666667, "grad_norm": 11.53433640909963, "learning_rate": 5e-05, "loss": 0.1946, "num_input_tokens_seen": 31582808, "step": 346 }, { "epoch": 1.4416666666666667, "loss": 0.20713722705841064, "loss_ce": 0.0035239539574831724, "loss_iou": 0.314453125, "loss_num": 0.031982421875, "loss_xval": 0.203125, "num_input_tokens_seen": 31582808, "step": 346 }, { "epoch": 1.4458333333333333, "grad_norm": 4.357137716774205, "learning_rate": 5e-05, "loss": 0.2112, "num_input_tokens_seen": 31673744, "step": 347 }, { "epoch": 1.4458333333333333, "loss": 0.2205427587032318, "loss_ce": 0.011802521534264088, "loss_iou": 0.3671875, "loss_num": 0.031494140625, "loss_xval": 0.208984375, "num_input_tokens_seen": 31673744, "step": 347 }, { "epoch": 1.45, "grad_norm": 7.492972058482306, "learning_rate": 5e-05, "loss": 0.1947, "num_input_tokens_seen": 31765152, "step": 348 }, { "epoch": 1.45, "loss": 0.27522969245910645, "loss_ce": 0.00069356121821329, "loss_iou": 0.3984375, "loss_num": 0.0439453125, "loss_xval": 0.275390625, "num_input_tokens_seen": 31765152, "step": 348 }, { "epoch": 1.4541666666666666, "grad_norm": 15.91631131469505, "learning_rate": 5e-05, "loss": 0.236, "num_input_tokens_seen": 31856836, "step": 349 }, { "epoch": 1.4541666666666666, "loss": 0.1890929788351059, "loss_ce": 0.0017150461208075285, "loss_iou": 0.3671875, "loss_num": 0.0272216796875, "loss_xval": 0.1875, "num_input_tokens_seen": 31856836, "step": 349 }, { "epoch": 1.4583333333333333, "grad_norm": 7.869322709371291, "learning_rate": 5e-05, "loss": 0.2321, "num_input_tokens_seen": 31948068, "step": 350 }, { "epoch": 1.4583333333333333, "loss": 0.21434611082077026, "loss_ce": 0.0029813670553267, "loss_iou": 0.2734375, "loss_num": 0.03466796875, "loss_xval": 0.2109375, "num_input_tokens_seen": 31948068, "step": 350 }, { "epoch": 1.4625, "grad_norm": 4.358120861649633, "learning_rate": 5e-05, "loss": 0.2187, "num_input_tokens_seen": 32039364, "step": 351 }, { "epoch": 1.4625, "loss": 0.18133623898029327, "loss_ce": 0.000550109485629946, "loss_iou": 0.361328125, "loss_num": 0.026123046875, "loss_xval": 0.1806640625, "num_input_tokens_seen": 32039364, "step": 351 }, { "epoch": 1.4666666666666668, "grad_norm": 14.366288785351877, "learning_rate": 5e-05, "loss": 0.2674, "num_input_tokens_seen": 32131012, "step": 352 }, { "epoch": 1.4666666666666668, "loss": 0.19408850371837616, "loss_ce": 0.00524573540315032, "loss_iou": 0.41015625, "loss_num": 0.0262451171875, "loss_xval": 0.1884765625, "num_input_tokens_seen": 32131012, "step": 352 }, { "epoch": 1.4708333333333332, "grad_norm": 7.794105388849188, "learning_rate": 5e-05, "loss": 0.1846, "num_input_tokens_seen": 32222392, "step": 353 }, { "epoch": 1.4708333333333332, "loss": 0.1904703974723816, "loss_ce": 0.0001322595780948177, "loss_iou": 0.23828125, "loss_num": 0.031494140625, "loss_xval": 0.1904296875, "num_input_tokens_seen": 32222392, "step": 353 }, { "epoch": 1.475, "grad_norm": 8.569126832794671, "learning_rate": 5e-05, "loss": 0.2441, "num_input_tokens_seen": 32313028, "step": 354 }, { "epoch": 1.475, "loss": 0.19572490453720093, "loss_ce": 0.003525196108967066, "loss_iou": 0.328125, "loss_num": 0.0291748046875, "loss_xval": 0.1923828125, "num_input_tokens_seen": 32313028, "step": 354 }, { "epoch": 1.4791666666666667, "grad_norm": 9.900711758184192, "learning_rate": 5e-05, "loss": 0.3003, "num_input_tokens_seen": 32405116, "step": 355 }, { "epoch": 1.4791666666666667, "loss": 0.2578551173210144, "loss_ce": 0.0013548820279538631, "loss_iou": 0.369140625, "loss_num": 0.040771484375, "loss_xval": 0.255859375, "num_input_tokens_seen": 32405116, "step": 355 }, { "epoch": 1.4833333333333334, "grad_norm": 12.792461653462786, "learning_rate": 5e-05, "loss": 0.2204, "num_input_tokens_seen": 32497380, "step": 356 }, { "epoch": 1.4833333333333334, "loss": 0.1747611165046692, "loss_ce": 0.003984738141298294, "loss_iou": 0.365234375, "loss_num": 0.0238037109375, "loss_xval": 0.1708984375, "num_input_tokens_seen": 32497380, "step": 356 }, { "epoch": 1.4875, "grad_norm": 22.40811815246497, "learning_rate": 5e-05, "loss": 0.2485, "num_input_tokens_seen": 32588164, "step": 357 }, { "epoch": 1.4875, "loss": 0.20770668983459473, "loss_ce": 6.508764636237174e-05, "loss_iou": 0.44921875, "loss_num": 0.0286865234375, "loss_xval": 0.2080078125, "num_input_tokens_seen": 32588164, "step": 357 }, { "epoch": 1.4916666666666667, "grad_norm": 22.32086680673658, "learning_rate": 5e-05, "loss": 0.248, "num_input_tokens_seen": 32679184, "step": 358 }, { "epoch": 1.4916666666666667, "loss": 0.2456224262714386, "loss_ce": 1.6946230971370824e-05, "loss_iou": 0.49609375, "loss_num": 0.034912109375, "loss_xval": 0.24609375, "num_input_tokens_seen": 32679184, "step": 358 }, { "epoch": 1.4958333333333333, "grad_norm": 6.409542662760085, "learning_rate": 5e-05, "loss": 0.3606, "num_input_tokens_seen": 32770212, "step": 359 }, { "epoch": 1.4958333333333333, "loss": 0.38848453760147095, "loss_ce": 5.6810757087077945e-05, "loss_iou": 0.404296875, "loss_num": 0.06591796875, "loss_xval": 0.388671875, "num_input_tokens_seen": 32770212, "step": 359 }, { "epoch": 1.5, "grad_norm": 46.11948684374341, "learning_rate": 5e-05, "loss": 0.266, "num_input_tokens_seen": 32861928, "step": 360 }, { "epoch": 1.5, "loss": 0.1256561428308487, "loss_ce": 0.0014190769288688898, "loss_iou": 0.3359375, "loss_num": 0.01519775390625, "loss_xval": 0.1240234375, "num_input_tokens_seen": 32861928, "step": 360 }, { "epoch": 1.5041666666666667, "grad_norm": 3.2817747199724736, "learning_rate": 5e-05, "loss": 0.1962, "num_input_tokens_seen": 32953268, "step": 361 }, { "epoch": 1.5041666666666667, "loss": 0.2756691873073578, "loss_ce": 0.0021706530824303627, "loss_iou": 0.26171875, "loss_num": 0.047119140625, "loss_xval": 0.2734375, "num_input_tokens_seen": 32953268, "step": 361 }, { "epoch": 1.5083333333333333, "grad_norm": 13.928454356141433, "learning_rate": 5e-05, "loss": 0.2496, "num_input_tokens_seen": 33044524, "step": 362 }, { "epoch": 1.5083333333333333, "loss": 0.2404671013355255, "loss_ce": 0.0009651454747654498, "loss_iou": 0.1708984375, "loss_num": 0.04296875, "loss_xval": 0.2392578125, "num_input_tokens_seen": 33044524, "step": 362 }, { "epoch": 1.5125, "grad_norm": 30.534739055714443, "learning_rate": 5e-05, "loss": 0.1796, "num_input_tokens_seen": 33135836, "step": 363 }, { "epoch": 1.5125, "loss": 0.15238480269908905, "loss_ce": 0.0007124289986677468, "loss_iou": 0.4140625, "loss_num": 0.018310546875, "loss_xval": 0.1513671875, "num_input_tokens_seen": 33135836, "step": 363 }, { "epoch": 1.5166666666666666, "grad_norm": 9.815702000404304, "learning_rate": 5e-05, "loss": 0.3049, "num_input_tokens_seen": 33227504, "step": 364 }, { "epoch": 1.5166666666666666, "loss": 0.37344616651535034, "loss_ce": 0.0002772384032141417, "loss_iou": 0.1416015625, "loss_num": 0.07080078125, "loss_xval": 0.373046875, "num_input_tokens_seen": 33227504, "step": 364 }, { "epoch": 1.5208333333333335, "grad_norm": 6.0881351836137565, "learning_rate": 5e-05, "loss": 0.1999, "num_input_tokens_seen": 33319136, "step": 365 }, { "epoch": 1.5208333333333335, "loss": 0.21963077783584595, "loss_ce": 0.0015826758462935686, "loss_iou": 0.322265625, "loss_num": 0.0341796875, "loss_xval": 0.2177734375, "num_input_tokens_seen": 33319136, "step": 365 }, { "epoch": 1.525, "grad_norm": 45.387457295986835, "learning_rate": 5e-05, "loss": 0.294, "num_input_tokens_seen": 33409984, "step": 366 }, { "epoch": 1.525, "loss": 0.33772012591362, "loss_ce": 0.0009281392558477819, "loss_iou": 0.3125, "loss_num": 0.05810546875, "loss_xval": 0.3359375, "num_input_tokens_seen": 33409984, "step": 366 }, { "epoch": 1.5291666666666668, "grad_norm": 6.155663902094734, "learning_rate": 5e-05, "loss": 0.2236, "num_input_tokens_seen": 33501112, "step": 367 }, { "epoch": 1.5291666666666668, "loss": 0.22664915025234222, "loss_ce": 0.0006970040267333388, "loss_iou": 0.1630859375, "loss_num": 0.04052734375, "loss_xval": 0.2255859375, "num_input_tokens_seen": 33501112, "step": 367 }, { "epoch": 1.5333333333333332, "grad_norm": 4.6118762946783205, "learning_rate": 5e-05, "loss": 0.1876, "num_input_tokens_seen": 33592508, "step": 368 }, { "epoch": 1.5333333333333332, "loss": 0.1918344497680664, "loss_ce": 0.0008249252568930387, "loss_iou": 0.275390625, "loss_num": 0.0301513671875, "loss_xval": 0.19140625, "num_input_tokens_seen": 33592508, "step": 368 }, { "epoch": 1.5375, "grad_norm": 5.626777974326843, "learning_rate": 5e-05, "loss": 0.2292, "num_input_tokens_seen": 33682856, "step": 369 }, { "epoch": 1.5375, "loss": 0.24069613218307495, "loss_ce": 0.0019876514561474323, "loss_iou": 0.279296875, "loss_num": 0.03955078125, "loss_xval": 0.23828125, "num_input_tokens_seen": 33682856, "step": 369 }, { "epoch": 1.5416666666666665, "grad_norm": 9.044519890425953, "learning_rate": 5e-05, "loss": 0.1977, "num_input_tokens_seen": 33774808, "step": 370 }, { "epoch": 1.5416666666666665, "loss": 0.21389149129390717, "loss_ce": 0.006127822212874889, "loss_iou": 0.37109375, "loss_num": 0.030517578125, "loss_xval": 0.2080078125, "num_input_tokens_seen": 33774808, "step": 370 }, { "epoch": 1.5458333333333334, "grad_norm": 21.294508979140122, "learning_rate": 5e-05, "loss": 0.1979, "num_input_tokens_seen": 33866240, "step": 371 }, { "epoch": 1.5458333333333334, "loss": 0.16506054997444153, "loss_ce": 0.0013032348360866308, "loss_iou": 0.380859375, "loss_num": 0.021484375, "loss_xval": 0.1640625, "num_input_tokens_seen": 33866240, "step": 371 }, { "epoch": 1.55, "grad_norm": 23.744916776588756, "learning_rate": 5e-05, "loss": 0.2869, "num_input_tokens_seen": 33957488, "step": 372 }, { "epoch": 1.55, "loss": 0.3346654772758484, "loss_ce": 0.0010778360301628709, "loss_iou": 0.41015625, "loss_num": 0.0546875, "loss_xval": 0.333984375, "num_input_tokens_seen": 33957488, "step": 372 }, { "epoch": 1.5541666666666667, "grad_norm": 5.0626894260783954, "learning_rate": 5e-05, "loss": 0.269, "num_input_tokens_seen": 34049076, "step": 373 }, { "epoch": 1.5541666666666667, "loss": 0.2526951730251312, "loss_ce": 0.0027562116738408804, "loss_iou": 0.26953125, "loss_num": 0.0419921875, "loss_xval": 0.25, "num_input_tokens_seen": 34049076, "step": 373 }, { "epoch": 1.5583333333333333, "grad_norm": 8.974395840489713, "learning_rate": 5e-05, "loss": 0.2534, "num_input_tokens_seen": 34140908, "step": 374 }, { "epoch": 1.5583333333333333, "loss": 0.22455793619155884, "loss_ce": 0.002328932285308838, "loss_iou": 0.28125, "loss_num": 0.0361328125, "loss_xval": 0.22265625, "num_input_tokens_seen": 34140908, "step": 374 }, { "epoch": 1.5625, "grad_norm": 7.5526802682001986, "learning_rate": 5e-05, "loss": 0.1763, "num_input_tokens_seen": 34231624, "step": 375 }, { "epoch": 1.5625, "loss": 0.17288470268249512, "loss_ce": 0.003206963185220957, "loss_iou": 0.1787109375, "loss_num": 0.028564453125, "loss_xval": 0.169921875, "num_input_tokens_seen": 34231624, "step": 375 }, { "epoch": 1.5666666666666667, "grad_norm": 15.121092207403379, "learning_rate": 5e-05, "loss": 0.1969, "num_input_tokens_seen": 34321652, "step": 376 }, { "epoch": 1.5666666666666667, "loss": 0.17863944172859192, "loss_ce": 0.0010271335486322641, "loss_iou": 0.328125, "loss_num": 0.0257568359375, "loss_xval": 0.177734375, "num_input_tokens_seen": 34321652, "step": 376 }, { "epoch": 1.5708333333333333, "grad_norm": 6.962086400891019, "learning_rate": 5e-05, "loss": 0.2597, "num_input_tokens_seen": 34411720, "step": 377 }, { "epoch": 1.5708333333333333, "loss": 0.29189687967300415, "loss_ce": 2.67762388830306e-05, "loss_iou": 0.50390625, "loss_num": 0.043212890625, "loss_xval": 0.291015625, "num_input_tokens_seen": 34411720, "step": 377 }, { "epoch": 1.575, "grad_norm": 13.594155653472589, "learning_rate": 5e-05, "loss": 0.1871, "num_input_tokens_seen": 34503112, "step": 378 }, { "epoch": 1.575, "loss": 0.2062322199344635, "loss_ce": 0.00036064465530216694, "loss_iou": 0.306640625, "loss_num": 0.03173828125, "loss_xval": 0.2060546875, "num_input_tokens_seen": 34503112, "step": 378 }, { "epoch": 1.5791666666666666, "grad_norm": 11.618376014557729, "learning_rate": 5e-05, "loss": 0.2319, "num_input_tokens_seen": 34594328, "step": 379 }, { "epoch": 1.5791666666666666, "loss": 0.2239169478416443, "loss_ce": 0.00046722288243472576, "loss_iou": 0.37890625, "loss_num": 0.033203125, "loss_xval": 0.2236328125, "num_input_tokens_seen": 34594328, "step": 379 }, { "epoch": 1.5833333333333335, "grad_norm": 4.160227162012771, "learning_rate": 5e-05, "loss": 0.2201, "num_input_tokens_seen": 34685332, "step": 380 }, { "epoch": 1.5833333333333335, "loss": 0.2167702317237854, "loss_ce": 0.000522678135894239, "loss_iou": 0.30859375, "loss_num": 0.033935546875, "loss_xval": 0.2158203125, "num_input_tokens_seen": 34685332, "step": 380 }, { "epoch": 1.5875, "grad_norm": 26.170465463725673, "learning_rate": 5e-05, "loss": 0.2244, "num_input_tokens_seen": 34776532, "step": 381 }, { "epoch": 1.5875, "loss": 0.2976117730140686, "loss_ce": 0.0012555646244436502, "loss_iou": 0.259765625, "loss_num": 0.051513671875, "loss_xval": 0.296875, "num_input_tokens_seen": 34776532, "step": 381 }, { "epoch": 1.5916666666666668, "grad_norm": 11.570362583321861, "learning_rate": 5e-05, "loss": 0.186, "num_input_tokens_seen": 34866236, "step": 382 }, { "epoch": 1.5916666666666668, "loss": 0.18814677000045776, "loss_ce": 0.00137919036205858, "loss_iou": 0.41015625, "loss_num": 0.0247802734375, "loss_xval": 0.1865234375, "num_input_tokens_seen": 34866236, "step": 382 }, { "epoch": 1.5958333333333332, "grad_norm": 20.703551173153976, "learning_rate": 5e-05, "loss": 0.293, "num_input_tokens_seen": 34957448, "step": 383 }, { "epoch": 1.5958333333333332, "loss": 0.364020973443985, "loss_ce": 0.00879635289311409, "loss_iou": 0.259765625, "loss_num": 0.06298828125, "loss_xval": 0.35546875, "num_input_tokens_seen": 34957448, "step": 383 }, { "epoch": 1.6, "grad_norm": 7.683924593021991, "learning_rate": 5e-05, "loss": 0.2245, "num_input_tokens_seen": 35048296, "step": 384 }, { "epoch": 1.6, "loss": 0.2429579645395279, "loss_ce": 0.0018080619629472494, "loss_iou": 0.40625, "loss_num": 0.03564453125, "loss_xval": 0.2412109375, "num_input_tokens_seen": 35048296, "step": 384 }, { "epoch": 1.6041666666666665, "grad_norm": 7.103846328800123, "learning_rate": 5e-05, "loss": 0.2665, "num_input_tokens_seen": 35139692, "step": 385 }, { "epoch": 1.6041666666666665, "loss": 0.2960255742073059, "loss_ce": 0.002873738296329975, "loss_iou": 0.3515625, "loss_num": 0.0478515625, "loss_xval": 0.29296875, "num_input_tokens_seen": 35139692, "step": 385 }, { "epoch": 1.6083333333333334, "grad_norm": 18.805427329976087, "learning_rate": 5e-05, "loss": 0.2069, "num_input_tokens_seen": 35230592, "step": 386 }, { "epoch": 1.6083333333333334, "loss": 0.19052082300186157, "loss_ce": 0.001006664359010756, "loss_iou": 0.36328125, "loss_num": 0.0267333984375, "loss_xval": 0.189453125, "num_input_tokens_seen": 35230592, "step": 386 }, { "epoch": 1.6125, "grad_norm": 10.10429707392757, "learning_rate": 5e-05, "loss": 0.2302, "num_input_tokens_seen": 35322044, "step": 387 }, { "epoch": 1.6125, "loss": 0.24689523875713348, "loss_ce": 0.0006794063956476748, "loss_iou": 0.37109375, "loss_num": 0.037841796875, "loss_xval": 0.24609375, "num_input_tokens_seen": 35322044, "step": 387 }, { "epoch": 1.6166666666666667, "grad_norm": 4.736744820547368, "learning_rate": 5e-05, "loss": 0.3142, "num_input_tokens_seen": 35413536, "step": 388 }, { "epoch": 1.6166666666666667, "loss": 0.4286704957485199, "loss_ce": 0.0010886834934353828, "loss_iou": 0.27734375, "loss_num": 0.0771484375, "loss_xval": 0.427734375, "num_input_tokens_seen": 35413536, "step": 388 }, { "epoch": 1.6208333333333333, "grad_norm": 15.744695283257744, "learning_rate": 5e-05, "loss": 0.1926, "num_input_tokens_seen": 35505484, "step": 389 }, { "epoch": 1.6208333333333333, "loss": 0.19779208302497864, "loss_ce": 0.0017471597529947758, "loss_iou": 0.345703125, "loss_num": 0.028564453125, "loss_xval": 0.1962890625, "num_input_tokens_seen": 35505484, "step": 389 }, { "epoch": 1.625, "grad_norm": 7.9490484769934175, "learning_rate": 5e-05, "loss": 0.2718, "num_input_tokens_seen": 35596612, "step": 390 }, { "epoch": 1.625, "loss": 0.2874143719673157, "loss_ce": 0.0004270472563803196, "loss_iou": 0.3125, "loss_num": 0.047607421875, "loss_xval": 0.287109375, "num_input_tokens_seen": 35596612, "step": 390 }, { "epoch": 1.6291666666666667, "grad_norm": 7.330820229165241, "learning_rate": 5e-05, "loss": 0.297, "num_input_tokens_seen": 35687724, "step": 391 }, { "epoch": 1.6291666666666667, "loss": 0.23688414692878723, "loss_ce": 0.0011053455527871847, "loss_iou": 0.2490234375, "loss_num": 0.039306640625, "loss_xval": 0.2353515625, "num_input_tokens_seen": 35687724, "step": 391 }, { "epoch": 1.6333333333333333, "grad_norm": 3.2668345947971096, "learning_rate": 5e-05, "loss": 0.2265, "num_input_tokens_seen": 35778768, "step": 392 }, { "epoch": 1.6333333333333333, "loss": 0.19086655974388123, "loss_ce": 9.624052836443298e-06, "loss_iou": 0.2734375, "loss_num": 0.0296630859375, "loss_xval": 0.1904296875, "num_input_tokens_seen": 35778768, "step": 392 }, { "epoch": 1.6375, "grad_norm": 6.081542328972911, "learning_rate": 5e-05, "loss": 0.2041, "num_input_tokens_seen": 35870332, "step": 393 }, { "epoch": 1.6375, "loss": 0.20596742630004883, "loss_ce": 0.004307264927774668, "loss_iou": 0.345703125, "loss_num": 0.0294189453125, "loss_xval": 0.201171875, "num_input_tokens_seen": 35870332, "step": 393 }, { "epoch": 1.6416666666666666, "grad_norm": 5.586847565860315, "learning_rate": 5e-05, "loss": 0.2041, "num_input_tokens_seen": 35961944, "step": 394 }, { "epoch": 1.6416666666666666, "loss": 0.21251477301120758, "loss_ce": 0.014455698430538177, "loss_iou": 0.32421875, "loss_num": 0.029541015625, "loss_xval": 0.1982421875, "num_input_tokens_seen": 35961944, "step": 394 }, { "epoch": 1.6458333333333335, "grad_norm": 5.535852588618803, "learning_rate": 5e-05, "loss": 0.2247, "num_input_tokens_seen": 36053716, "step": 395 }, { "epoch": 1.6458333333333335, "loss": 0.2407526671886444, "loss_ce": 0.0012507280334830284, "loss_iou": 0.361328125, "loss_num": 0.036376953125, "loss_xval": 0.2392578125, "num_input_tokens_seen": 36053716, "step": 395 }, { "epoch": 1.65, "grad_norm": 8.008392249480794, "learning_rate": 5e-05, "loss": 0.1529, "num_input_tokens_seen": 36145512, "step": 396 }, { "epoch": 1.65, "loss": 0.13524940609931946, "loss_ce": 0.0006058429717086256, "loss_iou": 0.24609375, "loss_num": 0.0191650390625, "loss_xval": 0.134765625, "num_input_tokens_seen": 36145512, "step": 396 }, { "epoch": 1.6541666666666668, "grad_norm": 10.17149172074727, "learning_rate": 5e-05, "loss": 0.1779, "num_input_tokens_seen": 36237148, "step": 397 }, { "epoch": 1.6541666666666668, "loss": 0.18197308480739594, "loss_ce": 0.002529717283323407, "loss_iou": 0.357421875, "loss_num": 0.024658203125, "loss_xval": 0.1796875, "num_input_tokens_seen": 36237148, "step": 397 }, { "epoch": 1.6583333333333332, "grad_norm": 12.250940882285644, "learning_rate": 5e-05, "loss": 0.2401, "num_input_tokens_seen": 36328940, "step": 398 }, { "epoch": 1.6583333333333332, "loss": 0.24007660150527954, "loss_ce": 0.002588799921795726, "loss_iou": 0.255859375, "loss_num": 0.039306640625, "loss_xval": 0.2373046875, "num_input_tokens_seen": 36328940, "step": 398 }, { "epoch": 1.6625, "grad_norm": 13.131902976504431, "learning_rate": 5e-05, "loss": 0.2234, "num_input_tokens_seen": 36420480, "step": 399 }, { "epoch": 1.6625, "loss": 0.16811129450798035, "loss_ce": 0.0016684221336618066, "loss_iou": 0.359375, "loss_num": 0.0218505859375, "loss_xval": 0.166015625, "num_input_tokens_seen": 36420480, "step": 399 }, { "epoch": 1.6666666666666665, "grad_norm": 10.31781709490099, "learning_rate": 5e-05, "loss": 0.2428, "num_input_tokens_seen": 36512368, "step": 400 }, { "epoch": 1.6666666666666665, "loss": 0.2330133020877838, "loss_ce": 0.0021173148415982723, "loss_iou": 0.259765625, "loss_num": 0.037841796875, "loss_xval": 0.23046875, "num_input_tokens_seen": 36512368, "step": 400 }, { "epoch": 1.6708333333333334, "grad_norm": 9.461221271761932, "learning_rate": 5e-05, "loss": 0.3251, "num_input_tokens_seen": 36603644, "step": 401 }, { "epoch": 1.6708333333333334, "loss": 0.30060258507728577, "loss_ce": 0.0023848214186728, "loss_iou": 0.390625, "loss_num": 0.047119140625, "loss_xval": 0.298828125, "num_input_tokens_seen": 36603644, "step": 401 }, { "epoch": 1.675, "grad_norm": 7.1425619613813645, "learning_rate": 5e-05, "loss": 0.219, "num_input_tokens_seen": 36694824, "step": 402 }, { "epoch": 1.675, "loss": 0.2716678977012634, "loss_ce": 0.0014652373502030969, "loss_iou": 0.30078125, "loss_num": 0.04443359375, "loss_xval": 0.26953125, "num_input_tokens_seen": 36694824, "step": 402 }, { "epoch": 1.6791666666666667, "grad_norm": 10.989336173327343, "learning_rate": 5e-05, "loss": 0.2362, "num_input_tokens_seen": 36786560, "step": 403 }, { "epoch": 1.6791666666666667, "loss": 0.23715360462665558, "loss_ce": 0.0011306637898087502, "loss_iou": 0.3125, "loss_num": 0.037109375, "loss_xval": 0.236328125, "num_input_tokens_seen": 36786560, "step": 403 }, { "epoch": 1.6833333333333333, "grad_norm": 6.976559533587898, "learning_rate": 5e-05, "loss": 0.269, "num_input_tokens_seen": 36878524, "step": 404 }, { "epoch": 1.6833333333333333, "loss": 0.2160152792930603, "loss_ce": 0.005840706638991833, "loss_iou": 0.3203125, "loss_num": 0.03173828125, "loss_xval": 0.2099609375, "num_input_tokens_seen": 36878524, "step": 404 }, { "epoch": 1.6875, "grad_norm": 17.060754210977322, "learning_rate": 5e-05, "loss": 0.268, "num_input_tokens_seen": 36970148, "step": 405 }, { "epoch": 1.6875, "loss": 0.17095698416233063, "loss_ce": 0.0011266570072621107, "loss_iou": 0.326171875, "loss_num": 0.0234375, "loss_xval": 0.169921875, "num_input_tokens_seen": 36970148, "step": 405 }, { "epoch": 1.6916666666666667, "grad_norm": 5.975013278466526, "learning_rate": 5e-05, "loss": 0.2443, "num_input_tokens_seen": 37061936, "step": 406 }, { "epoch": 1.6916666666666667, "loss": 0.21971558034420013, "loss_ce": 0.0009655768517404795, "loss_iou": 0.3203125, "loss_num": 0.033447265625, "loss_xval": 0.21875, "num_input_tokens_seen": 37061936, "step": 406 }, { "epoch": 1.6958333333333333, "grad_norm": 8.59667374148006, "learning_rate": 5e-05, "loss": 0.1863, "num_input_tokens_seen": 37152712, "step": 407 }, { "epoch": 1.6958333333333333, "loss": 0.21013236045837402, "loss_ce": 0.0005071184132248163, "loss_iou": 0.2412109375, "loss_num": 0.0341796875, "loss_xval": 0.2099609375, "num_input_tokens_seen": 37152712, "step": 407 }, { "epoch": 1.7, "grad_norm": 3.3529361396033086, "learning_rate": 5e-05, "loss": 0.2094, "num_input_tokens_seen": 37244304, "step": 408 }, { "epoch": 1.7, "loss": 0.17331859469413757, "loss_ce": 0.002420151839032769, "loss_iou": 0.30859375, "loss_num": 0.024169921875, "loss_xval": 0.1708984375, "num_input_tokens_seen": 37244304, "step": 408 }, { "epoch": 1.7041666666666666, "grad_norm": 6.3683193819610375, "learning_rate": 5e-05, "loss": 0.2657, "num_input_tokens_seen": 37333460, "step": 409 }, { "epoch": 1.7041666666666666, "loss": 0.3749966025352478, "loss_ce": 0.000515383668243885, "loss_iou": 0.1396484375, "loss_num": 0.0703125, "loss_xval": 0.375, "num_input_tokens_seen": 37333460, "step": 409 }, { "epoch": 1.7083333333333335, "grad_norm": 13.088956915518326, "learning_rate": 5e-05, "loss": 0.2861, "num_input_tokens_seen": 37424924, "step": 410 }, { "epoch": 1.7083333333333335, "loss": 0.3154312074184418, "loss_ce": 0.014772024936974049, "loss_iou": 0.36328125, "loss_num": 0.04833984375, "loss_xval": 0.30078125, "num_input_tokens_seen": 37424924, "step": 410 }, { "epoch": 1.7125, "grad_norm": 13.613320408355072, "learning_rate": 5e-05, "loss": 0.2217, "num_input_tokens_seen": 37516336, "step": 411 }, { "epoch": 1.7125, "loss": 0.1935127079486847, "loss_ce": 0.002289560856297612, "loss_iou": 0.328125, "loss_num": 0.0274658203125, "loss_xval": 0.19140625, "num_input_tokens_seen": 37516336, "step": 411 }, { "epoch": 1.7166666666666668, "grad_norm": 13.38082197566342, "learning_rate": 5e-05, "loss": 0.1883, "num_input_tokens_seen": 37607440, "step": 412 }, { "epoch": 1.7166666666666668, "loss": 0.223766028881073, "loss_ce": 0.0012318526860326529, "loss_iou": 0.3125, "loss_num": 0.0341796875, "loss_xval": 0.22265625, "num_input_tokens_seen": 37607440, "step": 412 }, { "epoch": 1.7208333333333332, "grad_norm": 8.073520023907198, "learning_rate": 5e-05, "loss": 0.2529, "num_input_tokens_seen": 37698444, "step": 413 }, { "epoch": 1.7208333333333332, "loss": 0.24851244688034058, "loss_ce": 0.0008317787433043122, "loss_iou": 0.201171875, "loss_num": 0.04296875, "loss_xval": 0.248046875, "num_input_tokens_seen": 37698444, "step": 413 }, { "epoch": 1.725, "grad_norm": 6.152938443727431, "learning_rate": 5e-05, "loss": 0.2345, "num_input_tokens_seen": 37789556, "step": 414 }, { "epoch": 1.725, "loss": 0.17417480051517487, "loss_ce": 0.0009570303955115378, "loss_iou": 0.365234375, "loss_num": 0.0225830078125, "loss_xval": 0.1728515625, "num_input_tokens_seen": 37789556, "step": 414 }, { "epoch": 1.7291666666666665, "grad_norm": 7.64973034730577, "learning_rate": 5e-05, "loss": 0.2374, "num_input_tokens_seen": 37881200, "step": 415 }, { "epoch": 1.7291666666666665, "loss": 0.2355271875858307, "loss_ce": 0.0021287663839757442, "loss_iou": 0.345703125, "loss_num": 0.03515625, "loss_xval": 0.2333984375, "num_input_tokens_seen": 37881200, "step": 415 }, { "epoch": 1.7333333333333334, "grad_norm": 7.465852707646311, "learning_rate": 5e-05, "loss": 0.2417, "num_input_tokens_seen": 37972388, "step": 416 }, { "epoch": 1.7333333333333334, "loss": 0.21885107457637787, "loss_ce": 0.0008945193840190768, "loss_iou": 0.3671875, "loss_num": 0.031494140625, "loss_xval": 0.2177734375, "num_input_tokens_seen": 37972388, "step": 416 }, { "epoch": 1.7375, "grad_norm": 12.16011864159544, "learning_rate": 5e-05, "loss": 0.1871, "num_input_tokens_seen": 38063388, "step": 417 }, { "epoch": 1.7375, "loss": 0.22579969465732574, "loss_ce": 0.0022279280237853527, "loss_iou": 0.1767578125, "loss_num": 0.038818359375, "loss_xval": 0.2236328125, "num_input_tokens_seen": 38063388, "step": 417 }, { "epoch": 1.7416666666666667, "grad_norm": 7.508322645181989, "learning_rate": 5e-05, "loss": 0.1914, "num_input_tokens_seen": 38155040, "step": 418 }, { "epoch": 1.7416666666666667, "loss": 0.151132732629776, "loss_ce": 0.00904288049787283, "loss_iou": 0.32421875, "loss_num": 0.017578125, "loss_xval": 0.142578125, "num_input_tokens_seen": 38155040, "step": 418 }, { "epoch": 1.7458333333333333, "grad_norm": 7.262707246882911, "learning_rate": 5e-05, "loss": 0.1827, "num_input_tokens_seen": 38245628, "step": 419 }, { "epoch": 1.7458333333333333, "loss": 0.20362314581871033, "loss_ce": 0.00013193067570682615, "loss_iou": 0.333984375, "loss_num": 0.0296630859375, "loss_xval": 0.203125, "num_input_tokens_seen": 38245628, "step": 419 }, { "epoch": 1.75, "grad_norm": 11.195260101861862, "learning_rate": 5e-05, "loss": 0.2121, "num_input_tokens_seen": 38336516, "step": 420 }, { "epoch": 1.75, "loss": 0.15969014167785645, "loss_ce": 0.0031349719502031803, "loss_iou": 0.19140625, "loss_num": 0.02490234375, "loss_xval": 0.15625, "num_input_tokens_seen": 38336516, "step": 420 }, { "epoch": 1.7541666666666667, "grad_norm": 7.311132884519421, "learning_rate": 5e-05, "loss": 0.2435, "num_input_tokens_seen": 38427492, "step": 421 }, { "epoch": 1.7541666666666667, "loss": 0.2525137662887573, "loss_ce": 0.0022085891105234623, "loss_iou": 0.384765625, "loss_num": 0.037109375, "loss_xval": 0.25, "num_input_tokens_seen": 38427492, "step": 421 }, { "epoch": 1.7583333333333333, "grad_norm": 8.407594284466022, "learning_rate": 5e-05, "loss": 0.2977, "num_input_tokens_seen": 38518472, "step": 422 }, { "epoch": 1.7583333333333333, "loss": 0.32363784313201904, "loss_ce": 0.06790055334568024, "loss_iou": 0.41796875, "loss_num": 0.037109375, "loss_xval": 0.255859375, "num_input_tokens_seen": 38518472, "step": 422 }, { "epoch": 1.7625, "grad_norm": 6.269501342271039, "learning_rate": 5e-05, "loss": 0.2442, "num_input_tokens_seen": 38608332, "step": 423 }, { "epoch": 1.7625, "loss": 0.2547208070755005, "loss_ce": 0.001608002814464271, "loss_iou": 0.0810546875, "loss_num": 0.0478515625, "loss_xval": 0.25390625, "num_input_tokens_seen": 38608332, "step": 423 }, { "epoch": 1.7666666666666666, "grad_norm": 13.92887135014984, "learning_rate": 5e-05, "loss": 0.1873, "num_input_tokens_seen": 38699308, "step": 424 }, { "epoch": 1.7666666666666666, "loss": 0.1888759434223175, "loss_ce": 0.003268023021519184, "loss_iou": 0.21875, "loss_num": 0.0296630859375, "loss_xval": 0.185546875, "num_input_tokens_seen": 38699308, "step": 424 }, { "epoch": 1.7708333333333335, "grad_norm": 12.482389388443156, "learning_rate": 5e-05, "loss": 0.2519, "num_input_tokens_seen": 38790780, "step": 425 }, { "epoch": 1.7708333333333335, "loss": 0.23790404200553894, "loss_ce": 0.0007824670756235719, "loss_iou": 0.40625, "loss_num": 0.03369140625, "loss_xval": 0.2373046875, "num_input_tokens_seen": 38790780, "step": 425 }, { "epoch": 1.775, "grad_norm": 15.205548992803037, "learning_rate": 5e-05, "loss": 0.2028, "num_input_tokens_seen": 38881920, "step": 426 }, { "epoch": 1.775, "loss": 0.1748272329568863, "loss_ce": 0.0007549648871645331, "loss_iou": 0.39453125, "loss_num": 0.0213623046875, "loss_xval": 0.173828125, "num_input_tokens_seen": 38881920, "step": 426 }, { "epoch": 1.7791666666666668, "grad_norm": 52.300440017972036, "learning_rate": 5e-05, "loss": 0.2853, "num_input_tokens_seen": 38973020, "step": 427 }, { "epoch": 1.7791666666666668, "loss": 0.27499961853027344, "loss_ce": 0.000463480013422668, "loss_iou": 0.40234375, "loss_num": 0.041259765625, "loss_xval": 0.275390625, "num_input_tokens_seen": 38973020, "step": 427 }, { "epoch": 1.7833333333333332, "grad_norm": 9.954323695155647, "learning_rate": 5e-05, "loss": 0.1929, "num_input_tokens_seen": 39064384, "step": 428 }, { "epoch": 1.7833333333333332, "loss": 0.21909351646900177, "loss_ce": 0.0010759325232356787, "loss_iou": 0.3671875, "loss_num": 0.031005859375, "loss_xval": 0.2177734375, "num_input_tokens_seen": 39064384, "step": 428 }, { "epoch": 1.7875, "grad_norm": 2.790513551952913, "learning_rate": 5e-05, "loss": 0.1648, "num_input_tokens_seen": 39156164, "step": 429 }, { "epoch": 1.7875, "loss": 0.20743504166603088, "loss_ce": 0.0013193088816478848, "loss_iou": 0.326171875, "loss_num": 0.030029296875, "loss_xval": 0.2060546875, "num_input_tokens_seen": 39156164, "step": 429 }, { "epoch": 1.7916666666666665, "grad_norm": 5.595761110073747, "learning_rate": 5e-05, "loss": 0.2919, "num_input_tokens_seen": 39246356, "step": 430 }, { "epoch": 1.7916666666666665, "loss": 0.3420943319797516, "loss_ce": 0.0020674869883805513, "loss_iou": 0.361328125, "loss_num": 0.0556640625, "loss_xval": 0.33984375, "num_input_tokens_seen": 39246356, "step": 430 }, { "epoch": 1.7958333333333334, "grad_norm": 48.416934241964086, "learning_rate": 5e-05, "loss": 0.2207, "num_input_tokens_seen": 39337120, "step": 431 }, { "epoch": 1.7958333333333334, "loss": 0.21679693460464478, "loss_ce": 0.0009155991720035672, "loss_iou": 0.380859375, "loss_num": 0.030029296875, "loss_xval": 0.2158203125, "num_input_tokens_seen": 39337120, "step": 431 }, { "epoch": 1.8, "grad_norm": 18.232593843356966, "learning_rate": 5e-05, "loss": 0.2727, "num_input_tokens_seen": 39428636, "step": 432 }, { "epoch": 1.8, "loss": 0.2289196401834488, "loss_ce": 0.0031505939550697803, "loss_iou": 0.33984375, "loss_num": 0.033447265625, "loss_xval": 0.2255859375, "num_input_tokens_seen": 39428636, "step": 432 }, { "epoch": 1.8041666666666667, "grad_norm": 13.763518512759806, "learning_rate": 5e-05, "loss": 0.1901, "num_input_tokens_seen": 39520168, "step": 433 }, { "epoch": 1.8041666666666667, "loss": 0.1349903792142868, "loss_ce": 0.005397483240813017, "loss_iou": 0.15625, "loss_num": 0.0205078125, "loss_xval": 0.1298828125, "num_input_tokens_seen": 39520168, "step": 433 }, { "epoch": 1.8083333333333333, "grad_norm": 6.450519649920571, "learning_rate": 5e-05, "loss": 0.2366, "num_input_tokens_seen": 39611840, "step": 434 }, { "epoch": 1.8083333333333333, "loss": 0.21854786574840546, "loss_ce": 0.0014458110090345144, "loss_iou": 0.3046875, "loss_num": 0.032958984375, "loss_xval": 0.216796875, "num_input_tokens_seen": 39611840, "step": 434 }, { "epoch": 1.8125, "grad_norm": 36.514213990480904, "learning_rate": 5e-05, "loss": 0.2076, "num_input_tokens_seen": 39703452, "step": 435 }, { "epoch": 1.8125, "loss": 0.24096806347370148, "loss_ce": 0.00445683253929019, "loss_iou": 0.359375, "loss_num": 0.034912109375, "loss_xval": 0.236328125, "num_input_tokens_seen": 39703452, "step": 435 }, { "epoch": 1.8166666666666667, "grad_norm": 26.94343530241333, "learning_rate": 5e-05, "loss": 0.2408, "num_input_tokens_seen": 39794924, "step": 436 }, { "epoch": 1.8166666666666667, "loss": 0.24908341467380524, "loss_ce": 0.0005482627893798053, "loss_iou": 0.5078125, "loss_num": 0.031982421875, "loss_xval": 0.248046875, "num_input_tokens_seen": 39794924, "step": 436 }, { "epoch": 1.8208333333333333, "grad_norm": 14.728416874555222, "learning_rate": 5e-05, "loss": 0.3074, "num_input_tokens_seen": 39886324, "step": 437 }, { "epoch": 1.8208333333333333, "loss": 0.2843567728996277, "loss_ce": 0.0029847188852727413, "loss_iou": 0.365234375, "loss_num": 0.04345703125, "loss_xval": 0.28125, "num_input_tokens_seen": 39886324, "step": 437 }, { "epoch": 1.825, "grad_norm": 8.850324859702438, "learning_rate": 5e-05, "loss": 0.2256, "num_input_tokens_seen": 39977516, "step": 438 }, { "epoch": 1.825, "loss": 0.20806992053985596, "loss_ce": 0.0029917967040091753, "loss_iou": 0.251953125, "loss_num": 0.0322265625, "loss_xval": 0.205078125, "num_input_tokens_seen": 39977516, "step": 438 }, { "epoch": 1.8291666666666666, "grad_norm": 18.04227980986907, "learning_rate": 5e-05, "loss": 0.2689, "num_input_tokens_seen": 40068756, "step": 439 }, { "epoch": 1.8291666666666666, "loss": 0.31097856163978577, "loss_ce": 0.0016218679957091808, "loss_iou": 0.33984375, "loss_num": 0.050048828125, "loss_xval": 0.30859375, "num_input_tokens_seen": 40068756, "step": 439 }, { "epoch": 1.8333333333333335, "grad_norm": 5.586189786763669, "learning_rate": 5e-05, "loss": 0.2875, "num_input_tokens_seen": 40160316, "step": 440 }, { "epoch": 1.8333333333333335, "loss": 0.24810029566287994, "loss_ce": 0.0009079031879082322, "loss_iou": 0.34375, "loss_num": 0.037353515625, "loss_xval": 0.2470703125, "num_input_tokens_seen": 40160316, "step": 440 }, { "epoch": 1.8375, "grad_norm": 22.851153983391157, "learning_rate": 5e-05, "loss": 0.2403, "num_input_tokens_seen": 40252136, "step": 441 }, { "epoch": 1.8375, "loss": 0.21321170032024384, "loss_ce": 0.005081813782453537, "loss_iou": 0.2490234375, "loss_num": 0.032958984375, "loss_xval": 0.2080078125, "num_input_tokens_seen": 40252136, "step": 441 }, { "epoch": 1.8416666666666668, "grad_norm": 5.288895590637178, "learning_rate": 5e-05, "loss": 0.2826, "num_input_tokens_seen": 40341600, "step": 442 }, { "epoch": 1.8416666666666668, "loss": 0.23124778270721436, "loss_ce": 0.0005348873091861606, "loss_iou": 0.4453125, "loss_num": 0.030517578125, "loss_xval": 0.23046875, "num_input_tokens_seen": 40341600, "step": 442 }, { "epoch": 1.8458333333333332, "grad_norm": 8.125178497484377, "learning_rate": 5e-05, "loss": 0.2252, "num_input_tokens_seen": 40433276, "step": 443 }, { "epoch": 1.8458333333333332, "loss": 0.24864652752876282, "loss_ce": 0.002247605938464403, "loss_iou": 0.373046875, "loss_num": 0.0361328125, "loss_xval": 0.24609375, "num_input_tokens_seen": 40433276, "step": 443 }, { "epoch": 1.85, "grad_norm": 7.138480090683848, "learning_rate": 5e-05, "loss": 0.2542, "num_input_tokens_seen": 40524728, "step": 444 }, { "epoch": 1.85, "loss": 0.25098586082458496, "loss_ce": 0.005014184396713972, "loss_iou": 0.1689453125, "loss_num": 0.043212890625, "loss_xval": 0.24609375, "num_input_tokens_seen": 40524728, "step": 444 }, { "epoch": 1.8541666666666665, "grad_norm": 5.769777410913954, "learning_rate": 5e-05, "loss": 0.203, "num_input_tokens_seen": 40616144, "step": 445 }, { "epoch": 1.8541666666666665, "loss": 0.2437579333782196, "loss_ce": 0.002546995645388961, "loss_iou": 0.40625, "loss_num": 0.033935546875, "loss_xval": 0.2412109375, "num_input_tokens_seen": 40616144, "step": 445 }, { "epoch": 1.8583333333333334, "grad_norm": 4.068488818002669, "learning_rate": 5e-05, "loss": 0.2416, "num_input_tokens_seen": 40708024, "step": 446 }, { "epoch": 1.8583333333333334, "loss": 0.21607771515846252, "loss_ce": 0.003003982827067375, "loss_iou": 0.25390625, "loss_num": 0.03369140625, "loss_xval": 0.212890625, "num_input_tokens_seen": 40708024, "step": 446 }, { "epoch": 1.8625, "grad_norm": 10.867206459117877, "learning_rate": 5e-05, "loss": 0.2701, "num_input_tokens_seen": 40799076, "step": 447 }, { "epoch": 1.8625, "loss": 0.33143606781959534, "loss_ce": 0.00263968319632113, "loss_iou": 0.328125, "loss_num": 0.05419921875, "loss_xval": 0.328125, "num_input_tokens_seen": 40799076, "step": 447 }, { "epoch": 1.8666666666666667, "grad_norm": 54.97065144326686, "learning_rate": 5e-05, "loss": 0.1671, "num_input_tokens_seen": 40890848, "step": 448 }, { "epoch": 1.8666666666666667, "loss": 0.1328609138727188, "loss_ce": 0.001574298250488937, "loss_iou": 0.20703125, "loss_num": 0.018798828125, "loss_xval": 0.130859375, "num_input_tokens_seen": 40890848, "step": 448 }, { "epoch": 1.8708333333333333, "grad_norm": 5.836067414607739, "learning_rate": 5e-05, "loss": 0.2459, "num_input_tokens_seen": 40983080, "step": 449 }, { "epoch": 1.8708333333333333, "loss": 0.2940506637096405, "loss_ce": 0.0029129667673259974, "loss_iou": 0.310546875, "loss_num": 0.047119140625, "loss_xval": 0.291015625, "num_input_tokens_seen": 40983080, "step": 449 }, { "epoch": 1.875, "grad_norm": 4.601007964435342, "learning_rate": 5e-05, "loss": 0.2801, "num_input_tokens_seen": 41074384, "step": 450 }, { "epoch": 1.875, "loss": 0.25329628586769104, "loss_ce": 0.0012821154668927193, "loss_iou": 0.359375, "loss_num": 0.03759765625, "loss_xval": 0.251953125, "num_input_tokens_seen": 41074384, "step": 450 }, { "epoch": 1.8791666666666667, "grad_norm": 12.796871529274252, "learning_rate": 5e-05, "loss": 0.3056, "num_input_tokens_seen": 41165644, "step": 451 }, { "epoch": 1.8791666666666667, "loss": 0.35303500294685364, "loss_ce": 0.0019607748836278915, "loss_iou": 0.388671875, "loss_num": 0.05615234375, "loss_xval": 0.3515625, "num_input_tokens_seen": 41165644, "step": 451 }, { "epoch": 1.8833333333333333, "grad_norm": 13.93276233688637, "learning_rate": 5e-05, "loss": 0.249, "num_input_tokens_seen": 41257472, "step": 452 }, { "epoch": 1.8833333333333333, "loss": 0.2850377559661865, "loss_ce": 0.002200846094638109, "loss_iou": 0.287109375, "loss_num": 0.046142578125, "loss_xval": 0.283203125, "num_input_tokens_seen": 41257472, "step": 452 }, { "epoch": 1.8875, "grad_norm": 20.22805971748526, "learning_rate": 5e-05, "loss": 0.228, "num_input_tokens_seen": 41348604, "step": 453 }, { "epoch": 1.8875, "loss": 0.27741241455078125, "loss_ce": 6.864364695502445e-05, "loss_iou": 0.2470703125, "loss_num": 0.04638671875, "loss_xval": 0.27734375, "num_input_tokens_seen": 41348604, "step": 453 }, { "epoch": 1.8916666666666666, "grad_norm": 14.070020258641543, "learning_rate": 5e-05, "loss": 0.2554, "num_input_tokens_seen": 41439728, "step": 454 }, { "epoch": 1.8916666666666666, "loss": 0.1974847912788391, "loss_ce": 0.0027826428413391113, "loss_iou": 0.31640625, "loss_num": 0.0274658203125, "loss_xval": 0.1943359375, "num_input_tokens_seen": 41439728, "step": 454 }, { "epoch": 1.8958333333333335, "grad_norm": 10.414859001422169, "learning_rate": 5e-05, "loss": 0.2386, "num_input_tokens_seen": 41531584, "step": 455 }, { "epoch": 1.8958333333333335, "loss": 0.2527470588684082, "loss_ce": 0.00024461650173179805, "loss_iou": 0.47265625, "loss_num": 0.033447265625, "loss_xval": 0.251953125, "num_input_tokens_seen": 41531584, "step": 455 }, { "epoch": 1.9, "grad_norm": 10.851725528442135, "learning_rate": 5e-05, "loss": 0.2082, "num_input_tokens_seen": 41623208, "step": 456 }, { "epoch": 1.9, "loss": 0.21295757591724396, "loss_ce": 0.0010435068979859352, "loss_iou": 0.28515625, "loss_num": 0.031982421875, "loss_xval": 0.2119140625, "num_input_tokens_seen": 41623208, "step": 456 }, { "epoch": 1.9041666666666668, "grad_norm": 6.143359027790444, "learning_rate": 5e-05, "loss": 0.1694, "num_input_tokens_seen": 41714784, "step": 457 }, { "epoch": 1.9041666666666668, "loss": 0.11842577159404755, "loss_ce": 0.004717267118394375, "loss_iou": 0.19140625, "loss_num": 0.0157470703125, "loss_xval": 0.11376953125, "num_input_tokens_seen": 41714784, "step": 457 }, { "epoch": 1.9083333333333332, "grad_norm": 4.351736596832513, "learning_rate": 5e-05, "loss": 0.1894, "num_input_tokens_seen": 41806148, "step": 458 }, { "epoch": 1.9083333333333332, "loss": 0.20772914588451385, "loss_ce": 8.754467853577808e-05, "loss_iou": 0.3359375, "loss_num": 0.0291748046875, "loss_xval": 0.2080078125, "num_input_tokens_seen": 41806148, "step": 458 }, { "epoch": 1.9125, "grad_norm": 3.880482346896209, "learning_rate": 5e-05, "loss": 0.2375, "num_input_tokens_seen": 41897204, "step": 459 }, { "epoch": 1.9125, "loss": 0.27232295274734497, "loss_ce": 0.0005333737935870886, "loss_iou": 0.369140625, "loss_num": 0.040771484375, "loss_xval": 0.271484375, "num_input_tokens_seen": 41897204, "step": 459 }, { "epoch": 1.9166666666666665, "grad_norm": 4.917601920806994, "learning_rate": 5e-05, "loss": 0.2394, "num_input_tokens_seen": 41988488, "step": 460 }, { "epoch": 1.9166666666666665, "loss": 0.25409287214279175, "loss_ce": 0.0018650712445378304, "loss_iou": 0.1611328125, "loss_num": 0.044677734375, "loss_xval": 0.251953125, "num_input_tokens_seen": 41988488, "step": 460 }, { "epoch": 1.9208333333333334, "grad_norm": 8.894410443279389, "learning_rate": 5e-05, "loss": 0.2329, "num_input_tokens_seen": 42079860, "step": 461 }, { "epoch": 1.9208333333333334, "loss": 0.23146000504493713, "loss_ce": 1.4697448932565749e-05, "loss_iou": 0.294921875, "loss_num": 0.035400390625, "loss_xval": 0.2314453125, "num_input_tokens_seen": 42079860, "step": 461 }, { "epoch": 1.925, "grad_norm": 16.189621066761624, "learning_rate": 5e-05, "loss": 0.2077, "num_input_tokens_seen": 42170812, "step": 462 }, { "epoch": 1.925, "loss": 0.21362504363059998, "loss_ce": 6.30219146842137e-05, "loss_iou": 0.291015625, "loss_num": 0.031982421875, "loss_xval": 0.2138671875, "num_input_tokens_seen": 42170812, "step": 462 }, { "epoch": 1.9291666666666667, "grad_norm": 3.855073566469258, "learning_rate": 5e-05, "loss": 0.2269, "num_input_tokens_seen": 42261644, "step": 463 }, { "epoch": 1.9291666666666667, "loss": 0.30324554443359375, "loss_ce": 0.0003280591045040637, "loss_iou": 0.271484375, "loss_num": 0.050537109375, "loss_xval": 0.302734375, "num_input_tokens_seen": 42261644, "step": 463 }, { "epoch": 1.9333333333333333, "grad_norm": 16.503840026630748, "learning_rate": 5e-05, "loss": 0.2487, "num_input_tokens_seen": 42353188, "step": 464 }, { "epoch": 1.9333333333333333, "loss": 0.18360735476016998, "loss_ce": 0.0017225849442183971, "loss_iou": 0.099609375, "loss_num": 0.03271484375, "loss_xval": 0.181640625, "num_input_tokens_seen": 42353188, "step": 464 }, { "epoch": 1.9375, "grad_norm": 8.628894781312468, "learning_rate": 5e-05, "loss": 0.1897, "num_input_tokens_seen": 42444276, "step": 465 }, { "epoch": 1.9375, "loss": 0.18303534388542175, "loss_ce": 0.002005070447921753, "loss_iou": 0.248046875, "loss_num": 0.0269775390625, "loss_xval": 0.1806640625, "num_input_tokens_seen": 42444276, "step": 465 }, { "epoch": 1.9416666666666667, "grad_norm": 7.3114052832005845, "learning_rate": 5e-05, "loss": 0.3136, "num_input_tokens_seen": 42535168, "step": 466 }, { "epoch": 1.9416666666666667, "loss": 0.31238681077957153, "loss_ce": 0.002572346245869994, "loss_iou": 0.2470703125, "loss_num": 0.052734375, "loss_xval": 0.310546875, "num_input_tokens_seen": 42535168, "step": 466 }, { "epoch": 1.9458333333333333, "grad_norm": 4.8613746511208475, "learning_rate": 5e-05, "loss": 0.2269, "num_input_tokens_seen": 42626024, "step": 467 }, { "epoch": 1.9458333333333333, "loss": 0.218013197183609, "loss_ce": 0.00017872979515232146, "loss_iou": 0.291015625, "loss_num": 0.03271484375, "loss_xval": 0.2177734375, "num_input_tokens_seen": 42626024, "step": 467 }, { "epoch": 1.95, "grad_norm": 4.480762983796849, "learning_rate": 5e-05, "loss": 0.3126, "num_input_tokens_seen": 42717488, "step": 468 }, { "epoch": 1.95, "loss": 0.36717867851257324, "loss_ce": 0.0014255361165851355, "loss_iou": 0.259765625, "loss_num": 0.0634765625, "loss_xval": 0.365234375, "num_input_tokens_seen": 42717488, "step": 468 }, { "epoch": 1.9541666666666666, "grad_norm": 6.821058545858503, "learning_rate": 5e-05, "loss": 0.2826, "num_input_tokens_seen": 42808380, "step": 469 }, { "epoch": 1.9541666666666666, "loss": 0.3651350438594818, "loss_ce": 0.0002668919914867729, "loss_iou": 0.1962890625, "loss_num": 0.0654296875, "loss_xval": 0.365234375, "num_input_tokens_seen": 42808380, "step": 469 }, { "epoch": 1.9583333333333335, "grad_norm": 8.457303564288464, "learning_rate": 5e-05, "loss": 0.2184, "num_input_tokens_seen": 42900112, "step": 470 }, { "epoch": 1.9583333333333335, "loss": 0.19896230101585388, "loss_ce": 0.005419825669378042, "loss_iou": 0.265625, "loss_num": 0.02880859375, "loss_xval": 0.193359375, "num_input_tokens_seen": 42900112, "step": 470 }, { "epoch": 1.9625, "grad_norm": 43.913513558689424, "learning_rate": 5e-05, "loss": 0.2405, "num_input_tokens_seen": 42991484, "step": 471 }, { "epoch": 1.9625, "loss": 0.19493769109249115, "loss_ce": 0.0004796826106030494, "loss_iou": 0.376953125, "loss_num": 0.0247802734375, "loss_xval": 0.1943359375, "num_input_tokens_seen": 42991484, "step": 471 }, { "epoch": 1.9666666666666668, "grad_norm": 5.607040268767477, "learning_rate": 5e-05, "loss": 0.2056, "num_input_tokens_seen": 43083208, "step": 472 }, { "epoch": 1.9666666666666668, "loss": 0.2115071415901184, "loss_ce": 0.0014546550810337067, "loss_iou": 0.29296875, "loss_num": 0.031005859375, "loss_xval": 0.2099609375, "num_input_tokens_seen": 43083208, "step": 472 }, { "epoch": 1.9708333333333332, "grad_norm": 15.939240899776951, "learning_rate": 5e-05, "loss": 0.2249, "num_input_tokens_seen": 43173816, "step": 473 }, { "epoch": 1.9708333333333332, "loss": 0.24818861484527588, "loss_ce": 0.0024000415578484535, "loss_iou": 0.298828125, "loss_num": 0.037841796875, "loss_xval": 0.24609375, "num_input_tokens_seen": 43173816, "step": 473 }, { "epoch": 1.975, "grad_norm": 7.107474798711838, "learning_rate": 5e-05, "loss": 0.2092, "num_input_tokens_seen": 43265380, "step": 474 }, { "epoch": 1.975, "loss": 0.14708954095840454, "loss_ce": 0.0022531079594045877, "loss_iou": 0.259765625, "loss_num": 0.0191650390625, "loss_xval": 0.14453125, "num_input_tokens_seen": 43265380, "step": 474 }, { "epoch": 1.9791666666666665, "grad_norm": 6.792902728521322, "learning_rate": 5e-05, "loss": 0.2255, "num_input_tokens_seen": 43356616, "step": 475 }, { "epoch": 1.9791666666666665, "loss": 0.1605501025915146, "loss_ce": 0.0006379862898029387, "loss_iou": 0.310546875, "loss_num": 0.020263671875, "loss_xval": 0.16015625, "num_input_tokens_seen": 43356616, "step": 475 }, { "epoch": 1.9833333333333334, "grad_norm": 5.614667033572034, "learning_rate": 5e-05, "loss": 0.2103, "num_input_tokens_seen": 43447548, "step": 476 }, { "epoch": 1.9833333333333334, "loss": 0.1937415599822998, "loss_ce": 0.00068735855165869, "loss_iou": 0.34765625, "loss_num": 0.025390625, "loss_xval": 0.193359375, "num_input_tokens_seen": 43447548, "step": 476 }, { "epoch": 1.9875, "grad_norm": 5.637636664809899, "learning_rate": 5e-05, "loss": 0.2005, "num_input_tokens_seen": 43539048, "step": 477 }, { "epoch": 1.9875, "loss": 0.19285109639167786, "loss_ce": 0.010478038340806961, "loss_iou": 0.365234375, "loss_num": 0.0225830078125, "loss_xval": 0.1826171875, "num_input_tokens_seen": 43539048, "step": 477 }, { "epoch": 1.9916666666666667, "grad_norm": 10.267982859584741, "learning_rate": 5e-05, "loss": 0.2948, "num_input_tokens_seen": 43630460, "step": 478 }, { "epoch": 1.9916666666666667, "loss": 0.3191360831260681, "loss_ce": 0.009077484719455242, "loss_iou": 0.3046875, "loss_num": 0.05029296875, "loss_xval": 0.310546875, "num_input_tokens_seen": 43630460, "step": 478 }, { "epoch": 1.9958333333333333, "grad_norm": 4.494551642761895, "learning_rate": 5e-05, "loss": 0.2828, "num_input_tokens_seen": 43721936, "step": 479 }, { "epoch": 1.9958333333333333, "loss": 0.3179689645767212, "loss_ce": 0.001318573486059904, "loss_iou": 0.25390625, "loss_num": 0.0537109375, "loss_xval": 0.31640625, "num_input_tokens_seen": 43721936, "step": 479 }, { "epoch": 2.0, "grad_norm": 17.220812887551514, "learning_rate": 5e-05, "loss": 0.2027, "num_input_tokens_seen": 43813360, "step": 480 }, { "epoch": 2.0, "loss": 0.2017011195421219, "loss_ce": 0.0005902753327973187, "loss_iou": 0.421875, "loss_num": 0.0240478515625, "loss_xval": 0.201171875, "num_input_tokens_seen": 43813360, "step": 480 }, { "epoch": 2.004166666666667, "grad_norm": 9.071971916454121, "learning_rate": 5e-05, "loss": 0.293, "num_input_tokens_seen": 43904944, "step": 481 }, { "epoch": 2.004166666666667, "loss": 0.3399706780910492, "loss_ce": 0.012150846421718597, "loss_iou": 0.306640625, "loss_num": 0.0537109375, "loss_xval": 0.328125, "num_input_tokens_seen": 43904944, "step": 481 }, { "epoch": 2.0083333333333333, "grad_norm": 5.605031727104742, "learning_rate": 5e-05, "loss": 0.2103, "num_input_tokens_seen": 43996192, "step": 482 }, { "epoch": 2.0083333333333333, "loss": 0.23982742428779602, "loss_ce": 0.0004475304449442774, "loss_iou": 0.28125, "loss_num": 0.037109375, "loss_xval": 0.2392578125, "num_input_tokens_seen": 43996192, "step": 482 }, { "epoch": 2.0125, "grad_norm": 2.413140946271684, "learning_rate": 5e-05, "loss": 0.2327, "num_input_tokens_seen": 44086036, "step": 483 }, { "epoch": 2.0125, "loss": 0.19676050543785095, "loss_ce": 0.001692143501713872, "loss_iou": 0.3125, "loss_num": 0.0269775390625, "loss_xval": 0.1953125, "num_input_tokens_seen": 44086036, "step": 483 }, { "epoch": 2.0166666666666666, "grad_norm": 4.696557237903741, "learning_rate": 5e-05, "loss": 0.238, "num_input_tokens_seen": 44177616, "step": 484 }, { "epoch": 2.0166666666666666, "loss": 0.18791495263576508, "loss_ce": 0.0024291048757731915, "loss_iou": 0.30078125, "loss_num": 0.0255126953125, "loss_xval": 0.185546875, "num_input_tokens_seen": 44177616, "step": 484 }, { "epoch": 2.0208333333333335, "grad_norm": 5.869051437792836, "learning_rate": 5e-05, "loss": 0.2714, "num_input_tokens_seen": 44268552, "step": 485 }, { "epoch": 2.0208333333333335, "loss": 0.2644971013069153, "loss_ce": 0.00045901196426711977, "loss_iou": 0.328125, "loss_num": 0.0400390625, "loss_xval": 0.263671875, "num_input_tokens_seen": 44268552, "step": 485 }, { "epoch": 2.025, "grad_norm": 5.760866011005493, "learning_rate": 5e-05, "loss": 0.2184, "num_input_tokens_seen": 44360196, "step": 486 }, { "epoch": 2.025, "loss": 0.26277172565460205, "loss_ce": 0.00044262310257181525, "loss_iou": 0.36328125, "loss_num": 0.038330078125, "loss_xval": 0.26171875, "num_input_tokens_seen": 44360196, "step": 486 }, { "epoch": 2.029166666666667, "grad_norm": 9.485726120144186, "learning_rate": 5e-05, "loss": 0.2049, "num_input_tokens_seen": 44451476, "step": 487 }, { "epoch": 2.029166666666667, "loss": 0.1496741771697998, "loss_ce": 0.0011756441090255976, "loss_iou": 0.361328125, "loss_num": 0.015625, "loss_xval": 0.1484375, "num_input_tokens_seen": 44451476, "step": 487 }, { "epoch": 2.033333333333333, "grad_norm": 6.46724624293029, "learning_rate": 5e-05, "loss": 0.2019, "num_input_tokens_seen": 44543556, "step": 488 }, { "epoch": 2.033333333333333, "loss": 0.20846158266067505, "loss_ce": 0.0011251522228121758, "loss_iou": 0.283203125, "loss_num": 0.0303955078125, "loss_xval": 0.20703125, "num_input_tokens_seen": 44543556, "step": 488 }, { "epoch": 2.0375, "grad_norm": 4.663831894967608, "learning_rate": 5e-05, "loss": 0.1779, "num_input_tokens_seen": 44634704, "step": 489 }, { "epoch": 2.0375, "loss": 0.20913270115852356, "loss_ce": 0.0005755819729529321, "loss_iou": 0.333984375, "loss_num": 0.0286865234375, "loss_xval": 0.208984375, "num_input_tokens_seen": 44634704, "step": 489 }, { "epoch": 2.0416666666666665, "grad_norm": 4.817924636236086, "learning_rate": 5e-05, "loss": 0.2052, "num_input_tokens_seen": 44725908, "step": 490 }, { "epoch": 2.0416666666666665, "loss": 0.27449101209640503, "loss_ce": 0.007279078476130962, "loss_iou": 0.1845703125, "loss_num": 0.046142578125, "loss_xval": 0.267578125, "num_input_tokens_seen": 44725908, "step": 490 }, { "epoch": 2.0458333333333334, "grad_norm": 9.929049168261573, "learning_rate": 5e-05, "loss": 0.2116, "num_input_tokens_seen": 44817812, "step": 491 }, { "epoch": 2.0458333333333334, "loss": 0.21883490681648254, "loss_ce": 0.0010614749044179916, "loss_iou": 0.408203125, "loss_num": 0.027587890625, "loss_xval": 0.2177734375, "num_input_tokens_seen": 44817812, "step": 491 }, { "epoch": 2.05, "grad_norm": 8.817393848544738, "learning_rate": 5e-05, "loss": 0.23, "num_input_tokens_seen": 44909208, "step": 492 }, { "epoch": 2.05, "loss": 0.2640566825866699, "loss_ce": 0.0022769207134842873, "loss_iou": 0.29296875, "loss_num": 0.040771484375, "loss_xval": 0.26171875, "num_input_tokens_seen": 44909208, "step": 492 }, { "epoch": 2.0541666666666667, "grad_norm": 5.042570206698982, "learning_rate": 5e-05, "loss": 0.2268, "num_input_tokens_seen": 45000720, "step": 493 }, { "epoch": 2.0541666666666667, "loss": 0.2562553286552429, "loss_ce": 0.001494583673775196, "loss_iou": 0.279296875, "loss_num": 0.0400390625, "loss_xval": 0.25390625, "num_input_tokens_seen": 45000720, "step": 493 }, { "epoch": 2.058333333333333, "grad_norm": 6.383182702072335, "learning_rate": 5e-05, "loss": 0.1796, "num_input_tokens_seen": 45092216, "step": 494 }, { "epoch": 2.058333333333333, "loss": 0.15668010711669922, "loss_ce": 0.0010404729982838035, "loss_iou": 0.29296875, "loss_num": 0.01953125, "loss_xval": 0.1552734375, "num_input_tokens_seen": 45092216, "step": 494 }, { "epoch": 2.0625, "grad_norm": 9.998961284226256, "learning_rate": 5e-05, "loss": 0.2058, "num_input_tokens_seen": 45183120, "step": 495 }, { "epoch": 2.0625, "loss": 0.14924222230911255, "loss_ce": 0.00043851512600667775, "loss_iou": 0.25, "loss_num": 0.0198974609375, "loss_xval": 0.1484375, "num_input_tokens_seen": 45183120, "step": 495 }, { "epoch": 2.066666666666667, "grad_norm": 5.28057501901058, "learning_rate": 5e-05, "loss": 0.1962, "num_input_tokens_seen": 45273996, "step": 496 }, { "epoch": 2.066666666666667, "loss": 0.20502203702926636, "loss_ce": 0.0005542653379961848, "loss_iou": 0.322265625, "loss_num": 0.028076171875, "loss_xval": 0.2041015625, "num_input_tokens_seen": 45273996, "step": 496 }, { "epoch": 2.0708333333333333, "grad_norm": 7.768866562570465, "learning_rate": 5e-05, "loss": 0.1964, "num_input_tokens_seen": 45365364, "step": 497 }, { "epoch": 2.0708333333333333, "loss": 0.20308563113212585, "loss_ce": 0.0013644578866660595, "loss_iou": 0.279296875, "loss_num": 0.029296875, "loss_xval": 0.2021484375, "num_input_tokens_seen": 45365364, "step": 497 }, { "epoch": 2.075, "grad_norm": 6.284527737111382, "learning_rate": 5e-05, "loss": 0.1963, "num_input_tokens_seen": 45456736, "step": 498 }, { "epoch": 2.075, "loss": 0.18389853835105896, "loss_ce": 0.0016475582960993052, "loss_iou": 0.296875, "loss_num": 0.024658203125, "loss_xval": 0.1826171875, "num_input_tokens_seen": 45456736, "step": 498 }, { "epoch": 2.0791666666666666, "grad_norm": 4.79734703937701, "learning_rate": 5e-05, "loss": 0.2825, "num_input_tokens_seen": 45547784, "step": 499 }, { "epoch": 2.0791666666666666, "loss": 0.29045653343200684, "loss_ce": 0.0013940533390268683, "loss_iou": 0.404296875, "loss_num": 0.041748046875, "loss_xval": 0.2890625, "num_input_tokens_seen": 45547784, "step": 499 }, { "epoch": 2.0833333333333335, "grad_norm": 9.216382238681696, "learning_rate": 5e-05, "loss": 0.2529, "num_input_tokens_seen": 45637724, "step": 500 }, { "epoch": 2.0833333333333335, "eval_seeclick_CIoU": 0.40254954993724823, "eval_seeclick_GIoU": 0.40075618028640747, "eval_seeclick_IoU": 0.46426986157894135, "eval_seeclick_MAE_all": 0.06658709421753883, "eval_seeclick_MAE_h": 0.04495918843895197, "eval_seeclick_MAE_w": 0.13129764050245285, "eval_seeclick_MAE_x_boxes": 0.14095629006624222, "eval_seeclick_MAE_y_boxes": 0.04734954051673412, "eval_seeclick_NUM_probability": 0.9999997019767761, "eval_seeclick_inside_bbox": 0.7528409063816071, "eval_seeclick_loss": 0.5049810409545898, "eval_seeclick_loss_ce": 0.08913525566458702, "eval_seeclick_loss_iou": 0.469970703125, "eval_seeclick_loss_num": 0.06447601318359375, "eval_seeclick_loss_xval": 0.4163818359375, "eval_seeclick_runtime": 74.8052, "eval_seeclick_samples_per_second": 0.575, "eval_seeclick_steps_per_second": 0.027, "num_input_tokens_seen": 45637724, "step": 500 }, { "epoch": 2.0833333333333335, "eval_icons_CIoU": 0.2693219371140003, "eval_icons_GIoU": 0.2740980312228203, "eval_icons_IoU": 0.37362542748451233, "eval_icons_MAE_all": 0.08273549005389214, "eval_icons_MAE_h": 0.1786297969520092, "eval_icons_MAE_w": 0.12335462868213654, "eval_icons_MAE_x_boxes": 0.12370636314153671, "eval_icons_MAE_y_boxes": 0.18266896158456802, "eval_icons_NUM_probability": 0.9999996721744537, "eval_icons_inside_bbox": 0.4635416716337204, "eval_icons_loss": 0.441895455121994, "eval_icons_loss_ce": 1.6518655229447177e-05, "eval_icons_loss_iou": 0.22637939453125, "eval_icons_loss_num": 0.08292007446289062, "eval_icons_loss_xval": 0.459716796875, "eval_icons_runtime": 85.3011, "eval_icons_samples_per_second": 0.586, "eval_icons_steps_per_second": 0.023, "num_input_tokens_seen": 45637724, "step": 500 }, { "epoch": 2.0833333333333335, "eval_screenspot_CIoU": 0.3571961025396983, "eval_screenspot_GIoU": 0.3489176432291667, "eval_screenspot_IoU": 0.4308655261993408, "eval_screenspot_MAE_all": 0.10158004860083263, "eval_screenspot_MAE_h": 0.0966620072722435, "eval_screenspot_MAE_w": 0.2082077662150065, "eval_screenspot_MAE_x_boxes": 0.17490030328432718, "eval_screenspot_MAE_y_boxes": 0.09309135625759761, "eval_screenspot_NUM_probability": 0.9999842445055643, "eval_screenspot_inside_bbox": 0.690833330154419, "eval_screenspot_loss": 0.5826627612113953, "eval_screenspot_loss_ce": 1.5979758851851027e-05, "eval_screenspot_loss_iou": 0.3749593098958333, "eval_screenspot_loss_num": 0.10100301106770833, "eval_screenspot_loss_xval": 0.58056640625, "eval_screenspot_runtime": 146.4672, "eval_screenspot_samples_per_second": 0.608, "eval_screenspot_steps_per_second": 0.02, "num_input_tokens_seen": 45637724, "step": 500 }, { "epoch": 2.0833333333333335, "eval_compot_CIoU": 0.4399932026863098, "eval_compot_GIoU": 0.4214942157268524, "eval_compot_IoU": 0.5184762179851532, "eval_compot_MAE_all": 0.06234058737754822, "eval_compot_MAE_h": 0.09770151227712631, "eval_compot_MAE_w": 0.1238437294960022, "eval_compot_MAE_x_boxes": 0.12367824465036392, "eval_compot_MAE_y_boxes": 0.09963158518075943, "eval_compot_NUM_probability": 0.9999629557132721, "eval_compot_inside_bbox": 0.6493055522441864, "eval_compot_loss": 0.3942759335041046, "eval_compot_loss_ce": 0.0099323526956141, "eval_compot_loss_iou": 0.41339111328125, "eval_compot_loss_num": 0.054779052734375, "eval_compot_loss_xval": 0.3568115234375, "eval_compot_runtime": 84.0824, "eval_compot_samples_per_second": 0.595, "eval_compot_steps_per_second": 0.024, "num_input_tokens_seen": 45637724, "step": 500 }, { "epoch": 2.0833333333333335, "loss": 0.3428493142127991, "loss_ce": 0.009841513819992542, "loss_iou": 0.42578125, "loss_num": 0.049560546875, "loss_xval": 0.33203125, "num_input_tokens_seen": 45637724, "step": 500 }, { "epoch": 2.0875, "grad_norm": 7.729747203415406, "learning_rate": 5e-05, "loss": 0.2109, "num_input_tokens_seen": 45727648, "step": 501 }, { "epoch": 2.0875, "loss": 0.1922411322593689, "loss_ce": 0.00236074673011899, "loss_iou": 0.298828125, "loss_num": 0.0260009765625, "loss_xval": 0.189453125, "num_input_tokens_seen": 45727648, "step": 501 }, { "epoch": 2.091666666666667, "grad_norm": 10.076408675413663, "learning_rate": 5e-05, "loss": 0.2347, "num_input_tokens_seen": 45818984, "step": 502 }, { "epoch": 2.091666666666667, "loss": 0.22048774361610413, "loss_ce": 2.8755013772752136e-05, "loss_iou": 0.5078125, "loss_num": 0.023681640625, "loss_xval": 0.220703125, "num_input_tokens_seen": 45818984, "step": 502 }, { "epoch": 2.095833333333333, "grad_norm": 15.803004261541545, "learning_rate": 5e-05, "loss": 0.2213, "num_input_tokens_seen": 45909824, "step": 503 }, { "epoch": 2.095833333333333, "loss": 0.19270195066928864, "loss_ce": 0.002577434293925762, "loss_iou": 0.28125, "loss_num": 0.0267333984375, "loss_xval": 0.1904296875, "num_input_tokens_seen": 45909824, "step": 503 }, { "epoch": 2.1, "grad_norm": 10.453303811857014, "learning_rate": 5e-05, "loss": 0.2257, "num_input_tokens_seen": 46001408, "step": 504 }, { "epoch": 2.1, "loss": 0.2370985597372055, "loss_ce": 0.00028215881320647895, "loss_iou": 0.3984375, "loss_num": 0.03125, "loss_xval": 0.236328125, "num_input_tokens_seen": 46001408, "step": 504 }, { "epoch": 2.1041666666666665, "grad_norm": 10.658019924725252, "learning_rate": 5e-05, "loss": 0.2449, "num_input_tokens_seen": 46092752, "step": 505 }, { "epoch": 2.1041666666666665, "loss": 0.22125060856342316, "loss_ce": 0.0002117783296853304, "loss_iou": 0.2060546875, "loss_num": 0.035888671875, "loss_xval": 0.220703125, "num_input_tokens_seen": 46092752, "step": 505 }, { "epoch": 2.1083333333333334, "grad_norm": 3.3864494529227724, "learning_rate": 5e-05, "loss": 0.1457, "num_input_tokens_seen": 46184072, "step": 506 }, { "epoch": 2.1083333333333334, "loss": 0.14974187314510345, "loss_ce": 0.0001447110262233764, "loss_iou": 0.23046875, "loss_num": 0.0206298828125, "loss_xval": 0.1494140625, "num_input_tokens_seen": 46184072, "step": 506 }, { "epoch": 2.1125, "grad_norm": 7.53025454760194, "learning_rate": 5e-05, "loss": 0.2635, "num_input_tokens_seen": 46275676, "step": 507 }, { "epoch": 2.1125, "loss": 0.20503434538841248, "loss_ce": 0.0040608420968055725, "loss_iou": 0.208984375, "loss_num": 0.03173828125, "loss_xval": 0.201171875, "num_input_tokens_seen": 46275676, "step": 507 }, { "epoch": 2.1166666666666667, "grad_norm": 33.285284767596316, "learning_rate": 5e-05, "loss": 0.2176, "num_input_tokens_seen": 46365852, "step": 508 }, { "epoch": 2.1166666666666667, "loss": 0.21645520627498627, "loss_ce": 0.001123182475566864, "loss_iou": 0.33984375, "loss_num": 0.029296875, "loss_xval": 0.21484375, "num_input_tokens_seen": 46365852, "step": 508 }, { "epoch": 2.120833333333333, "grad_norm": 14.815680317725786, "learning_rate": 5e-05, "loss": 0.2742, "num_input_tokens_seen": 46457248, "step": 509 }, { "epoch": 2.120833333333333, "loss": 0.25932037830352783, "loss_ce": 0.002606501104310155, "loss_iou": 0.2421875, "loss_num": 0.04150390625, "loss_xval": 0.255859375, "num_input_tokens_seen": 46457248, "step": 509 }, { "epoch": 2.125, "grad_norm": 8.73990619843021, "learning_rate": 5e-05, "loss": 0.2557, "num_input_tokens_seen": 46548532, "step": 510 }, { "epoch": 2.125, "loss": 0.2554578185081482, "loss_ce": 0.0008191587985493243, "loss_iou": 0.28125, "loss_num": 0.03955078125, "loss_xval": 0.25390625, "num_input_tokens_seen": 46548532, "step": 510 }, { "epoch": 2.129166666666667, "grad_norm": 10.105992721155996, "learning_rate": 5e-05, "loss": 0.2362, "num_input_tokens_seen": 46639872, "step": 511 }, { "epoch": 2.129166666666667, "loss": 0.2154877781867981, "loss_ce": 3.368042234797031e-05, "loss_iou": 0.416015625, "loss_num": 0.026123046875, "loss_xval": 0.2158203125, "num_input_tokens_seen": 46639872, "step": 511 }, { "epoch": 2.1333333333333333, "grad_norm": 29.72522284972365, "learning_rate": 5e-05, "loss": 0.1714, "num_input_tokens_seen": 46731340, "step": 512 }, { "epoch": 2.1333333333333333, "loss": 0.15613722801208496, "loss_ce": 0.004953158088028431, "loss_iou": 0.244140625, "loss_num": 0.020263671875, "loss_xval": 0.1513671875, "num_input_tokens_seen": 46731340, "step": 512 }, { "epoch": 2.1375, "grad_norm": 9.531361802052238, "learning_rate": 5e-05, "loss": 0.253, "num_input_tokens_seen": 46822688, "step": 513 }, { "epoch": 2.1375, "loss": 0.22187145054340363, "loss_ce": 0.001046255580149591, "loss_iou": 0.275390625, "loss_num": 0.032958984375, "loss_xval": 0.220703125, "num_input_tokens_seen": 46822688, "step": 513 }, { "epoch": 2.1416666666666666, "grad_norm": 14.410513234997431, "learning_rate": 5e-05, "loss": 0.2471, "num_input_tokens_seen": 46914088, "step": 514 }, { "epoch": 2.1416666666666666, "loss": 0.2741532623767853, "loss_ce": 0.0002885128487832844, "loss_iou": 0.2890625, "loss_num": 0.04296875, "loss_xval": 0.2734375, "num_input_tokens_seen": 46914088, "step": 514 }, { "epoch": 2.1458333333333335, "grad_norm": 8.05729374282805, "learning_rate": 5e-05, "loss": 0.1985, "num_input_tokens_seen": 47005280, "step": 515 }, { "epoch": 2.1458333333333335, "loss": 0.21794849634170532, "loss_ce": 0.00011403985263314098, "loss_iou": 0.265625, "loss_num": 0.03271484375, "loss_xval": 0.2177734375, "num_input_tokens_seen": 47005280, "step": 515 }, { "epoch": 2.15, "grad_norm": 7.318539417819902, "learning_rate": 5e-05, "loss": 0.196, "num_input_tokens_seen": 47096288, "step": 516 }, { "epoch": 2.15, "loss": 0.27666139602661133, "loss_ce": 0.0016369989607483149, "loss_iou": 0.37109375, "loss_num": 0.039794921875, "loss_xval": 0.275390625, "num_input_tokens_seen": 47096288, "step": 516 }, { "epoch": 2.154166666666667, "grad_norm": 8.090481077078994, "learning_rate": 5e-05, "loss": 0.2136, "num_input_tokens_seen": 47187788, "step": 517 }, { "epoch": 2.154166666666667, "loss": 0.14408686757087708, "loss_ce": 0.00010493230365682393, "loss_iou": 0.212890625, "loss_num": 0.02001953125, "loss_xval": 0.1435546875, "num_input_tokens_seen": 47187788, "step": 517 }, { "epoch": 2.158333333333333, "grad_norm": 11.230013943385165, "learning_rate": 5e-05, "loss": 0.2047, "num_input_tokens_seen": 47279348, "step": 518 }, { "epoch": 2.158333333333333, "loss": 0.1982039213180542, "loss_ce": 0.0006941695464774966, "loss_iou": 0.26171875, "loss_num": 0.0286865234375, "loss_xval": 0.197265625, "num_input_tokens_seen": 47279348, "step": 518 }, { "epoch": 2.1625, "grad_norm": 4.75139905842334, "learning_rate": 5e-05, "loss": 0.19, "num_input_tokens_seen": 47370376, "step": 519 }, { "epoch": 2.1625, "loss": 0.18471525609493256, "loss_ce": 2.2873391571920365e-05, "loss_iou": 0.41015625, "loss_num": 0.0198974609375, "loss_xval": 0.1845703125, "num_input_tokens_seen": 47370376, "step": 519 }, { "epoch": 2.1666666666666665, "grad_norm": 6.517972761801104, "learning_rate": 5e-05, "loss": 0.2322, "num_input_tokens_seen": 47462060, "step": 520 }, { "epoch": 2.1666666666666665, "loss": 0.18972179293632507, "loss_ce": 0.0049683707766234875, "loss_iou": 0.22265625, "loss_num": 0.0277099609375, "loss_xval": 0.1845703125, "num_input_tokens_seen": 47462060, "step": 520 }, { "epoch": 2.1708333333333334, "grad_norm": 7.3722523096943275, "learning_rate": 5e-05, "loss": 0.285, "num_input_tokens_seen": 47553000, "step": 521 }, { "epoch": 2.1708333333333334, "loss": 0.27948668599128723, "loss_ce": 0.0012884553289040923, "loss_iou": 0.3984375, "loss_num": 0.0390625, "loss_xval": 0.27734375, "num_input_tokens_seen": 47553000, "step": 521 }, { "epoch": 2.175, "grad_norm": 13.932503842674887, "learning_rate": 5e-05, "loss": 0.1891, "num_input_tokens_seen": 47643948, "step": 522 }, { "epoch": 2.175, "loss": 0.21535250544548035, "loss_ce": 0.0006308312877081335, "loss_iou": 0.287109375, "loss_num": 0.031005859375, "loss_xval": 0.21484375, "num_input_tokens_seen": 47643948, "step": 522 }, { "epoch": 2.1791666666666667, "grad_norm": 8.908307371306876, "learning_rate": 5e-05, "loss": 0.2113, "num_input_tokens_seen": 47735776, "step": 523 }, { "epoch": 2.1791666666666667, "loss": 0.20093733072280884, "loss_ce": 0.0030613476410508156, "loss_iou": 0.3671875, "loss_num": 0.024169921875, "loss_xval": 0.1982421875, "num_input_tokens_seen": 47735776, "step": 523 }, { "epoch": 2.183333333333333, "grad_norm": 16.019182269926212, "learning_rate": 5e-05, "loss": 0.2013, "num_input_tokens_seen": 47827680, "step": 524 }, { "epoch": 2.183333333333333, "loss": 0.23729866743087769, "loss_ce": 0.001214687479659915, "loss_iou": 0.35546875, "loss_num": 0.032470703125, "loss_xval": 0.236328125, "num_input_tokens_seen": 47827680, "step": 524 }, { "epoch": 2.1875, "grad_norm": 6.964867016997655, "learning_rate": 5e-05, "loss": 0.1731, "num_input_tokens_seen": 47919152, "step": 525 }, { "epoch": 2.1875, "loss": 0.17872335016727448, "loss_ce": 0.0013246757443994284, "loss_iou": 0.310546875, "loss_num": 0.0224609375, "loss_xval": 0.177734375, "num_input_tokens_seen": 47919152, "step": 525 }, { "epoch": 2.191666666666667, "grad_norm": 4.883391608973748, "learning_rate": 5e-05, "loss": 0.1685, "num_input_tokens_seen": 48010236, "step": 526 }, { "epoch": 2.191666666666667, "loss": 0.21654918789863586, "loss_ce": 0.00042369376751594245, "loss_iou": 0.2216796875, "loss_num": 0.033935546875, "loss_xval": 0.2158203125, "num_input_tokens_seen": 48010236, "step": 526 }, { "epoch": 2.1958333333333333, "grad_norm": 23.035289497265442, "learning_rate": 5e-05, "loss": 0.2039, "num_input_tokens_seen": 48101544, "step": 527 }, { "epoch": 2.1958333333333333, "loss": 0.200510174036026, "loss_ce": 0.0003148576943203807, "loss_iou": 0.39453125, "loss_num": 0.0234375, "loss_xval": 0.2001953125, "num_input_tokens_seen": 48101544, "step": 527 }, { "epoch": 2.2, "grad_norm": 8.430324506304586, "learning_rate": 5e-05, "loss": 0.2343, "num_input_tokens_seen": 48192704, "step": 528 }, { "epoch": 2.2, "loss": 0.2944282293319702, "loss_ce": 0.0019477481255307794, "loss_iou": 0.353515625, "loss_num": 0.04345703125, "loss_xval": 0.29296875, "num_input_tokens_seen": 48192704, "step": 528 }, { "epoch": 2.2041666666666666, "grad_norm": 15.089991729505178, "learning_rate": 5e-05, "loss": 0.2177, "num_input_tokens_seen": 48284604, "step": 529 }, { "epoch": 2.2041666666666666, "loss": 0.19912487268447876, "loss_ce": 0.0010047497926279902, "loss_iou": 0.1611328125, "loss_num": 0.03271484375, "loss_xval": 0.1982421875, "num_input_tokens_seen": 48284604, "step": 529 }, { "epoch": 2.2083333333333335, "grad_norm": 8.60120057780928, "learning_rate": 5e-05, "loss": 0.252, "num_input_tokens_seen": 48376036, "step": 530 }, { "epoch": 2.2083333333333335, "loss": 0.2593064308166504, "loss_ce": 2.909008617280051e-05, "loss_iou": 0.373046875, "loss_num": 0.0361328125, "loss_xval": 0.259765625, "num_input_tokens_seen": 48376036, "step": 530 }, { "epoch": 2.2125, "grad_norm": 18.192155504342225, "learning_rate": 5e-05, "loss": 0.2296, "num_input_tokens_seen": 48467472, "step": 531 }, { "epoch": 2.2125, "loss": 0.2686399519443512, "loss_ce": 0.00045148952631279826, "loss_iou": 0.333984375, "loss_num": 0.03955078125, "loss_xval": 0.267578125, "num_input_tokens_seen": 48467472, "step": 531 }, { "epoch": 2.216666666666667, "grad_norm": 20.96977992387443, "learning_rate": 5e-05, "loss": 0.1982, "num_input_tokens_seen": 48559212, "step": 532 }, { "epoch": 2.216666666666667, "loss": 0.1729247123003006, "loss_ce": 0.0036437036469578743, "loss_iou": 0.2119140625, "loss_num": 0.0247802734375, "loss_xval": 0.1689453125, "num_input_tokens_seen": 48559212, "step": 532 }, { "epoch": 2.220833333333333, "grad_norm": 11.254529565975902, "learning_rate": 5e-05, "loss": 0.2586, "num_input_tokens_seen": 48650604, "step": 533 }, { "epoch": 2.220833333333333, "loss": 0.16954649984836578, "loss_ce": 0.0027984497137367725, "loss_iou": 0.296875, "loss_num": 0.0206298828125, "loss_xval": 0.1669921875, "num_input_tokens_seen": 48650604, "step": 533 }, { "epoch": 2.225, "grad_norm": 8.116782024881553, "learning_rate": 5e-05, "loss": 0.2486, "num_input_tokens_seen": 48741488, "step": 534 }, { "epoch": 2.225, "loss": 0.2426433265209198, "loss_ce": 0.002225851407274604, "loss_iou": 0.21484375, "loss_num": 0.038818359375, "loss_xval": 0.240234375, "num_input_tokens_seen": 48741488, "step": 534 }, { "epoch": 2.2291666666666665, "grad_norm": 16.77344005618869, "learning_rate": 5e-05, "loss": 0.2735, "num_input_tokens_seen": 48833012, "step": 535 }, { "epoch": 2.2291666666666665, "loss": 0.310322642326355, "loss_ce": 0.00014196071424521506, "loss_iou": 0.306640625, "loss_num": 0.048828125, "loss_xval": 0.310546875, "num_input_tokens_seen": 48833012, "step": 535 }, { "epoch": 2.2333333333333334, "grad_norm": 7.982912313525831, "learning_rate": 5e-05, "loss": 0.259, "num_input_tokens_seen": 48924748, "step": 536 }, { "epoch": 2.2333333333333334, "loss": 0.27782681584358215, "loss_ce": 0.003412754973396659, "loss_iou": 0.2890625, "loss_num": 0.04248046875, "loss_xval": 0.2734375, "num_input_tokens_seen": 48924748, "step": 536 }, { "epoch": 2.2375, "grad_norm": 1.8105627790424674, "learning_rate": 5e-05, "loss": 0.197, "num_input_tokens_seen": 49015828, "step": 537 }, { "epoch": 2.2375, "loss": 0.19336609542369843, "loss_ce": 0.0029364186339080334, "loss_iou": 0.361328125, "loss_num": 0.0225830078125, "loss_xval": 0.1904296875, "num_input_tokens_seen": 49015828, "step": 537 }, { "epoch": 2.2416666666666667, "grad_norm": 3.8723721155447866, "learning_rate": 5e-05, "loss": 0.2857, "num_input_tokens_seen": 49106980, "step": 538 }, { "epoch": 2.2416666666666667, "loss": 0.26870205998420715, "loss_ce": 0.0036874094512313604, "loss_iou": 0.38671875, "loss_num": 0.036376953125, "loss_xval": 0.265625, "num_input_tokens_seen": 49106980, "step": 538 }, { "epoch": 2.245833333333333, "grad_norm": 11.43927378564669, "learning_rate": 5e-05, "loss": 0.2064, "num_input_tokens_seen": 49198292, "step": 539 }, { "epoch": 2.245833333333333, "loss": 0.11015903949737549, "loss_ce": 0.0009366283775307238, "loss_iou": 0.197265625, "loss_num": 0.01336669921875, "loss_xval": 0.109375, "num_input_tokens_seen": 49198292, "step": 539 }, { "epoch": 2.25, "grad_norm": 9.479268489327094, "learning_rate": 5e-05, "loss": 0.2347, "num_input_tokens_seen": 49289980, "step": 540 }, { "epoch": 2.25, "loss": 0.24618439376354218, "loss_ce": 0.0028982588555663824, "loss_iou": 0.392578125, "loss_num": 0.03173828125, "loss_xval": 0.2431640625, "num_input_tokens_seen": 49289980, "step": 540 }, { "epoch": 2.2541666666666664, "grad_norm": 23.234574182965247, "learning_rate": 5e-05, "loss": 0.2566, "num_input_tokens_seen": 49381024, "step": 541 }, { "epoch": 2.2541666666666664, "loss": 0.32389476895332336, "loss_ce": 0.0039484878070652485, "loss_iou": 0.330078125, "loss_num": 0.0498046875, "loss_xval": 0.3203125, "num_input_tokens_seen": 49381024, "step": 541 }, { "epoch": 2.2583333333333333, "grad_norm": 6.512330127642697, "learning_rate": 5e-05, "loss": 0.231, "num_input_tokens_seen": 49471848, "step": 542 }, { "epoch": 2.2583333333333333, "loss": 0.2775254547595978, "loss_ce": 0.0032334798015654087, "loss_iou": 0.59375, "loss_num": 0.0291748046875, "loss_xval": 0.2734375, "num_input_tokens_seen": 49471848, "step": 542 }, { "epoch": 2.2625, "grad_norm": 31.365520197825454, "learning_rate": 5e-05, "loss": 0.2918, "num_input_tokens_seen": 49563792, "step": 543 }, { "epoch": 2.2625, "loss": 0.3964589834213257, "loss_ce": 0.0029043066315352917, "loss_iou": 0.36328125, "loss_num": 0.06298828125, "loss_xval": 0.39453125, "num_input_tokens_seen": 49563792, "step": 543 }, { "epoch": 2.2666666666666666, "grad_norm": 9.131551427623819, "learning_rate": 5e-05, "loss": 0.1894, "num_input_tokens_seen": 49654884, "step": 544 }, { "epoch": 2.2666666666666666, "loss": 0.16559162735939026, "loss_ce": 0.0012849814957007766, "loss_iou": 0.189453125, "loss_num": 0.024658203125, "loss_xval": 0.1640625, "num_input_tokens_seen": 49654884, "step": 544 }, { "epoch": 2.2708333333333335, "grad_norm": 4.3210090075713765, "learning_rate": 5e-05, "loss": 0.1877, "num_input_tokens_seen": 49745916, "step": 545 }, { "epoch": 2.2708333333333335, "loss": 0.22035479545593262, "loss_ce": 1.789092675608117e-05, "loss_iou": 0.408203125, "loss_num": 0.0262451171875, "loss_xval": 0.220703125, "num_input_tokens_seen": 49745916, "step": 545 }, { "epoch": 2.275, "grad_norm": 21.405001553889708, "learning_rate": 5e-05, "loss": 0.2585, "num_input_tokens_seen": 49837444, "step": 546 }, { "epoch": 2.275, "loss": 0.3219088315963745, "loss_ce": 0.014673092402517796, "loss_iou": 0.400390625, "loss_num": 0.0439453125, "loss_xval": 0.306640625, "num_input_tokens_seen": 49837444, "step": 546 }, { "epoch": 2.279166666666667, "grad_norm": 11.734590492448271, "learning_rate": 5e-05, "loss": 0.2045, "num_input_tokens_seen": 49928440, "step": 547 }, { "epoch": 2.279166666666667, "loss": 0.15000897645950317, "loss_ce": 0.0007780222222208977, "loss_iou": 0.34765625, "loss_num": 0.0146484375, "loss_xval": 0.1494140625, "num_input_tokens_seen": 49928440, "step": 547 }, { "epoch": 2.283333333333333, "grad_norm": 6.654040686552338, "learning_rate": 5e-05, "loss": 0.1729, "num_input_tokens_seen": 50019816, "step": 548 }, { "epoch": 2.283333333333333, "loss": 0.16181126236915588, "loss_ce": 0.000800521404016763, "loss_iou": 0.28125, "loss_num": 0.0198974609375, "loss_xval": 0.1611328125, "num_input_tokens_seen": 50019816, "step": 548 }, { "epoch": 2.2875, "grad_norm": 4.9014266222969445, "learning_rate": 5e-05, "loss": 0.2343, "num_input_tokens_seen": 50111900, "step": 549 }, { "epoch": 2.2875, "loss": 0.17283910512924194, "loss_ce": 0.0007199622923508286, "loss_iou": 0.38671875, "loss_num": 0.0174560546875, "loss_xval": 0.171875, "num_input_tokens_seen": 50111900, "step": 549 }, { "epoch": 2.2916666666666665, "grad_norm": 3.1547749971655343, "learning_rate": 5e-05, "loss": 0.1892, "num_input_tokens_seen": 50203308, "step": 550 }, { "epoch": 2.2916666666666665, "loss": 0.18455275893211365, "loss_ce": 0.002561180619522929, "loss_iou": 0.166015625, "loss_num": 0.029052734375, "loss_xval": 0.181640625, "num_input_tokens_seen": 50203308, "step": 550 }, { "epoch": 2.2958333333333334, "grad_norm": 9.05140127670536, "learning_rate": 5e-05, "loss": 0.2295, "num_input_tokens_seen": 50294728, "step": 551 }, { "epoch": 2.2958333333333334, "loss": 0.2015194296836853, "loss_ce": 0.0009579022298566997, "loss_iou": 0.287109375, "loss_num": 0.0274658203125, "loss_xval": 0.2001953125, "num_input_tokens_seen": 50294728, "step": 551 }, { "epoch": 2.3, "grad_norm": 12.269307896539292, "learning_rate": 5e-05, "loss": 0.2782, "num_input_tokens_seen": 50385860, "step": 552 }, { "epoch": 2.3, "loss": 0.2552986145019531, "loss_ce": 0.0013313385425135493, "loss_iou": 0.32421875, "loss_num": 0.036376953125, "loss_xval": 0.25390625, "num_input_tokens_seen": 50385860, "step": 552 }, { "epoch": 2.3041666666666667, "grad_norm": 9.396071896354771, "learning_rate": 5e-05, "loss": 0.2442, "num_input_tokens_seen": 50477380, "step": 553 }, { "epoch": 2.3041666666666667, "loss": 0.23897230625152588, "loss_ce": 0.00026380264898762107, "loss_iou": 0.359375, "loss_num": 0.031982421875, "loss_xval": 0.23828125, "num_input_tokens_seen": 50477380, "step": 553 }, { "epoch": 2.3083333333333336, "grad_norm": 5.715814267268313, "learning_rate": 5e-05, "loss": 0.2217, "num_input_tokens_seen": 50568760, "step": 554 }, { "epoch": 2.3083333333333336, "loss": 0.2331497073173523, "loss_ce": 0.0013381622266024351, "loss_iou": 0.1748046875, "loss_num": 0.03857421875, "loss_xval": 0.2314453125, "num_input_tokens_seen": 50568760, "step": 554 }, { "epoch": 2.3125, "grad_norm": 8.906380660918442, "learning_rate": 5e-05, "loss": 0.2031, "num_input_tokens_seen": 50659780, "step": 555 }, { "epoch": 2.3125, "loss": 0.1668861210346222, "loss_ce": 1.6016194422263652e-05, "loss_iou": 0.2490234375, "loss_num": 0.0223388671875, "loss_xval": 0.1669921875, "num_input_tokens_seen": 50659780, "step": 555 }, { "epoch": 2.3166666666666664, "grad_norm": 13.260563611079487, "learning_rate": 5e-05, "loss": 0.2962, "num_input_tokens_seen": 50750744, "step": 556 }, { "epoch": 2.3166666666666664, "loss": 0.3436912000179291, "loss_ce": 0.0016501795034855604, "loss_iou": 0.46484375, "loss_num": 0.0478515625, "loss_xval": 0.341796875, "num_input_tokens_seen": 50750744, "step": 556 }, { "epoch": 2.3208333333333333, "grad_norm": 6.33711851791319, "learning_rate": 5e-05, "loss": 0.1551, "num_input_tokens_seen": 50842328, "step": 557 }, { "epoch": 2.3208333333333333, "loss": 0.15023556351661682, "loss_ce": 0.0015234070597216487, "loss_iou": 0.296875, "loss_num": 0.0166015625, "loss_xval": 0.1484375, "num_input_tokens_seen": 50842328, "step": 557 }, { "epoch": 2.325, "grad_norm": 8.66796050891329, "learning_rate": 5e-05, "loss": 0.2726, "num_input_tokens_seen": 50933788, "step": 558 }, { "epoch": 2.325, "loss": 0.3197578489780426, "loss_ce": 0.00029983557760715485, "loss_iou": 0.1474609375, "loss_num": 0.057373046875, "loss_xval": 0.3203125, "num_input_tokens_seen": 50933788, "step": 558 }, { "epoch": 2.3291666666666666, "grad_norm": 6.913245856308076, "learning_rate": 5e-05, "loss": 0.2266, "num_input_tokens_seen": 51024468, "step": 559 }, { "epoch": 2.3291666666666666, "loss": 0.24923312664031982, "loss_ce": 8.76137928571552e-05, "loss_iou": 0.306640625, "loss_num": 0.0361328125, "loss_xval": 0.2490234375, "num_input_tokens_seen": 51024468, "step": 559 }, { "epoch": 2.3333333333333335, "grad_norm": 4.904812670647303, "learning_rate": 5e-05, "loss": 0.2201, "num_input_tokens_seen": 51115288, "step": 560 }, { "epoch": 2.3333333333333335, "loss": 0.23579418659210205, "loss_ce": 0.0003205478424206376, "loss_iou": 0.353515625, "loss_num": 0.03125, "loss_xval": 0.2353515625, "num_input_tokens_seen": 51115288, "step": 560 }, { "epoch": 2.3375, "grad_norm": 22.768072572200946, "learning_rate": 5e-05, "loss": 0.2143, "num_input_tokens_seen": 51206408, "step": 561 }, { "epoch": 2.3375, "loss": 0.198276549577713, "loss_ce": 0.00021747614664491266, "loss_iou": 0.28125, "loss_num": 0.0269775390625, "loss_xval": 0.1982421875, "num_input_tokens_seen": 51206408, "step": 561 }, { "epoch": 2.341666666666667, "grad_norm": 7.501671159923808, "learning_rate": 5e-05, "loss": 0.1443, "num_input_tokens_seen": 51297908, "step": 562 }, { "epoch": 2.341666666666667, "loss": 0.14608047902584076, "loss_ce": 0.0003590428677853197, "loss_iou": 0.32421875, "loss_num": 0.01458740234375, "loss_xval": 0.1455078125, "num_input_tokens_seen": 51297908, "step": 562 }, { "epoch": 2.345833333333333, "grad_norm": 4.105011198127262, "learning_rate": 5e-05, "loss": 0.2115, "num_input_tokens_seen": 51389324, "step": 563 }, { "epoch": 2.345833333333333, "loss": 0.22044454514980316, "loss_ce": 0.0006874686223454773, "loss_iou": 0.255859375, "loss_num": 0.032470703125, "loss_xval": 0.2197265625, "num_input_tokens_seen": 51389324, "step": 563 }, { "epoch": 2.35, "grad_norm": 4.929604021728092, "learning_rate": 5e-05, "loss": 0.2209, "num_input_tokens_seen": 51480428, "step": 564 }, { "epoch": 2.35, "loss": 0.23108640313148499, "loss_ce": 7.303670372493798e-06, "loss_iou": 0.423828125, "loss_num": 0.0272216796875, "loss_xval": 0.2314453125, "num_input_tokens_seen": 51480428, "step": 564 }, { "epoch": 2.3541666666666665, "grad_norm": 11.02440441371447, "learning_rate": 5e-05, "loss": 0.2063, "num_input_tokens_seen": 51572020, "step": 565 }, { "epoch": 2.3541666666666665, "loss": 0.22416293621063232, "loss_ce": 0.0008963280124589801, "loss_iou": 0.353515625, "loss_num": 0.0286865234375, "loss_xval": 0.2236328125, "num_input_tokens_seen": 51572020, "step": 565 }, { "epoch": 2.3583333333333334, "grad_norm": 3.198732197234387, "learning_rate": 5e-05, "loss": 0.1983, "num_input_tokens_seen": 51663300, "step": 566 }, { "epoch": 2.3583333333333334, "loss": 0.1470959484577179, "loss_ce": 0.0004895069869235158, "loss_iou": 0.169921875, "loss_num": 0.021728515625, "loss_xval": 0.146484375, "num_input_tokens_seen": 51663300, "step": 566 }, { "epoch": 2.3625, "grad_norm": 9.113443508930928, "learning_rate": 5e-05, "loss": 0.251, "num_input_tokens_seen": 51755036, "step": 567 }, { "epoch": 2.3625, "loss": 0.2304636687040329, "loss_ce": 0.0011240601306781173, "loss_iou": 0.271484375, "loss_num": 0.03369140625, "loss_xval": 0.2294921875, "num_input_tokens_seen": 51755036, "step": 567 }, { "epoch": 2.3666666666666667, "grad_norm": 8.550613414250304, "learning_rate": 5e-05, "loss": 0.2219, "num_input_tokens_seen": 51846400, "step": 568 }, { "epoch": 2.3666666666666667, "loss": 0.2326650619506836, "loss_ce": 0.006834982428699732, "loss_iou": 0.36328125, "loss_num": 0.0286865234375, "loss_xval": 0.2255859375, "num_input_tokens_seen": 51846400, "step": 568 }, { "epoch": 2.3708333333333336, "grad_norm": 14.361037201153003, "learning_rate": 5e-05, "loss": 0.2203, "num_input_tokens_seen": 51937948, "step": 569 }, { "epoch": 2.3708333333333336, "loss": 0.23306363821029663, "loss_ce": 0.0023507599253207445, "loss_iou": 0.34375, "loss_num": 0.030517578125, "loss_xval": 0.23046875, "num_input_tokens_seen": 51937948, "step": 569 }, { "epoch": 2.375, "grad_norm": 3.9173504909673533, "learning_rate": 5e-05, "loss": 0.2329, "num_input_tokens_seen": 52028928, "step": 570 }, { "epoch": 2.375, "loss": 0.23187220096588135, "loss_ce": 0.00018276153423357755, "loss_iou": 0.298828125, "loss_num": 0.03271484375, "loss_xval": 0.2314453125, "num_input_tokens_seen": 52028928, "step": 570 }, { "epoch": 2.3791666666666664, "grad_norm": 4.439040232326872, "learning_rate": 5e-05, "loss": 0.2733, "num_input_tokens_seen": 52120152, "step": 571 }, { "epoch": 2.3791666666666664, "loss": 0.23996983468532562, "loss_ce": 0.0005289136315695941, "loss_iou": 0.33203125, "loss_num": 0.03271484375, "loss_xval": 0.2392578125, "num_input_tokens_seen": 52120152, "step": 571 }, { "epoch": 2.3833333333333333, "grad_norm": 6.735291595031367, "learning_rate": 5e-05, "loss": 0.2516, "num_input_tokens_seen": 52211680, "step": 572 }, { "epoch": 2.3833333333333333, "loss": 0.21853697299957275, "loss_ce": 0.003601683070883155, "loss_iou": 0.2080078125, "loss_num": 0.033447265625, "loss_xval": 0.21484375, "num_input_tokens_seen": 52211680, "step": 572 }, { "epoch": 2.3875, "grad_norm": 8.91972627065563, "learning_rate": 5e-05, "loss": 0.2946, "num_input_tokens_seen": 52302312, "step": 573 }, { "epoch": 2.3875, "loss": 0.33584511280059814, "loss_ce": 0.0017386688850820065, "loss_iou": 0.30078125, "loss_num": 0.052978515625, "loss_xval": 0.333984375, "num_input_tokens_seen": 52302312, "step": 573 }, { "epoch": 2.3916666666666666, "grad_norm": 19.558079094613312, "learning_rate": 5e-05, "loss": 0.2492, "num_input_tokens_seen": 52393828, "step": 574 }, { "epoch": 2.3916666666666666, "loss": 0.2284211963415146, "loss_ce": 0.004239081870764494, "loss_iou": 0.29296875, "loss_num": 0.03125, "loss_xval": 0.224609375, "num_input_tokens_seen": 52393828, "step": 574 }, { "epoch": 2.3958333333333335, "grad_norm": 13.560440175013902, "learning_rate": 5e-05, "loss": 0.2644, "num_input_tokens_seen": 52485140, "step": 575 }, { "epoch": 2.3958333333333335, "loss": 0.3099120855331421, "loss_ce": 0.0005858939257450402, "loss_iou": 0.53125, "loss_num": 0.03759765625, "loss_xval": 0.30859375, "num_input_tokens_seen": 52485140, "step": 575 }, { "epoch": 2.4, "grad_norm": 8.716871598647728, "learning_rate": 5e-05, "loss": 0.2122, "num_input_tokens_seen": 52575916, "step": 576 }, { "epoch": 2.4, "loss": 0.23423391580581665, "loss_ce": 0.0008354815072380006, "loss_iou": 0.38671875, "loss_num": 0.0289306640625, "loss_xval": 0.2333984375, "num_input_tokens_seen": 52575916, "step": 576 }, { "epoch": 2.404166666666667, "grad_norm": 8.08384619479721, "learning_rate": 5e-05, "loss": 0.2205, "num_input_tokens_seen": 52667400, "step": 577 }, { "epoch": 2.404166666666667, "loss": 0.18752390146255493, "loss_ce": 0.004113262984901667, "loss_iou": 0.279296875, "loss_num": 0.0238037109375, "loss_xval": 0.18359375, "num_input_tokens_seen": 52667400, "step": 577 }, { "epoch": 2.408333333333333, "grad_norm": 13.861550015247142, "learning_rate": 5e-05, "loss": 0.2721, "num_input_tokens_seen": 52759060, "step": 578 }, { "epoch": 2.408333333333333, "loss": 0.21155069768428802, "loss_ce": 0.0020170207135379314, "loss_iou": 0.3203125, "loss_num": 0.0272216796875, "loss_xval": 0.2099609375, "num_input_tokens_seen": 52759060, "step": 578 }, { "epoch": 2.4125, "grad_norm": 9.645532716058332, "learning_rate": 5e-05, "loss": 0.283, "num_input_tokens_seen": 52851116, "step": 579 }, { "epoch": 2.4125, "loss": 0.29763203859329224, "loss_ce": 0.0001466983085265383, "loss_iou": 0.310546875, "loss_num": 0.045166015625, "loss_xval": 0.296875, "num_input_tokens_seen": 52851116, "step": 579 }, { "epoch": 2.4166666666666665, "grad_norm": 4.418503920500965, "learning_rate": 5e-05, "loss": 0.2826, "num_input_tokens_seen": 52942416, "step": 580 }, { "epoch": 2.4166666666666665, "loss": 0.28978437185287476, "loss_ce": 0.0030411938205361366, "loss_iou": 0.03369140625, "loss_num": 0.055908203125, "loss_xval": 0.287109375, "num_input_tokens_seen": 52942416, "step": 580 }, { "epoch": 2.4208333333333334, "grad_norm": 4.552478828334792, "learning_rate": 5e-05, "loss": 0.2277, "num_input_tokens_seen": 53033392, "step": 581 }, { "epoch": 2.4208333333333334, "loss": 0.2454247623682022, "loss_ce": 0.0015893190866336226, "loss_iou": 0.3125, "loss_num": 0.034423828125, "loss_xval": 0.244140625, "num_input_tokens_seen": 53033392, "step": 581 }, { "epoch": 2.425, "grad_norm": 4.39237409560183, "learning_rate": 5e-05, "loss": 0.1809, "num_input_tokens_seen": 53124512, "step": 582 }, { "epoch": 2.425, "loss": 0.15168428421020508, "loss_ce": 0.0001645039883442223, "loss_iou": 0.263671875, "loss_num": 0.01806640625, "loss_xval": 0.1513671875, "num_input_tokens_seen": 53124512, "step": 582 }, { "epoch": 2.4291666666666667, "grad_norm": 11.948325894522515, "learning_rate": 5e-05, "loss": 0.2631, "num_input_tokens_seen": 53215996, "step": 583 }, { "epoch": 2.4291666666666667, "loss": 0.2483394742012024, "loss_ce": 0.0050533367320895195, "loss_iou": 0.2119140625, "loss_num": 0.038818359375, "loss_xval": 0.2431640625, "num_input_tokens_seen": 53215996, "step": 583 }, { "epoch": 2.4333333333333336, "grad_norm": 11.401193742000979, "learning_rate": 5e-05, "loss": 0.2263, "num_input_tokens_seen": 53306272, "step": 584 }, { "epoch": 2.4333333333333336, "loss": 0.2307310402393341, "loss_ce": 0.0036802669055759907, "loss_iou": 0.3828125, "loss_num": 0.027587890625, "loss_xval": 0.2265625, "num_input_tokens_seen": 53306272, "step": 584 }, { "epoch": 2.4375, "grad_norm": 16.571411322861564, "learning_rate": 5e-05, "loss": 0.2692, "num_input_tokens_seen": 53397812, "step": 585 }, { "epoch": 2.4375, "loss": 0.2582886815071106, "loss_ce": 0.00017098673561122268, "loss_iou": 0.3828125, "loss_num": 0.03369140625, "loss_xval": 0.2578125, "num_input_tokens_seen": 53397812, "step": 585 }, { "epoch": 2.4416666666666664, "grad_norm": 4.6137369430836594, "learning_rate": 5e-05, "loss": 0.2597, "num_input_tokens_seen": 53490068, "step": 586 }, { "epoch": 2.4416666666666664, "loss": 0.2722558081150055, "loss_ce": 0.0016259271651506424, "loss_iou": 0.359375, "loss_num": 0.037353515625, "loss_xval": 0.271484375, "num_input_tokens_seen": 53490068, "step": 586 }, { "epoch": 2.4458333333333333, "grad_norm": 8.212023165000291, "learning_rate": 5e-05, "loss": 0.3262, "num_input_tokens_seen": 53580940, "step": 587 }, { "epoch": 2.4458333333333333, "loss": 0.322366327047348, "loss_ce": 0.0004669178160838783, "loss_iou": 0.19140625, "loss_num": 0.055419921875, "loss_xval": 0.322265625, "num_input_tokens_seen": 53580940, "step": 587 }, { "epoch": 2.45, "grad_norm": 7.41087579689105, "learning_rate": 5e-05, "loss": 0.2616, "num_input_tokens_seen": 53672184, "step": 588 }, { "epoch": 2.45, "loss": 0.2832728326320648, "loss_ce": 0.0014124559238553047, "loss_iou": 0.2265625, "loss_num": 0.045654296875, "loss_xval": 0.28125, "num_input_tokens_seen": 53672184, "step": 588 }, { "epoch": 2.4541666666666666, "grad_norm": 5.467855471006087, "learning_rate": 5e-05, "loss": 0.2301, "num_input_tokens_seen": 53763264, "step": 589 }, { "epoch": 2.4541666666666666, "loss": 0.24083589017391205, "loss_ce": 0.0010897895554080606, "loss_iou": 0.330078125, "loss_num": 0.032470703125, "loss_xval": 0.240234375, "num_input_tokens_seen": 53763264, "step": 589 }, { "epoch": 2.4583333333333335, "grad_norm": 9.064539351391804, "learning_rate": 5e-05, "loss": 0.3217, "num_input_tokens_seen": 53854088, "step": 590 }, { "epoch": 2.4583333333333335, "loss": 0.28198763728141785, "loss_ce": 0.0008597145788371563, "loss_iou": 0.275390625, "loss_num": 0.043212890625, "loss_xval": 0.28125, "num_input_tokens_seen": 53854088, "step": 590 }, { "epoch": 2.4625, "grad_norm": 23.99147596421393, "learning_rate": 5e-05, "loss": 0.2264, "num_input_tokens_seen": 53945840, "step": 591 }, { "epoch": 2.4625, "loss": 0.2099674642086029, "loss_ce": 0.002066471381112933, "loss_iou": 0.2431640625, "loss_num": 0.0301513671875, "loss_xval": 0.2080078125, "num_input_tokens_seen": 53945840, "step": 591 }, { "epoch": 2.466666666666667, "grad_norm": 10.504176338474922, "learning_rate": 5e-05, "loss": 0.2831, "num_input_tokens_seen": 54036712, "step": 592 }, { "epoch": 2.466666666666667, "loss": 0.21228235960006714, "loss_ce": 0.00256557809188962, "loss_iou": 0.267578125, "loss_num": 0.029296875, "loss_xval": 0.2099609375, "num_input_tokens_seen": 54036712, "step": 592 }, { "epoch": 2.470833333333333, "grad_norm": 4.43523163700089, "learning_rate": 5e-05, "loss": 0.2155, "num_input_tokens_seen": 54127908, "step": 593 }, { "epoch": 2.470833333333333, "loss": 0.25974512100219727, "loss_ce": 0.0009560543112456799, "loss_iou": 0.3125, "loss_num": 0.036865234375, "loss_xval": 0.2578125, "num_input_tokens_seen": 54127908, "step": 593 }, { "epoch": 2.475, "grad_norm": 12.34114585053913, "learning_rate": 5e-05, "loss": 0.2014, "num_input_tokens_seen": 54219776, "step": 594 }, { "epoch": 2.475, "loss": 0.1644817292690277, "loss_ce": 0.0022197626531124115, "loss_iou": 0.17578125, "loss_num": 0.024169921875, "loss_xval": 0.162109375, "num_input_tokens_seen": 54219776, "step": 594 }, { "epoch": 2.4791666666666665, "grad_norm": 6.150484430831458, "learning_rate": 5e-05, "loss": 0.1779, "num_input_tokens_seen": 54311088, "step": 595 }, { "epoch": 2.4791666666666665, "loss": 0.17623114585876465, "loss_ce": 0.00038886640686541796, "loss_iou": 0.27734375, "loss_num": 0.02197265625, "loss_xval": 0.17578125, "num_input_tokens_seen": 54311088, "step": 595 }, { "epoch": 2.4833333333333334, "grad_norm": 13.595301299687849, "learning_rate": 5e-05, "loss": 0.2931, "num_input_tokens_seen": 54402208, "step": 596 }, { "epoch": 2.4833333333333334, "loss": 0.3184419274330139, "loss_ce": 0.0003266702115070075, "loss_iou": 0.271484375, "loss_num": 0.05078125, "loss_xval": 0.318359375, "num_input_tokens_seen": 54402208, "step": 596 }, { "epoch": 2.4875, "grad_norm": 6.662174326856572, "learning_rate": 5e-05, "loss": 0.2273, "num_input_tokens_seen": 54494012, "step": 597 }, { "epoch": 2.4875, "loss": 0.22467482089996338, "loss_ce": 0.040165551006793976, "loss_iou": 0.365234375, "loss_num": 0.01953125, "loss_xval": 0.1845703125, "num_input_tokens_seen": 54494012, "step": 597 }, { "epoch": 2.4916666666666667, "grad_norm": 6.025819860082932, "learning_rate": 5e-05, "loss": 0.1869, "num_input_tokens_seen": 54585016, "step": 598 }, { "epoch": 2.4916666666666667, "loss": 0.17940133810043335, "loss_ce": 9.530932584311813e-05, "loss_iou": 0.21484375, "loss_num": 0.025634765625, "loss_xval": 0.1796875, "num_input_tokens_seen": 54585016, "step": 598 }, { "epoch": 2.4958333333333336, "grad_norm": 5.914698039286818, "learning_rate": 5e-05, "loss": 0.2873, "num_input_tokens_seen": 54675584, "step": 599 }, { "epoch": 2.4958333333333336, "loss": 0.3032541275024414, "loss_ce": 0.005219449754804373, "loss_iou": 0.451171875, "loss_num": 0.037841796875, "loss_xval": 0.298828125, "num_input_tokens_seen": 54675584, "step": 599 }, { "epoch": 2.5, "grad_norm": 7.242830995897235, "learning_rate": 5e-05, "loss": 0.2843, "num_input_tokens_seen": 54766396, "step": 600 }, { "epoch": 2.5, "loss": 0.2106434404850006, "loss_ce": 0.002635627519339323, "loss_iou": 0.38671875, "loss_num": 0.0230712890625, "loss_xval": 0.2080078125, "num_input_tokens_seen": 54766396, "step": 600 }, { "epoch": 2.5041666666666664, "grad_norm": 28.109909327718626, "learning_rate": 5e-05, "loss": 0.2381, "num_input_tokens_seen": 54857360, "step": 601 }, { "epoch": 2.5041666666666664, "loss": 0.22719983756542206, "loss_ce": 0.00246838154271245, "loss_iou": 0.3359375, "loss_num": 0.02880859375, "loss_xval": 0.224609375, "num_input_tokens_seen": 54857360, "step": 601 }, { "epoch": 2.5083333333333333, "grad_norm": 4.044157297681407, "learning_rate": 5e-05, "loss": 0.1771, "num_input_tokens_seen": 54949156, "step": 602 }, { "epoch": 2.5083333333333333, "loss": 0.1752859652042389, "loss_ce": 0.0004812785191461444, "loss_iou": 0.251953125, "loss_num": 0.0228271484375, "loss_xval": 0.1748046875, "num_input_tokens_seen": 54949156, "step": 602 }, { "epoch": 2.5125, "grad_norm": 5.0459673677008965, "learning_rate": 5e-05, "loss": 0.2322, "num_input_tokens_seen": 55040396, "step": 603 }, { "epoch": 2.5125, "loss": 0.19457007944583893, "loss_ce": 0.0001731009833747521, "loss_iou": 0.265625, "loss_num": 0.026123046875, "loss_xval": 0.1943359375, "num_input_tokens_seen": 55040396, "step": 603 }, { "epoch": 2.5166666666666666, "grad_norm": 12.668503987480383, "learning_rate": 5e-05, "loss": 0.2029, "num_input_tokens_seen": 55131348, "step": 604 }, { "epoch": 2.5166666666666666, "loss": 0.21802936494350433, "loss_ce": 0.00019488329417072237, "loss_iou": 0.283203125, "loss_num": 0.0299072265625, "loss_xval": 0.2177734375, "num_input_tokens_seen": 55131348, "step": 604 }, { "epoch": 2.5208333333333335, "grad_norm": 7.16371519314298, "learning_rate": 5e-05, "loss": 0.2742, "num_input_tokens_seen": 55222552, "step": 605 }, { "epoch": 2.5208333333333335, "loss": 0.210233673453331, "loss_ce": 0.0006084321648813784, "loss_iou": 0.2578125, "loss_num": 0.0294189453125, "loss_xval": 0.2099609375, "num_input_tokens_seen": 55222552, "step": 605 }, { "epoch": 2.525, "grad_norm": 11.766139605706721, "learning_rate": 5e-05, "loss": 0.2538, "num_input_tokens_seen": 55314332, "step": 606 }, { "epoch": 2.525, "loss": 0.2533082664012909, "loss_ce": 0.0020570610649883747, "loss_iou": 0.20703125, "loss_num": 0.040283203125, "loss_xval": 0.251953125, "num_input_tokens_seen": 55314332, "step": 606 }, { "epoch": 2.529166666666667, "grad_norm": 10.679410877512694, "learning_rate": 5e-05, "loss": 0.2949, "num_input_tokens_seen": 55405292, "step": 607 }, { "epoch": 2.529166666666667, "loss": 0.3683062791824341, "loss_ce": 0.0009356520604342222, "loss_iou": 0.263671875, "loss_num": 0.060546875, "loss_xval": 0.3671875, "num_input_tokens_seen": 55405292, "step": 607 }, { "epoch": 2.533333333333333, "grad_norm": 5.315307943850376, "learning_rate": 5e-05, "loss": 0.2221, "num_input_tokens_seen": 55497420, "step": 608 }, { "epoch": 2.533333333333333, "loss": 0.22690710425376892, "loss_ce": 0.0016263541765511036, "loss_iou": 0.265625, "loss_num": 0.031982421875, "loss_xval": 0.2255859375, "num_input_tokens_seen": 55497420, "step": 608 }, { "epoch": 2.5375, "grad_norm": 5.8205477972719395, "learning_rate": 5e-05, "loss": 0.2586, "num_input_tokens_seen": 55586944, "step": 609 }, { "epoch": 2.5375, "loss": 0.22419872879981995, "loss_ce": 0.007707024924457073, "loss_iou": 0.1474609375, "loss_num": 0.0361328125, "loss_xval": 0.216796875, "num_input_tokens_seen": 55586944, "step": 609 }, { "epoch": 2.5416666666666665, "grad_norm": 7.47567852438644, "learning_rate": 5e-05, "loss": 0.215, "num_input_tokens_seen": 55678012, "step": 610 }, { "epoch": 2.5416666666666665, "loss": 0.20373475551605225, "loss_ce": 0.002776502165943384, "loss_iou": 0.283203125, "loss_num": 0.0263671875, "loss_xval": 0.201171875, "num_input_tokens_seen": 55678012, "step": 610 }, { "epoch": 2.5458333333333334, "grad_norm": 7.639502583558188, "learning_rate": 5e-05, "loss": 0.2196, "num_input_tokens_seen": 55769596, "step": 611 }, { "epoch": 2.5458333333333334, "loss": 0.15836487710475922, "loss_ce": 0.0027252272702753544, "loss_iou": 0.33203125, "loss_num": 0.01495361328125, "loss_xval": 0.1552734375, "num_input_tokens_seen": 55769596, "step": 611 }, { "epoch": 2.55, "grad_norm": 17.317356082913285, "learning_rate": 5e-05, "loss": 0.2363, "num_input_tokens_seen": 55861236, "step": 612 }, { "epoch": 2.55, "loss": 0.2235887199640274, "loss_ce": 0.00410629715770483, "loss_iou": 0.2158203125, "loss_num": 0.033447265625, "loss_xval": 0.2197265625, "num_input_tokens_seen": 55861236, "step": 612 }, { "epoch": 2.5541666666666667, "grad_norm": 32.654099925210524, "learning_rate": 5e-05, "loss": 0.2216, "num_input_tokens_seen": 55952812, "step": 613 }, { "epoch": 2.5541666666666667, "loss": 0.22760234773159027, "loss_ce": 0.0009177735191769898, "loss_iou": 0.201171875, "loss_num": 0.035400390625, "loss_xval": 0.2265625, "num_input_tokens_seen": 55952812, "step": 613 }, { "epoch": 2.5583333333333336, "grad_norm": 9.219897635046888, "learning_rate": 5e-05, "loss": 0.2513, "num_input_tokens_seen": 56043840, "step": 614 }, { "epoch": 2.5583333333333336, "loss": 0.2992645502090454, "loss_ce": 0.01947939395904541, "loss_iou": 0.32421875, "loss_num": 0.0400390625, "loss_xval": 0.279296875, "num_input_tokens_seen": 56043840, "step": 614 }, { "epoch": 2.5625, "grad_norm": 13.837170946955512, "learning_rate": 5e-05, "loss": 0.1934, "num_input_tokens_seen": 56135244, "step": 615 }, { "epoch": 2.5625, "loss": 0.15511605143547058, "loss_ce": 0.0029859289061278105, "loss_iou": 0.263671875, "loss_num": 0.017578125, "loss_xval": 0.15234375, "num_input_tokens_seen": 56135244, "step": 615 }, { "epoch": 2.5666666666666664, "grad_norm": 15.002267527018512, "learning_rate": 5e-05, "loss": 0.2414, "num_input_tokens_seen": 56227036, "step": 616 }, { "epoch": 2.5666666666666664, "loss": 0.24220646917819977, "loss_ce": 0.00557316979393363, "loss_iou": 0.353515625, "loss_num": 0.0299072265625, "loss_xval": 0.236328125, "num_input_tokens_seen": 56227036, "step": 616 }, { "epoch": 2.5708333333333333, "grad_norm": 7.173839485508792, "learning_rate": 5e-05, "loss": 0.2519, "num_input_tokens_seen": 56317752, "step": 617 }, { "epoch": 2.5708333333333333, "loss": 0.1648525893688202, "loss_ce": 0.0006375029333867133, "loss_iou": 0.1572265625, "loss_num": 0.025146484375, "loss_xval": 0.1640625, "num_input_tokens_seen": 56317752, "step": 617 }, { "epoch": 2.575, "grad_norm": 16.42943444460069, "learning_rate": 5e-05, "loss": 0.2938, "num_input_tokens_seen": 56408704, "step": 618 }, { "epoch": 2.575, "loss": 0.3060579299926758, "loss_ce": 0.010769846849143505, "loss_iou": 0.306640625, "loss_num": 0.0439453125, "loss_xval": 0.294921875, "num_input_tokens_seen": 56408704, "step": 618 }, { "epoch": 2.5791666666666666, "grad_norm": 5.16657496800359, "learning_rate": 5e-05, "loss": 0.2348, "num_input_tokens_seen": 56500452, "step": 619 }, { "epoch": 2.5791666666666666, "loss": 0.27557849884033203, "loss_ce": 0.002751333173364401, "loss_iou": 0.236328125, "loss_num": 0.04296875, "loss_xval": 0.2734375, "num_input_tokens_seen": 56500452, "step": 619 }, { "epoch": 2.5833333333333335, "grad_norm": 7.283405389473989, "learning_rate": 5e-05, "loss": 0.1938, "num_input_tokens_seen": 56592032, "step": 620 }, { "epoch": 2.5833333333333335, "loss": 0.1765138953924179, "loss_ce": 0.0032350909896194935, "loss_iou": 0.244140625, "loss_num": 0.0225830078125, "loss_xval": 0.1728515625, "num_input_tokens_seen": 56592032, "step": 620 }, { "epoch": 2.5875, "grad_norm": 16.938522115564087, "learning_rate": 5e-05, "loss": 0.2122, "num_input_tokens_seen": 56683540, "step": 621 }, { "epoch": 2.5875, "loss": 0.16648565232753754, "loss_ce": 0.0005615915288217366, "loss_iou": 0.337890625, "loss_num": 0.016357421875, "loss_xval": 0.166015625, "num_input_tokens_seen": 56683540, "step": 621 }, { "epoch": 2.591666666666667, "grad_norm": 6.825989216329065, "learning_rate": 5e-05, "loss": 0.2783, "num_input_tokens_seen": 56774596, "step": 622 }, { "epoch": 2.591666666666667, "loss": 0.33193904161453247, "loss_ce": 2.9850532882846892e-05, "loss_iou": 0.400390625, "loss_num": 0.04638671875, "loss_xval": 0.33203125, "num_input_tokens_seen": 56774596, "step": 622 }, { "epoch": 2.595833333333333, "grad_norm": 6.282992266417186, "learning_rate": 5e-05, "loss": 0.2123, "num_input_tokens_seen": 56866232, "step": 623 }, { "epoch": 2.595833333333333, "loss": 0.21510429680347443, "loss_ce": 0.0011150480713695288, "loss_iou": 0.287109375, "loss_num": 0.028564453125, "loss_xval": 0.2138671875, "num_input_tokens_seen": 56866232, "step": 623 }, { "epoch": 2.6, "grad_norm": 6.0190747707456875, "learning_rate": 5e-05, "loss": 0.2136, "num_input_tokens_seen": 56958128, "step": 624 }, { "epoch": 2.6, "loss": 0.25186964869499207, "loss_ce": 0.00034375820541754365, "loss_iou": 0.2255859375, "loss_num": 0.0390625, "loss_xval": 0.251953125, "num_input_tokens_seen": 56958128, "step": 624 }, { "epoch": 2.6041666666666665, "grad_norm": 10.169413415323342, "learning_rate": 5e-05, "loss": 0.2392, "num_input_tokens_seen": 57048004, "step": 625 }, { "epoch": 2.6041666666666665, "loss": 0.24526724219322205, "loss_ce": 2.7989730369881727e-05, "loss_iou": 0.40625, "loss_num": 0.0286865234375, "loss_xval": 0.2451171875, "num_input_tokens_seen": 57048004, "step": 625 }, { "epoch": 2.6083333333333334, "grad_norm": 5.278512092483218, "learning_rate": 5e-05, "loss": 0.3013, "num_input_tokens_seen": 57139252, "step": 626 }, { "epoch": 2.6083333333333334, "loss": 0.2858002185821533, "loss_ce": 0.001010189764201641, "loss_iou": 0.26171875, "loss_num": 0.043701171875, "loss_xval": 0.28515625, "num_input_tokens_seen": 57139252, "step": 626 }, { "epoch": 2.6125, "grad_norm": 8.353415112603466, "learning_rate": 5e-05, "loss": 0.2807, "num_input_tokens_seen": 57228832, "step": 627 }, { "epoch": 2.6125, "loss": 0.22690685093402863, "loss_ce": 0.002175399102270603, "loss_iou": 0.2890625, "loss_num": 0.0303955078125, "loss_xval": 0.224609375, "num_input_tokens_seen": 57228832, "step": 627 }, { "epoch": 2.6166666666666667, "grad_norm": 7.679395051656159, "learning_rate": 5e-05, "loss": 0.1637, "num_input_tokens_seen": 57320856, "step": 628 }, { "epoch": 2.6166666666666667, "loss": 0.18475009500980377, "loss_ce": 0.0012784195132553577, "loss_iou": 0.125, "loss_num": 0.030517578125, "loss_xval": 0.18359375, "num_input_tokens_seen": 57320856, "step": 628 }, { "epoch": 2.6208333333333336, "grad_norm": 15.678118569777013, "learning_rate": 5e-05, "loss": 0.23, "num_input_tokens_seen": 57412972, "step": 629 }, { "epoch": 2.6208333333333336, "loss": 0.24771615862846375, "loss_ce": 0.003575525712221861, "loss_iou": 0.3984375, "loss_num": 0.02880859375, "loss_xval": 0.244140625, "num_input_tokens_seen": 57412972, "step": 629 }, { "epoch": 2.625, "grad_norm": 7.690555252101069, "learning_rate": 5e-05, "loss": 0.187, "num_input_tokens_seen": 57503852, "step": 630 }, { "epoch": 2.625, "loss": 0.21856024861335754, "loss_ce": 0.0008478478412143886, "loss_iou": 0.1962890625, "loss_num": 0.03369140625, "loss_xval": 0.2177734375, "num_input_tokens_seen": 57503852, "step": 630 }, { "epoch": 2.6291666666666664, "grad_norm": 24.498475215223806, "learning_rate": 5e-05, "loss": 0.2475, "num_input_tokens_seen": 57594536, "step": 631 }, { "epoch": 2.6291666666666664, "loss": 0.23214200139045715, "loss_ce": 2.5318913685623556e-05, "loss_iou": 0.2412109375, "loss_num": 0.0341796875, "loss_xval": 0.232421875, "num_input_tokens_seen": 57594536, "step": 631 }, { "epoch": 2.6333333333333333, "grad_norm": 6.746331406823162, "learning_rate": 5e-05, "loss": 0.2355, "num_input_tokens_seen": 57686568, "step": 632 }, { "epoch": 2.6333333333333333, "loss": 0.17622056603431702, "loss_ce": 0.0010954277822747827, "loss_iou": 0.294921875, "loss_num": 0.0201416015625, "loss_xval": 0.1748046875, "num_input_tokens_seen": 57686568, "step": 632 }, { "epoch": 2.6375, "grad_norm": 26.850646887050033, "learning_rate": 5e-05, "loss": 0.343, "num_input_tokens_seen": 57778092, "step": 633 }, { "epoch": 2.6375, "loss": 0.3333674669265747, "loss_ce": 0.000847943767439574, "loss_iou": 0.236328125, "loss_num": 0.054443359375, "loss_xval": 0.33203125, "num_input_tokens_seen": 57778092, "step": 633 }, { "epoch": 2.6416666666666666, "grad_norm": 6.627786580412496, "learning_rate": 5e-05, "loss": 0.2335, "num_input_tokens_seen": 57869300, "step": 634 }, { "epoch": 2.6416666666666666, "loss": 0.18597182631492615, "loss_ce": 0.0004402133054099977, "loss_iou": 0.3359375, "loss_num": 0.02001953125, "loss_xval": 0.185546875, "num_input_tokens_seen": 57869300, "step": 634 }, { "epoch": 2.6458333333333335, "grad_norm": 5.948417253025581, "learning_rate": 5e-05, "loss": 0.1583, "num_input_tokens_seen": 57960656, "step": 635 }, { "epoch": 2.6458333333333335, "loss": 0.15748488903045654, "loss_ce": 0.001723156776279211, "loss_iou": 0.205078125, "loss_num": 0.020751953125, "loss_xval": 0.15625, "num_input_tokens_seen": 57960656, "step": 635 }, { "epoch": 2.65, "grad_norm": 7.441843298976219, "learning_rate": 5e-05, "loss": 0.2295, "num_input_tokens_seen": 58051536, "step": 636 }, { "epoch": 2.65, "loss": 0.22246670722961426, "loss_ce": 0.0006038988940417767, "loss_iou": 0.369140625, "loss_num": 0.025634765625, "loss_xval": 0.2216796875, "num_input_tokens_seen": 58051536, "step": 636 }, { "epoch": 2.654166666666667, "grad_norm": 39.320466579597394, "learning_rate": 5e-05, "loss": 0.2438, "num_input_tokens_seen": 58142564, "step": 637 }, { "epoch": 2.654166666666667, "loss": 0.23603284358978271, "loss_ce": 0.000620238424744457, "loss_iou": 0.474609375, "loss_num": 0.02294921875, "loss_xval": 0.2353515625, "num_input_tokens_seen": 58142564, "step": 637 }, { "epoch": 2.658333333333333, "grad_norm": 11.045505201026938, "learning_rate": 5e-05, "loss": 0.2387, "num_input_tokens_seen": 58233176, "step": 638 }, { "epoch": 2.658333333333333, "loss": 0.27559831738471985, "loss_ce": 8.562710718251765e-05, "loss_iou": 0.3515625, "loss_num": 0.037109375, "loss_xval": 0.275390625, "num_input_tokens_seen": 58233176, "step": 638 }, { "epoch": 2.6625, "grad_norm": 15.660428211237646, "learning_rate": 5e-05, "loss": 0.2244, "num_input_tokens_seen": 58324752, "step": 639 }, { "epoch": 2.6625, "loss": 0.2039228230714798, "loss_ce": 0.0001874716836027801, "loss_iou": 0.375, "loss_num": 0.0216064453125, "loss_xval": 0.2041015625, "num_input_tokens_seen": 58324752, "step": 639 }, { "epoch": 2.6666666666666665, "grad_norm": 18.16003551462728, "learning_rate": 5e-05, "loss": 0.2829, "num_input_tokens_seen": 58415844, "step": 640 }, { "epoch": 2.6666666666666665, "loss": 0.2732733488082886, "loss_ce": 0.004901759792119265, "loss_iou": 0.416015625, "loss_num": 0.032470703125, "loss_xval": 0.267578125, "num_input_tokens_seen": 58415844, "step": 640 }, { "epoch": 2.6708333333333334, "grad_norm": 9.423414548268628, "learning_rate": 5e-05, "loss": 0.2242, "num_input_tokens_seen": 58507252, "step": 641 }, { "epoch": 2.6708333333333334, "loss": 0.24119365215301514, "loss_ce": 0.00010478242620592937, "loss_iou": 0.34765625, "loss_num": 0.0303955078125, "loss_xval": 0.2412109375, "num_input_tokens_seen": 58507252, "step": 641 }, { "epoch": 2.675, "grad_norm": 5.52971937860211, "learning_rate": 5e-05, "loss": 0.2362, "num_input_tokens_seen": 58599320, "step": 642 }, { "epoch": 2.675, "loss": 0.27918940782546997, "loss_ce": 0.003432593774050474, "loss_iou": 0.294921875, "loss_num": 0.0400390625, "loss_xval": 0.275390625, "num_input_tokens_seen": 58599320, "step": 642 }, { "epoch": 2.6791666666666667, "grad_norm": 3.342123448215612, "learning_rate": 5e-05, "loss": 0.1887, "num_input_tokens_seen": 58691080, "step": 643 }, { "epoch": 2.6791666666666667, "loss": 0.18224427103996277, "loss_ce": 0.00316713098436594, "loss_iou": 0.2021484375, "loss_num": 0.0255126953125, "loss_xval": 0.1787109375, "num_input_tokens_seen": 58691080, "step": 643 }, { "epoch": 2.6833333333333336, "grad_norm": 5.4384932851096774, "learning_rate": 5e-05, "loss": 0.2862, "num_input_tokens_seen": 58782012, "step": 644 }, { "epoch": 2.6833333333333336, "loss": 0.2632726728916168, "loss_ce": 0.0004247686010785401, "loss_iou": 0.25, "loss_num": 0.039794921875, "loss_xval": 0.263671875, "num_input_tokens_seen": 58782012, "step": 644 }, { "epoch": 2.6875, "grad_norm": 6.975188398311233, "learning_rate": 5e-05, "loss": 0.2256, "num_input_tokens_seen": 58873700, "step": 645 }, { "epoch": 2.6875, "loss": 0.17887446284294128, "loss_ce": 0.0006517907604575157, "loss_iou": 0.2421875, "loss_num": 0.0230712890625, "loss_xval": 0.177734375, "num_input_tokens_seen": 58873700, "step": 645 }, { "epoch": 2.6916666666666664, "grad_norm": 11.234238447987558, "learning_rate": 5e-05, "loss": 0.2266, "num_input_tokens_seen": 58965020, "step": 646 }, { "epoch": 2.6916666666666664, "loss": 0.2156769037246704, "loss_ce": 0.000802622816991061, "loss_iou": 0.306640625, "loss_num": 0.027099609375, "loss_xval": 0.21484375, "num_input_tokens_seen": 58965020, "step": 646 }, { "epoch": 2.6958333333333333, "grad_norm": 15.223147913406171, "learning_rate": 5e-05, "loss": 0.2143, "num_input_tokens_seen": 59056500, "step": 647 }, { "epoch": 2.6958333333333333, "loss": 0.2504352927207947, "loss_ce": 0.0025104722008109093, "loss_iou": 0.32421875, "loss_num": 0.032958984375, "loss_xval": 0.248046875, "num_input_tokens_seen": 59056500, "step": 647 }, { "epoch": 2.7, "grad_norm": 10.64234876170083, "learning_rate": 5e-05, "loss": 0.3137, "num_input_tokens_seen": 59148324, "step": 648 }, { "epoch": 2.7, "loss": 0.31744182109832764, "loss_ce": 0.0014017969369888306, "loss_iou": 0.265625, "loss_num": 0.04931640625, "loss_xval": 0.31640625, "num_input_tokens_seen": 59148324, "step": 648 }, { "epoch": 2.7041666666666666, "grad_norm": 6.937275427148855, "learning_rate": 5e-05, "loss": 0.2047, "num_input_tokens_seen": 59239988, "step": 649 }, { "epoch": 2.7041666666666666, "loss": 0.19481661915779114, "loss_ce": 0.0026169028133153915, "loss_iou": 0.26171875, "loss_num": 0.02490234375, "loss_xval": 0.1923828125, "num_input_tokens_seen": 59239988, "step": 649 }, { "epoch": 2.7083333333333335, "grad_norm": 8.433146515122392, "learning_rate": 5e-05, "loss": 0.2338, "num_input_tokens_seen": 59330968, "step": 650 }, { "epoch": 2.7083333333333335, "loss": 0.2627840042114258, "loss_ce": 0.003933912143111229, "loss_iou": 0.353515625, "loss_num": 0.033447265625, "loss_xval": 0.259765625, "num_input_tokens_seen": 59330968, "step": 650 }, { "epoch": 2.7125, "grad_norm": 58.97124918183895, "learning_rate": 5e-05, "loss": 0.2601, "num_input_tokens_seen": 59421252, "step": 651 }, { "epoch": 2.7125, "loss": 0.2433607280254364, "loss_ce": 7.458672916982323e-05, "loss_iou": 0.34765625, "loss_num": 0.030517578125, "loss_xval": 0.2431640625, "num_input_tokens_seen": 59421252, "step": 651 }, { "epoch": 2.716666666666667, "grad_norm": 9.067646457220144, "learning_rate": 5e-05, "loss": 0.2721, "num_input_tokens_seen": 59512520, "step": 652 }, { "epoch": 2.716666666666667, "loss": 0.26135459542274475, "loss_ce": 0.0008565568132326007, "loss_iou": 0.353515625, "loss_num": 0.03369140625, "loss_xval": 0.259765625, "num_input_tokens_seen": 59512520, "step": 652 }, { "epoch": 2.720833333333333, "grad_norm": 6.026072638011931, "learning_rate": 5e-05, "loss": 0.2861, "num_input_tokens_seen": 59604232, "step": 653 }, { "epoch": 2.720833333333333, "loss": 0.30131012201309204, "loss_ce": 0.0018716540653258562, "loss_iou": 0.310546875, "loss_num": 0.043701171875, "loss_xval": 0.298828125, "num_input_tokens_seen": 59604232, "step": 653 }, { "epoch": 2.725, "grad_norm": 10.651785753724841, "learning_rate": 5e-05, "loss": 0.2502, "num_input_tokens_seen": 59695068, "step": 654 }, { "epoch": 2.725, "loss": 0.197337806224823, "loss_ce": 0.0008961611310951412, "loss_iou": 0.16796875, "loss_num": 0.030517578125, "loss_xval": 0.1962890625, "num_input_tokens_seen": 59695068, "step": 654 }, { "epoch": 2.7291666666666665, "grad_norm": 9.793374084872067, "learning_rate": 5e-05, "loss": 0.222, "num_input_tokens_seen": 59786688, "step": 655 }, { "epoch": 2.7291666666666665, "loss": 0.2327248454093933, "loss_ce": 0.007322031073272228, "loss_iou": 0.318359375, "loss_num": 0.0284423828125, "loss_xval": 0.2255859375, "num_input_tokens_seen": 59786688, "step": 655 }, { "epoch": 2.7333333333333334, "grad_norm": 13.849598415052697, "learning_rate": 5e-05, "loss": 0.314, "num_input_tokens_seen": 59877916, "step": 656 }, { "epoch": 2.7333333333333334, "loss": 0.2899276912212372, "loss_ce": 0.0003769119502976537, "loss_iou": 0.189453125, "loss_num": 0.0478515625, "loss_xval": 0.2890625, "num_input_tokens_seen": 59877916, "step": 656 }, { "epoch": 2.7375, "grad_norm": 14.770246930851002, "learning_rate": 5e-05, "loss": 0.2103, "num_input_tokens_seen": 59968964, "step": 657 }, { "epoch": 2.7375, "loss": 0.23949527740478516, "loss_ce": 0.0029229968786239624, "loss_iou": 0.25390625, "loss_num": 0.033935546875, "loss_xval": 0.236328125, "num_input_tokens_seen": 59968964, "step": 657 }, { "epoch": 2.7416666666666667, "grad_norm": 5.194622412637509, "learning_rate": 5e-05, "loss": 0.2327, "num_input_tokens_seen": 60060144, "step": 658 }, { "epoch": 2.7416666666666667, "loss": 0.20210278034210205, "loss_ce": 7.640862895641476e-05, "loss_iou": 0.203125, "loss_num": 0.0296630859375, "loss_xval": 0.2021484375, "num_input_tokens_seen": 60060144, "step": 658 }, { "epoch": 2.7458333333333336, "grad_norm": 3.628872661853219, "learning_rate": 5e-05, "loss": 0.1819, "num_input_tokens_seen": 60151700, "step": 659 }, { "epoch": 2.7458333333333336, "loss": 0.16526725888252258, "loss_ce": 0.0032799644395709038, "loss_iou": 0.1982421875, "loss_num": 0.02197265625, "loss_xval": 0.162109375, "num_input_tokens_seen": 60151700, "step": 659 }, { "epoch": 2.75, "grad_norm": 7.896663551863479, "learning_rate": 5e-05, "loss": 0.2507, "num_input_tokens_seen": 60242876, "step": 660 }, { "epoch": 2.75, "loss": 0.19262319803237915, "loss_ce": 0.0021934963297098875, "loss_iou": 0.2373046875, "loss_num": 0.0255126953125, "loss_xval": 0.1904296875, "num_input_tokens_seen": 60242876, "step": 660 }, { "epoch": 2.7541666666666664, "grad_norm": 11.92417871030824, "learning_rate": 5e-05, "loss": 0.2565, "num_input_tokens_seen": 60334320, "step": 661 }, { "epoch": 2.7541666666666664, "loss": 0.19221524894237518, "loss_ce": 1.553935362608172e-05, "loss_iou": 0.337890625, "loss_num": 0.0206298828125, "loss_xval": 0.1923828125, "num_input_tokens_seen": 60334320, "step": 661 }, { "epoch": 2.7583333333333333, "grad_norm": 11.250407466916267, "learning_rate": 5e-05, "loss": 0.2634, "num_input_tokens_seen": 60425888, "step": 662 }, { "epoch": 2.7583333333333333, "loss": 0.17673750221729279, "loss_ce": 0.0019328128546476364, "loss_iou": 0.232421875, "loss_num": 0.022705078125, "loss_xval": 0.1748046875, "num_input_tokens_seen": 60425888, "step": 662 }, { "epoch": 2.7625, "grad_norm": 10.98782497813321, "learning_rate": 5e-05, "loss": 0.2131, "num_input_tokens_seen": 60517260, "step": 663 }, { "epoch": 2.7625, "loss": 0.2038540244102478, "loss_ce": 0.0006679813377559185, "loss_iou": 0.44921875, "loss_num": 0.0167236328125, "loss_xval": 0.203125, "num_input_tokens_seen": 60517260, "step": 663 }, { "epoch": 2.7666666666666666, "grad_norm": 13.162918661460708, "learning_rate": 5e-05, "loss": 0.3174, "num_input_tokens_seen": 60608376, "step": 664 }, { "epoch": 2.7666666666666666, "loss": 0.31894752383232117, "loss_ce": 0.0025412747636437416, "loss_iou": 0.2431640625, "loss_num": 0.05029296875, "loss_xval": 0.31640625, "num_input_tokens_seen": 60608376, "step": 664 }, { "epoch": 2.7708333333333335, "grad_norm": 6.382744557259831, "learning_rate": 5e-05, "loss": 0.1812, "num_input_tokens_seen": 60699668, "step": 665 }, { "epoch": 2.7708333333333335, "loss": 0.16857726871967316, "loss_ce": 0.0005474792560562491, "loss_iou": 0.294921875, "loss_num": 0.0179443359375, "loss_xval": 0.16796875, "num_input_tokens_seen": 60699668, "step": 665 }, { "epoch": 2.775, "grad_norm": 3.281281401377553, "learning_rate": 5e-05, "loss": 0.3254, "num_input_tokens_seen": 60791272, "step": 666 }, { "epoch": 2.775, "loss": 0.2621937096118927, "loss_ce": 0.002061892068013549, "loss_iou": 0.208984375, "loss_num": 0.040771484375, "loss_xval": 0.259765625, "num_input_tokens_seen": 60791272, "step": 666 }, { "epoch": 2.779166666666667, "grad_norm": 5.055060987599515, "learning_rate": 5e-05, "loss": 0.1799, "num_input_tokens_seen": 60882756, "step": 667 }, { "epoch": 2.779166666666667, "loss": 0.11920049786567688, "loss_ce": 0.0060107954777777195, "loss_iou": 0.06103515625, "loss_num": 0.0194091796875, "loss_xval": 0.11328125, "num_input_tokens_seen": 60882756, "step": 667 }, { "epoch": 2.783333333333333, "grad_norm": 9.929699710676825, "learning_rate": 5e-05, "loss": 0.2377, "num_input_tokens_seen": 60972912, "step": 668 }, { "epoch": 2.783333333333333, "loss": 0.2324073612689972, "loss_ce": 0.0005347952246665955, "loss_iou": 0.28125, "loss_num": 0.03125, "loss_xval": 0.2314453125, "num_input_tokens_seen": 60972912, "step": 668 }, { "epoch": 2.7875, "grad_norm": 3.515949080645658, "learning_rate": 5e-05, "loss": 0.2269, "num_input_tokens_seen": 61063756, "step": 669 }, { "epoch": 2.7875, "loss": 0.20325595140457153, "loss_ce": 0.0023892491590231657, "loss_iou": 0.337890625, "loss_num": 0.0220947265625, "loss_xval": 0.201171875, "num_input_tokens_seen": 61063756, "step": 669 }, { "epoch": 2.7916666666666665, "grad_norm": 15.70144054020013, "learning_rate": 5e-05, "loss": 0.23, "num_input_tokens_seen": 61154772, "step": 670 }, { "epoch": 2.7916666666666665, "loss": 0.25681400299072266, "loss_ce": 0.0027094087563455105, "loss_iou": 0.466796875, "loss_num": 0.02587890625, "loss_xval": 0.25390625, "num_input_tokens_seen": 61154772, "step": 670 }, { "epoch": 2.7958333333333334, "grad_norm": 8.257168700533908, "learning_rate": 5e-05, "loss": 0.2208, "num_input_tokens_seen": 61245680, "step": 671 }, { "epoch": 2.7958333333333334, "loss": 0.18007764220237732, "loss_ce": 2.3923021217342466e-05, "loss_iou": 0.1064453125, "loss_num": 0.0302734375, "loss_xval": 0.1796875, "num_input_tokens_seen": 61245680, "step": 671 }, { "epoch": 2.8, "grad_norm": 8.520313388600616, "learning_rate": 5e-05, "loss": 0.2032, "num_input_tokens_seen": 61336904, "step": 672 }, { "epoch": 2.8, "loss": 0.1864093542098999, "loss_ce": 0.0005267920205369592, "loss_iou": 0.326171875, "loss_num": 0.0196533203125, "loss_xval": 0.185546875, "num_input_tokens_seen": 61336904, "step": 672 }, { "epoch": 2.8041666666666667, "grad_norm": 12.037204098331905, "learning_rate": 5e-05, "loss": 0.244, "num_input_tokens_seen": 61427584, "step": 673 }, { "epoch": 2.8041666666666667, "loss": 0.2448461949825287, "loss_ce": 0.0001562585384817794, "loss_iou": 0.318359375, "loss_num": 0.03173828125, "loss_xval": 0.2451171875, "num_input_tokens_seen": 61427584, "step": 673 }, { "epoch": 2.8083333333333336, "grad_norm": 11.035018957052076, "learning_rate": 5e-05, "loss": 0.3142, "num_input_tokens_seen": 61519484, "step": 674 }, { "epoch": 2.8083333333333336, "loss": 0.3390733599662781, "loss_ce": 0.003380006877705455, "loss_iou": 0.1708984375, "loss_num": 0.057861328125, "loss_xval": 0.3359375, "num_input_tokens_seen": 61519484, "step": 674 }, { "epoch": 2.8125, "grad_norm": 5.682714870903573, "learning_rate": 5e-05, "loss": 0.2313, "num_input_tokens_seen": 61611072, "step": 675 }, { "epoch": 2.8125, "loss": 0.2279127836227417, "loss_ce": 0.006538269110023975, "loss_iou": 0.2890625, "loss_num": 0.0286865234375, "loss_xval": 0.2216796875, "num_input_tokens_seen": 61611072, "step": 675 }, { "epoch": 2.8166666666666664, "grad_norm": 16.111549162613954, "learning_rate": 5e-05, "loss": 0.2393, "num_input_tokens_seen": 61701712, "step": 676 }, { "epoch": 2.8166666666666664, "loss": 0.28185001015663147, "loss_ce": 0.0011493464699015021, "loss_iou": 0.330078125, "loss_num": 0.038330078125, "loss_xval": 0.28125, "num_input_tokens_seen": 61701712, "step": 676 }, { "epoch": 2.8208333333333333, "grad_norm": 7.381517233376643, "learning_rate": 5e-05, "loss": 0.2343, "num_input_tokens_seen": 61792896, "step": 677 }, { "epoch": 2.8208333333333333, "loss": 0.2264833152294159, "loss_ce": 0.00022599007934331894, "loss_iou": 0.234375, "loss_num": 0.032470703125, "loss_xval": 0.2265625, "num_input_tokens_seen": 61792896, "step": 677 }, { "epoch": 2.825, "grad_norm": 8.826422628521604, "learning_rate": 5e-05, "loss": 0.2607, "num_input_tokens_seen": 61884132, "step": 678 }, { "epoch": 2.825, "loss": 0.24496780335903168, "loss_ce": 0.001376486150547862, "loss_iou": 0.25390625, "loss_num": 0.034912109375, "loss_xval": 0.2431640625, "num_input_tokens_seen": 61884132, "step": 678 }, { "epoch": 2.8291666666666666, "grad_norm": 9.635858360118323, "learning_rate": 5e-05, "loss": 0.2347, "num_input_tokens_seen": 61975732, "step": 679 }, { "epoch": 2.8291666666666666, "loss": 0.27702057361602783, "loss_ce": 0.0026675716508179903, "loss_iou": 0.34765625, "loss_num": 0.035888671875, "loss_xval": 0.2734375, "num_input_tokens_seen": 61975732, "step": 679 }, { "epoch": 2.8333333333333335, "grad_norm": 11.19240813728173, "learning_rate": 5e-05, "loss": 0.2936, "num_input_tokens_seen": 62067048, "step": 680 }, { "epoch": 2.8333333333333335, "loss": 0.2815859913825989, "loss_ce": 0.004974641837179661, "loss_iou": 0.322265625, "loss_num": 0.037841796875, "loss_xval": 0.27734375, "num_input_tokens_seen": 62067048, "step": 680 }, { "epoch": 2.8375, "grad_norm": 5.430056738897933, "learning_rate": 5e-05, "loss": 0.331, "num_input_tokens_seen": 62158012, "step": 681 }, { "epoch": 2.8375, "loss": 0.43693971633911133, "loss_ce": 0.0007824877393431962, "loss_iou": 0.34765625, "loss_num": 0.068359375, "loss_xval": 0.435546875, "num_input_tokens_seen": 62158012, "step": 681 }, { "epoch": 2.841666666666667, "grad_norm": 11.984837455811917, "learning_rate": 5e-05, "loss": 0.1713, "num_input_tokens_seen": 62249280, "step": 682 }, { "epoch": 2.841666666666667, "loss": 0.17815792560577393, "loss_ce": 0.001644266420044005, "loss_iou": 0.2060546875, "loss_num": 0.0240478515625, "loss_xval": 0.1767578125, "num_input_tokens_seen": 62249280, "step": 682 }, { "epoch": 2.845833333333333, "grad_norm": 15.193914351278222, "learning_rate": 5e-05, "loss": 0.3415, "num_input_tokens_seen": 62339108, "step": 683 }, { "epoch": 2.845833333333333, "loss": 0.2802782654762268, "loss_ce": 0.0231371708214283, "loss_iou": 0.294921875, "loss_num": 0.035400390625, "loss_xval": 0.2578125, "num_input_tokens_seen": 62339108, "step": 683 }, { "epoch": 2.85, "grad_norm": 6.521015692246656, "learning_rate": 5e-05, "loss": 0.1894, "num_input_tokens_seen": 62430992, "step": 684 }, { "epoch": 2.85, "loss": 0.2058933973312378, "loss_ce": 0.0009373407810926437, "loss_iou": 0.326171875, "loss_num": 0.023193359375, "loss_xval": 0.205078125, "num_input_tokens_seen": 62430992, "step": 684 }, { "epoch": 2.8541666666666665, "grad_norm": 5.874700429325175, "learning_rate": 5e-05, "loss": 0.222, "num_input_tokens_seen": 62522124, "step": 685 }, { "epoch": 2.8541666666666665, "loss": 0.22513145208358765, "loss_ce": 0.0011324224760755897, "loss_iou": 0.2470703125, "loss_num": 0.03125, "loss_xval": 0.2236328125, "num_input_tokens_seen": 62522124, "step": 685 }, { "epoch": 2.8583333333333334, "grad_norm": 11.611583234295143, "learning_rate": 5e-05, "loss": 0.268, "num_input_tokens_seen": 62613572, "step": 686 }, { "epoch": 2.8583333333333334, "loss": 0.20817159116268158, "loss_ce": 0.0013844802742823958, "loss_iou": 0.302734375, "loss_num": 0.0247802734375, "loss_xval": 0.20703125, "num_input_tokens_seen": 62613572, "step": 686 }, { "epoch": 2.8625, "grad_norm": 16.695023351003698, "learning_rate": 5e-05, "loss": 0.2915, "num_input_tokens_seen": 62704872, "step": 687 }, { "epoch": 2.8625, "loss": 0.27818945050239563, "loss_ce": 0.0017001950182020664, "loss_iou": 0.322265625, "loss_num": 0.03759765625, "loss_xval": 0.27734375, "num_input_tokens_seen": 62704872, "step": 687 }, { "epoch": 2.8666666666666667, "grad_norm": 12.654336027930734, "learning_rate": 5e-05, "loss": 0.2401, "num_input_tokens_seen": 62795636, "step": 688 }, { "epoch": 2.8666666666666667, "loss": 0.25469058752059937, "loss_ce": 0.00011293623538222164, "loss_iou": 0.291015625, "loss_num": 0.034912109375, "loss_xval": 0.25390625, "num_input_tokens_seen": 62795636, "step": 688 }, { "epoch": 2.8708333333333336, "grad_norm": 8.620940501512623, "learning_rate": 5e-05, "loss": 0.1799, "num_input_tokens_seen": 62887012, "step": 689 }, { "epoch": 2.8708333333333336, "loss": 0.19433006644248962, "loss_ce": 0.00106223882175982, "loss_iou": 0.265625, "loss_num": 0.0240478515625, "loss_xval": 0.193359375, "num_input_tokens_seen": 62887012, "step": 689 }, { "epoch": 2.875, "grad_norm": 6.658542573080399, "learning_rate": 5e-05, "loss": 0.2632, "num_input_tokens_seen": 62978376, "step": 690 }, { "epoch": 2.875, "loss": 0.33673685789108276, "loss_ce": 0.00031107006361708045, "loss_iou": 0.32421875, "loss_num": 0.04931640625, "loss_xval": 0.3359375, "num_input_tokens_seen": 62978376, "step": 690 }, { "epoch": 2.8791666666666664, "grad_norm": 13.381904165276124, "learning_rate": 5e-05, "loss": 0.3308, "num_input_tokens_seen": 63069576, "step": 691 }, { "epoch": 2.8791666666666664, "loss": 0.2565188407897949, "loss_ce": 0.0002932699571829289, "loss_iou": 0.435546875, "loss_num": 0.0272216796875, "loss_xval": 0.255859375, "num_input_tokens_seen": 63069576, "step": 691 }, { "epoch": 2.8833333333333333, "grad_norm": 18.143033223163656, "learning_rate": 5e-05, "loss": 0.2573, "num_input_tokens_seen": 63161244, "step": 692 }, { "epoch": 2.8833333333333333, "loss": 0.3152886629104614, "loss_ce": 0.00373471318744123, "loss_iou": 0.1962890625, "loss_num": 0.051513671875, "loss_xval": 0.3125, "num_input_tokens_seen": 63161244, "step": 692 }, { "epoch": 2.8875, "grad_norm": 8.917116183411315, "learning_rate": 5e-05, "loss": 0.2325, "num_input_tokens_seen": 63253472, "step": 693 }, { "epoch": 2.8875, "loss": 0.2928329110145569, "loss_ce": 0.0025497153401374817, "loss_iou": 0.3125, "loss_num": 0.040771484375, "loss_xval": 0.291015625, "num_input_tokens_seen": 63253472, "step": 693 }, { "epoch": 2.8916666666666666, "grad_norm": 9.397470507246357, "learning_rate": 5e-05, "loss": 0.2805, "num_input_tokens_seen": 63343676, "step": 694 }, { "epoch": 2.8916666666666666, "loss": 0.29843559861183167, "loss_ce": 9.574588329996914e-05, "loss_iou": 0.42578125, "loss_num": 0.0361328125, "loss_xval": 0.298828125, "num_input_tokens_seen": 63343676, "step": 694 }, { "epoch": 2.8958333333333335, "grad_norm": 13.558932690642562, "learning_rate": 5e-05, "loss": 0.2866, "num_input_tokens_seen": 63435244, "step": 695 }, { "epoch": 2.8958333333333335, "loss": 0.35385358333587646, "loss_ce": 0.0011924391146749258, "loss_iou": 0.263671875, "loss_num": 0.055908203125, "loss_xval": 0.353515625, "num_input_tokens_seen": 63435244, "step": 695 }, { "epoch": 2.9, "grad_norm": 5.0040104824422285, "learning_rate": 5e-05, "loss": 0.2512, "num_input_tokens_seen": 63526600, "step": 696 }, { "epoch": 2.9, "loss": 0.29930955171585083, "loss_ce": 0.005486306734383106, "loss_iou": 0.2451171875, "loss_num": 0.044921875, "loss_xval": 0.29296875, "num_input_tokens_seen": 63526600, "step": 696 }, { "epoch": 2.904166666666667, "grad_norm": 5.068797963676063, "learning_rate": 5e-05, "loss": 0.2976, "num_input_tokens_seen": 63618304, "step": 697 }, { "epoch": 2.904166666666667, "loss": 0.24356845021247864, "loss_ce": 0.0015030185459181666, "loss_iou": 0.33984375, "loss_num": 0.0294189453125, "loss_xval": 0.2421875, "num_input_tokens_seen": 63618304, "step": 697 }, { "epoch": 2.908333333333333, "grad_norm": 10.455525088397929, "learning_rate": 5e-05, "loss": 0.2502, "num_input_tokens_seen": 63708884, "step": 698 }, { "epoch": 2.908333333333333, "loss": 0.27911537885665894, "loss_ce": 0.005952558480203152, "loss_iou": 0.32421875, "loss_num": 0.03662109375, "loss_xval": 0.2734375, "num_input_tokens_seen": 63708884, "step": 698 }, { "epoch": 2.9125, "grad_norm": 11.425428022543029, "learning_rate": 5e-05, "loss": 0.263, "num_input_tokens_seen": 63800660, "step": 699 }, { "epoch": 2.9125, "loss": 0.2969471216201782, "loss_ce": 0.005199064500629902, "loss_iou": 0.23046875, "loss_num": 0.04541015625, "loss_xval": 0.291015625, "num_input_tokens_seen": 63800660, "step": 699 }, { "epoch": 2.9166666666666665, "grad_norm": 4.500736972041381, "learning_rate": 5e-05, "loss": 0.2321, "num_input_tokens_seen": 63892240, "step": 700 }, { "epoch": 2.9166666666666665, "loss": 0.2109307199716568, "loss_ce": 0.00042046865564770997, "loss_iou": 0.326171875, "loss_num": 0.0238037109375, "loss_xval": 0.2109375, "num_input_tokens_seen": 63892240, "step": 700 }, { "epoch": 2.9208333333333334, "grad_norm": 11.82852733728945, "learning_rate": 5e-05, "loss": 0.2214, "num_input_tokens_seen": 63983876, "step": 701 }, { "epoch": 2.9208333333333334, "loss": 0.28741031885147095, "loss_ce": 0.001643726835027337, "loss_iou": 0.365234375, "loss_num": 0.036865234375, "loss_xval": 0.28515625, "num_input_tokens_seen": 63983876, "step": 701 }, { "epoch": 2.925, "grad_norm": 6.021733502978741, "learning_rate": 5e-05, "loss": 0.2829, "num_input_tokens_seen": 64074748, "step": 702 }, { "epoch": 2.925, "loss": 0.2205052673816681, "loss_ce": 0.00035146629670634866, "loss_iou": 0.38671875, "loss_num": 0.0223388671875, "loss_xval": 0.2197265625, "num_input_tokens_seen": 64074748, "step": 702 }, { "epoch": 2.9291666666666667, "grad_norm": 6.696864207356387, "learning_rate": 5e-05, "loss": 0.226, "num_input_tokens_seen": 64166260, "step": 703 }, { "epoch": 2.9291666666666667, "loss": 0.2554856836795807, "loss_ce": 0.0018235727911815047, "loss_iou": 0.27734375, "loss_num": 0.03515625, "loss_xval": 0.25390625, "num_input_tokens_seen": 64166260, "step": 703 }, { "epoch": 2.9333333333333336, "grad_norm": 5.027904053317167, "learning_rate": 5e-05, "loss": 0.1737, "num_input_tokens_seen": 64257464, "step": 704 }, { "epoch": 2.9333333333333336, "loss": 0.19181515276432037, "loss_ce": 0.0008971852366812527, "loss_iou": 0.287109375, "loss_num": 0.0220947265625, "loss_xval": 0.19140625, "num_input_tokens_seen": 64257464, "step": 704 }, { "epoch": 2.9375, "grad_norm": 14.681615464234536, "learning_rate": 5e-05, "loss": 0.29, "num_input_tokens_seen": 64348264, "step": 705 }, { "epoch": 2.9375, "loss": 0.34003758430480957, "loss_ce": 0.0014145312597975135, "loss_iou": 0.474609375, "loss_num": 0.041015625, "loss_xval": 0.337890625, "num_input_tokens_seen": 64348264, "step": 705 }, { "epoch": 2.9416666666666664, "grad_norm": 11.050186663812722, "learning_rate": 5e-05, "loss": 0.3783, "num_input_tokens_seen": 64439336, "step": 706 }, { "epoch": 2.9416666666666664, "loss": 0.4706357717514038, "loss_ce": 5.473133933264762e-05, "loss_iou": 0.337890625, "loss_num": 0.0751953125, "loss_xval": 0.470703125, "num_input_tokens_seen": 64439336, "step": 706 }, { "epoch": 2.9458333333333333, "grad_norm": 9.673816924435314, "learning_rate": 5e-05, "loss": 0.2529, "num_input_tokens_seen": 64530780, "step": 707 }, { "epoch": 2.9458333333333333, "loss": 0.27379924058914185, "loss_ce": 0.0010941624641418457, "loss_iou": 0.3671875, "loss_num": 0.03369140625, "loss_xval": 0.2734375, "num_input_tokens_seen": 64530780, "step": 707 }, { "epoch": 2.95, "grad_norm": 9.763305725297712, "learning_rate": 5e-05, "loss": 0.2803, "num_input_tokens_seen": 64622072, "step": 708 }, { "epoch": 2.95, "loss": 0.2703673541545868, "loss_ce": 0.0031554533634334803, "loss_iou": 0.39453125, "loss_num": 0.0311279296875, "loss_xval": 0.267578125, "num_input_tokens_seen": 64622072, "step": 708 }, { "epoch": 2.9541666666666666, "grad_norm": 7.074264004711966, "learning_rate": 5e-05, "loss": 0.2478, "num_input_tokens_seen": 64713488, "step": 709 }, { "epoch": 2.9541666666666666, "loss": 0.28091779351234436, "loss_ce": 0.0006138343014754355, "loss_iou": 0.345703125, "loss_num": 0.036376953125, "loss_xval": 0.28125, "num_input_tokens_seen": 64713488, "step": 709 }, { "epoch": 2.9583333333333335, "grad_norm": 7.851782261633102, "learning_rate": 5e-05, "loss": 0.2913, "num_input_tokens_seen": 64804640, "step": 710 }, { "epoch": 2.9583333333333335, "loss": 0.348236620426178, "loss_ce": 0.0005803514504805207, "loss_iou": 0.2490234375, "loss_num": 0.055419921875, "loss_xval": 0.34765625, "num_input_tokens_seen": 64804640, "step": 710 }, { "epoch": 2.9625, "grad_norm": 31.590384776585033, "learning_rate": 5e-05, "loss": 0.2149, "num_input_tokens_seen": 64895024, "step": 711 }, { "epoch": 2.9625, "loss": 0.2294641137123108, "loss_ce": 3.295343049103394e-05, "loss_iou": 0.341796875, "loss_num": 0.0264892578125, "loss_xval": 0.2294921875, "num_input_tokens_seen": 64895024, "step": 711 }, { "epoch": 2.966666666666667, "grad_norm": 9.453416591030498, "learning_rate": 5e-05, "loss": 0.3454, "num_input_tokens_seen": 64986716, "step": 712 }, { "epoch": 2.966666666666667, "loss": 0.31218835711479187, "loss_ce": 0.0015194227453321218, "loss_iou": 0.330078125, "loss_num": 0.04345703125, "loss_xval": 0.310546875, "num_input_tokens_seen": 64986716, "step": 712 }, { "epoch": 2.970833333333333, "grad_norm": 24.69275794721592, "learning_rate": 5e-05, "loss": 0.2417, "num_input_tokens_seen": 65077856, "step": 713 }, { "epoch": 2.970833333333333, "loss": 0.30438870191574097, "loss_ce": 0.00015895110846031457, "loss_iou": 0.34765625, "loss_num": 0.041015625, "loss_xval": 0.3046875, "num_input_tokens_seen": 65077856, "step": 713 }, { "epoch": 2.975, "grad_norm": 20.21321352363983, "learning_rate": 5e-05, "loss": 0.2496, "num_input_tokens_seen": 65169540, "step": 714 }, { "epoch": 2.975, "loss": 0.2716801166534424, "loss_ce": 0.002454049652442336, "loss_iou": 0.302734375, "loss_num": 0.03662109375, "loss_xval": 0.26953125, "num_input_tokens_seen": 65169540, "step": 714 }, { "epoch": 2.9791666666666665, "grad_norm": 10.718899872832404, "learning_rate": 5e-05, "loss": 0.2258, "num_input_tokens_seen": 65260556, "step": 715 }, { "epoch": 2.9791666666666665, "loss": 0.23855066299438477, "loss_ce": 0.001795294345356524, "loss_iou": 0.4296875, "loss_num": 0.02294921875, "loss_xval": 0.236328125, "num_input_tokens_seen": 65260556, "step": 715 }, { "epoch": 2.9833333333333334, "grad_norm": 8.397827721760875, "learning_rate": 5e-05, "loss": 0.2388, "num_input_tokens_seen": 65351612, "step": 716 }, { "epoch": 2.9833333333333334, "loss": 0.3029412627220154, "loss_ce": 0.002495725639164448, "loss_iou": 0.296875, "loss_num": 0.04296875, "loss_xval": 0.30078125, "num_input_tokens_seen": 65351612, "step": 716 }, { "epoch": 2.9875, "grad_norm": 3.6564582738942732, "learning_rate": 5e-05, "loss": 0.1776, "num_input_tokens_seen": 65442800, "step": 717 }, { "epoch": 2.9875, "loss": 0.14690200984477997, "loss_ce": 0.00169937324244529, "loss_iou": 0.28515625, "loss_num": 0.0126953125, "loss_xval": 0.1455078125, "num_input_tokens_seen": 65442800, "step": 717 }, { "epoch": 2.9916666666666667, "grad_norm": 17.98213183348839, "learning_rate": 5e-05, "loss": 0.2081, "num_input_tokens_seen": 65533756, "step": 718 }, { "epoch": 2.9916666666666667, "loss": 0.19276343286037445, "loss_ce": 0.004042725078761578, "loss_iou": 0.22265625, "loss_num": 0.02490234375, "loss_xval": 0.1884765625, "num_input_tokens_seen": 65533756, "step": 718 }, { "epoch": 2.9958333333333336, "grad_norm": 25.648827625748236, "learning_rate": 5e-05, "loss": 0.2349, "num_input_tokens_seen": 65625792, "step": 719 }, { "epoch": 2.9958333333333336, "loss": 0.25091925263404846, "loss_ce": 0.002689286367967725, "loss_iou": 0.330078125, "loss_num": 0.0306396484375, "loss_xval": 0.248046875, "num_input_tokens_seen": 65625792, "step": 719 }, { "epoch": 3.0, "grad_norm": 6.210842823600316, "learning_rate": 5e-05, "loss": 0.3084, "num_input_tokens_seen": 65716796, "step": 720 }, { "epoch": 3.0, "loss": 0.24776512384414673, "loss_ce": 0.0007558311335742474, "loss_iou": 0.3125, "loss_num": 0.031494140625, "loss_xval": 0.2470703125, "num_input_tokens_seen": 65716796, "step": 720 }, { "epoch": 3.004166666666667, "grad_norm": 5.980664589750897, "learning_rate": 5e-05, "loss": 0.2419, "num_input_tokens_seen": 65807996, "step": 721 }, { "epoch": 3.004166666666667, "loss": 0.27643975615501404, "loss_ce": 0.0007439564215019345, "loss_iou": 0.2158203125, "loss_num": 0.042724609375, "loss_xval": 0.275390625, "num_input_tokens_seen": 65807996, "step": 721 }, { "epoch": 3.0083333333333333, "grad_norm": 6.249638553171776, "learning_rate": 5e-05, "loss": 0.2432, "num_input_tokens_seen": 65899328, "step": 722 }, { "epoch": 3.0083333333333333, "loss": 0.3124554753303528, "loss_ce": 0.0016644435236230493, "loss_iou": 0.390625, "loss_num": 0.03955078125, "loss_xval": 0.310546875, "num_input_tokens_seen": 65899328, "step": 722 }, { "epoch": 3.0125, "grad_norm": 5.702342020055269, "learning_rate": 5e-05, "loss": 0.275, "num_input_tokens_seen": 65990712, "step": 723 }, { "epoch": 3.0125, "loss": 0.3301711082458496, "loss_ce": 0.000703359954059124, "loss_iou": 0.353515625, "loss_num": 0.045654296875, "loss_xval": 0.330078125, "num_input_tokens_seen": 65990712, "step": 723 }, { "epoch": 3.0166666666666666, "grad_norm": 17.307058236726217, "learning_rate": 5e-05, "loss": 0.31, "num_input_tokens_seen": 66080572, "step": 724 }, { "epoch": 3.0166666666666666, "loss": 0.3130786120891571, "loss_ce": 9.032182424562052e-05, "loss_iou": 0.484375, "loss_num": 0.03466796875, "loss_xval": 0.3125, "num_input_tokens_seen": 66080572, "step": 724 }, { "epoch": 3.0208333333333335, "grad_norm": 5.9350376914759835, "learning_rate": 5e-05, "loss": 0.2336, "num_input_tokens_seen": 66172580, "step": 725 }, { "epoch": 3.0208333333333335, "loss": 0.2671472430229187, "loss_ce": 0.0024377694353461266, "loss_iou": 0.2080078125, "loss_num": 0.041015625, "loss_xval": 0.265625, "num_input_tokens_seen": 66172580, "step": 725 }, { "epoch": 3.025, "grad_norm": 6.0164216461009605, "learning_rate": 5e-05, "loss": 0.2599, "num_input_tokens_seen": 66263888, "step": 726 }, { "epoch": 3.025, "loss": 0.23070672154426575, "loss_ce": 0.0018248929409310222, "loss_iou": 0.30078125, "loss_num": 0.0283203125, "loss_xval": 0.228515625, "num_input_tokens_seen": 66263888, "step": 726 }, { "epoch": 3.029166666666667, "grad_norm": 6.765864018918662, "learning_rate": 5e-05, "loss": 0.192, "num_input_tokens_seen": 66355716, "step": 727 }, { "epoch": 3.029166666666667, "loss": 0.20913203060626984, "loss_ce": 0.0018261217046529055, "loss_iou": 0.2197265625, "loss_num": 0.0286865234375, "loss_xval": 0.20703125, "num_input_tokens_seen": 66355716, "step": 727 }, { "epoch": 3.033333333333333, "grad_norm": 14.570674315501613, "learning_rate": 5e-05, "loss": 0.2043, "num_input_tokens_seen": 66446728, "step": 728 }, { "epoch": 3.033333333333333, "loss": 0.17731231451034546, "loss_ce": 0.0014700321480631828, "loss_iou": 0.3046875, "loss_num": 0.0174560546875, "loss_xval": 0.17578125, "num_input_tokens_seen": 66446728, "step": 728 }, { "epoch": 3.0375, "grad_norm": 11.645861591898887, "learning_rate": 5e-05, "loss": 0.3101, "num_input_tokens_seen": 66538300, "step": 729 }, { "epoch": 3.0375, "loss": 0.3834507465362549, "loss_ce": 0.003812087932601571, "loss_iou": 0.48046875, "loss_num": 0.0478515625, "loss_xval": 0.37890625, "num_input_tokens_seen": 66538300, "step": 729 }, { "epoch": 3.0416666666666665, "grad_norm": 2.4084494270106953, "learning_rate": 5e-05, "loss": 0.2703, "num_input_tokens_seen": 66629428, "step": 730 }, { "epoch": 3.0416666666666665, "loss": 0.29808974266052246, "loss_ce": 0.0013368347426876426, "loss_iou": 0.27734375, "loss_num": 0.043212890625, "loss_xval": 0.296875, "num_input_tokens_seen": 66629428, "step": 730 }, { "epoch": 3.0458333333333334, "grad_norm": 6.375099040028535, "learning_rate": 5e-05, "loss": 0.2234, "num_input_tokens_seen": 66720904, "step": 731 }, { "epoch": 3.0458333333333334, "loss": 0.15203692018985748, "loss_ce": 0.0004561072855722159, "loss_iou": 0.259765625, "loss_num": 0.01519775390625, "loss_xval": 0.1513671875, "num_input_tokens_seen": 66720904, "step": 731 }, { "epoch": 3.05, "grad_norm": 8.7307235975561, "learning_rate": 5e-05, "loss": 0.1671, "num_input_tokens_seen": 66812448, "step": 732 }, { "epoch": 3.05, "loss": 0.15988758206367493, "loss_ce": 0.0007155279163271189, "loss_iou": 0.265625, "loss_num": 0.0162353515625, "loss_xval": 0.1591796875, "num_input_tokens_seen": 66812448, "step": 732 }, { "epoch": 3.0541666666666667, "grad_norm": 10.132144554810068, "learning_rate": 5e-05, "loss": 0.2463, "num_input_tokens_seen": 66903796, "step": 733 }, { "epoch": 3.0541666666666667, "loss": 0.23347671329975128, "loss_ce": 0.0009327692678198218, "loss_iou": 0.41796875, "loss_num": 0.02197265625, "loss_xval": 0.232421875, "num_input_tokens_seen": 66903796, "step": 733 }, { "epoch": 3.058333333333333, "grad_norm": 13.139156179340397, "learning_rate": 5e-05, "loss": 0.1758, "num_input_tokens_seen": 66994496, "step": 734 }, { "epoch": 3.058333333333333, "loss": 0.17115697264671326, "loss_ce": 0.00013645495346281677, "loss_iou": 0.3125, "loss_num": 0.015869140625, "loss_xval": 0.1708984375, "num_input_tokens_seen": 66994496, "step": 734 }, { "epoch": 3.0625, "grad_norm": 60.285167818015054, "learning_rate": 5e-05, "loss": 0.2115, "num_input_tokens_seen": 67085600, "step": 735 }, { "epoch": 3.0625, "loss": 0.24958476424217224, "loss_ce": 0.0008054607314988971, "loss_iou": 0.330078125, "loss_num": 0.0303955078125, "loss_xval": 0.2490234375, "num_input_tokens_seen": 67085600, "step": 735 }, { "epoch": 3.066666666666667, "grad_norm": 10.104238622158336, "learning_rate": 5e-05, "loss": 0.2253, "num_input_tokens_seen": 67176960, "step": 736 }, { "epoch": 3.066666666666667, "loss": 0.23942793905735016, "loss_ce": 0.0004142660181969404, "loss_iou": 0.353515625, "loss_num": 0.0269775390625, "loss_xval": 0.2392578125, "num_input_tokens_seen": 67176960, "step": 736 }, { "epoch": 3.0708333333333333, "grad_norm": 110.64647697840584, "learning_rate": 5e-05, "loss": 0.3002, "num_input_tokens_seen": 67268684, "step": 737 }, { "epoch": 3.0708333333333333, "loss": 0.4394175410270691, "loss_ce": 0.0021616898011416197, "loss_iou": 0.359375, "loss_num": 0.06591796875, "loss_xval": 0.4375, "num_input_tokens_seen": 67268684, "step": 737 }, { "epoch": 3.075, "grad_norm": 24.477292386933787, "learning_rate": 5e-05, "loss": 0.2881, "num_input_tokens_seen": 67359580, "step": 738 }, { "epoch": 3.075, "loss": 0.3519362211227417, "loss_ce": 0.00025164170074276626, "loss_iou": 0.443359375, "loss_num": 0.044189453125, "loss_xval": 0.3515625, "num_input_tokens_seen": 67359580, "step": 738 }, { "epoch": 3.0791666666666666, "grad_norm": 11.227093648263798, "learning_rate": 5e-05, "loss": 0.3251, "num_input_tokens_seen": 67451448, "step": 739 }, { "epoch": 3.0791666666666666, "loss": 0.34748974442481995, "loss_ce": 0.0026411088183522224, "loss_iou": 0.3046875, "loss_num": 0.051025390625, "loss_xval": 0.345703125, "num_input_tokens_seen": 67451448, "step": 739 }, { "epoch": 3.0833333333333335, "grad_norm": 19.663817251326506, "learning_rate": 5e-05, "loss": 0.2662, "num_input_tokens_seen": 67542240, "step": 740 }, { "epoch": 3.0833333333333335, "loss": 0.26498711109161377, "loss_ce": 3.35053882736247e-05, "loss_iou": 0.37109375, "loss_num": 0.0311279296875, "loss_xval": 0.265625, "num_input_tokens_seen": 67542240, "step": 740 }, { "epoch": 3.0875, "grad_norm": 7.2515753144097355, "learning_rate": 5e-05, "loss": 0.1997, "num_input_tokens_seen": 67634168, "step": 741 }, { "epoch": 3.0875, "loss": 0.20393003523349762, "loss_ce": 0.004192485008388758, "loss_iou": 0.2158203125, "loss_num": 0.0272216796875, "loss_xval": 0.2001953125, "num_input_tokens_seen": 67634168, "step": 741 }, { "epoch": 3.091666666666667, "grad_norm": 9.50108893290788, "learning_rate": 5e-05, "loss": 0.1838, "num_input_tokens_seen": 67724972, "step": 742 }, { "epoch": 3.091666666666667, "loss": 0.1799710988998413, "loss_ce": 3.946739889215678e-05, "loss_iou": 0.3125, "loss_num": 0.0174560546875, "loss_xval": 0.1796875, "num_input_tokens_seen": 67724972, "step": 742 }, { "epoch": 3.095833333333333, "grad_norm": 9.838223610176781, "learning_rate": 5e-05, "loss": 0.2151, "num_input_tokens_seen": 67816724, "step": 743 }, { "epoch": 3.095833333333333, "loss": 0.23354679346084595, "loss_ce": 0.0018878569826483727, "loss_iou": 0.25, "loss_num": 0.031494140625, "loss_xval": 0.2314453125, "num_input_tokens_seen": 67816724, "step": 743 }, { "epoch": 3.1, "grad_norm": 23.535071183878934, "learning_rate": 5e-05, "loss": 0.2149, "num_input_tokens_seen": 67908128, "step": 744 }, { "epoch": 3.1, "loss": 0.24034494161605835, "loss_ce": 0.0024909228086471558, "loss_iou": 0.42578125, "loss_num": 0.022216796875, "loss_xval": 0.23828125, "num_input_tokens_seen": 67908128, "step": 744 }, { "epoch": 3.1041666666666665, "grad_norm": 11.178389315992188, "learning_rate": 5e-05, "loss": 0.2537, "num_input_tokens_seen": 67999636, "step": 745 }, { "epoch": 3.1041666666666665, "loss": 0.308246910572052, "loss_ce": 0.006122882943600416, "loss_iou": 0.296875, "loss_num": 0.042724609375, "loss_xval": 0.302734375, "num_input_tokens_seen": 67999636, "step": 745 }, { "epoch": 3.1083333333333334, "grad_norm": 21.301332250689942, "learning_rate": 5e-05, "loss": 0.237, "num_input_tokens_seen": 68091420, "step": 746 }, { "epoch": 3.1083333333333334, "loss": 0.30138248205184937, "loss_ce": 0.001211579772643745, "loss_iou": 0.44921875, "loss_num": 0.033203125, "loss_xval": 0.30078125, "num_input_tokens_seen": 68091420, "step": 746 }, { "epoch": 3.1125, "grad_norm": 20.655782682495296, "learning_rate": 5e-05, "loss": 0.2335, "num_input_tokens_seen": 68183032, "step": 747 }, { "epoch": 3.1125, "loss": 0.23181165754795074, "loss_ce": 0.0014039536472409964, "loss_iou": 0.28515625, "loss_num": 0.029052734375, "loss_xval": 0.23046875, "num_input_tokens_seen": 68183032, "step": 747 }, { "epoch": 3.1166666666666667, "grad_norm": 18.36717474641016, "learning_rate": 5e-05, "loss": 0.2397, "num_input_tokens_seen": 68273852, "step": 748 }, { "epoch": 3.1166666666666667, "loss": 0.25389769673347473, "loss_ce": 0.0002966265310533345, "loss_iou": 0.130859375, "loss_num": 0.04296875, "loss_xval": 0.25390625, "num_input_tokens_seen": 68273852, "step": 748 }, { "epoch": 3.120833333333333, "grad_norm": 5.027092516315899, "learning_rate": 5e-05, "loss": 0.3024, "num_input_tokens_seen": 68365540, "step": 749 }, { "epoch": 3.120833333333333, "loss": 0.3448534607887268, "loss_ce": 0.0008593128295615315, "loss_iou": 0.40234375, "loss_num": 0.044677734375, "loss_xval": 0.34375, "num_input_tokens_seen": 68365540, "step": 749 }, { "epoch": 3.125, "grad_norm": 10.119415515193976, "learning_rate": 5e-05, "loss": 0.2683, "num_input_tokens_seen": 68457120, "step": 750 }, { "epoch": 3.125, "eval_seeclick_CIoU": 0.18456538021564484, "eval_seeclick_GIoU": 0.15629717707633972, "eval_seeclick_IoU": 0.30682089924812317, "eval_seeclick_MAE_all": 0.10893617942929268, "eval_seeclick_MAE_h": 0.11178385838866234, "eval_seeclick_MAE_w": 0.21100984513759613, "eval_seeclick_MAE_x_boxes": 0.23261529207229614, "eval_seeclick_MAE_y_boxes": 0.10914269834756851, "eval_seeclick_NUM_probability": 0.9999980628490448, "eval_seeclick_inside_bbox": 0.5085227340459824, "eval_seeclick_loss": 0.6975008845329285, "eval_seeclick_loss_ce": 0.09834163635969162, "eval_seeclick_loss_iou": 0.392333984375, "eval_seeclick_loss_num": 0.0947113037109375, "eval_seeclick_loss_xval": 0.5914306640625, "eval_seeclick_runtime": 73.0255, "eval_seeclick_samples_per_second": 0.589, "eval_seeclick_steps_per_second": 0.027, "num_input_tokens_seen": 68457120, "step": 750 }, { "epoch": 3.125, "eval_icons_CIoU": 0.38979673385620117, "eval_icons_GIoU": 0.39880916476249695, "eval_icons_IoU": 0.45799557864665985, "eval_icons_MAE_all": 0.0646231584250927, "eval_icons_MAE_h": 0.1286742426455021, "eval_icons_MAE_w": 0.10467254742980003, "eval_icons_MAE_x_boxes": 0.10263776406645775, "eval_icons_MAE_y_boxes": 0.1300939917564392, "eval_icons_NUM_probability": 0.9999995827674866, "eval_icons_inside_bbox": 0.6180555522441864, "eval_icons_loss": 0.405547559261322, "eval_icons_loss_ce": 8.476455661821092e-07, "eval_icons_loss_iou": 0.26947021484375, "eval_icons_loss_num": 0.06772613525390625, "eval_icons_loss_xval": 0.41937255859375, "eval_icons_runtime": 89.5837, "eval_icons_samples_per_second": 0.558, "eval_icons_steps_per_second": 0.022, "num_input_tokens_seen": 68457120, "step": 750 }, { "epoch": 3.125, "eval_screenspot_CIoU": 0.33028921484947205, "eval_screenspot_GIoU": 0.3156593143939972, "eval_screenspot_IoU": 0.4068033794562022, "eval_screenspot_MAE_all": 0.10019912074009578, "eval_screenspot_MAE_h": 0.11826031655073166, "eval_screenspot_MAE_w": 0.1802296737829844, "eval_screenspot_MAE_x_boxes": 0.1806080937385559, "eval_screenspot_MAE_y_boxes": 0.11672305067380269, "eval_screenspot_NUM_probability": 0.9999342759450277, "eval_screenspot_inside_bbox": 0.6120833357175192, "eval_screenspot_loss": 0.6047809720039368, "eval_screenspot_loss_ce": 3.964887582696974e-05, "eval_screenspot_loss_iou": 0.3628336588541667, "eval_screenspot_loss_num": 0.09828694661458333, "eval_screenspot_loss_xval": 0.600341796875, "eval_screenspot_runtime": 154.3123, "eval_screenspot_samples_per_second": 0.577, "eval_screenspot_steps_per_second": 0.019, "num_input_tokens_seen": 68457120, "step": 750 }, { "epoch": 3.125, "eval_compot_CIoU": 0.3661753237247467, "eval_compot_GIoU": 0.3585101515054703, "eval_compot_IoU": 0.448599174618721, "eval_compot_MAE_all": 0.06923724710941315, "eval_compot_MAE_h": 0.10827170684933662, "eval_compot_MAE_w": 0.1390521600842476, "eval_compot_MAE_x_boxes": 0.1365266591310501, "eval_compot_MAE_y_boxes": 0.10781393945217133, "eval_compot_NUM_probability": 0.9999944269657135, "eval_compot_inside_bbox": 0.6180555522441864, "eval_compot_loss": 0.4263221025466919, "eval_compot_loss_ce": 0.012015189044177532, "eval_compot_loss_iou": 0.2703857421875, "eval_compot_loss_num": 0.06455612182617188, "eval_compot_loss_xval": 0.40386962890625, "eval_compot_runtime": 87.8802, "eval_compot_samples_per_second": 0.569, "eval_compot_steps_per_second": 0.023, "num_input_tokens_seen": 68457120, "step": 750 }, { "epoch": 3.129166666666667, "grad_norm": 4.23326422179187, "learning_rate": 5e-05, "loss": 0.1328, "num_input_tokens_seen": 68547508, "step": 751 }, { "epoch": 3.129166666666667, "loss": 0.1485927700996399, "loss_ce": 0.003664804855361581, "loss_iou": 0.2890625, "loss_num": 0.029052734375, "loss_xval": 0.14453125, "num_input_tokens_seen": 68547508, "step": 751 }, { "epoch": 3.1333333333333333, "grad_norm": 4.24613529579631, "learning_rate": 5e-05, "loss": 0.123, "num_input_tokens_seen": 68638744, "step": 752 }, { "epoch": 3.1333333333333333, "loss": 0.12835034728050232, "loss_ce": 0.0031519709154963493, "loss_iou": 0.412109375, "loss_num": 0.0250244140625, "loss_xval": 0.125, "num_input_tokens_seen": 68638744, "step": 752 }, { "epoch": 3.1375, "grad_norm": 5.939108838935815, "learning_rate": 5e-05, "loss": 0.2623, "num_input_tokens_seen": 68729548, "step": 753 }, { "epoch": 3.1375, "loss": 0.35694050788879395, "loss_ce": 0.0001289581268792972, "loss_iou": 0.1298828125, "loss_num": 0.0712890625, "loss_xval": 0.357421875, "num_input_tokens_seen": 68729548, "step": 753 }, { "epoch": 3.1416666666666666, "grad_norm": 5.088557187131976, "learning_rate": 5e-05, "loss": 0.1229, "num_input_tokens_seen": 68820904, "step": 754 }, { "epoch": 3.1416666666666666, "loss": 0.15128138661384583, "loss_ce": 0.0018978423904627562, "loss_iou": 0.259765625, "loss_num": 0.02978515625, "loss_xval": 0.1494140625, "num_input_tokens_seen": 68820904, "step": 754 }, { "epoch": 3.1458333333333335, "grad_norm": 8.873679118003906, "learning_rate": 5e-05, "loss": 0.164, "num_input_tokens_seen": 68912692, "step": 755 }, { "epoch": 3.1458333333333335, "loss": 0.20318354666233063, "loss_ce": 0.000897779711522162, "loss_iou": 0.24609375, "loss_num": 0.04052734375, "loss_xval": 0.2021484375, "num_input_tokens_seen": 68912692, "step": 755 }, { "epoch": 3.15, "grad_norm": 16.297387562182426, "learning_rate": 5e-05, "loss": 0.13, "num_input_tokens_seen": 69003836, "step": 756 }, { "epoch": 3.15, "loss": 0.139330193400383, "loss_ce": 0.0013602229300886393, "loss_iou": 0.326171875, "loss_num": 0.027587890625, "loss_xval": 0.1376953125, "num_input_tokens_seen": 69003836, "step": 756 }, { "epoch": 3.154166666666667, "grad_norm": 2.1080044689722515, "learning_rate": 5e-05, "loss": 0.1267, "num_input_tokens_seen": 69094656, "step": 757 }, { "epoch": 3.154166666666667, "loss": 0.1562238335609436, "loss_ce": 0.0010114258620887995, "loss_iou": 0.1162109375, "loss_num": 0.031005859375, "loss_xval": 0.1552734375, "num_input_tokens_seen": 69094656, "step": 757 }, { "epoch": 3.158333333333333, "grad_norm": 19.595354792790424, "learning_rate": 5e-05, "loss": 0.1061, "num_input_tokens_seen": 69186340, "step": 758 }, { "epoch": 3.158333333333333, "loss": 0.09858790785074234, "loss_ce": 0.00019923456420656294, "loss_iou": 0.29296875, "loss_num": 0.019775390625, "loss_xval": 0.0986328125, "num_input_tokens_seen": 69186340, "step": 758 }, { "epoch": 3.1625, "grad_norm": 8.59897241926053, "learning_rate": 5e-05, "loss": 0.179, "num_input_tokens_seen": 69277932, "step": 759 }, { "epoch": 3.1625, "loss": 0.2634497582912445, "loss_ce": 0.0026465251576155424, "loss_iou": 0.322265625, "loss_num": 0.05224609375, "loss_xval": 0.26171875, "num_input_tokens_seen": 69277932, "step": 759 }, { "epoch": 3.1666666666666665, "grad_norm": 3.239029242443324, "learning_rate": 5e-05, "loss": 0.1348, "num_input_tokens_seen": 69369724, "step": 760 }, { "epoch": 3.1666666666666665, "loss": 0.15876804292201996, "loss_ce": 0.0007175026694312692, "loss_iou": 0.232421875, "loss_num": 0.031494140625, "loss_xval": 0.158203125, "num_input_tokens_seen": 69369724, "step": 760 }, { "epoch": 3.1708333333333334, "grad_norm": 4.391061916761268, "learning_rate": 5e-05, "loss": 0.1649, "num_input_tokens_seen": 69460676, "step": 761 }, { "epoch": 3.1708333333333334, "loss": 0.14667901396751404, "loss_ce": 0.00150689750444144, "loss_iou": 0.2734375, "loss_num": 0.029052734375, "loss_xval": 0.1455078125, "num_input_tokens_seen": 69460676, "step": 761 }, { "epoch": 3.175, "grad_norm": 3.076163275564048, "learning_rate": 5e-05, "loss": 0.1377, "num_input_tokens_seen": 69551892, "step": 762 }, { "epoch": 3.175, "loss": 0.10917718708515167, "loss_ce": 0.000191280065337196, "loss_iou": 0.045654296875, "loss_num": 0.0218505859375, "loss_xval": 0.10888671875, "num_input_tokens_seen": 69551892, "step": 762 }, { "epoch": 3.1791666666666667, "grad_norm": 4.103402222438804, "learning_rate": 5e-05, "loss": 0.1578, "num_input_tokens_seen": 69643000, "step": 763 }, { "epoch": 3.1791666666666667, "loss": 0.17557072639465332, "loss_ce": 0.002612349344417453, "loss_iou": 0.26953125, "loss_num": 0.03466796875, "loss_xval": 0.1728515625, "num_input_tokens_seen": 69643000, "step": 763 }, { "epoch": 3.183333333333333, "grad_norm": 6.504159280235073, "learning_rate": 5e-05, "loss": 0.184, "num_input_tokens_seen": 69734104, "step": 764 }, { "epoch": 3.183333333333333, "loss": 0.16976343095302582, "loss_ce": 0.0020998548716306686, "loss_iou": 0.373046875, "loss_num": 0.033447265625, "loss_xval": 0.16796875, "num_input_tokens_seen": 69734104, "step": 764 }, { "epoch": 3.1875, "grad_norm": 4.173262875208758, "learning_rate": 5e-05, "loss": 0.1794, "num_input_tokens_seen": 69825836, "step": 765 }, { "epoch": 3.1875, "loss": 0.19754093885421753, "loss_ce": 0.0003973785205744207, "loss_iou": 0.44140625, "loss_num": 0.03955078125, "loss_xval": 0.197265625, "num_input_tokens_seen": 69825836, "step": 765 }, { "epoch": 3.191666666666667, "grad_norm": 10.20024471572148, "learning_rate": 5e-05, "loss": 0.1561, "num_input_tokens_seen": 69918116, "step": 766 }, { "epoch": 3.191666666666667, "loss": 0.18424035608768463, "loss_ce": 0.0032558543607592583, "loss_iou": 0.29296875, "loss_num": 0.0361328125, "loss_xval": 0.1806640625, "num_input_tokens_seen": 69918116, "step": 766 }, { "epoch": 3.1958333333333333, "grad_norm": 15.316242677734815, "learning_rate": 5e-05, "loss": 0.1123, "num_input_tokens_seen": 70009612, "step": 767 }, { "epoch": 3.1958333333333333, "loss": 0.10993720591068268, "loss_ce": 0.0009894566610455513, "loss_iou": 0.32421875, "loss_num": 0.021728515625, "loss_xval": 0.10888671875, "num_input_tokens_seen": 70009612, "step": 767 }, { "epoch": 3.2, "grad_norm": 30.342286623122178, "learning_rate": 5e-05, "loss": 0.2394, "num_input_tokens_seen": 70100704, "step": 768 }, { "epoch": 3.2, "loss": 0.1780831515789032, "loss_ce": 0.010602687485516071, "loss_iou": 0.2041015625, "loss_num": 0.033447265625, "loss_xval": 0.16796875, "num_input_tokens_seen": 70100704, "step": 768 }, { "epoch": 3.2041666666666666, "grad_norm": 11.984200740333602, "learning_rate": 5e-05, "loss": 0.1789, "num_input_tokens_seen": 70191612, "step": 769 }, { "epoch": 3.2041666666666666, "loss": 0.23412063717842102, "loss_ce": 0.00130203808657825, "loss_iou": 0.1494140625, "loss_num": 0.046630859375, "loss_xval": 0.232421875, "num_input_tokens_seen": 70191612, "step": 769 }, { "epoch": 3.2083333333333335, "grad_norm": 4.047384925920614, "learning_rate": 5e-05, "loss": 0.1686, "num_input_tokens_seen": 70281980, "step": 770 }, { "epoch": 3.2083333333333335, "loss": 0.12489617615938187, "loss_ce": 0.001879819785244763, "loss_iou": 0.1298828125, "loss_num": 0.024658203125, "loss_xval": 0.123046875, "num_input_tokens_seen": 70281980, "step": 770 }, { "epoch": 3.2125, "grad_norm": 2.803294314643065, "learning_rate": 5e-05, "loss": 0.0901, "num_input_tokens_seen": 70372984, "step": 771 }, { "epoch": 3.2125, "loss": 0.10114803165197372, "loss_ce": 0.0024694406893104315, "loss_iou": 0.1669921875, "loss_num": 0.019775390625, "loss_xval": 0.0986328125, "num_input_tokens_seen": 70372984, "step": 771 }, { "epoch": 3.216666666666667, "grad_norm": 6.785832043826993, "learning_rate": 5e-05, "loss": 0.1715, "num_input_tokens_seen": 70463936, "step": 772 }, { "epoch": 3.216666666666667, "loss": 0.12498641759157181, "loss_ce": 0.002580410335212946, "loss_iou": 0.296875, "loss_num": 0.0245361328125, "loss_xval": 0.12255859375, "num_input_tokens_seen": 70463936, "step": 772 }, { "epoch": 3.220833333333333, "grad_norm": 5.1851765366428815, "learning_rate": 5e-05, "loss": 0.1977, "num_input_tokens_seen": 70555092, "step": 773 }, { "epoch": 3.220833333333333, "loss": 0.26781585812568665, "loss_ce": 0.003701466601341963, "loss_iou": 0.15234375, "loss_num": 0.052734375, "loss_xval": 0.263671875, "num_input_tokens_seen": 70555092, "step": 773 }, { "epoch": 3.225, "grad_norm": 8.688299795759383, "learning_rate": 5e-05, "loss": 0.152, "num_input_tokens_seen": 70646432, "step": 774 }, { "epoch": 3.225, "loss": 0.12950116395950317, "loss_ce": 0.0030057919211685658, "loss_iou": 0.1884765625, "loss_num": 0.0252685546875, "loss_xval": 0.126953125, "num_input_tokens_seen": 70646432, "step": 774 }, { "epoch": 3.2291666666666665, "grad_norm": 3.4936298082637647, "learning_rate": 5e-05, "loss": 0.1429, "num_input_tokens_seen": 70737124, "step": 775 }, { "epoch": 3.2291666666666665, "loss": 0.16758891940116882, "loss_ce": 0.00013895254232920706, "loss_iou": 0.36328125, "loss_num": 0.033447265625, "loss_xval": 0.1669921875, "num_input_tokens_seen": 70737124, "step": 775 }, { "epoch": 3.2333333333333334, "grad_norm": 5.350457455750773, "learning_rate": 5e-05, "loss": 0.1123, "num_input_tokens_seen": 70827660, "step": 776 }, { "epoch": 3.2333333333333334, "loss": 0.10184476524591446, "loss_ce": 7.608376108692028e-06, "loss_iou": 0.04638671875, "loss_num": 0.0203857421875, "loss_xval": 0.10205078125, "num_input_tokens_seen": 70827660, "step": 776 }, { "epoch": 3.2375, "grad_norm": 2.5950546414943525, "learning_rate": 5e-05, "loss": 0.1718, "num_input_tokens_seen": 70919060, "step": 777 }, { "epoch": 3.2375, "loss": 0.20926007628440857, "loss_ce": 0.0016261127311736345, "loss_iou": 0.2373046875, "loss_num": 0.04150390625, "loss_xval": 0.2080078125, "num_input_tokens_seen": 70919060, "step": 777 }, { "epoch": 3.2416666666666667, "grad_norm": 9.458066813907067, "learning_rate": 5e-05, "loss": 0.1588, "num_input_tokens_seen": 71010772, "step": 778 }, { "epoch": 3.2416666666666667, "loss": 0.1460130363702774, "loss_ce": 0.0014054938219487667, "loss_iou": 0.216796875, "loss_num": 0.0289306640625, "loss_xval": 0.14453125, "num_input_tokens_seen": 71010772, "step": 778 }, { "epoch": 3.245833333333333, "grad_norm": 6.716962142407295, "learning_rate": 5e-05, "loss": 0.1795, "num_input_tokens_seen": 71101844, "step": 779 }, { "epoch": 3.245833333333333, "loss": 0.17741234600543976, "loss_ce": 0.002088862704113126, "loss_iou": 0.23046875, "loss_num": 0.03515625, "loss_xval": 0.17578125, "num_input_tokens_seen": 71101844, "step": 779 }, { "epoch": 3.25, "grad_norm": 15.172517224504327, "learning_rate": 5e-05, "loss": 0.1973, "num_input_tokens_seen": 71192812, "step": 780 }, { "epoch": 3.25, "loss": 0.17874208092689514, "loss_ce": 0.00018372779595665634, "loss_iou": 0.314453125, "loss_num": 0.03564453125, "loss_xval": 0.1787109375, "num_input_tokens_seen": 71192812, "step": 780 }, { "epoch": 3.2541666666666664, "grad_norm": 9.244636582891307, "learning_rate": 5e-05, "loss": 0.1933, "num_input_tokens_seen": 71284428, "step": 781 }, { "epoch": 3.2541666666666664, "loss": 0.23924417793750763, "loss_ce": 0.0009018905693665147, "loss_iou": 0.296875, "loss_num": 0.047607421875, "loss_xval": 0.23828125, "num_input_tokens_seen": 71284428, "step": 781 }, { "epoch": 3.2583333333333333, "grad_norm": 3.405929870103962, "learning_rate": 5e-05, "loss": 0.1475, "num_input_tokens_seen": 71375580, "step": 782 }, { "epoch": 3.2583333333333333, "loss": 0.14019837975502014, "loss_ce": 3.11500443785917e-05, "loss_iou": 0.134765625, "loss_num": 0.028076171875, "loss_xval": 0.140625, "num_input_tokens_seen": 71375580, "step": 782 }, { "epoch": 3.2625, "grad_norm": 4.298926898901773, "learning_rate": 5e-05, "loss": 0.1711, "num_input_tokens_seen": 71466292, "step": 783 }, { "epoch": 3.2625, "loss": 0.12115862220525742, "loss_ce": 6.487128121079877e-05, "loss_iou": 0.216796875, "loss_num": 0.024169921875, "loss_xval": 0.12109375, "num_input_tokens_seen": 71466292, "step": 783 }, { "epoch": 3.2666666666666666, "grad_norm": 3.795022106927016, "learning_rate": 5e-05, "loss": 0.0777, "num_input_tokens_seen": 71557532, "step": 784 }, { "epoch": 3.2666666666666666, "loss": 0.06865088641643524, "loss_ce": 0.002442999044433236, "loss_iou": 0.2734375, "loss_num": 0.01324462890625, "loss_xval": 0.06640625, "num_input_tokens_seen": 71557532, "step": 784 }, { "epoch": 3.2708333333333335, "grad_norm": 4.1149975795163485, "learning_rate": 5e-05, "loss": 0.0999, "num_input_tokens_seen": 71648752, "step": 785 }, { "epoch": 3.2708333333333335, "loss": 0.08993716537952423, "loss_ce": 0.0004901447682641447, "loss_iou": 0.328125, "loss_num": 0.0179443359375, "loss_xval": 0.08935546875, "num_input_tokens_seen": 71648752, "step": 785 }, { "epoch": 3.275, "grad_norm": 6.759255492884617, "learning_rate": 5e-05, "loss": 0.1545, "num_input_tokens_seen": 71740376, "step": 786 }, { "epoch": 3.275, "loss": 0.11835940927267075, "loss_ce": 0.002331583062186837, "loss_iou": 0.25, "loss_num": 0.023193359375, "loss_xval": 0.1162109375, "num_input_tokens_seen": 71740376, "step": 786 }, { "epoch": 3.279166666666667, "grad_norm": 3.5243667525079267, "learning_rate": 5e-05, "loss": 0.137, "num_input_tokens_seen": 71832116, "step": 787 }, { "epoch": 3.279166666666667, "loss": 0.07449323683977127, "loss_ce": 0.00338728167116642, "loss_iou": 0.2412109375, "loss_num": 0.01422119140625, "loss_xval": 0.0712890625, "num_input_tokens_seen": 71832116, "step": 787 }, { "epoch": 3.283333333333333, "grad_norm": 3.4524324577376686, "learning_rate": 5e-05, "loss": 0.1167, "num_input_tokens_seen": 71923616, "step": 788 }, { "epoch": 3.283333333333333, "loss": 0.12164635211229324, "loss_ce": 0.0007814801065251231, "loss_iou": 0.400390625, "loss_num": 0.024169921875, "loss_xval": 0.12109375, "num_input_tokens_seen": 71923616, "step": 788 }, { "epoch": 3.2875, "grad_norm": 4.781918619933174, "learning_rate": 5e-05, "loss": 0.1268, "num_input_tokens_seen": 72014964, "step": 789 }, { "epoch": 3.2875, "loss": 0.18079587817192078, "loss_ce": 0.00022337015252560377, "loss_iou": 0.234375, "loss_num": 0.0361328125, "loss_xval": 0.1806640625, "num_input_tokens_seen": 72014964, "step": 789 }, { "epoch": 3.2916666666666665, "grad_norm": 5.517276258832106, "learning_rate": 5e-05, "loss": 0.1124, "num_input_tokens_seen": 72106332, "step": 790 }, { "epoch": 3.2916666666666665, "loss": 0.12845715880393982, "loss_ce": 0.001198849524371326, "loss_iou": 0.125, "loss_num": 0.0255126953125, "loss_xval": 0.126953125, "num_input_tokens_seen": 72106332, "step": 790 }, { "epoch": 3.2958333333333334, "grad_norm": 29.98018266638596, "learning_rate": 5e-05, "loss": 0.1293, "num_input_tokens_seen": 72197912, "step": 791 }, { "epoch": 3.2958333333333334, "loss": 0.10846811532974243, "loss_ce": 0.00019174793851561844, "loss_iou": 0.25, "loss_num": 0.021728515625, "loss_xval": 0.1083984375, "num_input_tokens_seen": 72197912, "step": 791 }, { "epoch": 3.3, "grad_norm": 9.902237060806767, "learning_rate": 5e-05, "loss": 0.2077, "num_input_tokens_seen": 72288952, "step": 792 }, { "epoch": 3.3, "loss": 0.1036197692155838, "loss_ce": 0.0003177704056724906, "loss_iou": 0.2578125, "loss_num": 0.0206298828125, "loss_xval": 0.103515625, "num_input_tokens_seen": 72288952, "step": 792 }, { "epoch": 3.3041666666666667, "grad_norm": 5.384154441900459, "learning_rate": 5e-05, "loss": 0.2307, "num_input_tokens_seen": 72379412, "step": 793 }, { "epoch": 3.3041666666666667, "loss": 0.26934942603111267, "loss_ce": 1.288789690079284e-06, "loss_iou": 0.1611328125, "loss_num": 0.053955078125, "loss_xval": 0.26953125, "num_input_tokens_seen": 72379412, "step": 793 }, { "epoch": 3.3083333333333336, "grad_norm": 3.253569574893736, "learning_rate": 5e-05, "loss": 0.1432, "num_input_tokens_seen": 72470820, "step": 794 }, { "epoch": 3.3083333333333336, "loss": 0.08719146251678467, "loss_ce": 0.0017727642552927136, "loss_iou": 0.201171875, "loss_num": 0.01708984375, "loss_xval": 0.08544921875, "num_input_tokens_seen": 72470820, "step": 794 }, { "epoch": 3.3125, "grad_norm": 4.513685619297302, "learning_rate": 5e-05, "loss": 0.1465, "num_input_tokens_seen": 72562456, "step": 795 }, { "epoch": 3.3125, "loss": 0.17177698016166687, "loss_ce": 0.007043100893497467, "loss_iou": 0.267578125, "loss_num": 0.032958984375, "loss_xval": 0.1650390625, "num_input_tokens_seen": 72562456, "step": 795 }, { "epoch": 3.3166666666666664, "grad_norm": 7.32082631713167, "learning_rate": 5e-05, "loss": 0.1907, "num_input_tokens_seen": 72653804, "step": 796 }, { "epoch": 3.3166666666666664, "loss": 0.2058718502521515, "loss_ce": 0.0014651028905063868, "loss_iou": 0.23046875, "loss_num": 0.040771484375, "loss_xval": 0.2041015625, "num_input_tokens_seen": 72653804, "step": 796 }, { "epoch": 3.3208333333333333, "grad_norm": 5.038456487445877, "learning_rate": 5e-05, "loss": 0.1141, "num_input_tokens_seen": 72745292, "step": 797 }, { "epoch": 3.3208333333333333, "loss": 0.11923874169588089, "loss_ce": 0.00018967277719639242, "loss_iou": 0.27734375, "loss_num": 0.0238037109375, "loss_xval": 0.119140625, "num_input_tokens_seen": 72745292, "step": 797 }, { "epoch": 3.325, "grad_norm": 3.784735092054637, "learning_rate": 5e-05, "loss": 0.1424, "num_input_tokens_seen": 72836684, "step": 798 }, { "epoch": 3.325, "loss": 0.12178568542003632, "loss_ce": 0.0003562434285413474, "loss_iou": 0.2392578125, "loss_num": 0.0242919921875, "loss_xval": 0.12158203125, "num_input_tokens_seen": 72836684, "step": 798 }, { "epoch": 3.3291666666666666, "grad_norm": 3.8761231327791097, "learning_rate": 5e-05, "loss": 0.1353, "num_input_tokens_seen": 72928732, "step": 799 }, { "epoch": 3.3291666666666666, "loss": 0.14368420839309692, "loss_ce": 0.00397472595795989, "loss_iou": 0.16015625, "loss_num": 0.0279541015625, "loss_xval": 0.1396484375, "num_input_tokens_seen": 72928732, "step": 799 }, { "epoch": 3.3333333333333335, "grad_norm": 4.350988633709539, "learning_rate": 5e-05, "loss": 0.1429, "num_input_tokens_seen": 73019092, "step": 800 }, { "epoch": 3.3333333333333335, "loss": 0.07402680069208145, "loss_ce": 0.0004031416610814631, "loss_iou": 0.1875, "loss_num": 0.01470947265625, "loss_xval": 0.07373046875, "num_input_tokens_seen": 73019092, "step": 800 }, { "epoch": 3.3375, "grad_norm": 6.427134290140459, "learning_rate": 5e-05, "loss": 0.2131, "num_input_tokens_seen": 73110312, "step": 801 }, { "epoch": 3.3375, "loss": 0.23900842666625977, "loss_ce": 0.0008492398192174733, "loss_iou": 0.17578125, "loss_num": 0.047607421875, "loss_xval": 0.23828125, "num_input_tokens_seen": 73110312, "step": 801 }, { "epoch": 3.341666666666667, "grad_norm": 8.603622827505802, "learning_rate": 5e-05, "loss": 0.1597, "num_input_tokens_seen": 73201564, "step": 802 }, { "epoch": 3.341666666666667, "loss": 0.1939290463924408, "loss_ce": 0.0020650303922593594, "loss_iou": 0.2490234375, "loss_num": 0.038330078125, "loss_xval": 0.19140625, "num_input_tokens_seen": 73201564, "step": 802 }, { "epoch": 3.345833333333333, "grad_norm": 2.8720544166960487, "learning_rate": 5e-05, "loss": 0.1123, "num_input_tokens_seen": 73292644, "step": 803 }, { "epoch": 3.345833333333333, "loss": 0.09989237040281296, "loss_ce": 0.00016092188889160752, "loss_iou": 0.302734375, "loss_num": 0.0198974609375, "loss_xval": 0.099609375, "num_input_tokens_seen": 73292644, "step": 803 }, { "epoch": 3.35, "grad_norm": 7.114589570182523, "learning_rate": 5e-05, "loss": 0.1079, "num_input_tokens_seen": 73384220, "step": 804 }, { "epoch": 3.35, "loss": 0.07536976039409637, "loss_ce": 0.0018834264483302832, "loss_iou": 0.296875, "loss_num": 0.01470947265625, "loss_xval": 0.0732421875, "num_input_tokens_seen": 73384220, "step": 804 }, { "epoch": 3.3541666666666665, "grad_norm": 4.828739351086096, "learning_rate": 5e-05, "loss": 0.1446, "num_input_tokens_seen": 73475856, "step": 805 }, { "epoch": 3.3541666666666665, "loss": 0.10500533878803253, "loss_ce": 0.0006504841148853302, "loss_iou": 0.26953125, "loss_num": 0.0208740234375, "loss_xval": 0.1044921875, "num_input_tokens_seen": 73475856, "step": 805 }, { "epoch": 3.3583333333333334, "grad_norm": 3.214913125602273, "learning_rate": 5e-05, "loss": 0.1161, "num_input_tokens_seen": 73567160, "step": 806 }, { "epoch": 3.3583333333333334, "loss": 0.12572184205055237, "loss_ce": 0.0043229046277701855, "loss_iou": 0.283203125, "loss_num": 0.0242919921875, "loss_xval": 0.12158203125, "num_input_tokens_seen": 73567160, "step": 806 }, { "epoch": 3.3625, "grad_norm": 7.114280904079044, "learning_rate": 5e-05, "loss": 0.1481, "num_input_tokens_seen": 73658604, "step": 807 }, { "epoch": 3.3625, "loss": 0.21147285401821136, "loss_ce": 0.0032056490890681744, "loss_iou": 0.27734375, "loss_num": 0.041748046875, "loss_xval": 0.2080078125, "num_input_tokens_seen": 73658604, "step": 807 }, { "epoch": 3.3666666666666667, "grad_norm": 2.2326677998565954, "learning_rate": 5e-05, "loss": 0.1255, "num_input_tokens_seen": 73749692, "step": 808 }, { "epoch": 3.3666666666666667, "loss": 0.1155465841293335, "loss_ce": 0.004264240153133869, "loss_iou": 0.275390625, "loss_num": 0.022216796875, "loss_xval": 0.111328125, "num_input_tokens_seen": 73749692, "step": 808 }, { "epoch": 3.3708333333333336, "grad_norm": 2.0023833310085335, "learning_rate": 5e-05, "loss": 0.1097, "num_input_tokens_seen": 73840924, "step": 809 }, { "epoch": 3.3708333333333336, "loss": 0.1414848268032074, "loss_ce": 3.585106605896726e-05, "loss_iou": 0.376953125, "loss_num": 0.0283203125, "loss_xval": 0.1416015625, "num_input_tokens_seen": 73840924, "step": 809 }, { "epoch": 3.375, "grad_norm": 3.1374653335995903, "learning_rate": 5e-05, "loss": 0.1364, "num_input_tokens_seen": 73932068, "step": 810 }, { "epoch": 3.375, "loss": 0.1625884622335434, "loss_ce": 0.0015929804649204016, "loss_iou": 0.26171875, "loss_num": 0.0322265625, "loss_xval": 0.1611328125, "num_input_tokens_seen": 73932068, "step": 810 }, { "epoch": 3.3791666666666664, "grad_norm": 1.685708414668304, "learning_rate": 5e-05, "loss": 0.0805, "num_input_tokens_seen": 74023680, "step": 811 }, { "epoch": 3.3791666666666664, "loss": 0.07619469612836838, "loss_ce": 0.0005111010977998376, "loss_iou": 0.2734375, "loss_num": 0.01513671875, "loss_xval": 0.07568359375, "num_input_tokens_seen": 74023680, "step": 811 }, { "epoch": 3.3833333333333333, "grad_norm": 1.8933376014832215, "learning_rate": 5e-05, "loss": 0.1564, "num_input_tokens_seen": 74114916, "step": 812 }, { "epoch": 3.3833333333333333, "loss": 0.16464364528656006, "loss_ce": 0.003922812175005674, "loss_iou": 0.2099609375, "loss_num": 0.0322265625, "loss_xval": 0.1611328125, "num_input_tokens_seen": 74114916, "step": 812 }, { "epoch": 3.3875, "grad_norm": 4.402993261964665, "learning_rate": 5e-05, "loss": 0.1436, "num_input_tokens_seen": 74206152, "step": 813 }, { "epoch": 3.3875, "loss": 0.1346263587474823, "loss_ce": 0.00030323388637043536, "loss_iou": 0.3125, "loss_num": 0.02685546875, "loss_xval": 0.134765625, "num_input_tokens_seen": 74206152, "step": 813 }, { "epoch": 3.3916666666666666, "grad_norm": 8.942507116728107, "learning_rate": 5e-05, "loss": 0.0969, "num_input_tokens_seen": 74298040, "step": 814 }, { "epoch": 3.3916666666666666, "loss": 0.08397021889686584, "loss_ce": 0.0021068197675049305, "loss_iou": 0.37109375, "loss_num": 0.016357421875, "loss_xval": 0.08203125, "num_input_tokens_seen": 74298040, "step": 814 }, { "epoch": 3.3958333333333335, "grad_norm": 3.0814600182017395, "learning_rate": 5e-05, "loss": 0.1609, "num_input_tokens_seen": 74389564, "step": 815 }, { "epoch": 3.3958333333333335, "loss": 0.17124760150909424, "loss_ce": 0.000776410277467221, "loss_iou": 0.408203125, "loss_num": 0.0341796875, "loss_xval": 0.1708984375, "num_input_tokens_seen": 74389564, "step": 815 }, { "epoch": 3.4, "grad_norm": 11.077937502764769, "learning_rate": 5e-05, "loss": 0.203, "num_input_tokens_seen": 74479988, "step": 816 }, { "epoch": 3.4, "loss": 0.2204510122537613, "loss_ce": 7.280018962774193e-06, "loss_iou": 0.14453125, "loss_num": 0.044189453125, "loss_xval": 0.220703125, "num_input_tokens_seen": 74479988, "step": 816 }, { "epoch": 3.404166666666667, "grad_norm": 5.900049711330432, "learning_rate": 5e-05, "loss": 0.107, "num_input_tokens_seen": 74571308, "step": 817 }, { "epoch": 3.404166666666667, "loss": 0.11785734444856644, "loss_ce": 0.0003951911348849535, "loss_iou": 0.287109375, "loss_num": 0.0234375, "loss_xval": 0.11767578125, "num_input_tokens_seen": 74571308, "step": 817 }, { "epoch": 3.408333333333333, "grad_norm": 3.0966138313823004, "learning_rate": 5e-05, "loss": 0.1065, "num_input_tokens_seen": 74661784, "step": 818 }, { "epoch": 3.408333333333333, "loss": 0.1265370100736618, "loss_ce": 0.008952784352004528, "loss_iou": 0.2294921875, "loss_num": 0.0235595703125, "loss_xval": 0.11767578125, "num_input_tokens_seen": 74661784, "step": 818 }, { "epoch": 3.4125, "grad_norm": 21.46042272135341, "learning_rate": 5e-05, "loss": 0.169, "num_input_tokens_seen": 74753064, "step": 819 }, { "epoch": 3.4125, "loss": 0.06247454136610031, "loss_ce": 0.0009053258690983057, "loss_iou": 0.2490234375, "loss_num": 0.0123291015625, "loss_xval": 0.0615234375, "num_input_tokens_seen": 74753064, "step": 819 }, { "epoch": 3.4166666666666665, "grad_norm": 5.311244630831775, "learning_rate": 5e-05, "loss": 0.1708, "num_input_tokens_seen": 74844492, "step": 820 }, { "epoch": 3.4166666666666665, "loss": 0.1370103657245636, "loss_ce": 0.0049379244446754456, "loss_iou": 0.2890625, "loss_num": 0.0263671875, "loss_xval": 0.1318359375, "num_input_tokens_seen": 74844492, "step": 820 }, { "epoch": 3.4208333333333334, "grad_norm": 5.211533051644635, "learning_rate": 5e-05, "loss": 0.185, "num_input_tokens_seen": 74935568, "step": 821 }, { "epoch": 3.4208333333333334, "loss": 0.2441231608390808, "loss_ce": 0.0002572032390162349, "loss_iou": 0.271484375, "loss_num": 0.048828125, "loss_xval": 0.244140625, "num_input_tokens_seen": 74935568, "step": 821 }, { "epoch": 3.425, "grad_norm": 5.37698998509261, "learning_rate": 5e-05, "loss": 0.1298, "num_input_tokens_seen": 75026872, "step": 822 }, { "epoch": 3.425, "loss": 0.14967405796051025, "loss_ce": 0.00032103960984386504, "loss_iou": 0.40625, "loss_num": 0.0299072265625, "loss_xval": 0.1494140625, "num_input_tokens_seen": 75026872, "step": 822 }, { "epoch": 3.4291666666666667, "grad_norm": 3.4283196054918132, "learning_rate": 5e-05, "loss": 0.1534, "num_input_tokens_seen": 75118744, "step": 823 }, { "epoch": 3.4291666666666667, "loss": 0.10464496165513992, "loss_ce": 0.002945136744529009, "loss_iou": 0.28125, "loss_num": 0.0203857421875, "loss_xval": 0.1015625, "num_input_tokens_seen": 75118744, "step": 823 }, { "epoch": 3.4333333333333336, "grad_norm": 5.775737722048964, "learning_rate": 5e-05, "loss": 0.1075, "num_input_tokens_seen": 75209872, "step": 824 }, { "epoch": 3.4333333333333336, "loss": 0.11979828774929047, "loss_ce": 0.0010086168767884374, "loss_iou": 0.28515625, "loss_num": 0.0238037109375, "loss_xval": 0.11865234375, "num_input_tokens_seen": 75209872, "step": 824 }, { "epoch": 3.4375, "grad_norm": 3.227782306824348, "learning_rate": 5e-05, "loss": 0.1044, "num_input_tokens_seen": 75301168, "step": 825 }, { "epoch": 3.4375, "loss": 0.08311197906732559, "loss_ce": 0.00010416478471597657, "loss_iou": 0.5390625, "loss_num": 0.0166015625, "loss_xval": 0.0830078125, "num_input_tokens_seen": 75301168, "step": 825 }, { "epoch": 3.4416666666666664, "grad_norm": 4.921386646325974, "learning_rate": 5e-05, "loss": 0.0919, "num_input_tokens_seen": 75391968, "step": 826 }, { "epoch": 3.4416666666666664, "loss": 0.11057807505130768, "loss_ce": 0.0024695568718016148, "loss_iou": 0.3046875, "loss_num": 0.0216064453125, "loss_xval": 0.10791015625, "num_input_tokens_seen": 75391968, "step": 826 }, { "epoch": 3.4458333333333333, "grad_norm": 8.603358089040904, "learning_rate": 5e-05, "loss": 0.1828, "num_input_tokens_seen": 75483776, "step": 827 }, { "epoch": 3.4458333333333333, "loss": 0.1585550755262375, "loss_ce": 0.003495255019515753, "loss_iou": 0.208984375, "loss_num": 0.031005859375, "loss_xval": 0.1552734375, "num_input_tokens_seen": 75483776, "step": 827 }, { "epoch": 3.45, "grad_norm": 2.6733910252403015, "learning_rate": 5e-05, "loss": 0.1585, "num_input_tokens_seen": 75574940, "step": 828 }, { "epoch": 3.45, "loss": 0.14622877538204193, "loss_ce": 0.0012550182873383164, "loss_iou": 0.1650390625, "loss_num": 0.0289306640625, "loss_xval": 0.14453125, "num_input_tokens_seen": 75574940, "step": 828 }, { "epoch": 3.4541666666666666, "grad_norm": 7.671186709477866, "learning_rate": 5e-05, "loss": 0.2176, "num_input_tokens_seen": 75666364, "step": 829 }, { "epoch": 3.4541666666666666, "loss": 0.2595589756965637, "loss_ce": 0.000525783107150346, "loss_iou": 0.2109375, "loss_num": 0.0517578125, "loss_xval": 0.259765625, "num_input_tokens_seen": 75666364, "step": 829 }, { "epoch": 3.4583333333333335, "grad_norm": 18.226846072866003, "learning_rate": 5e-05, "loss": 0.1378, "num_input_tokens_seen": 75757436, "step": 830 }, { "epoch": 3.4583333333333335, "loss": 0.16365206241607666, "loss_ce": 1.5429660606969264e-06, "loss_iou": 0.380859375, "loss_num": 0.03271484375, "loss_xval": 0.1640625, "num_input_tokens_seen": 75757436, "step": 830 }, { "epoch": 3.4625, "grad_norm": 8.318685188165535, "learning_rate": 5e-05, "loss": 0.1436, "num_input_tokens_seen": 75849040, "step": 831 }, { "epoch": 3.4625, "loss": 0.15188753604888916, "loss_ce": 0.00012363101996015757, "loss_iou": 0.236328125, "loss_num": 0.0303955078125, "loss_xval": 0.1513671875, "num_input_tokens_seen": 75849040, "step": 831 }, { "epoch": 3.466666666666667, "grad_norm": 2.6630234794156418, "learning_rate": 5e-05, "loss": 0.1736, "num_input_tokens_seen": 75940524, "step": 832 }, { "epoch": 3.466666666666667, "loss": 0.19373507797718048, "loss_ce": 0.001672694575972855, "loss_iou": 0.337890625, "loss_num": 0.038330078125, "loss_xval": 0.1923828125, "num_input_tokens_seen": 75940524, "step": 832 }, { "epoch": 3.470833333333333, "grad_norm": 2.2459169394774356, "learning_rate": 5e-05, "loss": 0.0962, "num_input_tokens_seen": 76031620, "step": 833 }, { "epoch": 3.470833333333333, "loss": 0.12036258727312088, "loss_ce": 0.0005429437151178718, "loss_iou": 0.2294921875, "loss_num": 0.0240478515625, "loss_xval": 0.11962890625, "num_input_tokens_seen": 76031620, "step": 833 }, { "epoch": 3.475, "grad_norm": 2.498827767772015, "learning_rate": 5e-05, "loss": 0.1102, "num_input_tokens_seen": 76122764, "step": 834 }, { "epoch": 3.475, "loss": 0.1229761466383934, "loss_ce": 2.8445483621908352e-05, "loss_iou": 0.2353515625, "loss_num": 0.0245361328125, "loss_xval": 0.123046875, "num_input_tokens_seen": 76122764, "step": 834 }, { "epoch": 3.4791666666666665, "grad_norm": 4.298154138835746, "learning_rate": 5e-05, "loss": 0.1125, "num_input_tokens_seen": 76213956, "step": 835 }, { "epoch": 3.4791666666666665, "loss": 0.13150085508823395, "loss_ce": 0.0007940710638649762, "loss_iou": 0.166015625, "loss_num": 0.026123046875, "loss_xval": 0.130859375, "num_input_tokens_seen": 76213956, "step": 835 }, { "epoch": 3.4833333333333334, "grad_norm": 4.972732986290837, "learning_rate": 5e-05, "loss": 0.172, "num_input_tokens_seen": 76304764, "step": 836 }, { "epoch": 3.4833333333333334, "loss": 0.22352594137191772, "loss_ce": 0.0004729589563794434, "loss_iou": 0.248046875, "loss_num": 0.044677734375, "loss_xval": 0.22265625, "num_input_tokens_seen": 76304764, "step": 836 }, { "epoch": 3.4875, "grad_norm": 4.725391316756069, "learning_rate": 5e-05, "loss": 0.1716, "num_input_tokens_seen": 76396520, "step": 837 }, { "epoch": 3.4875, "loss": 0.13293907046318054, "loss_ce": 0.0023543545976281166, "loss_iou": 0.3359375, "loss_num": 0.026123046875, "loss_xval": 0.130859375, "num_input_tokens_seen": 76396520, "step": 837 }, { "epoch": 3.4916666666666667, "grad_norm": 5.668771822315559, "learning_rate": 5e-05, "loss": 0.1328, "num_input_tokens_seen": 76488272, "step": 838 }, { "epoch": 3.4916666666666667, "loss": 0.17592602968215942, "loss_ce": 0.0019910915289074183, "loss_iou": 0.251953125, "loss_num": 0.034912109375, "loss_xval": 0.173828125, "num_input_tokens_seen": 76488272, "step": 838 }, { "epoch": 3.4958333333333336, "grad_norm": 5.638127585122112, "learning_rate": 5e-05, "loss": 0.1882, "num_input_tokens_seen": 76579868, "step": 839 }, { "epoch": 3.4958333333333336, "loss": 0.2288551926612854, "loss_ce": 0.004596756771206856, "loss_iou": 0.26171875, "loss_num": 0.044921875, "loss_xval": 0.224609375, "num_input_tokens_seen": 76579868, "step": 839 }, { "epoch": 3.5, "grad_norm": 9.286255736402026, "learning_rate": 5e-05, "loss": 0.1512, "num_input_tokens_seen": 76671076, "step": 840 }, { "epoch": 3.5, "loss": 0.07215861976146698, "loss_ce": 0.00032024577376432717, "loss_iou": 0.365234375, "loss_num": 0.01434326171875, "loss_xval": 0.07177734375, "num_input_tokens_seen": 76671076, "step": 840 }, { "epoch": 3.5041666666666664, "grad_norm": 4.386475945583153, "learning_rate": 5e-05, "loss": 0.0689, "num_input_tokens_seen": 76762808, "step": 841 }, { "epoch": 3.5041666666666664, "loss": 0.07066143304109573, "loss_ce": 0.0019663649145513773, "loss_iou": 0.365234375, "loss_num": 0.01373291015625, "loss_xval": 0.06884765625, "num_input_tokens_seen": 76762808, "step": 841 }, { "epoch": 3.5083333333333333, "grad_norm": 4.9959751465411175, "learning_rate": 5e-05, "loss": 0.1352, "num_input_tokens_seen": 76854288, "step": 842 }, { "epoch": 3.5083333333333333, "loss": 0.14913874864578247, "loss_ce": 0.0036614639684557915, "loss_iou": 0.291015625, "loss_num": 0.029052734375, "loss_xval": 0.1455078125, "num_input_tokens_seen": 76854288, "step": 842 }, { "epoch": 3.5125, "grad_norm": 1.5990170431545732, "learning_rate": 5e-05, "loss": 0.1006, "num_input_tokens_seen": 76945748, "step": 843 }, { "epoch": 3.5125, "loss": 0.10650460422039032, "loss_ce": 0.002042930806055665, "loss_iou": 0.28515625, "loss_num": 0.0208740234375, "loss_xval": 0.1044921875, "num_input_tokens_seen": 76945748, "step": 843 }, { "epoch": 3.5166666666666666, "grad_norm": 4.534076156594933, "learning_rate": 5e-05, "loss": 0.1216, "num_input_tokens_seen": 77037200, "step": 844 }, { "epoch": 3.5166666666666666, "loss": 0.09443645179271698, "loss_ce": 0.0006559367757290602, "loss_iou": 0.16796875, "loss_num": 0.018798828125, "loss_xval": 0.09375, "num_input_tokens_seen": 77037200, "step": 844 }, { "epoch": 3.5208333333333335, "grad_norm": 2.4011977220701426, "learning_rate": 5e-05, "loss": 0.1531, "num_input_tokens_seen": 77129188, "step": 845 }, { "epoch": 3.5208333333333335, "loss": 0.10003243386745453, "loss_ce": 0.0007434985018335283, "loss_iou": 0.265625, "loss_num": 0.0198974609375, "loss_xval": 0.09912109375, "num_input_tokens_seen": 77129188, "step": 845 }, { "epoch": 3.525, "grad_norm": 4.260385981525903, "learning_rate": 5e-05, "loss": 0.0976, "num_input_tokens_seen": 77220180, "step": 846 }, { "epoch": 3.525, "loss": 0.08183970302343369, "loss_ce": 0.00037303665885701776, "loss_iou": 0.2890625, "loss_num": 0.0162353515625, "loss_xval": 0.08154296875, "num_input_tokens_seen": 77220180, "step": 846 }, { "epoch": 3.529166666666667, "grad_norm": 3.6679913865243883, "learning_rate": 5e-05, "loss": 0.1906, "num_input_tokens_seen": 77311900, "step": 847 }, { "epoch": 3.529166666666667, "loss": 0.21985681354999542, "loss_ce": 0.001732430886477232, "loss_iou": 0.2119140625, "loss_num": 0.043701171875, "loss_xval": 0.2177734375, "num_input_tokens_seen": 77311900, "step": 847 }, { "epoch": 3.533333333333333, "grad_norm": 8.497174574589904, "learning_rate": 5e-05, "loss": 0.1817, "num_input_tokens_seen": 77403052, "step": 848 }, { "epoch": 3.533333333333333, "loss": 0.12078467756509781, "loss_ce": 7.239534897962585e-05, "loss_iou": 0.29296875, "loss_num": 0.024169921875, "loss_xval": 0.12060546875, "num_input_tokens_seen": 77403052, "step": 848 }, { "epoch": 3.5375, "grad_norm": 14.354819680373655, "learning_rate": 5e-05, "loss": 0.1719, "num_input_tokens_seen": 77494380, "step": 849 }, { "epoch": 3.5375, "loss": 0.1600828915834427, "loss_ce": 0.0015440742718055844, "loss_iou": 0.341796875, "loss_num": 0.03173828125, "loss_xval": 0.158203125, "num_input_tokens_seen": 77494380, "step": 849 }, { "epoch": 3.5416666666666665, "grad_norm": 5.368974849498363, "learning_rate": 5e-05, "loss": 0.1391, "num_input_tokens_seen": 77585304, "step": 850 }, { "epoch": 3.5416666666666665, "loss": 0.1813376545906067, "loss_ce": 0.0013907547108829021, "loss_iou": 0.087890625, "loss_num": 0.035888671875, "loss_xval": 0.1796875, "num_input_tokens_seen": 77585304, "step": 850 }, { "epoch": 3.5458333333333334, "grad_norm": 5.220110320232065, "learning_rate": 5e-05, "loss": 0.2156, "num_input_tokens_seen": 77676964, "step": 851 }, { "epoch": 3.5458333333333334, "loss": 0.1062939465045929, "loss_ce": 0.0022442599292844534, "loss_iou": 0.23046875, "loss_num": 0.0208740234375, "loss_xval": 0.10400390625, "num_input_tokens_seen": 77676964, "step": 851 }, { "epoch": 3.55, "grad_norm": 7.044117539532497, "learning_rate": 5e-05, "loss": 0.1643, "num_input_tokens_seen": 77768672, "step": 852 }, { "epoch": 3.55, "loss": 0.1451941579580307, "loss_ce": 0.0013648092281073332, "loss_iou": 0.474609375, "loss_num": 0.0286865234375, "loss_xval": 0.1435546875, "num_input_tokens_seen": 77768672, "step": 852 }, { "epoch": 3.5541666666666667, "grad_norm": 3.512386496593121, "learning_rate": 5e-05, "loss": 0.2079, "num_input_tokens_seen": 77859024, "step": 853 }, { "epoch": 3.5541666666666667, "loss": 0.20865783095359802, "loss_ce": 3.968643432017416e-05, "loss_iou": 0.19140625, "loss_num": 0.041748046875, "loss_xval": 0.208984375, "num_input_tokens_seen": 77859024, "step": 853 }, { "epoch": 3.5583333333333336, "grad_norm": 2.5185722713598144, "learning_rate": 5e-05, "loss": 0.108, "num_input_tokens_seen": 77950920, "step": 854 }, { "epoch": 3.5583333333333336, "loss": 0.16412417590618134, "loss_ce": 0.0008246133802458644, "loss_iou": 0.298828125, "loss_num": 0.03271484375, "loss_xval": 0.1630859375, "num_input_tokens_seen": 77950920, "step": 854 }, { "epoch": 3.5625, "grad_norm": 7.683335972137127, "learning_rate": 5e-05, "loss": 0.1109, "num_input_tokens_seen": 78042688, "step": 855 }, { "epoch": 3.5625, "loss": 0.08507044613361359, "loss_ce": 0.0029705329798161983, "loss_iou": 0.34375, "loss_num": 0.016357421875, "loss_xval": 0.08203125, "num_input_tokens_seen": 78042688, "step": 855 }, { "epoch": 3.5666666666666664, "grad_norm": 2.9311002155316537, "learning_rate": 5e-05, "loss": 0.153, "num_input_tokens_seen": 78133672, "step": 856 }, { "epoch": 3.5666666666666664, "loss": 0.10116206854581833, "loss_ce": 0.0005761290667578578, "loss_iou": 0.3203125, "loss_num": 0.0201416015625, "loss_xval": 0.1005859375, "num_input_tokens_seen": 78133672, "step": 856 }, { "epoch": 3.5708333333333333, "grad_norm": 1.755481247239063, "learning_rate": 5e-05, "loss": 0.1185, "num_input_tokens_seen": 78225000, "step": 857 }, { "epoch": 3.5708333333333333, "loss": 0.12590903043746948, "loss_ce": 0.002099202712997794, "loss_iou": 0.23828125, "loss_num": 0.0247802734375, "loss_xval": 0.1240234375, "num_input_tokens_seen": 78225000, "step": 857 }, { "epoch": 3.575, "grad_norm": 9.559561742494447, "learning_rate": 5e-05, "loss": 0.1764, "num_input_tokens_seen": 78316300, "step": 858 }, { "epoch": 3.575, "loss": 0.2367614209651947, "loss_ce": 0.0005858814110979438, "loss_iou": 0.34765625, "loss_num": 0.047119140625, "loss_xval": 0.236328125, "num_input_tokens_seen": 78316300, "step": 858 }, { "epoch": 3.5791666666666666, "grad_norm": 10.036579582471406, "learning_rate": 5e-05, "loss": 0.0823, "num_input_tokens_seen": 78408016, "step": 859 }, { "epoch": 3.5791666666666666, "loss": 0.07882070541381836, "loss_ce": 0.001809599227271974, "loss_iou": 0.279296875, "loss_num": 0.015380859375, "loss_xval": 0.0771484375, "num_input_tokens_seen": 78408016, "step": 859 }, { "epoch": 3.5833333333333335, "grad_norm": 2.3705521449279967, "learning_rate": 5e-05, "loss": 0.0829, "num_input_tokens_seen": 78498968, "step": 860 }, { "epoch": 3.5833333333333335, "loss": 0.11630737036466599, "loss_ce": 2.0137465980951674e-05, "loss_iou": 0.3125, "loss_num": 0.0233154296875, "loss_xval": 0.1162109375, "num_input_tokens_seen": 78498968, "step": 860 }, { "epoch": 3.5875, "grad_norm": 5.987229333857078, "learning_rate": 5e-05, "loss": 0.1943, "num_input_tokens_seen": 78590208, "step": 861 }, { "epoch": 3.5875, "loss": 0.26542216539382935, "loss_ce": 4.132339381612837e-05, "loss_iou": 0.2265625, "loss_num": 0.052978515625, "loss_xval": 0.265625, "num_input_tokens_seen": 78590208, "step": 861 }, { "epoch": 3.591666666666667, "grad_norm": 5.736555389697911, "learning_rate": 5e-05, "loss": 0.1309, "num_input_tokens_seen": 78681064, "step": 862 }, { "epoch": 3.591666666666667, "loss": 0.1575353443622589, "loss_ce": 0.0006749906460754573, "loss_iou": 0.29296875, "loss_num": 0.031494140625, "loss_xval": 0.1572265625, "num_input_tokens_seen": 78681064, "step": 862 }, { "epoch": 3.595833333333333, "grad_norm": 4.555255178719723, "learning_rate": 5e-05, "loss": 0.1412, "num_input_tokens_seen": 78772584, "step": 863 }, { "epoch": 3.595833333333333, "loss": 0.10891114175319672, "loss_ce": 0.0009246918489225209, "loss_iou": 0.306640625, "loss_num": 0.0216064453125, "loss_xval": 0.10791015625, "num_input_tokens_seen": 78772584, "step": 863 }, { "epoch": 3.6, "grad_norm": 10.841600062563783, "learning_rate": 5e-05, "loss": 0.1488, "num_input_tokens_seen": 78864592, "step": 864 }, { "epoch": 3.6, "loss": 0.1749829202890396, "loss_ce": 0.0038403368089348078, "loss_iou": 0.279296875, "loss_num": 0.0341796875, "loss_xval": 0.1708984375, "num_input_tokens_seen": 78864592, "step": 864 }, { "epoch": 3.6041666666666665, "grad_norm": 4.92048030012845, "learning_rate": 5e-05, "loss": 0.1626, "num_input_tokens_seen": 78955560, "step": 865 }, { "epoch": 3.6041666666666665, "loss": 0.1305989921092987, "loss_ce": 0.0034627642016857862, "loss_iou": 0.326171875, "loss_num": 0.025390625, "loss_xval": 0.126953125, "num_input_tokens_seen": 78955560, "step": 865 }, { "epoch": 3.6083333333333334, "grad_norm": 4.437315758229823, "learning_rate": 5e-05, "loss": 0.1117, "num_input_tokens_seen": 79046972, "step": 866 }, { "epoch": 3.6083333333333334, "loss": 0.0896613746881485, "loss_ce": 0.0017402288503944874, "loss_iou": 0.349609375, "loss_num": 0.017578125, "loss_xval": 0.087890625, "num_input_tokens_seen": 79046972, "step": 866 }, { "epoch": 3.6125, "grad_norm": 2.2807293140028384, "learning_rate": 5e-05, "loss": 0.1052, "num_input_tokens_seen": 79136824, "step": 867 }, { "epoch": 3.6125, "loss": 0.11052871495485306, "loss_ce": 9.305941603088286e-06, "loss_iou": 0.39453125, "loss_num": 0.0220947265625, "loss_xval": 0.1103515625, "num_input_tokens_seen": 79136824, "step": 867 }, { "epoch": 3.6166666666666667, "grad_norm": 7.141800061379381, "learning_rate": 5e-05, "loss": 0.1306, "num_input_tokens_seen": 79228340, "step": 868 }, { "epoch": 3.6166666666666667, "loss": 0.13482439517974854, "loss_ce": 0.0004097204946447164, "loss_iou": 0.2890625, "loss_num": 0.02685546875, "loss_xval": 0.134765625, "num_input_tokens_seen": 79228340, "step": 868 }, { "epoch": 3.6208333333333336, "grad_norm": 3.8234864903722547, "learning_rate": 5e-05, "loss": 0.1308, "num_input_tokens_seen": 79319768, "step": 869 }, { "epoch": 3.6208333333333336, "loss": 0.16188469529151917, "loss_ce": 0.0019420783501118422, "loss_iou": 0.333984375, "loss_num": 0.031982421875, "loss_xval": 0.16015625, "num_input_tokens_seen": 79319768, "step": 869 }, { "epoch": 3.625, "grad_norm": 3.551647947387826, "learning_rate": 5e-05, "loss": 0.1047, "num_input_tokens_seen": 79411328, "step": 870 }, { "epoch": 3.625, "loss": 0.12375196069478989, "loss_ce": 0.0030549420043826103, "loss_iou": 0.240234375, "loss_num": 0.024169921875, "loss_xval": 0.12060546875, "num_input_tokens_seen": 79411328, "step": 870 }, { "epoch": 3.6291666666666664, "grad_norm": 3.7442918500543776, "learning_rate": 5e-05, "loss": 0.1229, "num_input_tokens_seen": 79502548, "step": 871 }, { "epoch": 3.6291666666666664, "loss": 0.1477302610874176, "loss_ce": 0.0003303608391433954, "loss_iou": 0.201171875, "loss_num": 0.029541015625, "loss_xval": 0.1474609375, "num_input_tokens_seen": 79502548, "step": 871 }, { "epoch": 3.6333333333333333, "grad_norm": 7.0439013115078755, "learning_rate": 5e-05, "loss": 0.1173, "num_input_tokens_seen": 79594160, "step": 872 }, { "epoch": 3.6333333333333333, "loss": 0.153409942984581, "loss_ce": 0.001920689595863223, "loss_iou": 0.251953125, "loss_num": 0.0302734375, "loss_xval": 0.1513671875, "num_input_tokens_seen": 79594160, "step": 872 }, { "epoch": 3.6375, "grad_norm": 2.205887052407003, "learning_rate": 5e-05, "loss": 0.1456, "num_input_tokens_seen": 79685640, "step": 873 }, { "epoch": 3.6375, "loss": 0.12901686131954193, "loss_ce": 0.001453386852517724, "loss_iou": 0.34765625, "loss_num": 0.0255126953125, "loss_xval": 0.1279296875, "num_input_tokens_seen": 79685640, "step": 873 }, { "epoch": 3.6416666666666666, "grad_norm": 4.9782678997006515, "learning_rate": 5e-05, "loss": 0.121, "num_input_tokens_seen": 79777272, "step": 874 }, { "epoch": 3.6416666666666666, "loss": 0.13823364675045013, "loss_ce": 0.0010342422174289823, "loss_iou": 0.1142578125, "loss_num": 0.0274658203125, "loss_xval": 0.13671875, "num_input_tokens_seen": 79777272, "step": 874 }, { "epoch": 3.6458333333333335, "grad_norm": 4.34295823261341, "learning_rate": 5e-05, "loss": 0.1058, "num_input_tokens_seen": 79868608, "step": 875 }, { "epoch": 3.6458333333333335, "loss": 0.09006209671497345, "loss_ce": 0.00029464036924764514, "loss_iou": 0.37890625, "loss_num": 0.0179443359375, "loss_xval": 0.08984375, "num_input_tokens_seen": 79868608, "step": 875 }, { "epoch": 3.65, "grad_norm": 5.888083977419817, "learning_rate": 5e-05, "loss": 0.182, "num_input_tokens_seen": 79959644, "step": 876 }, { "epoch": 3.65, "loss": 0.21097832918167114, "loss_ce": 0.0014599019195884466, "loss_iou": 0.2275390625, "loss_num": 0.0419921875, "loss_xval": 0.2099609375, "num_input_tokens_seen": 79959644, "step": 876 }, { "epoch": 3.654166666666667, "grad_norm": 3.737045771465334, "learning_rate": 5e-05, "loss": 0.1034, "num_input_tokens_seen": 80050584, "step": 877 }, { "epoch": 3.654166666666667, "loss": 0.09872293472290039, "loss_ce": 0.0002579695428721607, "loss_iou": 0.265625, "loss_num": 0.0196533203125, "loss_xval": 0.0986328125, "num_input_tokens_seen": 80050584, "step": 877 }, { "epoch": 3.658333333333333, "grad_norm": 3.2482049945691562, "learning_rate": 5e-05, "loss": 0.1676, "num_input_tokens_seen": 80141352, "step": 878 }, { "epoch": 3.658333333333333, "loss": 0.11685407161712646, "loss_ce": 2.2607714527111966e-06, "loss_iou": 0.35546875, "loss_num": 0.0234375, "loss_xval": 0.11669921875, "num_input_tokens_seen": 80141352, "step": 878 }, { "epoch": 3.6625, "grad_norm": 5.0140644768875395, "learning_rate": 5e-05, "loss": 0.1577, "num_input_tokens_seen": 80233160, "step": 879 }, { "epoch": 3.6625, "loss": 0.1571737825870514, "loss_ce": 0.0018698314670473337, "loss_iou": 0.318359375, "loss_num": 0.031005859375, "loss_xval": 0.1552734375, "num_input_tokens_seen": 80233160, "step": 879 }, { "epoch": 3.6666666666666665, "grad_norm": 7.255196429255433, "learning_rate": 5e-05, "loss": 0.1797, "num_input_tokens_seen": 80322868, "step": 880 }, { "epoch": 3.6666666666666665, "loss": 0.160821795463562, "loss_ce": 4.757134956889786e-05, "loss_iou": 0.310546875, "loss_num": 0.0322265625, "loss_xval": 0.1611328125, "num_input_tokens_seen": 80322868, "step": 880 }, { "epoch": 3.6708333333333334, "grad_norm": 10.360077602554204, "learning_rate": 5e-05, "loss": 0.1211, "num_input_tokens_seen": 80413852, "step": 881 }, { "epoch": 3.6708333333333334, "loss": 0.08685818314552307, "loss_ce": 0.0004934355965815485, "loss_iou": 0.28125, "loss_num": 0.0172119140625, "loss_xval": 0.08642578125, "num_input_tokens_seen": 80413852, "step": 881 }, { "epoch": 3.675, "grad_norm": 5.939748395653842, "learning_rate": 5e-05, "loss": 0.1236, "num_input_tokens_seen": 80505160, "step": 882 }, { "epoch": 3.675, "loss": 0.13993090391159058, "loss_ce": 0.00016038586909417063, "loss_iou": 0.345703125, "loss_num": 0.0279541015625, "loss_xval": 0.1396484375, "num_input_tokens_seen": 80505160, "step": 882 }, { "epoch": 3.6791666666666667, "grad_norm": 4.20897557631186, "learning_rate": 5e-05, "loss": 0.1369, "num_input_tokens_seen": 80596384, "step": 883 }, { "epoch": 3.6791666666666667, "loss": 0.1429780274629593, "loss_ce": 6.421100988518447e-05, "loss_iou": 0.30859375, "loss_num": 0.028564453125, "loss_xval": 0.142578125, "num_input_tokens_seen": 80596384, "step": 883 }, { "epoch": 3.6833333333333336, "grad_norm": 5.195823724131978, "learning_rate": 5e-05, "loss": 0.1643, "num_input_tokens_seen": 80687208, "step": 884 }, { "epoch": 3.6833333333333336, "loss": 0.1412520557641983, "loss_ce": 0.0003829213383141905, "loss_iou": 0.2890625, "loss_num": 0.0281982421875, "loss_xval": 0.140625, "num_input_tokens_seen": 80687208, "step": 884 }, { "epoch": 3.6875, "grad_norm": 3.672237243325278, "learning_rate": 5e-05, "loss": 0.1604, "num_input_tokens_seen": 80778752, "step": 885 }, { "epoch": 3.6875, "loss": 0.1688692420721054, "loss_ce": 0.0016939521301537752, "loss_iou": 0.36328125, "loss_num": 0.033447265625, "loss_xval": 0.1669921875, "num_input_tokens_seen": 80778752, "step": 885 }, { "epoch": 3.6916666666666664, "grad_norm": 13.96778543887359, "learning_rate": 5e-05, "loss": 0.1447, "num_input_tokens_seen": 80870668, "step": 886 }, { "epoch": 3.6916666666666664, "loss": 0.15862274169921875, "loss_ce": 0.005424493458122015, "loss_iou": 0.296875, "loss_num": 0.0306396484375, "loss_xval": 0.1533203125, "num_input_tokens_seen": 80870668, "step": 886 }, { "epoch": 3.6958333333333333, "grad_norm": 3.293803485383682, "learning_rate": 5e-05, "loss": 0.1242, "num_input_tokens_seen": 80961928, "step": 887 }, { "epoch": 3.6958333333333333, "loss": 0.14489322900772095, "loss_ce": 0.0027576074935495853, "loss_iou": 0.3515625, "loss_num": 0.0284423828125, "loss_xval": 0.142578125, "num_input_tokens_seen": 80961928, "step": 887 }, { "epoch": 3.7, "grad_norm": 3.231950272341994, "learning_rate": 5e-05, "loss": 0.1681, "num_input_tokens_seen": 81052964, "step": 888 }, { "epoch": 3.7, "loss": 0.14616870880126953, "loss_ce": 0.0016527026891708374, "loss_iou": 0.2060546875, "loss_num": 0.0289306640625, "loss_xval": 0.14453125, "num_input_tokens_seen": 81052964, "step": 888 }, { "epoch": 3.7041666666666666, "grad_norm": 5.961244662688287, "learning_rate": 5e-05, "loss": 0.1136, "num_input_tokens_seen": 81144284, "step": 889 }, { "epoch": 3.7041666666666666, "loss": 0.11914453655481339, "loss_ce": 0.0016518579795956612, "loss_iou": 0.28515625, "loss_num": 0.0234375, "loss_xval": 0.11767578125, "num_input_tokens_seen": 81144284, "step": 889 }, { "epoch": 3.7083333333333335, "grad_norm": 8.571525568639126, "learning_rate": 5e-05, "loss": 0.1556, "num_input_tokens_seen": 81235328, "step": 890 }, { "epoch": 3.7083333333333335, "loss": 0.13373783230781555, "loss_ce": 0.0008337879553437233, "loss_iou": 0.23828125, "loss_num": 0.026611328125, "loss_xval": 0.1328125, "num_input_tokens_seen": 81235328, "step": 890 }, { "epoch": 3.7125, "grad_norm": 5.826651502861672, "learning_rate": 5e-05, "loss": 0.1322, "num_input_tokens_seen": 81326252, "step": 891 }, { "epoch": 3.7125, "loss": 0.1149607002735138, "loss_ce": 0.0010843577329069376, "loss_iou": 0.16015625, "loss_num": 0.022705078125, "loss_xval": 0.11376953125, "num_input_tokens_seen": 81326252, "step": 891 }, { "epoch": 3.716666666666667, "grad_norm": 2.33538776603535, "learning_rate": 5e-05, "loss": 0.1452, "num_input_tokens_seen": 81416468, "step": 892 }, { "epoch": 3.716666666666667, "loss": 0.20617538690567017, "loss_ce": 0.0007615811773575842, "loss_iou": 0.28515625, "loss_num": 0.041015625, "loss_xval": 0.205078125, "num_input_tokens_seen": 81416468, "step": 892 }, { "epoch": 3.720833333333333, "grad_norm": 2.6862937653430787, "learning_rate": 5e-05, "loss": 0.0968, "num_input_tokens_seen": 81507680, "step": 893 }, { "epoch": 3.720833333333333, "loss": 0.09008393436670303, "loss_ce": 0.0024374467320740223, "loss_iou": 0.208984375, "loss_num": 0.0174560546875, "loss_xval": 0.087890625, "num_input_tokens_seen": 81507680, "step": 893 }, { "epoch": 3.725, "grad_norm": 7.085969329959365, "learning_rate": 5e-05, "loss": 0.1291, "num_input_tokens_seen": 81598772, "step": 894 }, { "epoch": 3.725, "loss": 0.1661437749862671, "loss_ce": 0.0019744737073779106, "loss_iou": 0.1796875, "loss_num": 0.03271484375, "loss_xval": 0.1640625, "num_input_tokens_seen": 81598772, "step": 894 }, { "epoch": 3.7291666666666665, "grad_norm": 7.046338485536485, "learning_rate": 5e-05, "loss": 0.109, "num_input_tokens_seen": 81689904, "step": 895 }, { "epoch": 3.7291666666666665, "loss": 0.1281951367855072, "loss_ce": 0.00035699873114936054, "loss_iou": 0.298828125, "loss_num": 0.0255126953125, "loss_xval": 0.1279296875, "num_input_tokens_seen": 81689904, "step": 895 }, { "epoch": 3.7333333333333334, "grad_norm": 4.641066412415717, "learning_rate": 5e-05, "loss": 0.1218, "num_input_tokens_seen": 81780940, "step": 896 }, { "epoch": 3.7333333333333334, "loss": 0.11866171658039093, "loss_ce": 0.0057161590084433556, "loss_iou": 0.26953125, "loss_num": 0.0225830078125, "loss_xval": 0.11279296875, "num_input_tokens_seen": 81780940, "step": 896 }, { "epoch": 3.7375, "grad_norm": 2.363724821308636, "learning_rate": 5e-05, "loss": 0.1382, "num_input_tokens_seen": 81872548, "step": 897 }, { "epoch": 3.7375, "loss": 0.14953023195266724, "loss_ce": 0.004968480207026005, "loss_iou": 0.25390625, "loss_num": 0.0289306640625, "loss_xval": 0.14453125, "num_input_tokens_seen": 81872548, "step": 897 }, { "epoch": 3.7416666666666667, "grad_norm": 4.37568884559808, "learning_rate": 5e-05, "loss": 0.1448, "num_input_tokens_seen": 81964208, "step": 898 }, { "epoch": 3.7416666666666667, "loss": 0.1580948829650879, "loss_ce": 0.0035691240336745977, "loss_iou": 0.1845703125, "loss_num": 0.0308837890625, "loss_xval": 0.154296875, "num_input_tokens_seen": 81964208, "step": 898 }, { "epoch": 3.7458333333333336, "grad_norm": 8.266730113660397, "learning_rate": 5e-05, "loss": 0.1684, "num_input_tokens_seen": 82055704, "step": 899 }, { "epoch": 3.7458333333333336, "loss": 0.14106786251068115, "loss_ce": 0.0011142397997900844, "loss_iou": 0.302734375, "loss_num": 0.028076171875, "loss_xval": 0.1396484375, "num_input_tokens_seen": 82055704, "step": 899 }, { "epoch": 3.75, "grad_norm": 6.854780871094846, "learning_rate": 5e-05, "loss": 0.183, "num_input_tokens_seen": 82146356, "step": 900 }, { "epoch": 3.75, "loss": 0.14154388010501862, "loss_ce": 3.353296506247716e-06, "loss_iou": 0.33984375, "loss_num": 0.0283203125, "loss_xval": 0.1416015625, "num_input_tokens_seen": 82146356, "step": 900 }, { "epoch": 3.7541666666666664, "grad_norm": 7.233013245995583, "learning_rate": 5e-05, "loss": 0.1471, "num_input_tokens_seen": 82238148, "step": 901 }, { "epoch": 3.7541666666666664, "loss": 0.17437678575515747, "loss_ce": 0.0021660891361534595, "loss_iou": 0.255859375, "loss_num": 0.034423828125, "loss_xval": 0.171875, "num_input_tokens_seen": 82238148, "step": 901 }, { "epoch": 3.7583333333333333, "grad_norm": 3.0074373855147565, "learning_rate": 5e-05, "loss": 0.106, "num_input_tokens_seen": 82329300, "step": 902 }, { "epoch": 3.7583333333333333, "loss": 0.14340101182460785, "loss_ce": 0.0020588545594364405, "loss_iou": 0.1591796875, "loss_num": 0.0283203125, "loss_xval": 0.1416015625, "num_input_tokens_seen": 82329300, "step": 902 }, { "epoch": 3.7625, "grad_norm": 6.872203528323323, "learning_rate": 5e-05, "loss": 0.1822, "num_input_tokens_seen": 82420628, "step": 903 }, { "epoch": 3.7625, "loss": 0.20105046033859253, "loss_ce": 0.002045325469225645, "loss_iou": 0.3125, "loss_num": 0.039794921875, "loss_xval": 0.19921875, "num_input_tokens_seen": 82420628, "step": 903 }, { "epoch": 3.7666666666666666, "grad_norm": 3.456179304931455, "learning_rate": 5e-05, "loss": 0.1165, "num_input_tokens_seen": 82511840, "step": 904 }, { "epoch": 3.7666666666666666, "loss": 0.07701139152050018, "loss_ce": 0.0019076326861977577, "loss_iou": 0.1484375, "loss_num": 0.0150146484375, "loss_xval": 0.0751953125, "num_input_tokens_seen": 82511840, "step": 904 }, { "epoch": 3.7708333333333335, "grad_norm": 3.1272839592274613, "learning_rate": 5e-05, "loss": 0.1265, "num_input_tokens_seen": 82602904, "step": 905 }, { "epoch": 3.7708333333333335, "loss": 0.12377360463142395, "loss_ce": 0.00020792950817849487, "loss_iou": 0.369140625, "loss_num": 0.0247802734375, "loss_xval": 0.12353515625, "num_input_tokens_seen": 82602904, "step": 905 }, { "epoch": 3.775, "grad_norm": 9.70117444562128, "learning_rate": 5e-05, "loss": 0.18, "num_input_tokens_seen": 82693644, "step": 906 }, { "epoch": 3.775, "loss": 0.21095910668373108, "loss_ce": 0.0015017122495919466, "loss_iou": 0.30078125, "loss_num": 0.041748046875, "loss_xval": 0.208984375, "num_input_tokens_seen": 82693644, "step": 906 }, { "epoch": 3.779166666666667, "grad_norm": 13.071673758329982, "learning_rate": 5e-05, "loss": 0.1602, "num_input_tokens_seen": 82785280, "step": 907 }, { "epoch": 3.779166666666667, "loss": 0.1759437620639801, "loss_ce": 0.0017799364868551493, "loss_iou": 0.376953125, "loss_num": 0.034912109375, "loss_xval": 0.173828125, "num_input_tokens_seen": 82785280, "step": 907 }, { "epoch": 3.783333333333333, "grad_norm": 2.3950949529873165, "learning_rate": 5e-05, "loss": 0.2106, "num_input_tokens_seen": 82876080, "step": 908 }, { "epoch": 3.783333333333333, "loss": 0.1860213726758957, "loss_ce": 0.0006118176388554275, "loss_iou": 0.236328125, "loss_num": 0.037109375, "loss_xval": 0.185546875, "num_input_tokens_seen": 82876080, "step": 908 }, { "epoch": 3.7875, "grad_norm": 3.5588174044211054, "learning_rate": 5e-05, "loss": 0.1465, "num_input_tokens_seen": 82967216, "step": 909 }, { "epoch": 3.7875, "loss": 0.15826614201068878, "loss_ce": 0.0018788184970617294, "loss_iou": 0.33203125, "loss_num": 0.03125, "loss_xval": 0.15625, "num_input_tokens_seen": 82967216, "step": 909 }, { "epoch": 3.7916666666666665, "grad_norm": 5.950982975488221, "learning_rate": 5e-05, "loss": 0.2663, "num_input_tokens_seen": 83058916, "step": 910 }, { "epoch": 3.7916666666666665, "loss": 0.2467661052942276, "loss_ce": 0.0008249367238022387, "loss_iou": 0.2080078125, "loss_num": 0.049072265625, "loss_xval": 0.24609375, "num_input_tokens_seen": 83058916, "step": 910 }, { "epoch": 3.7958333333333334, "grad_norm": 7.937274435923854, "learning_rate": 5e-05, "loss": 0.1838, "num_input_tokens_seen": 83149832, "step": 911 }, { "epoch": 3.7958333333333334, "loss": 0.18536323308944702, "loss_ce": 0.0038446770049631596, "loss_iou": 0.126953125, "loss_num": 0.036376953125, "loss_xval": 0.181640625, "num_input_tokens_seen": 83149832, "step": 911 }, { "epoch": 3.8, "grad_norm": 3.1189095340514794, "learning_rate": 5e-05, "loss": 0.1302, "num_input_tokens_seen": 83240876, "step": 912 }, { "epoch": 3.8, "loss": 0.14421942830085754, "loss_ce": 0.0007562931277789176, "loss_iou": 0.234375, "loss_num": 0.0286865234375, "loss_xval": 0.1435546875, "num_input_tokens_seen": 83240876, "step": 912 }, { "epoch": 3.8041666666666667, "grad_norm": 3.5604076626857815, "learning_rate": 5e-05, "loss": 0.1292, "num_input_tokens_seen": 83332576, "step": 913 }, { "epoch": 3.8041666666666667, "loss": 0.11656103283166885, "loss_ce": 0.005995844956487417, "loss_iou": 0.189453125, "loss_num": 0.0220947265625, "loss_xval": 0.1103515625, "num_input_tokens_seen": 83332576, "step": 913 }, { "epoch": 3.8083333333333336, "grad_norm": 5.22722230155665, "learning_rate": 5e-05, "loss": 0.1424, "num_input_tokens_seen": 83423936, "step": 914 }, { "epoch": 3.8083333333333336, "loss": 0.10840374231338501, "loss_ce": 0.0005851405439898372, "loss_iou": 0.0751953125, "loss_num": 0.0216064453125, "loss_xval": 0.10791015625, "num_input_tokens_seen": 83423936, "step": 914 }, { "epoch": 3.8125, "grad_norm": 2.720035878251394, "learning_rate": 5e-05, "loss": 0.1747, "num_input_tokens_seen": 83515352, "step": 915 }, { "epoch": 3.8125, "loss": 0.1660463958978653, "loss_ce": 0.0030214914586395025, "loss_iou": 0.150390625, "loss_num": 0.03271484375, "loss_xval": 0.1630859375, "num_input_tokens_seen": 83515352, "step": 915 }, { "epoch": 3.8166666666666664, "grad_norm": 6.0446981950480145, "learning_rate": 5e-05, "loss": 0.1575, "num_input_tokens_seen": 83606452, "step": 916 }, { "epoch": 3.8166666666666664, "loss": 0.12540730834007263, "loss_ce": 0.001551724737510085, "loss_iou": 0.328125, "loss_num": 0.0247802734375, "loss_xval": 0.1240234375, "num_input_tokens_seen": 83606452, "step": 916 }, { "epoch": 3.8208333333333333, "grad_norm": 6.168434467541618, "learning_rate": 5e-05, "loss": 0.1399, "num_input_tokens_seen": 83697824, "step": 917 }, { "epoch": 3.8208333333333333, "loss": 0.08690094202756882, "loss_ce": 0.0008566331234760582, "loss_iou": 0.255859375, "loss_num": 0.0172119140625, "loss_xval": 0.0859375, "num_input_tokens_seen": 83697824, "step": 917 }, { "epoch": 3.825, "grad_norm": 3.0075171810552344, "learning_rate": 5e-05, "loss": 0.093, "num_input_tokens_seen": 83789212, "step": 918 }, { "epoch": 3.825, "loss": 0.07617410272359848, "loss_ce": 0.0011618940625339746, "loss_iou": 0.25390625, "loss_num": 0.0150146484375, "loss_xval": 0.0751953125, "num_input_tokens_seen": 83789212, "step": 918 }, { "epoch": 3.8291666666666666, "grad_norm": 10.734787557551742, "learning_rate": 5e-05, "loss": 0.1166, "num_input_tokens_seen": 83879912, "step": 919 }, { "epoch": 3.8291666666666666, "loss": 0.10854049026966095, "loss_ce": 4.730073669634294e-06, "loss_iou": 0.365234375, "loss_num": 0.021728515625, "loss_xval": 0.1083984375, "num_input_tokens_seen": 83879912, "step": 919 }, { "epoch": 3.8333333333333335, "grad_norm": 6.116961978654756, "learning_rate": 5e-05, "loss": 0.1869, "num_input_tokens_seen": 83970948, "step": 920 }, { "epoch": 3.8333333333333335, "loss": 0.14833904802799225, "loss_ce": 0.0032890033908188343, "loss_iou": 0.28515625, "loss_num": 0.0289306640625, "loss_xval": 0.1455078125, "num_input_tokens_seen": 83970948, "step": 920 }, { "epoch": 3.8375, "grad_norm": 3.20997363255731, "learning_rate": 5e-05, "loss": 0.1156, "num_input_tokens_seen": 84062356, "step": 921 }, { "epoch": 3.8375, "loss": 0.1262749582529068, "loss_ce": 0.0005120187997817993, "loss_iou": 0.384765625, "loss_num": 0.025146484375, "loss_xval": 0.1259765625, "num_input_tokens_seen": 84062356, "step": 921 }, { "epoch": 3.841666666666667, "grad_norm": 7.310422599518437, "learning_rate": 5e-05, "loss": 0.1041, "num_input_tokens_seen": 84153220, "step": 922 }, { "epoch": 3.841666666666667, "loss": 0.09646876156330109, "loss_ce": 0.00010950541036436334, "loss_iou": 0.181640625, "loss_num": 0.019287109375, "loss_xval": 0.09619140625, "num_input_tokens_seen": 84153220, "step": 922 }, { "epoch": 3.845833333333333, "grad_norm": 11.07096274448853, "learning_rate": 5e-05, "loss": 0.1529, "num_input_tokens_seen": 84244428, "step": 923 }, { "epoch": 3.845833333333333, "loss": 0.17023152112960815, "loss_ce": 0.00034015910932794213, "loss_iou": 0.353515625, "loss_num": 0.033935546875, "loss_xval": 0.169921875, "num_input_tokens_seen": 84244428, "step": 923 }, { "epoch": 3.85, "grad_norm": 5.051180851336462, "learning_rate": 5e-05, "loss": 0.129, "num_input_tokens_seen": 84336700, "step": 924 }, { "epoch": 3.85, "loss": 0.1427893042564392, "loss_ce": 0.001553958049044013, "loss_iou": 0.2470703125, "loss_num": 0.0281982421875, "loss_xval": 0.1416015625, "num_input_tokens_seen": 84336700, "step": 924 }, { "epoch": 3.8541666666666665, "grad_norm": 4.495963485971154, "learning_rate": 5e-05, "loss": 0.1785, "num_input_tokens_seen": 84428020, "step": 925 }, { "epoch": 3.8541666666666665, "loss": 0.21857711672782898, "loss_ce": 0.002756803994998336, "loss_iou": 0.26953125, "loss_num": 0.043212890625, "loss_xval": 0.2158203125, "num_input_tokens_seen": 84428020, "step": 925 }, { "epoch": 3.8583333333333334, "grad_norm": 3.3145649389351153, "learning_rate": 5e-05, "loss": 0.1057, "num_input_tokens_seen": 84519152, "step": 926 }, { "epoch": 3.8583333333333334, "loss": 0.08949233591556549, "loss_ce": 0.0012354973005130887, "loss_iou": 0.28125, "loss_num": 0.0177001953125, "loss_xval": 0.08837890625, "num_input_tokens_seen": 84519152, "step": 926 }, { "epoch": 3.8625, "grad_norm": 11.110766333390965, "learning_rate": 5e-05, "loss": 0.1772, "num_input_tokens_seen": 84610144, "step": 927 }, { "epoch": 3.8625, "loss": 0.17750558257102966, "loss_ce": 0.00021371689217630774, "loss_iou": 0.337890625, "loss_num": 0.035400390625, "loss_xval": 0.177734375, "num_input_tokens_seen": 84610144, "step": 927 }, { "epoch": 3.8666666666666667, "grad_norm": 9.320735724906612, "learning_rate": 5e-05, "loss": 0.1365, "num_input_tokens_seen": 84702372, "step": 928 }, { "epoch": 3.8666666666666667, "loss": 0.12013794481754303, "loss_ce": 0.002462164033204317, "loss_iou": 0.30078125, "loss_num": 0.0235595703125, "loss_xval": 0.11767578125, "num_input_tokens_seen": 84702372, "step": 928 }, { "epoch": 3.8708333333333336, "grad_norm": 54.707623230301095, "learning_rate": 5e-05, "loss": 0.1142, "num_input_tokens_seen": 84793828, "step": 929 }, { "epoch": 3.8708333333333336, "loss": 0.09439679980278015, "loss_ce": 0.0032865693792700768, "loss_iou": 0.26171875, "loss_num": 0.018310546875, "loss_xval": 0.09130859375, "num_input_tokens_seen": 84793828, "step": 929 }, { "epoch": 3.875, "grad_norm": 1.5635476627662845, "learning_rate": 5e-05, "loss": 0.0995, "num_input_tokens_seen": 84886020, "step": 930 }, { "epoch": 3.875, "loss": 0.10173749923706055, "loss_ce": 0.004340652376413345, "loss_iou": 0.259765625, "loss_num": 0.01953125, "loss_xval": 0.09716796875, "num_input_tokens_seen": 84886020, "step": 930 }, { "epoch": 3.8791666666666664, "grad_norm": 5.889122208098058, "learning_rate": 5e-05, "loss": 0.1463, "num_input_tokens_seen": 84977480, "step": 931 }, { "epoch": 3.8791666666666664, "loss": 0.1577497273683548, "loss_ce": 0.00040109510882757604, "loss_iou": 0.2470703125, "loss_num": 0.031494140625, "loss_xval": 0.1572265625, "num_input_tokens_seen": 84977480, "step": 931 }, { "epoch": 3.8833333333333333, "grad_norm": 4.224099488917448, "learning_rate": 5e-05, "loss": 0.1867, "num_input_tokens_seen": 85069072, "step": 932 }, { "epoch": 3.8833333333333333, "loss": 0.11261321604251862, "loss_ce": 0.0005526671302504838, "loss_iou": 0.27734375, "loss_num": 0.0224609375, "loss_xval": 0.1123046875, "num_input_tokens_seen": 85069072, "step": 932 }, { "epoch": 3.8875, "grad_norm": 11.423622253129569, "learning_rate": 5e-05, "loss": 0.1392, "num_input_tokens_seen": 85160348, "step": 933 }, { "epoch": 3.8875, "loss": 0.20320799946784973, "loss_ce": 0.02242186665534973, "loss_iou": 0.396484375, "loss_num": 0.0361328125, "loss_xval": 0.1806640625, "num_input_tokens_seen": 85160348, "step": 933 }, { "epoch": 3.8916666666666666, "grad_norm": 5.606403462872274, "learning_rate": 5e-05, "loss": 0.1506, "num_input_tokens_seen": 85251604, "step": 934 }, { "epoch": 3.8916666666666666, "loss": 0.15166088938713074, "loss_ce": 0.0013923394726589322, "loss_iou": 0.158203125, "loss_num": 0.030029296875, "loss_xval": 0.150390625, "num_input_tokens_seen": 85251604, "step": 934 }, { "epoch": 3.8958333333333335, "grad_norm": 16.76592793234881, "learning_rate": 5e-05, "loss": 0.1967, "num_input_tokens_seen": 85343376, "step": 935 }, { "epoch": 3.8958333333333335, "loss": 0.26409974694252014, "loss_ce": 0.002258933149278164, "loss_iou": 0.2734375, "loss_num": 0.05224609375, "loss_xval": 0.26171875, "num_input_tokens_seen": 85343376, "step": 935 }, { "epoch": 3.9, "grad_norm": 7.951347957557853, "learning_rate": 5e-05, "loss": 0.1094, "num_input_tokens_seen": 85434500, "step": 936 }, { "epoch": 3.9, "loss": 0.10416235029697418, "loss_ce": 0.00015844989684410393, "loss_iou": 0.35546875, "loss_num": 0.020751953125, "loss_xval": 0.10400390625, "num_input_tokens_seen": 85434500, "step": 936 }, { "epoch": 3.904166666666667, "grad_norm": 2.4830229805624704, "learning_rate": 5e-05, "loss": 0.1479, "num_input_tokens_seen": 85525420, "step": 937 }, { "epoch": 3.904166666666667, "loss": 0.20122557878494263, "loss_ce": 7.928603736218065e-06, "loss_iou": 0.4765625, "loss_num": 0.040283203125, "loss_xval": 0.201171875, "num_input_tokens_seen": 85525420, "step": 937 }, { "epoch": 3.908333333333333, "grad_norm": 6.260816700820158, "learning_rate": 5e-05, "loss": 0.1974, "num_input_tokens_seen": 85616688, "step": 938 }, { "epoch": 3.908333333333333, "loss": 0.26152363419532776, "loss_ce": 0.006701848469674587, "loss_iou": 0.296875, "loss_num": 0.051025390625, "loss_xval": 0.25390625, "num_input_tokens_seen": 85616688, "step": 938 }, { "epoch": 3.9125, "grad_norm": 2.4562762107300933, "learning_rate": 5e-05, "loss": 0.1253, "num_input_tokens_seen": 85707284, "step": 939 }, { "epoch": 3.9125, "loss": 0.13999181985855103, "loss_ce": 0.0010300282156094909, "loss_iou": 0.291015625, "loss_num": 0.02783203125, "loss_xval": 0.138671875, "num_input_tokens_seen": 85707284, "step": 939 }, { "epoch": 3.9166666666666665, "grad_norm": 2.8296455134094156, "learning_rate": 5e-05, "loss": 0.1193, "num_input_tokens_seen": 85798936, "step": 940 }, { "epoch": 3.9166666666666665, "loss": 0.13532821834087372, "loss_ce": 0.00187484931666404, "loss_iou": 0.28125, "loss_num": 0.0267333984375, "loss_xval": 0.1337890625, "num_input_tokens_seen": 85798936, "step": 940 }, { "epoch": 3.9208333333333334, "grad_norm": 2.1867038989698413, "learning_rate": 5e-05, "loss": 0.1318, "num_input_tokens_seen": 85890680, "step": 941 }, { "epoch": 3.9208333333333334, "loss": 0.1194370836019516, "loss_ce": 0.001028873841278255, "loss_iou": 0.2578125, "loss_num": 0.023681640625, "loss_xval": 0.1181640625, "num_input_tokens_seen": 85890680, "step": 941 }, { "epoch": 3.925, "grad_norm": 9.344342386609227, "learning_rate": 5e-05, "loss": 0.1357, "num_input_tokens_seen": 85981620, "step": 942 }, { "epoch": 3.925, "loss": 0.13859602808952332, "loss_ce": 0.0033116054255515337, "loss_iou": 0.39453125, "loss_num": 0.027099609375, "loss_xval": 0.1357421875, "num_input_tokens_seen": 85981620, "step": 942 }, { "epoch": 3.9291666666666667, "grad_norm": 3.2652150935417814, "learning_rate": 5e-05, "loss": 0.1738, "num_input_tokens_seen": 86073340, "step": 943 }, { "epoch": 3.9291666666666667, "loss": 0.16249480843544006, "loss_ce": 0.000950021087191999, "loss_iou": 0.2236328125, "loss_num": 0.0322265625, "loss_xval": 0.1611328125, "num_input_tokens_seen": 86073340, "step": 943 }, { "epoch": 3.9333333333333336, "grad_norm": 6.533695399414451, "learning_rate": 5e-05, "loss": 0.1753, "num_input_tokens_seen": 86164440, "step": 944 }, { "epoch": 3.9333333333333336, "loss": 0.13617786765098572, "loss_ce": 0.00011524726141942665, "loss_iou": 0.4140625, "loss_num": 0.0272216796875, "loss_xval": 0.1357421875, "num_input_tokens_seen": 86164440, "step": 944 }, { "epoch": 3.9375, "grad_norm": 14.80299981535188, "learning_rate": 5e-05, "loss": 0.1802, "num_input_tokens_seen": 86256060, "step": 945 }, { "epoch": 3.9375, "loss": 0.2190660834312439, "loss_ce": 0.002177656628191471, "loss_iou": 0.26171875, "loss_num": 0.04345703125, "loss_xval": 0.216796875, "num_input_tokens_seen": 86256060, "step": 945 }, { "epoch": 3.9416666666666664, "grad_norm": 7.1976167399766275, "learning_rate": 5e-05, "loss": 0.1259, "num_input_tokens_seen": 86347328, "step": 946 }, { "epoch": 3.9416666666666664, "loss": 0.17285513877868652, "loss_ce": 4.935155811836012e-05, "loss_iou": 0.296875, "loss_num": 0.03466796875, "loss_xval": 0.1728515625, "num_input_tokens_seen": 86347328, "step": 946 }, { "epoch": 3.9458333333333333, "grad_norm": 11.88169149602509, "learning_rate": 5e-05, "loss": 0.1394, "num_input_tokens_seen": 86438636, "step": 947 }, { "epoch": 3.9458333333333333, "loss": 0.13551881909370422, "loss_ce": 0.001638201531022787, "loss_iou": 0.33984375, "loss_num": 0.0267333984375, "loss_xval": 0.1337890625, "num_input_tokens_seen": 86438636, "step": 947 }, { "epoch": 3.95, "grad_norm": 4.718115266274853, "learning_rate": 5e-05, "loss": 0.1227, "num_input_tokens_seen": 86530304, "step": 948 }, { "epoch": 3.95, "loss": 0.11934144049882889, "loss_ce": 0.002947398694232106, "loss_iou": 0.291015625, "loss_num": 0.0233154296875, "loss_xval": 0.1162109375, "num_input_tokens_seen": 86530304, "step": 948 }, { "epoch": 3.9541666666666666, "grad_norm": 2.5578899967053723, "learning_rate": 5e-05, "loss": 0.11, "num_input_tokens_seen": 86621448, "step": 949 }, { "epoch": 3.9541666666666666, "loss": 0.10856461524963379, "loss_ce": 0.0005018667434342206, "loss_iou": 0.23046875, "loss_num": 0.0216064453125, "loss_xval": 0.10791015625, "num_input_tokens_seen": 86621448, "step": 949 }, { "epoch": 3.9583333333333335, "grad_norm": 15.126351483779652, "learning_rate": 5e-05, "loss": 0.1009, "num_input_tokens_seen": 86713244, "step": 950 }, { "epoch": 3.9583333333333335, "loss": 0.13179825246334076, "loss_ce": 0.0014882051618769765, "loss_iou": 0.296875, "loss_num": 0.0260009765625, "loss_xval": 0.1298828125, "num_input_tokens_seen": 86713244, "step": 950 }, { "epoch": 3.9625, "grad_norm": 3.3495506162086532, "learning_rate": 5e-05, "loss": 0.1148, "num_input_tokens_seen": 86805068, "step": 951 }, { "epoch": 3.9625, "loss": 0.13726088404655457, "loss_ce": 0.0015492134261876345, "loss_iou": 0.255859375, "loss_num": 0.027099609375, "loss_xval": 0.1357421875, "num_input_tokens_seen": 86805068, "step": 951 }, { "epoch": 3.966666666666667, "grad_norm": 7.1065666758190735, "learning_rate": 5e-05, "loss": 0.098, "num_input_tokens_seen": 86896444, "step": 952 }, { "epoch": 3.966666666666667, "loss": 0.09745579212903976, "loss_ce": 0.00024204922374337912, "loss_iou": 0.3515625, "loss_num": 0.0194091796875, "loss_xval": 0.09716796875, "num_input_tokens_seen": 86896444, "step": 952 }, { "epoch": 3.970833333333333, "grad_norm": 4.579685209282489, "learning_rate": 5e-05, "loss": 0.1246, "num_input_tokens_seen": 86987832, "step": 953 }, { "epoch": 3.970833333333333, "loss": 0.0786675363779068, "loss_ce": 0.0010003021452575922, "loss_iou": 0.39453125, "loss_num": 0.0155029296875, "loss_xval": 0.07763671875, "num_input_tokens_seen": 86987832, "step": 953 }, { "epoch": 3.975, "grad_norm": 3.83942290602781, "learning_rate": 5e-05, "loss": 0.0906, "num_input_tokens_seen": 87079384, "step": 954 }, { "epoch": 3.975, "loss": 0.08803156018257141, "loss_ce": 0.0010335702681913972, "loss_iou": 0.205078125, "loss_num": 0.017333984375, "loss_xval": 0.0869140625, "num_input_tokens_seen": 87079384, "step": 954 }, { "epoch": 3.9791666666666665, "grad_norm": 3.6696422264372415, "learning_rate": 5e-05, "loss": 0.1051, "num_input_tokens_seen": 87170740, "step": 955 }, { "epoch": 3.9791666666666665, "loss": 0.1223042756319046, "loss_ce": 0.0009816479869186878, "loss_iou": 0.287109375, "loss_num": 0.0242919921875, "loss_xval": 0.12109375, "num_input_tokens_seen": 87170740, "step": 955 }, { "epoch": 3.9833333333333334, "grad_norm": 12.134255707746513, "learning_rate": 5e-05, "loss": 0.1632, "num_input_tokens_seen": 87261932, "step": 956 }, { "epoch": 3.9833333333333334, "loss": 0.16107177734375, "loss_ce": 0.0034789997152984142, "loss_iou": 0.291015625, "loss_num": 0.031494140625, "loss_xval": 0.1572265625, "num_input_tokens_seen": 87261932, "step": 956 }, { "epoch": 3.9875, "grad_norm": 5.088105032232529, "learning_rate": 5e-05, "loss": 0.1208, "num_input_tokens_seen": 87353140, "step": 957 }, { "epoch": 3.9875, "loss": 0.15737253427505493, "loss_ce": 0.004189538769423962, "loss_iou": 0.283203125, "loss_num": 0.0306396484375, "loss_xval": 0.1533203125, "num_input_tokens_seen": 87353140, "step": 957 }, { "epoch": 3.9916666666666667, "grad_norm": 5.805691454909013, "learning_rate": 5e-05, "loss": 0.1199, "num_input_tokens_seen": 87444768, "step": 958 }, { "epoch": 3.9916666666666667, "loss": 0.1642824411392212, "loss_ce": 9.787664021132514e-05, "loss_iou": 0.298828125, "loss_num": 0.03271484375, "loss_xval": 0.1640625, "num_input_tokens_seen": 87444768, "step": 958 }, { "epoch": 3.9958333333333336, "grad_norm": 8.423182459422138, "learning_rate": 5e-05, "loss": 0.1529, "num_input_tokens_seen": 87536576, "step": 959 }, { "epoch": 3.9958333333333336, "loss": 0.2128945142030716, "loss_ce": 0.0014076823135837913, "loss_iou": 0.25390625, "loss_num": 0.042236328125, "loss_xval": 0.2119140625, "num_input_tokens_seen": 87536576, "step": 959 }, { "epoch": 4.0, "grad_norm": 32.96109338843066, "learning_rate": 5e-05, "loss": 0.1576, "num_input_tokens_seen": 87628096, "step": 960 }, { "epoch": 4.0, "loss": 0.07793666422367096, "loss_ce": 0.00039149654912762344, "loss_iou": 0.283203125, "loss_num": 0.0155029296875, "loss_xval": 0.07763671875, "num_input_tokens_seen": 87628096, "step": 960 }, { "epoch": 4.004166666666666, "grad_norm": 4.443845311228221, "learning_rate": 5e-05, "loss": 0.1403, "num_input_tokens_seen": 87719392, "step": 961 }, { "epoch": 4.004166666666666, "loss": 0.1875435709953308, "loss_ce": 0.00010460759222041816, "loss_iou": 0.03857421875, "loss_num": 0.037353515625, "loss_xval": 0.1875, "num_input_tokens_seen": 87719392, "step": 961 }, { "epoch": 4.008333333333334, "grad_norm": 4.898741991070323, "learning_rate": 5e-05, "loss": 0.0962, "num_input_tokens_seen": 87810456, "step": 962 }, { "epoch": 4.008333333333334, "loss": 0.07666168361902237, "loss_ce": 0.000520326429978013, "loss_iou": 0.330078125, "loss_num": 0.0152587890625, "loss_xval": 0.076171875, "num_input_tokens_seen": 87810456, "step": 962 }, { "epoch": 4.0125, "grad_norm": 6.425378915500767, "learning_rate": 5e-05, "loss": 0.1308, "num_input_tokens_seen": 87901412, "step": 963 }, { "epoch": 4.0125, "loss": 0.15114706754684448, "loss_ce": 0.00025289118639193475, "loss_iou": 0.251953125, "loss_num": 0.0301513671875, "loss_xval": 0.1513671875, "num_input_tokens_seen": 87901412, "step": 963 }, { "epoch": 4.016666666666667, "grad_norm": 5.964979636005754, "learning_rate": 5e-05, "loss": 0.1322, "num_input_tokens_seen": 87992700, "step": 964 }, { "epoch": 4.016666666666667, "loss": 0.16243040561676025, "loss_ce": 0.000534652906935662, "loss_iou": 0.2734375, "loss_num": 0.032470703125, "loss_xval": 0.162109375, "num_input_tokens_seen": 87992700, "step": 964 }, { "epoch": 4.020833333333333, "grad_norm": 8.049764208385682, "learning_rate": 5e-05, "loss": 0.0996, "num_input_tokens_seen": 88084624, "step": 965 }, { "epoch": 4.020833333333333, "loss": 0.08246070146560669, "loss_ce": 0.001886668847873807, "loss_iou": 0.2373046875, "loss_num": 0.01611328125, "loss_xval": 0.08056640625, "num_input_tokens_seen": 88084624, "step": 965 }, { "epoch": 4.025, "grad_norm": 3.960262569801235, "learning_rate": 5e-05, "loss": 0.1005, "num_input_tokens_seen": 88175828, "step": 966 }, { "epoch": 4.025, "loss": 0.13607226312160492, "loss_ce": 0.0002995551039930433, "loss_iou": 0.322265625, "loss_num": 0.027099609375, "loss_xval": 0.1357421875, "num_input_tokens_seen": 88175828, "step": 966 }, { "epoch": 4.029166666666667, "grad_norm": 19.844560449627313, "learning_rate": 5e-05, "loss": 0.1436, "num_input_tokens_seen": 88266892, "step": 967 }, { "epoch": 4.029166666666667, "loss": 0.13706375658512115, "loss_ce": 0.00025344558525830507, "loss_iou": 0.34765625, "loss_num": 0.02734375, "loss_xval": 0.13671875, "num_input_tokens_seen": 88266892, "step": 967 }, { "epoch": 4.033333333333333, "grad_norm": 8.680644226182876, "learning_rate": 5e-05, "loss": 0.1105, "num_input_tokens_seen": 88357920, "step": 968 }, { "epoch": 4.033333333333333, "loss": 0.1243818998336792, "loss_ce": 0.0002669144596438855, "loss_iou": 0.396484375, "loss_num": 0.0247802734375, "loss_xval": 0.1240234375, "num_input_tokens_seen": 88357920, "step": 968 }, { "epoch": 4.0375, "grad_norm": 7.204047305940804, "learning_rate": 5e-05, "loss": 0.0912, "num_input_tokens_seen": 88449892, "step": 969 }, { "epoch": 4.0375, "loss": 0.1099606603384018, "loss_ce": 0.003149131080135703, "loss_iou": 0.3359375, "loss_num": 0.0213623046875, "loss_xval": 0.10693359375, "num_input_tokens_seen": 88449892, "step": 969 }, { "epoch": 4.041666666666667, "grad_norm": 2.4152880561554833, "learning_rate": 5e-05, "loss": 0.1681, "num_input_tokens_seen": 88541204, "step": 970 }, { "epoch": 4.041666666666667, "loss": 0.11691722273826599, "loss_ce": 0.00021800363902002573, "loss_iou": 0.240234375, "loss_num": 0.0233154296875, "loss_xval": 0.11669921875, "num_input_tokens_seen": 88541204, "step": 970 }, { "epoch": 4.045833333333333, "grad_norm": 12.140109984366983, "learning_rate": 5e-05, "loss": 0.1387, "num_input_tokens_seen": 88633152, "step": 971 }, { "epoch": 4.045833333333333, "loss": 0.10950451344251633, "loss_ce": 0.0027540295850485563, "loss_iou": 0.26171875, "loss_num": 0.0213623046875, "loss_xval": 0.10693359375, "num_input_tokens_seen": 88633152, "step": 971 }, { "epoch": 4.05, "grad_norm": 4.865660471200905, "learning_rate": 5e-05, "loss": 0.1275, "num_input_tokens_seen": 88724328, "step": 972 }, { "epoch": 4.05, "loss": 0.10865399241447449, "loss_ce": 0.0001640023838263005, "loss_iou": 0.388671875, "loss_num": 0.021728515625, "loss_xval": 0.1083984375, "num_input_tokens_seen": 88724328, "step": 972 }, { "epoch": 4.054166666666666, "grad_norm": 3.998968587081581, "learning_rate": 5e-05, "loss": 0.0976, "num_input_tokens_seen": 88815820, "step": 973 }, { "epoch": 4.054166666666666, "loss": 0.059591155499219894, "loss_ce": 5.5868404160719365e-06, "loss_iou": 0.357421875, "loss_num": 0.01190185546875, "loss_xval": 0.0595703125, "num_input_tokens_seen": 88815820, "step": 973 }, { "epoch": 4.058333333333334, "grad_norm": 3.5969518362843984, "learning_rate": 5e-05, "loss": 0.1013, "num_input_tokens_seen": 88906316, "step": 974 }, { "epoch": 4.058333333333334, "loss": 0.10994169861078262, "loss_ce": 0.00018522625032346696, "loss_iou": 0.171875, "loss_num": 0.02197265625, "loss_xval": 0.10986328125, "num_input_tokens_seen": 88906316, "step": 974 }, { "epoch": 4.0625, "grad_norm": 5.148583156604257, "learning_rate": 5e-05, "loss": 0.0793, "num_input_tokens_seen": 88997732, "step": 975 }, { "epoch": 4.0625, "loss": 0.06272031366825104, "loss_ce": 0.0009985165670514107, "loss_iou": 0.34375, "loss_num": 0.0123291015625, "loss_xval": 0.061767578125, "num_input_tokens_seen": 88997732, "step": 975 }, { "epoch": 4.066666666666666, "grad_norm": 5.352702192595242, "learning_rate": 5e-05, "loss": 0.1092, "num_input_tokens_seen": 89089060, "step": 976 }, { "epoch": 4.066666666666666, "loss": 0.0935366153717041, "loss_ce": 0.0006105887005105615, "loss_iou": 0.3984375, "loss_num": 0.0185546875, "loss_xval": 0.0927734375, "num_input_tokens_seen": 89089060, "step": 976 }, { "epoch": 4.070833333333334, "grad_norm": 3.929697031856074, "learning_rate": 5e-05, "loss": 0.0959, "num_input_tokens_seen": 89180636, "step": 977 }, { "epoch": 4.070833333333334, "loss": 0.10896719247102737, "loss_ce": 0.0013622116530314088, "loss_iou": 0.259765625, "loss_num": 0.021484375, "loss_xval": 0.107421875, "num_input_tokens_seen": 89180636, "step": 977 }, { "epoch": 4.075, "grad_norm": 5.665121284659802, "learning_rate": 5e-05, "loss": 0.124, "num_input_tokens_seen": 89271704, "step": 978 }, { "epoch": 4.075, "loss": 0.14936652779579163, "loss_ce": 0.0018903320888057351, "loss_iou": 0.21484375, "loss_num": 0.029541015625, "loss_xval": 0.1474609375, "num_input_tokens_seen": 89271704, "step": 978 }, { "epoch": 4.079166666666667, "grad_norm": 4.147776116421283, "learning_rate": 5e-05, "loss": 0.084, "num_input_tokens_seen": 89362856, "step": 979 }, { "epoch": 4.079166666666667, "loss": 0.049897756427526474, "loss_ce": 0.001161185442470014, "loss_iou": 0.369140625, "loss_num": 0.009765625, "loss_xval": 0.048828125, "num_input_tokens_seen": 89362856, "step": 979 }, { "epoch": 4.083333333333333, "grad_norm": 3.9288760153563587, "learning_rate": 5e-05, "loss": 0.1276, "num_input_tokens_seen": 89454140, "step": 980 }, { "epoch": 4.083333333333333, "loss": 0.1733560562133789, "loss_ce": 0.0009012245573103428, "loss_iou": 0.2890625, "loss_num": 0.034423828125, "loss_xval": 0.1728515625, "num_input_tokens_seen": 89454140, "step": 980 }, { "epoch": 4.0875, "grad_norm": 2.7682581278089256, "learning_rate": 5e-05, "loss": 0.1069, "num_input_tokens_seen": 89545052, "step": 981 }, { "epoch": 4.0875, "loss": 0.10385777056217194, "loss_ce": 6.45157433609711e-06, "loss_iou": 0.328125, "loss_num": 0.020751953125, "loss_xval": 0.10400390625, "num_input_tokens_seen": 89545052, "step": 981 }, { "epoch": 4.091666666666667, "grad_norm": 1.5098268804953137, "learning_rate": 5e-05, "loss": 0.1338, "num_input_tokens_seen": 89635988, "step": 982 }, { "epoch": 4.091666666666667, "loss": 0.15624697506427765, "loss_ce": 0.0008667304064147174, "loss_iou": 0.2265625, "loss_num": 0.031005859375, "loss_xval": 0.1552734375, "num_input_tokens_seen": 89635988, "step": 982 }, { "epoch": 4.095833333333333, "grad_norm": 7.695699542286283, "learning_rate": 5e-05, "loss": 0.1346, "num_input_tokens_seen": 89727320, "step": 983 }, { "epoch": 4.095833333333333, "loss": 0.17396774888038635, "loss_ce": 0.00046768668107688427, "loss_iou": 0.2890625, "loss_num": 0.03466796875, "loss_xval": 0.173828125, "num_input_tokens_seen": 89727320, "step": 983 }, { "epoch": 4.1, "grad_norm": 2.949612298898544, "learning_rate": 5e-05, "loss": 0.1358, "num_input_tokens_seen": 89818596, "step": 984 }, { "epoch": 4.1, "loss": 0.09132670611143112, "loss_ce": 0.0010557147907093167, "loss_iou": 0.35546875, "loss_num": 0.01806640625, "loss_xval": 0.09033203125, "num_input_tokens_seen": 89818596, "step": 984 }, { "epoch": 4.104166666666667, "grad_norm": 4.07795865495468, "learning_rate": 5e-05, "loss": 0.1367, "num_input_tokens_seen": 89909784, "step": 985 }, { "epoch": 4.104166666666667, "loss": 0.17781442403793335, "loss_ce": 0.0014838598435744643, "loss_iou": 0.30859375, "loss_num": 0.03515625, "loss_xval": 0.1767578125, "num_input_tokens_seen": 89909784, "step": 985 }, { "epoch": 4.108333333333333, "grad_norm": 2.766711540092012, "learning_rate": 5e-05, "loss": 0.112, "num_input_tokens_seen": 90000876, "step": 986 }, { "epoch": 4.108333333333333, "loss": 0.13473664224147797, "loss_ce": 1.5432389091074583e-06, "loss_iou": 0.54296875, "loss_num": 0.0269775390625, "loss_xval": 0.134765625, "num_input_tokens_seen": 90000876, "step": 986 }, { "epoch": 4.1125, "grad_norm": 4.713177841079944, "learning_rate": 5e-05, "loss": 0.1128, "num_input_tokens_seen": 90092116, "step": 987 }, { "epoch": 4.1125, "loss": 0.13242650032043457, "loss_ce": 0.0015823881840333343, "loss_iou": 0.259765625, "loss_num": 0.026123046875, "loss_xval": 0.130859375, "num_input_tokens_seen": 90092116, "step": 987 }, { "epoch": 4.116666666666666, "grad_norm": 7.2006335170937055, "learning_rate": 5e-05, "loss": 0.1098, "num_input_tokens_seen": 90182952, "step": 988 }, { "epoch": 4.116666666666666, "loss": 0.13896583020687103, "loss_ce": 0.004581678658723831, "loss_iou": 0.2421875, "loss_num": 0.02685546875, "loss_xval": 0.134765625, "num_input_tokens_seen": 90182952, "step": 988 }, { "epoch": 4.120833333333334, "grad_norm": 4.493654244976299, "learning_rate": 5e-05, "loss": 0.1583, "num_input_tokens_seen": 90274168, "step": 989 }, { "epoch": 4.120833333333334, "loss": 0.19936174154281616, "loss_ce": 0.0023478814400732517, "loss_iou": 0.333984375, "loss_num": 0.039306640625, "loss_xval": 0.197265625, "num_input_tokens_seen": 90274168, "step": 989 }, { "epoch": 4.125, "grad_norm": 4.991471041662424, "learning_rate": 5e-05, "loss": 0.0771, "num_input_tokens_seen": 90365976, "step": 990 }, { "epoch": 4.125, "loss": 0.08014944195747375, "loss_ce": 0.0003764900902751833, "loss_iou": 0.251953125, "loss_num": 0.0159912109375, "loss_xval": 0.07958984375, "num_input_tokens_seen": 90365976, "step": 990 }, { "epoch": 4.129166666666666, "grad_norm": 6.153980458296831, "learning_rate": 5e-05, "loss": 0.1423, "num_input_tokens_seen": 90457376, "step": 991 }, { "epoch": 4.129166666666666, "loss": 0.14803236722946167, "loss_ce": 2.2120133507996798e-05, "loss_iou": 0.416015625, "loss_num": 0.0296630859375, "loss_xval": 0.1484375, "num_input_tokens_seen": 90457376, "step": 991 }, { "epoch": 4.133333333333334, "grad_norm": 6.076137039796416, "learning_rate": 5e-05, "loss": 0.1176, "num_input_tokens_seen": 90548868, "step": 992 }, { "epoch": 4.133333333333334, "loss": 0.08611531555652618, "loss_ce": 4.0487215301254764e-05, "loss_iou": 0.2041015625, "loss_num": 0.0172119140625, "loss_xval": 0.0859375, "num_input_tokens_seen": 90548868, "step": 992 }, { "epoch": 4.1375, "grad_norm": 3.4031906722640737, "learning_rate": 5e-05, "loss": 0.116, "num_input_tokens_seen": 90640408, "step": 993 }, { "epoch": 4.1375, "loss": 0.14683718979358673, "loss_ce": 4.7640893171774223e-05, "loss_iou": 0.279296875, "loss_num": 0.029296875, "loss_xval": 0.146484375, "num_input_tokens_seen": 90640408, "step": 993 }, { "epoch": 4.141666666666667, "grad_norm": 6.529223897966937, "learning_rate": 5e-05, "loss": 0.1685, "num_input_tokens_seen": 90731516, "step": 994 }, { "epoch": 4.141666666666667, "loss": 0.18272951245307922, "loss_ce": 0.0006005996838212013, "loss_iou": 0.251953125, "loss_num": 0.036376953125, "loss_xval": 0.181640625, "num_input_tokens_seen": 90731516, "step": 994 }, { "epoch": 4.145833333333333, "grad_norm": 2.1685415747038927, "learning_rate": 5e-05, "loss": 0.0979, "num_input_tokens_seen": 90823400, "step": 995 }, { "epoch": 4.145833333333333, "loss": 0.09621228277683258, "loss_ce": 0.002431760774925351, "loss_iou": 0.1689453125, "loss_num": 0.018798828125, "loss_xval": 0.09375, "num_input_tokens_seen": 90823400, "step": 995 }, { "epoch": 4.15, "grad_norm": 5.302334741694656, "learning_rate": 5e-05, "loss": 0.124, "num_input_tokens_seen": 90914192, "step": 996 }, { "epoch": 4.15, "loss": 0.08803659677505493, "loss_ce": 0.001030988059937954, "loss_iou": 0.349609375, "loss_num": 0.0174560546875, "loss_xval": 0.0869140625, "num_input_tokens_seen": 90914192, "step": 996 }, { "epoch": 4.154166666666667, "grad_norm": 3.955132642719237, "learning_rate": 5e-05, "loss": 0.1567, "num_input_tokens_seen": 91004840, "step": 997 }, { "epoch": 4.154166666666667, "loss": 0.15012209117412567, "loss_ce": 0.00018922274466603994, "loss_iou": 0.326171875, "loss_num": 0.030029296875, "loss_xval": 0.150390625, "num_input_tokens_seen": 91004840, "step": 997 }, { "epoch": 4.158333333333333, "grad_norm": 3.1198611222256947, "learning_rate": 5e-05, "loss": 0.113, "num_input_tokens_seen": 91095924, "step": 998 }, { "epoch": 4.158333333333333, "loss": 0.1430675983428955, "loss_ce": 0.0012218987103551626, "loss_iou": 0.302734375, "loss_num": 0.0283203125, "loss_xval": 0.1416015625, "num_input_tokens_seen": 91095924, "step": 998 }, { "epoch": 4.1625, "grad_norm": 2.630883571061764, "learning_rate": 5e-05, "loss": 0.1122, "num_input_tokens_seen": 91186540, "step": 999 }, { "epoch": 4.1625, "loss": 0.12479518353939056, "loss_ce": 3.9328693674178794e-05, "loss_iou": 0.345703125, "loss_num": 0.0250244140625, "loss_xval": 0.125, "num_input_tokens_seen": 91186540, "step": 999 }, { "epoch": 4.166666666666667, "grad_norm": 6.898916642476488, "learning_rate": 5e-05, "loss": 0.1358, "num_input_tokens_seen": 91278140, "step": 1000 }, { "epoch": 4.166666666666667, "eval_seeclick_CIoU": 0.2288176789879799, "eval_seeclick_GIoU": 0.2192443385720253, "eval_seeclick_IoU": 0.34198732674121857, "eval_seeclick_MAE_all": 0.10106326639652252, "eval_seeclick_MAE_h": 0.1016768105328083, "eval_seeclick_MAE_w": 0.18323545902967453, "eval_seeclick_MAE_x_boxes": 0.2194460779428482, "eval_seeclick_MAE_y_boxes": 0.10459760949015617, "eval_seeclick_NUM_probability": 0.9999995231628418, "eval_seeclick_inside_bbox": 0.4943181872367859, "eval_seeclick_loss": 0.5763809680938721, "eval_seeclick_loss_ce": 0.12347016483545303, "eval_seeclick_loss_iou": 0.4483642578125, "eval_seeclick_loss_num": 0.08683013916015625, "eval_seeclick_loss_xval": 0.434326171875, "eval_seeclick_runtime": 74.1847, "eval_seeclick_samples_per_second": 0.58, "eval_seeclick_steps_per_second": 0.027, "num_input_tokens_seen": 91278140, "step": 1000 }, { "epoch": 4.166666666666667, "eval_icons_CIoU": 0.41445091366767883, "eval_icons_GIoU": 0.4151296019554138, "eval_icons_IoU": 0.47638723254203796, "eval_icons_MAE_all": 0.060545625165104866, "eval_icons_MAE_h": 0.12721606343984604, "eval_icons_MAE_w": 0.08871277421712875, "eval_icons_MAE_x_boxes": 0.08461445942521095, "eval_icons_MAE_y_boxes": 0.12413446977734566, "eval_icons_NUM_probability": 0.9999995529651642, "eval_icons_inside_bbox": 0.640625, "eval_icons_loss": 0.2962195873260498, "eval_icons_loss_ce": 6.803241319630615e-06, "eval_icons_loss_iou": 0.345947265625, "eval_icons_loss_num": 0.059906005859375, "eval_icons_loss_xval": 0.299713134765625, "eval_icons_runtime": 85.3334, "eval_icons_samples_per_second": 0.586, "eval_icons_steps_per_second": 0.023, "num_input_tokens_seen": 91278140, "step": 1000 }, { "epoch": 4.166666666666667, "eval_screenspot_CIoU": 0.39049918452898663, "eval_screenspot_GIoU": 0.37226015826066333, "eval_screenspot_IoU": 0.4547177155812581, "eval_screenspot_MAE_all": 0.0913725992043813, "eval_screenspot_MAE_h": 0.0856855387489001, "eval_screenspot_MAE_w": 0.18789143363634744, "eval_screenspot_MAE_x_boxes": 0.1800380746523539, "eval_screenspot_MAE_y_boxes": 0.07909448444843292, "eval_screenspot_NUM_probability": 0.9999943971633911, "eval_screenspot_inside_bbox": 0.6833333373069763, "eval_screenspot_loss": 0.4577127993106842, "eval_screenspot_loss_ce": 1.8053931967187964e-06, "eval_screenspot_loss_iou": 0.3567708333333333, "eval_screenspot_loss_num": 0.09382120768229167, "eval_screenspot_loss_xval": 0.46923828125, "eval_screenspot_runtime": 154.6239, "eval_screenspot_samples_per_second": 0.576, "eval_screenspot_steps_per_second": 0.019, "num_input_tokens_seen": 91278140, "step": 1000 }, { "epoch": 4.166666666666667, "eval_compot_CIoU": 0.4610436111688614, "eval_compot_GIoU": 0.451981320977211, "eval_compot_IoU": 0.5354782938957214, "eval_compot_MAE_all": 0.05896247178316116, "eval_compot_MAE_h": 0.08070729672908783, "eval_compot_MAE_w": 0.13192753866314888, "eval_compot_MAE_x_boxes": 0.12282107770442963, "eval_compot_MAE_y_boxes": 0.08394554629921913, "eval_compot_NUM_probability": 0.9999991953372955, "eval_compot_inside_bbox": 0.6892361044883728, "eval_compot_loss": 0.31611597537994385, "eval_compot_loss_ce": 0.013481661211699247, "eval_compot_loss_iou": 0.3232421875, "eval_compot_loss_num": 0.054492950439453125, "eval_compot_loss_xval": 0.27239990234375, "eval_compot_runtime": 86.7171, "eval_compot_samples_per_second": 0.577, "eval_compot_steps_per_second": 0.023, "num_input_tokens_seen": 91278140, "step": 1000 }, { "epoch": 4.166666666666667, "loss": 0.257731556892395, "loss_ce": 0.012309202924370766, "loss_iou": 0.306640625, "loss_num": 0.049072265625, "loss_xval": 0.2451171875, "num_input_tokens_seen": 91278140, "step": 1000 }, { "epoch": 4.170833333333333, "grad_norm": 4.775300264318875, "learning_rate": 5e-05, "loss": 0.1354, "num_input_tokens_seen": 91369144, "step": 1001 }, { "epoch": 4.170833333333333, "loss": 0.184337317943573, "loss_ce": 1.1161824659211561e-05, "loss_iou": 0.3515625, "loss_num": 0.036865234375, "loss_xval": 0.1845703125, "num_input_tokens_seen": 91369144, "step": 1001 }, { "epoch": 4.175, "grad_norm": 4.021752324374664, "learning_rate": 5e-05, "loss": 0.0796, "num_input_tokens_seen": 91460416, "step": 1002 }, { "epoch": 4.175, "loss": 0.08465791493654251, "loss_ce": 2.1546813968598144e-06, "loss_iou": 0.2314453125, "loss_num": 0.0169677734375, "loss_xval": 0.08447265625, "num_input_tokens_seen": 91460416, "step": 1002 }, { "epoch": 4.179166666666666, "grad_norm": 5.366416397358166, "learning_rate": 5e-05, "loss": 0.0921, "num_input_tokens_seen": 91551540, "step": 1003 }, { "epoch": 4.179166666666666, "loss": 0.10360711067914963, "loss_ce": 0.0005187301430851221, "loss_iou": 0.50390625, "loss_num": 0.0206298828125, "loss_xval": 0.10302734375, "num_input_tokens_seen": 91551540, "step": 1003 }, { "epoch": 4.183333333333334, "grad_norm": 3.0526868130769285, "learning_rate": 5e-05, "loss": 0.1933, "num_input_tokens_seen": 91643256, "step": 1004 }, { "epoch": 4.183333333333334, "loss": 0.17099416255950928, "loss_ce": 6.520580063806847e-05, "loss_iou": 0.376953125, "loss_num": 0.0341796875, "loss_xval": 0.1708984375, "num_input_tokens_seen": 91643256, "step": 1004 }, { "epoch": 4.1875, "grad_norm": 4.153700213808706, "learning_rate": 5e-05, "loss": 0.1375, "num_input_tokens_seen": 91734420, "step": 1005 }, { "epoch": 4.1875, "loss": 0.11866825819015503, "loss_ce": 0.0002905750006902963, "loss_iou": 0.1796875, "loss_num": 0.023681640625, "loss_xval": 0.1181640625, "num_input_tokens_seen": 91734420, "step": 1005 }, { "epoch": 4.191666666666666, "grad_norm": 5.938137477817666, "learning_rate": 5e-05, "loss": 0.0948, "num_input_tokens_seen": 91825960, "step": 1006 }, { "epoch": 4.191666666666666, "loss": 0.12803258001804352, "loss_ce": 0.0012625595554709435, "loss_iou": 0.328125, "loss_num": 0.025390625, "loss_xval": 0.126953125, "num_input_tokens_seen": 91825960, "step": 1006 }, { "epoch": 4.195833333333334, "grad_norm": 6.701228074881766, "learning_rate": 5e-05, "loss": 0.1026, "num_input_tokens_seen": 91917024, "step": 1007 }, { "epoch": 4.195833333333334, "loss": 0.13918179273605347, "loss_ce": 0.0023714962881058455, "loss_iou": 0.31640625, "loss_num": 0.02734375, "loss_xval": 0.13671875, "num_input_tokens_seen": 91917024, "step": 1007 }, { "epoch": 4.2, "grad_norm": 2.58968299528391, "learning_rate": 5e-05, "loss": 0.1368, "num_input_tokens_seen": 92008396, "step": 1008 }, { "epoch": 4.2, "loss": 0.18333737552165985, "loss_ce": 0.004656949080526829, "loss_iou": 0.267578125, "loss_num": 0.03564453125, "loss_xval": 0.1787109375, "num_input_tokens_seen": 92008396, "step": 1008 }, { "epoch": 4.204166666666667, "grad_norm": 4.2155909481180736, "learning_rate": 5e-05, "loss": 0.1072, "num_input_tokens_seen": 92099656, "step": 1009 }, { "epoch": 4.204166666666667, "loss": 0.1417737752199173, "loss_ce": 0.0001874682493507862, "loss_iou": 0.2373046875, "loss_num": 0.0283203125, "loss_xval": 0.1416015625, "num_input_tokens_seen": 92099656, "step": 1009 }, { "epoch": 4.208333333333333, "grad_norm": 5.415876527285819, "learning_rate": 5e-05, "loss": 0.1509, "num_input_tokens_seen": 92190484, "step": 1010 }, { "epoch": 4.208333333333333, "loss": 0.18809230625629425, "loss_ce": 0.0008364361710846424, "loss_iou": 0.24609375, "loss_num": 0.03759765625, "loss_xval": 0.1875, "num_input_tokens_seen": 92190484, "step": 1010 }, { "epoch": 4.2125, "grad_norm": 11.825215048838075, "learning_rate": 5e-05, "loss": 0.1618, "num_input_tokens_seen": 92281520, "step": 1011 }, { "epoch": 4.2125, "loss": 0.10141883790493011, "loss_ce": 0.0022519633639603853, "loss_iou": 0.30859375, "loss_num": 0.019775390625, "loss_xval": 0.09912109375, "num_input_tokens_seen": 92281520, "step": 1011 }, { "epoch": 4.216666666666667, "grad_norm": 5.040315147434173, "learning_rate": 5e-05, "loss": 0.0943, "num_input_tokens_seen": 92373020, "step": 1012 }, { "epoch": 4.216666666666667, "loss": 0.1095087081193924, "loss_ce": 0.004589277319610119, "loss_iou": 0.138671875, "loss_num": 0.02099609375, "loss_xval": 0.10498046875, "num_input_tokens_seen": 92373020, "step": 1012 }, { "epoch": 4.220833333333333, "grad_norm": 11.145069595062028, "learning_rate": 5e-05, "loss": 0.0987, "num_input_tokens_seen": 92464692, "step": 1013 }, { "epoch": 4.220833333333333, "loss": 0.11222478747367859, "loss_ce": 0.0022241822443902493, "loss_iou": 0.21875, "loss_num": 0.02197265625, "loss_xval": 0.10986328125, "num_input_tokens_seen": 92464692, "step": 1013 }, { "epoch": 4.225, "grad_norm": 4.100185141836065, "learning_rate": 5e-05, "loss": 0.11, "num_input_tokens_seen": 92556912, "step": 1014 }, { "epoch": 4.225, "loss": 0.0731762945652008, "loss_ce": 0.0004223883734084666, "loss_iou": 0.1552734375, "loss_num": 0.0145263671875, "loss_xval": 0.07275390625, "num_input_tokens_seen": 92556912, "step": 1014 }, { "epoch": 4.229166666666667, "grad_norm": 11.782810710198497, "learning_rate": 5e-05, "loss": 0.1718, "num_input_tokens_seen": 92648544, "step": 1015 }, { "epoch": 4.229166666666667, "loss": 0.13355088233947754, "loss_ce": 0.00018905679462477565, "loss_iou": 0.10986328125, "loss_num": 0.026611328125, "loss_xval": 0.1337890625, "num_input_tokens_seen": 92648544, "step": 1015 }, { "epoch": 4.233333333333333, "grad_norm": 3.266013508735402, "learning_rate": 5e-05, "loss": 0.0879, "num_input_tokens_seen": 92740228, "step": 1016 }, { "epoch": 4.233333333333333, "loss": 0.110908642411232, "loss_ce": 9.931866952683777e-05, "loss_iou": 0.29296875, "loss_num": 0.022216796875, "loss_xval": 0.11083984375, "num_input_tokens_seen": 92740228, "step": 1016 }, { "epoch": 4.2375, "grad_norm": 12.214471155924263, "learning_rate": 5e-05, "loss": 0.1184, "num_input_tokens_seen": 92832024, "step": 1017 }, { "epoch": 4.2375, "loss": 0.09152170270681381, "loss_ce": 0.0006403519655577838, "loss_iou": 0.16796875, "loss_num": 0.0181884765625, "loss_xval": 0.0908203125, "num_input_tokens_seen": 92832024, "step": 1017 }, { "epoch": 4.241666666666666, "grad_norm": 3.9020094684822277, "learning_rate": 5e-05, "loss": 0.0897, "num_input_tokens_seen": 92922932, "step": 1018 }, { "epoch": 4.241666666666666, "loss": 0.09350967407226562, "loss_ce": 3.81834479412646e-06, "loss_iou": 0.41015625, "loss_num": 0.0186767578125, "loss_xval": 0.09375, "num_input_tokens_seen": 92922932, "step": 1018 }, { "epoch": 4.245833333333334, "grad_norm": 7.433497429425666, "learning_rate": 5e-05, "loss": 0.1451, "num_input_tokens_seen": 93013296, "step": 1019 }, { "epoch": 4.245833333333334, "loss": 0.14127758145332336, "loss_ce": 0.0014155278913676739, "loss_iou": 0.2734375, "loss_num": 0.0279541015625, "loss_xval": 0.1396484375, "num_input_tokens_seen": 93013296, "step": 1019 }, { "epoch": 4.25, "grad_norm": 4.855624630655086, "learning_rate": 5e-05, "loss": 0.1653, "num_input_tokens_seen": 93104704, "step": 1020 }, { "epoch": 4.25, "loss": 0.22039823234081268, "loss_ce": 0.00012235053873155266, "loss_iou": 0.212890625, "loss_num": 0.0439453125, "loss_xval": 0.220703125, "num_input_tokens_seen": 93104704, "step": 1020 }, { "epoch": 4.254166666666666, "grad_norm": 2.869147326672204, "learning_rate": 5e-05, "loss": 0.089, "num_input_tokens_seen": 93196180, "step": 1021 }, { "epoch": 4.254166666666666, "loss": 0.09016729146242142, "loss_ce": 0.0015747613506391644, "loss_iou": 0.2099609375, "loss_num": 0.0177001953125, "loss_xval": 0.08837890625, "num_input_tokens_seen": 93196180, "step": 1021 }, { "epoch": 4.258333333333334, "grad_norm": 5.461179477967053, "learning_rate": 5e-05, "loss": 0.1149, "num_input_tokens_seen": 93287704, "step": 1022 }, { "epoch": 4.258333333333334, "loss": 0.10635490715503693, "loss_ce": 0.008027271367609501, "loss_iou": 0.248046875, "loss_num": 0.0196533203125, "loss_xval": 0.09814453125, "num_input_tokens_seen": 93287704, "step": 1022 }, { "epoch": 4.2625, "grad_norm": 2.559847695714954, "learning_rate": 5e-05, "loss": 0.1002, "num_input_tokens_seen": 93379140, "step": 1023 }, { "epoch": 4.2625, "loss": 0.10472606867551804, "loss_ce": 0.001118888845667243, "loss_iou": 0.287109375, "loss_num": 0.020751953125, "loss_xval": 0.103515625, "num_input_tokens_seen": 93379140, "step": 1023 }, { "epoch": 4.266666666666667, "grad_norm": 5.012909495931835, "learning_rate": 5e-05, "loss": 0.1619, "num_input_tokens_seen": 93470312, "step": 1024 }, { "epoch": 4.266666666666667, "loss": 0.11101584136486053, "loss_ce": 0.00026755983708426356, "loss_iou": 0.3359375, "loss_num": 0.0220947265625, "loss_xval": 0.11083984375, "num_input_tokens_seen": 93470312, "step": 1024 }, { "epoch": 4.270833333333333, "grad_norm": 4.205104507070826, "learning_rate": 5e-05, "loss": 0.1457, "num_input_tokens_seen": 93561672, "step": 1025 }, { "epoch": 4.270833333333333, "loss": 0.17691843211650848, "loss_ce": 8.432482718490064e-05, "loss_iou": 0.1044921875, "loss_num": 0.035400390625, "loss_xval": 0.1767578125, "num_input_tokens_seen": 93561672, "step": 1025 }, { "epoch": 4.275, "grad_norm": 3.0823987312822663, "learning_rate": 5e-05, "loss": 0.1219, "num_input_tokens_seen": 93652988, "step": 1026 }, { "epoch": 4.275, "loss": 0.11530449986457825, "loss_ce": 0.00013116124318912625, "loss_iou": 0.33984375, "loss_num": 0.02294921875, "loss_xval": 0.115234375, "num_input_tokens_seen": 93652988, "step": 1026 }, { "epoch": 4.279166666666667, "grad_norm": 41.05969313267573, "learning_rate": 5e-05, "loss": 0.1262, "num_input_tokens_seen": 93743720, "step": 1027 }, { "epoch": 4.279166666666667, "loss": 0.14659270644187927, "loss_ce": 1.6788795619504526e-05, "loss_iou": 0.2255859375, "loss_num": 0.029296875, "loss_xval": 0.146484375, "num_input_tokens_seen": 93743720, "step": 1027 }, { "epoch": 4.283333333333333, "grad_norm": 12.961008697360798, "learning_rate": 5e-05, "loss": 0.1056, "num_input_tokens_seen": 93834976, "step": 1028 }, { "epoch": 4.283333333333333, "loss": 0.14191699028015137, "loss_ce": 0.00031542833312414587, "loss_iou": 0.486328125, "loss_num": 0.0283203125, "loss_xval": 0.1416015625, "num_input_tokens_seen": 93834976, "step": 1028 }, { "epoch": 4.2875, "grad_norm": 3.5652944680153937, "learning_rate": 5e-05, "loss": 0.085, "num_input_tokens_seen": 93926376, "step": 1029 }, { "epoch": 4.2875, "loss": 0.07104349136352539, "loss_ce": 0.0011582336155697703, "loss_iou": 0.267578125, "loss_num": 0.01397705078125, "loss_xval": 0.06982421875, "num_input_tokens_seen": 93926376, "step": 1029 }, { "epoch": 4.291666666666667, "grad_norm": 6.44765876025744, "learning_rate": 5e-05, "loss": 0.1574, "num_input_tokens_seen": 94017836, "step": 1030 }, { "epoch": 4.291666666666667, "loss": 0.17174601554870605, "loss_ce": 0.00159526988863945, "loss_iou": 0.267578125, "loss_num": 0.033935546875, "loss_xval": 0.169921875, "num_input_tokens_seen": 94017836, "step": 1030 }, { "epoch": 4.295833333333333, "grad_norm": 5.20718161105157, "learning_rate": 5e-05, "loss": 0.1019, "num_input_tokens_seen": 94108616, "step": 1031 }, { "epoch": 4.295833333333333, "loss": 0.10829800367355347, "loss_ce": 0.00018948587239719927, "loss_iou": 0.310546875, "loss_num": 0.0216064453125, "loss_xval": 0.10791015625, "num_input_tokens_seen": 94108616, "step": 1031 }, { "epoch": 4.3, "grad_norm": 7.001137693637598, "learning_rate": 5e-05, "loss": 0.1759, "num_input_tokens_seen": 94199668, "step": 1032 }, { "epoch": 4.3, "loss": 0.17750316858291626, "loss_ce": 7.396183355012909e-05, "loss_iou": 0.390625, "loss_num": 0.035400390625, "loss_xval": 0.177734375, "num_input_tokens_seen": 94199668, "step": 1032 }, { "epoch": 4.304166666666666, "grad_norm": 5.786106834808877, "learning_rate": 5e-05, "loss": 0.1418, "num_input_tokens_seen": 94291036, "step": 1033 }, { "epoch": 4.304166666666666, "loss": 0.1702231764793396, "loss_ce": 0.002376505173742771, "loss_iou": 0.13671875, "loss_num": 0.033447265625, "loss_xval": 0.16796875, "num_input_tokens_seen": 94291036, "step": 1033 }, { "epoch": 4.308333333333334, "grad_norm": 4.057659988135845, "learning_rate": 5e-05, "loss": 0.0903, "num_input_tokens_seen": 94382240, "step": 1034 }, { "epoch": 4.308333333333334, "loss": 0.09596603363752365, "loss_ce": 0.0020329286344349384, "loss_iou": 0.3125, "loss_num": 0.018798828125, "loss_xval": 0.09375, "num_input_tokens_seen": 94382240, "step": 1034 }, { "epoch": 4.3125, "grad_norm": 60.708773659127075, "learning_rate": 5e-05, "loss": 0.1717, "num_input_tokens_seen": 94473272, "step": 1035 }, { "epoch": 4.3125, "loss": 0.15523292124271393, "loss_ce": 0.006978522054851055, "loss_iou": 0.279296875, "loss_num": 0.0296630859375, "loss_xval": 0.1484375, "num_input_tokens_seen": 94473272, "step": 1035 }, { "epoch": 4.316666666666666, "grad_norm": 15.166310694757913, "learning_rate": 5e-05, "loss": 0.2223, "num_input_tokens_seen": 94564500, "step": 1036 }, { "epoch": 4.316666666666666, "loss": 0.2636827230453491, "loss_ce": 1.0847867088159546e-05, "loss_iou": 0.13671875, "loss_num": 0.052734375, "loss_xval": 0.263671875, "num_input_tokens_seen": 94564500, "step": 1036 }, { "epoch": 4.320833333333334, "grad_norm": 4.906645673402136, "learning_rate": 5e-05, "loss": 0.1672, "num_input_tokens_seen": 94655904, "step": 1037 }, { "epoch": 4.320833333333334, "loss": 0.2126345932483673, "loss_ce": 0.0011172639206051826, "loss_iou": 0.298828125, "loss_num": 0.042236328125, "loss_xval": 0.2119140625, "num_input_tokens_seen": 94655904, "step": 1037 }, { "epoch": 4.325, "grad_norm": 6.5407679231960385, "learning_rate": 5e-05, "loss": 0.1233, "num_input_tokens_seen": 94747468, "step": 1038 }, { "epoch": 4.325, "loss": 0.13433226943016052, "loss_ce": 0.001580800162628293, "loss_iou": 0.296875, "loss_num": 0.026611328125, "loss_xval": 0.1328125, "num_input_tokens_seen": 94747468, "step": 1038 }, { "epoch": 4.329166666666667, "grad_norm": 2.6249773090077264, "learning_rate": 5e-05, "loss": 0.1367, "num_input_tokens_seen": 94838488, "step": 1039 }, { "epoch": 4.329166666666667, "loss": 0.09026487916707993, "loss_ce": 0.00106199795845896, "loss_iou": 0.287109375, "loss_num": 0.017822265625, "loss_xval": 0.08935546875, "num_input_tokens_seen": 94838488, "step": 1039 }, { "epoch": 4.333333333333333, "grad_norm": 4.371515638310731, "learning_rate": 5e-05, "loss": 0.1191, "num_input_tokens_seen": 94929896, "step": 1040 }, { "epoch": 4.333333333333333, "loss": 0.10758574306964874, "loss_ce": 0.0004690401256084442, "loss_iou": 0.353515625, "loss_num": 0.021484375, "loss_xval": 0.10693359375, "num_input_tokens_seen": 94929896, "step": 1040 }, { "epoch": 4.3375, "grad_norm": 4.839714934876044, "learning_rate": 5e-05, "loss": 0.1067, "num_input_tokens_seen": 95020620, "step": 1041 }, { "epoch": 4.3375, "loss": 0.11356394737958908, "loss_ce": 8.036779036046937e-06, "loss_iou": 0.34765625, "loss_num": 0.022705078125, "loss_xval": 0.11376953125, "num_input_tokens_seen": 95020620, "step": 1041 }, { "epoch": 4.341666666666667, "grad_norm": 2.935495078974472, "learning_rate": 5e-05, "loss": 0.0837, "num_input_tokens_seen": 95111780, "step": 1042 }, { "epoch": 4.341666666666667, "loss": 0.0978274717926979, "loss_ce": 0.00215486460365355, "loss_iou": 0.189453125, "loss_num": 0.0191650390625, "loss_xval": 0.095703125, "num_input_tokens_seen": 95111780, "step": 1042 }, { "epoch": 4.345833333333333, "grad_norm": 5.575717494937947, "learning_rate": 5e-05, "loss": 0.1329, "num_input_tokens_seen": 95202328, "step": 1043 }, { "epoch": 4.345833333333333, "loss": 0.0912865400314331, "loss_ce": 0.00106132123619318, "loss_iou": 0.2431640625, "loss_num": 0.01806640625, "loss_xval": 0.09033203125, "num_input_tokens_seen": 95202328, "step": 1043 }, { "epoch": 4.35, "grad_norm": 38.64345492570206, "learning_rate": 5e-05, "loss": 0.0997, "num_input_tokens_seen": 95293976, "step": 1044 }, { "epoch": 4.35, "loss": 0.09628809988498688, "loss_ce": 0.0026449114084243774, "loss_iou": 0.361328125, "loss_num": 0.0186767578125, "loss_xval": 0.09375, "num_input_tokens_seen": 95293976, "step": 1044 }, { "epoch": 4.354166666666667, "grad_norm": 4.982650414589755, "learning_rate": 5e-05, "loss": 0.1407, "num_input_tokens_seen": 95385772, "step": 1045 }, { "epoch": 4.354166666666667, "loss": 0.08871060609817505, "loss_ce": 0.0003011856460943818, "loss_iou": 0.32421875, "loss_num": 0.0177001953125, "loss_xval": 0.08837890625, "num_input_tokens_seen": 95385772, "step": 1045 }, { "epoch": 4.358333333333333, "grad_norm": 6.46543114351235, "learning_rate": 5e-05, "loss": 0.1169, "num_input_tokens_seen": 95477428, "step": 1046 }, { "epoch": 4.358333333333333, "loss": 0.13676266372203827, "loss_ce": 0.0015850570052862167, "loss_iou": 0.337890625, "loss_num": 0.027099609375, "loss_xval": 0.134765625, "num_input_tokens_seen": 95477428, "step": 1046 }, { "epoch": 4.3625, "grad_norm": 5.489329432381536, "learning_rate": 5e-05, "loss": 0.1253, "num_input_tokens_seen": 95568880, "step": 1047 }, { "epoch": 4.3625, "loss": 0.14458361268043518, "loss_ce": 0.0009297404321841896, "loss_iou": 0.388671875, "loss_num": 0.0286865234375, "loss_xval": 0.1435546875, "num_input_tokens_seen": 95568880, "step": 1047 }, { "epoch": 4.366666666666666, "grad_norm": 5.301059214254398, "learning_rate": 5e-05, "loss": 0.1523, "num_input_tokens_seen": 95659964, "step": 1048 }, { "epoch": 4.366666666666666, "loss": 0.18293695151805878, "loss_ce": 0.0017235726118087769, "loss_iou": 0.419921875, "loss_num": 0.0361328125, "loss_xval": 0.181640625, "num_input_tokens_seen": 95659964, "step": 1048 }, { "epoch": 4.370833333333334, "grad_norm": 4.682583376414269, "learning_rate": 5e-05, "loss": 0.1188, "num_input_tokens_seen": 95751316, "step": 1049 }, { "epoch": 4.370833333333334, "loss": 0.08031313121318817, "loss_ce": 0.0013336361153051257, "loss_iou": 0.2119140625, "loss_num": 0.0157470703125, "loss_xval": 0.0791015625, "num_input_tokens_seen": 95751316, "step": 1049 }, { "epoch": 4.375, "grad_norm": 2.8000950158405327, "learning_rate": 5e-05, "loss": 0.1377, "num_input_tokens_seen": 95842480, "step": 1050 }, { "epoch": 4.375, "loss": 0.11660677939653397, "loss_ce": 9.066909842658788e-05, "loss_iou": 0.330078125, "loss_num": 0.0233154296875, "loss_xval": 0.11669921875, "num_input_tokens_seen": 95842480, "step": 1050 }, { "epoch": 4.379166666666666, "grad_norm": 2.890628721811094, "learning_rate": 5e-05, "loss": 0.0822, "num_input_tokens_seen": 95933772, "step": 1051 }, { "epoch": 4.379166666666666, "loss": 0.07693706452846527, "loss_ce": 0.00021587421360891312, "loss_iou": 0.197265625, "loss_num": 0.01531982421875, "loss_xval": 0.07666015625, "num_input_tokens_seen": 95933772, "step": 1051 }, { "epoch": 4.383333333333334, "grad_norm": 6.7502043431928715, "learning_rate": 5e-05, "loss": 0.1116, "num_input_tokens_seen": 96025156, "step": 1052 }, { "epoch": 4.383333333333334, "loss": 0.06723552942276001, "loss_ce": 0.0006766942678950727, "loss_iou": 0.31640625, "loss_num": 0.0133056640625, "loss_xval": 0.06640625, "num_input_tokens_seen": 96025156, "step": 1052 }, { "epoch": 4.3875, "grad_norm": 12.387700241419916, "learning_rate": 5e-05, "loss": 0.1287, "num_input_tokens_seen": 96116552, "step": 1053 }, { "epoch": 4.3875, "loss": 0.16309259831905365, "loss_ce": 0.0009221778018400073, "loss_iou": 0.46875, "loss_num": 0.032470703125, "loss_xval": 0.162109375, "num_input_tokens_seen": 96116552, "step": 1053 }, { "epoch": 4.391666666666667, "grad_norm": 2.7429843825048317, "learning_rate": 5e-05, "loss": 0.1145, "num_input_tokens_seen": 96208260, "step": 1054 }, { "epoch": 4.391666666666667, "loss": 0.11623889207839966, "loss_ce": 0.00274402042850852, "loss_iou": 0.361328125, "loss_num": 0.022705078125, "loss_xval": 0.11328125, "num_input_tokens_seen": 96208260, "step": 1054 }, { "epoch": 4.395833333333333, "grad_norm": 3.122685674128764, "learning_rate": 5e-05, "loss": 0.0835, "num_input_tokens_seen": 96299532, "step": 1055 }, { "epoch": 4.395833333333333, "loss": 0.08110110461711884, "loss_ce": 0.002625147346407175, "loss_iou": 0.1640625, "loss_num": 0.015625, "loss_xval": 0.07861328125, "num_input_tokens_seen": 96299532, "step": 1055 }, { "epoch": 4.4, "grad_norm": 3.2042392537005755, "learning_rate": 5e-05, "loss": 0.1466, "num_input_tokens_seen": 96390992, "step": 1056 }, { "epoch": 4.4, "loss": 0.162710040807724, "loss_ce": 0.0016687808092683554, "loss_iou": 0.224609375, "loss_num": 0.0322265625, "loss_xval": 0.1611328125, "num_input_tokens_seen": 96390992, "step": 1056 }, { "epoch": 4.404166666666667, "grad_norm": 12.795231372183528, "learning_rate": 5e-05, "loss": 0.1034, "num_input_tokens_seen": 96482032, "step": 1057 }, { "epoch": 4.404166666666667, "loss": 0.11816906929016113, "loss_ce": 0.003926511853933334, "loss_iou": 0.103515625, "loss_num": 0.0228271484375, "loss_xval": 0.1142578125, "num_input_tokens_seen": 96482032, "step": 1057 }, { "epoch": 4.408333333333333, "grad_norm": 6.009806761384676, "learning_rate": 5e-05, "loss": 0.1758, "num_input_tokens_seen": 96573696, "step": 1058 }, { "epoch": 4.408333333333333, "loss": 0.140816330909729, "loss_ce": 0.004646895453333855, "loss_iou": 0.2119140625, "loss_num": 0.0272216796875, "loss_xval": 0.1357421875, "num_input_tokens_seen": 96573696, "step": 1058 }, { "epoch": 4.4125, "grad_norm": 4.539913894785967, "learning_rate": 5e-05, "loss": 0.0993, "num_input_tokens_seen": 96664868, "step": 1059 }, { "epoch": 4.4125, "loss": 0.111955925822258, "loss_ce": 0.0019400569144636393, "loss_iou": 0.2021484375, "loss_num": 0.02197265625, "loss_xval": 0.10986328125, "num_input_tokens_seen": 96664868, "step": 1059 }, { "epoch": 4.416666666666667, "grad_norm": 8.195207688890294, "learning_rate": 5e-05, "loss": 0.1131, "num_input_tokens_seen": 96756356, "step": 1060 }, { "epoch": 4.416666666666667, "loss": 0.1350308656692505, "loss_ce": 0.0017300797626376152, "loss_iou": 0.23828125, "loss_num": 0.026611328125, "loss_xval": 0.1328125, "num_input_tokens_seen": 96756356, "step": 1060 }, { "epoch": 4.420833333333333, "grad_norm": 3.0907685806253427, "learning_rate": 5e-05, "loss": 0.137, "num_input_tokens_seen": 96847764, "step": 1061 }, { "epoch": 4.420833333333333, "loss": 0.13408119976520538, "loss_ce": 0.0004905025125481188, "loss_iou": 0.41796875, "loss_num": 0.0267333984375, "loss_xval": 0.1337890625, "num_input_tokens_seen": 96847764, "step": 1061 }, { "epoch": 4.425, "grad_norm": 4.23812751420282, "learning_rate": 5e-05, "loss": 0.0817, "num_input_tokens_seen": 96938988, "step": 1062 }, { "epoch": 4.425, "loss": 0.08144142478704453, "loss_ce": 0.0015769237652420998, "loss_iou": 0.263671875, "loss_num": 0.0159912109375, "loss_xval": 0.080078125, "num_input_tokens_seen": 96938988, "step": 1062 }, { "epoch": 4.429166666666666, "grad_norm": 4.6119967415837015, "learning_rate": 5e-05, "loss": 0.0975, "num_input_tokens_seen": 97029908, "step": 1063 }, { "epoch": 4.429166666666666, "loss": 0.10544291138648987, "loss_ce": 4.6817790462228e-06, "loss_iou": 0.302734375, "loss_num": 0.0211181640625, "loss_xval": 0.10546875, "num_input_tokens_seen": 97029908, "step": 1063 }, { "epoch": 4.433333333333334, "grad_norm": 20.866494495602268, "learning_rate": 5e-05, "loss": 0.1457, "num_input_tokens_seen": 97121228, "step": 1064 }, { "epoch": 4.433333333333334, "loss": 0.15473097562789917, "loss_ce": 0.00260083912871778, "loss_iou": 0.349609375, "loss_num": 0.030517578125, "loss_xval": 0.15234375, "num_input_tokens_seen": 97121228, "step": 1064 }, { "epoch": 4.4375, "grad_norm": 8.185767026864667, "learning_rate": 5e-05, "loss": 0.1013, "num_input_tokens_seen": 97212372, "step": 1065 }, { "epoch": 4.4375, "loss": 0.08164595067501068, "loss_ce": 0.0008964404696598649, "loss_iou": 0.1953125, "loss_num": 0.01611328125, "loss_xval": 0.08056640625, "num_input_tokens_seen": 97212372, "step": 1065 }, { "epoch": 4.441666666666666, "grad_norm": 9.766264831533416, "learning_rate": 5e-05, "loss": 0.1549, "num_input_tokens_seen": 97303324, "step": 1066 }, { "epoch": 4.441666666666666, "loss": 0.1613626778125763, "loss_ce": 3.1500829209107906e-05, "loss_iou": 0.2275390625, "loss_num": 0.0322265625, "loss_xval": 0.1611328125, "num_input_tokens_seen": 97303324, "step": 1066 }, { "epoch": 4.445833333333334, "grad_norm": 8.703973354918965, "learning_rate": 5e-05, "loss": 0.1389, "num_input_tokens_seen": 97394880, "step": 1067 }, { "epoch": 4.445833333333334, "loss": 0.1335448920726776, "loss_ce": 0.00012203974620206282, "loss_iou": 0.419921875, "loss_num": 0.0267333984375, "loss_xval": 0.1337890625, "num_input_tokens_seen": 97394880, "step": 1067 }, { "epoch": 4.45, "grad_norm": 23.521570844410952, "learning_rate": 5e-05, "loss": 0.1463, "num_input_tokens_seen": 97486472, "step": 1068 }, { "epoch": 4.45, "loss": 0.129866823554039, "loss_ce": 0.0012657458428293467, "loss_iou": 0.390625, "loss_num": 0.0257568359375, "loss_xval": 0.12890625, "num_input_tokens_seen": 97486472, "step": 1068 }, { "epoch": 4.454166666666667, "grad_norm": 5.231644248721244, "learning_rate": 5e-05, "loss": 0.1618, "num_input_tokens_seen": 97578168, "step": 1069 }, { "epoch": 4.454166666666667, "loss": 0.15577560663223267, "loss_ce": 0.0015092582907527685, "loss_iou": 0.2333984375, "loss_num": 0.0308837890625, "loss_xval": 0.154296875, "num_input_tokens_seen": 97578168, "step": 1069 }, { "epoch": 4.458333333333333, "grad_norm": 4.642798890431178, "learning_rate": 5e-05, "loss": 0.1194, "num_input_tokens_seen": 97669944, "step": 1070 }, { "epoch": 4.458333333333333, "loss": 0.10740844905376434, "loss_ce": 0.0020617651753127575, "loss_iou": 0.27734375, "loss_num": 0.0211181640625, "loss_xval": 0.10546875, "num_input_tokens_seen": 97669944, "step": 1070 }, { "epoch": 4.4625, "grad_norm": 11.450914706764102, "learning_rate": 5e-05, "loss": 0.1861, "num_input_tokens_seen": 97760660, "step": 1071 }, { "epoch": 4.4625, "loss": 0.2228844165802002, "loss_ce": 0.0006554118008352816, "loss_iou": 0.275390625, "loss_num": 0.04443359375, "loss_xval": 0.22265625, "num_input_tokens_seen": 97760660, "step": 1071 }, { "epoch": 4.466666666666667, "grad_norm": 2.8459211790179264, "learning_rate": 5e-05, "loss": 0.1802, "num_input_tokens_seen": 97851884, "step": 1072 }, { "epoch": 4.466666666666667, "loss": 0.2216033786535263, "loss_ce": 4.576363062369637e-05, "loss_iou": 0.28515625, "loss_num": 0.04443359375, "loss_xval": 0.2216796875, "num_input_tokens_seen": 97851884, "step": 1072 }, { "epoch": 4.470833333333333, "grad_norm": 8.759497913734183, "learning_rate": 5e-05, "loss": 0.1255, "num_input_tokens_seen": 97943872, "step": 1073 }, { "epoch": 4.470833333333333, "loss": 0.19304654002189636, "loss_ce": 0.00418851338326931, "loss_iou": 0.201171875, "loss_num": 0.037841796875, "loss_xval": 0.1884765625, "num_input_tokens_seen": 97943872, "step": 1073 }, { "epoch": 4.475, "grad_norm": 1.6902853797270938, "learning_rate": 5e-05, "loss": 0.1127, "num_input_tokens_seen": 98034840, "step": 1074 }, { "epoch": 4.475, "loss": 0.1364348828792572, "loss_ce": 0.0004638074606191367, "loss_iou": 0.2578125, "loss_num": 0.0272216796875, "loss_xval": 0.1357421875, "num_input_tokens_seen": 98034840, "step": 1074 }, { "epoch": 4.479166666666667, "grad_norm": 5.3041756834731135, "learning_rate": 5e-05, "loss": 0.2545, "num_input_tokens_seen": 98126216, "step": 1075 }, { "epoch": 4.479166666666667, "loss": 0.22662073373794556, "loss_ce": 0.000882212829310447, "loss_iou": 0.291015625, "loss_num": 0.045166015625, "loss_xval": 0.2255859375, "num_input_tokens_seen": 98126216, "step": 1075 }, { "epoch": 4.483333333333333, "grad_norm": 5.693385201390032, "learning_rate": 5e-05, "loss": 0.0939, "num_input_tokens_seen": 98217784, "step": 1076 }, { "epoch": 4.483333333333333, "loss": 0.08883555233478546, "loss_ce": 0.0010975135955959558, "loss_iou": 0.3984375, "loss_num": 0.017578125, "loss_xval": 0.087890625, "num_input_tokens_seen": 98217784, "step": 1076 }, { "epoch": 4.4875, "grad_norm": 11.974885328346211, "learning_rate": 5e-05, "loss": 0.111, "num_input_tokens_seen": 98308892, "step": 1077 }, { "epoch": 4.4875, "loss": 0.08810891956090927, "loss_ce": 0.0007065777899697423, "loss_iou": 0.31640625, "loss_num": 0.0174560546875, "loss_xval": 0.08740234375, "num_input_tokens_seen": 98308892, "step": 1077 }, { "epoch": 4.491666666666666, "grad_norm": 5.405136544371594, "learning_rate": 5e-05, "loss": 0.182, "num_input_tokens_seen": 98400664, "step": 1078 }, { "epoch": 4.491666666666666, "loss": 0.1772213578224182, "loss_ce": 0.0016232000198215246, "loss_iou": 0.27734375, "loss_num": 0.03515625, "loss_xval": 0.17578125, "num_input_tokens_seen": 98400664, "step": 1078 }, { "epoch": 4.495833333333334, "grad_norm": 15.460965408014841, "learning_rate": 5e-05, "loss": 0.1202, "num_input_tokens_seen": 98492180, "step": 1079 }, { "epoch": 4.495833333333334, "loss": 0.11177276074886322, "loss_ce": 0.0016958509804680943, "loss_iou": 0.294921875, "loss_num": 0.0220947265625, "loss_xval": 0.10986328125, "num_input_tokens_seen": 98492180, "step": 1079 }, { "epoch": 4.5, "grad_norm": 1.8566096703105832, "learning_rate": 5e-05, "loss": 0.0817, "num_input_tokens_seen": 98583616, "step": 1080 }, { "epoch": 4.5, "loss": 0.07147879153490067, "loss_ce": 0.0007237876998260617, "loss_iou": 0.1484375, "loss_num": 0.01416015625, "loss_xval": 0.07080078125, "num_input_tokens_seen": 98583616, "step": 1080 }, { "epoch": 4.504166666666666, "grad_norm": 4.0890400559643, "learning_rate": 5e-05, "loss": 0.0996, "num_input_tokens_seen": 98673480, "step": 1081 }, { "epoch": 4.504166666666666, "loss": 0.1101948693394661, "loss_ce": 0.0014302213676273823, "loss_iou": 0.296875, "loss_num": 0.021728515625, "loss_xval": 0.10888671875, "num_input_tokens_seen": 98673480, "step": 1081 }, { "epoch": 4.508333333333333, "grad_norm": 3.0362109072862378, "learning_rate": 5e-05, "loss": 0.1142, "num_input_tokens_seen": 98764280, "step": 1082 }, { "epoch": 4.508333333333333, "loss": 0.1345566213130951, "loss_ce": 9.617566684028134e-05, "loss_iou": 0.34765625, "loss_num": 0.0269775390625, "loss_xval": 0.134765625, "num_input_tokens_seen": 98764280, "step": 1082 }, { "epoch": 4.5125, "grad_norm": 2.074294944015263, "learning_rate": 5e-05, "loss": 0.0802, "num_input_tokens_seen": 98855312, "step": 1083 }, { "epoch": 4.5125, "loss": 0.10959449410438538, "loss_ce": 0.005514297168701887, "loss_iou": 0.439453125, "loss_num": 0.020751953125, "loss_xval": 0.10400390625, "num_input_tokens_seen": 98855312, "step": 1083 }, { "epoch": 4.516666666666667, "grad_norm": 6.223919956572017, "learning_rate": 5e-05, "loss": 0.126, "num_input_tokens_seen": 98946616, "step": 1084 }, { "epoch": 4.516666666666667, "loss": 0.0991659164428711, "loss_ce": 0.0009603450307622552, "loss_iou": 0.2578125, "loss_num": 0.0196533203125, "loss_xval": 0.09814453125, "num_input_tokens_seen": 98946616, "step": 1084 }, { "epoch": 4.520833333333333, "grad_norm": 4.230394093817779, "learning_rate": 5e-05, "loss": 0.0991, "num_input_tokens_seen": 99037472, "step": 1085 }, { "epoch": 4.520833333333333, "loss": 0.0747312381863594, "loss_ce": 0.0007413700805045664, "loss_iou": 0.26953125, "loss_num": 0.0147705078125, "loss_xval": 0.07421875, "num_input_tokens_seen": 99037472, "step": 1085 }, { "epoch": 4.525, "grad_norm": 3.7165027957165955, "learning_rate": 5e-05, "loss": 0.1435, "num_input_tokens_seen": 99128828, "step": 1086 }, { "epoch": 4.525, "loss": 0.1280263066291809, "loss_ce": 0.00021869037300348282, "loss_iou": 0.455078125, "loss_num": 0.0255126953125, "loss_xval": 0.1279296875, "num_input_tokens_seen": 99128828, "step": 1086 }, { "epoch": 4.529166666666667, "grad_norm": 14.01206249592369, "learning_rate": 5e-05, "loss": 0.1471, "num_input_tokens_seen": 99220736, "step": 1087 }, { "epoch": 4.529166666666667, "loss": 0.16491644084453583, "loss_ce": 0.0023187866900116205, "loss_iou": 0.328125, "loss_num": 0.032470703125, "loss_xval": 0.162109375, "num_input_tokens_seen": 99220736, "step": 1087 }, { "epoch": 4.533333333333333, "grad_norm": 2.599111310139979, "learning_rate": 5e-05, "loss": 0.1357, "num_input_tokens_seen": 99311436, "step": 1088 }, { "epoch": 4.533333333333333, "loss": 0.16733185946941376, "loss_ce": 0.0006448504282161593, "loss_iou": 0.220703125, "loss_num": 0.033447265625, "loss_xval": 0.1669921875, "num_input_tokens_seen": 99311436, "step": 1088 }, { "epoch": 4.5375, "grad_norm": 4.24205043514949, "learning_rate": 5e-05, "loss": 0.0905, "num_input_tokens_seen": 99403160, "step": 1089 }, { "epoch": 4.5375, "loss": 0.07050883769989014, "loss_ce": 0.001424671383574605, "loss_iou": 0.2216796875, "loss_num": 0.0137939453125, "loss_xval": 0.06884765625, "num_input_tokens_seen": 99403160, "step": 1089 }, { "epoch": 4.541666666666667, "grad_norm": 3.7078569700241553, "learning_rate": 5e-05, "loss": 0.1323, "num_input_tokens_seen": 99494308, "step": 1090 }, { "epoch": 4.541666666666667, "loss": 0.1405109465122223, "loss_ce": 0.0011218992294743657, "loss_iou": 0.1826171875, "loss_num": 0.02783203125, "loss_xval": 0.1396484375, "num_input_tokens_seen": 99494308, "step": 1090 }, { "epoch": 4.545833333333333, "grad_norm": 4.602171652582107, "learning_rate": 5e-05, "loss": 0.1072, "num_input_tokens_seen": 99586328, "step": 1091 }, { "epoch": 4.545833333333333, "loss": 0.07130880653858185, "loss_ce": 0.002194119617342949, "loss_iou": 0.265625, "loss_num": 0.01385498046875, "loss_xval": 0.0693359375, "num_input_tokens_seen": 99586328, "step": 1091 }, { "epoch": 4.55, "grad_norm": 5.519884250024296, "learning_rate": 5e-05, "loss": 0.1428, "num_input_tokens_seen": 99677508, "step": 1092 }, { "epoch": 4.55, "loss": 0.19089868664741516, "loss_ce": 0.0008352050790563226, "loss_iou": 0.37109375, "loss_num": 0.0380859375, "loss_xval": 0.1904296875, "num_input_tokens_seen": 99677508, "step": 1092 }, { "epoch": 4.554166666666667, "grad_norm": 3.085873838630324, "learning_rate": 5e-05, "loss": 0.1394, "num_input_tokens_seen": 99768852, "step": 1093 }, { "epoch": 4.554166666666667, "loss": 0.18020425736904144, "loss_ce": 0.00039468033355660737, "loss_iou": 0.408203125, "loss_num": 0.035888671875, "loss_xval": 0.1796875, "num_input_tokens_seen": 99768852, "step": 1093 }, { "epoch": 4.558333333333334, "grad_norm": 7.984248544548892, "learning_rate": 5e-05, "loss": 0.1766, "num_input_tokens_seen": 99860088, "step": 1094 }, { "epoch": 4.558333333333334, "loss": 0.2613310217857361, "loss_ce": 3.948756420868449e-05, "loss_iou": 0.32421875, "loss_num": 0.05224609375, "loss_xval": 0.26171875, "num_input_tokens_seen": 99860088, "step": 1094 }, { "epoch": 4.5625, "grad_norm": 14.35350456740094, "learning_rate": 5e-05, "loss": 0.125, "num_input_tokens_seen": 99951820, "step": 1095 }, { "epoch": 4.5625, "loss": 0.1256924569606781, "loss_ce": 0.00035675818799063563, "loss_iou": 0.333984375, "loss_num": 0.025146484375, "loss_xval": 0.125, "num_input_tokens_seen": 99951820, "step": 1095 }, { "epoch": 4.566666666666666, "grad_norm": 15.883834122510626, "learning_rate": 5e-05, "loss": 0.135, "num_input_tokens_seen": 100043104, "step": 1096 }, { "epoch": 4.566666666666666, "loss": 0.11116228997707367, "loss_ce": 0.0041066245175898075, "loss_iou": 0.296875, "loss_num": 0.0213623046875, "loss_xval": 0.10693359375, "num_input_tokens_seen": 100043104, "step": 1096 }, { "epoch": 4.570833333333333, "grad_norm": 4.523964712573911, "learning_rate": 5e-05, "loss": 0.109, "num_input_tokens_seen": 100134216, "step": 1097 }, { "epoch": 4.570833333333333, "loss": 0.07000759243965149, "loss_ce": 0.0021212399005889893, "loss_iou": 0.30078125, "loss_num": 0.01361083984375, "loss_xval": 0.06787109375, "num_input_tokens_seen": 100134216, "step": 1097 }, { "epoch": 4.575, "grad_norm": 3.061116023954208, "learning_rate": 5e-05, "loss": 0.1407, "num_input_tokens_seen": 100225776, "step": 1098 }, { "epoch": 4.575, "loss": 0.11010673642158508, "loss_ce": 0.001220025704242289, "loss_iou": 0.234375, "loss_num": 0.021728515625, "loss_xval": 0.10888671875, "num_input_tokens_seen": 100225776, "step": 1098 }, { "epoch": 4.579166666666667, "grad_norm": 5.851281585800328, "learning_rate": 5e-05, "loss": 0.1402, "num_input_tokens_seen": 100316984, "step": 1099 }, { "epoch": 4.579166666666667, "loss": 0.16490298509597778, "loss_ce": 0.00032168958568945527, "loss_iou": 0.294921875, "loss_num": 0.032958984375, "loss_xval": 0.1650390625, "num_input_tokens_seen": 100316984, "step": 1099 }, { "epoch": 4.583333333333333, "grad_norm": 2.198330942761651, "learning_rate": 5e-05, "loss": 0.1685, "num_input_tokens_seen": 100408276, "step": 1100 }, { "epoch": 4.583333333333333, "loss": 0.10341347754001617, "loss_ce": 0.00046242796815931797, "loss_iou": 0.06787109375, "loss_num": 0.0206298828125, "loss_xval": 0.10302734375, "num_input_tokens_seen": 100408276, "step": 1100 }, { "epoch": 4.5875, "grad_norm": 8.432062934184843, "learning_rate": 5e-05, "loss": 0.149, "num_input_tokens_seen": 100499856, "step": 1101 }, { "epoch": 4.5875, "loss": 0.14112815260887146, "loss_ce": 0.002944562118500471, "loss_iou": 0.294921875, "loss_num": 0.027587890625, "loss_xval": 0.138671875, "num_input_tokens_seen": 100499856, "step": 1101 }, { "epoch": 4.591666666666667, "grad_norm": 9.177777201547373, "learning_rate": 5e-05, "loss": 0.1512, "num_input_tokens_seen": 100591332, "step": 1102 }, { "epoch": 4.591666666666667, "loss": 0.1141015812754631, "loss_ce": 0.0024530200753360987, "loss_iou": 0.37109375, "loss_num": 0.0223388671875, "loss_xval": 0.11181640625, "num_input_tokens_seen": 100591332, "step": 1102 }, { "epoch": 4.595833333333333, "grad_norm": 7.531965106745693, "learning_rate": 5e-05, "loss": 0.0928, "num_input_tokens_seen": 100682972, "step": 1103 }, { "epoch": 4.595833333333333, "loss": 0.09467661380767822, "loss_ce": 0.0016590356826782227, "loss_iou": 0.359375, "loss_num": 0.0185546875, "loss_xval": 0.0927734375, "num_input_tokens_seen": 100682972, "step": 1103 }, { "epoch": 4.6, "grad_norm": 3.007683190875576, "learning_rate": 5e-05, "loss": 0.0963, "num_input_tokens_seen": 100774672, "step": 1104 }, { "epoch": 4.6, "loss": 0.14586971700191498, "loss_ce": 0.006160246208310127, "loss_iou": 0.25390625, "loss_num": 0.0279541015625, "loss_xval": 0.1396484375, "num_input_tokens_seen": 100774672, "step": 1104 }, { "epoch": 4.604166666666667, "grad_norm": 6.5108841230106, "learning_rate": 5e-05, "loss": 0.1634, "num_input_tokens_seen": 100865852, "step": 1105 }, { "epoch": 4.604166666666667, "loss": 0.08255942910909653, "loss_ce": 0.0003298107476439327, "loss_iou": 0.25, "loss_num": 0.0164794921875, "loss_xval": 0.08203125, "num_input_tokens_seen": 100865852, "step": 1105 }, { "epoch": 4.608333333333333, "grad_norm": 1.9635394555090955, "learning_rate": 5e-05, "loss": 0.1689, "num_input_tokens_seen": 100956540, "step": 1106 }, { "epoch": 4.608333333333333, "loss": 0.22228175401687622, "loss_ce": 2.223448973381892e-05, "loss_iou": 0.1953125, "loss_num": 0.04443359375, "loss_xval": 0.22265625, "num_input_tokens_seen": 100956540, "step": 1106 }, { "epoch": 4.6125, "grad_norm": 3.526058451584953, "learning_rate": 5e-05, "loss": 0.1048, "num_input_tokens_seen": 101047388, "step": 1107 }, { "epoch": 4.6125, "loss": 0.0747670978307724, "loss_ce": 0.0007314551039598882, "loss_iou": 0.2890625, "loss_num": 0.01483154296875, "loss_xval": 0.07421875, "num_input_tokens_seen": 101047388, "step": 1107 }, { "epoch": 4.616666666666667, "grad_norm": 11.893244447837684, "learning_rate": 5e-05, "loss": 0.1028, "num_input_tokens_seen": 101138732, "step": 1108 }, { "epoch": 4.616666666666667, "loss": 0.12535637617111206, "loss_ce": 0.0013024121290072799, "loss_iou": 0.251953125, "loss_num": 0.0247802734375, "loss_xval": 0.1240234375, "num_input_tokens_seen": 101138732, "step": 1108 }, { "epoch": 4.620833333333334, "grad_norm": 5.773796067589656, "learning_rate": 5e-05, "loss": 0.1282, "num_input_tokens_seen": 101230184, "step": 1109 }, { "epoch": 4.620833333333334, "loss": 0.10866034775972366, "loss_ce": 0.0015436523826792836, "loss_iou": 0.21484375, "loss_num": 0.021484375, "loss_xval": 0.10693359375, "num_input_tokens_seen": 101230184, "step": 1109 }, { "epoch": 4.625, "grad_norm": 4.518275709839977, "learning_rate": 5e-05, "loss": 0.123, "num_input_tokens_seen": 101321308, "step": 1110 }, { "epoch": 4.625, "loss": 0.1575690656900406, "loss_ce": 6.810591457906412e-06, "loss_iou": 0.41796875, "loss_num": 0.031494140625, "loss_xval": 0.1572265625, "num_input_tokens_seen": 101321308, "step": 1110 }, { "epoch": 4.629166666666666, "grad_norm": 4.7529760698914485, "learning_rate": 5e-05, "loss": 0.1158, "num_input_tokens_seen": 101412848, "step": 1111 }, { "epoch": 4.629166666666666, "loss": 0.08711521327495575, "loss_ce": 0.0008115016971714795, "loss_iou": 0.29296875, "loss_num": 0.0172119140625, "loss_xval": 0.08642578125, "num_input_tokens_seen": 101412848, "step": 1111 }, { "epoch": 4.633333333333333, "grad_norm": 3.132943307581427, "learning_rate": 5e-05, "loss": 0.127, "num_input_tokens_seen": 101504576, "step": 1112 }, { "epoch": 4.633333333333333, "loss": 0.15530680119991302, "loss_ce": 0.0011930357431992888, "loss_iou": 0.283203125, "loss_num": 0.03076171875, "loss_xval": 0.154296875, "num_input_tokens_seen": 101504576, "step": 1112 }, { "epoch": 4.6375, "grad_norm": 2.886742399910248, "learning_rate": 5e-05, "loss": 0.1289, "num_input_tokens_seen": 101595300, "step": 1113 }, { "epoch": 4.6375, "loss": 0.12066149711608887, "loss_ce": 2.550972203607671e-05, "loss_iou": 0.404296875, "loss_num": 0.024169921875, "loss_xval": 0.12060546875, "num_input_tokens_seen": 101595300, "step": 1113 }, { "epoch": 4.641666666666667, "grad_norm": 13.523813010513921, "learning_rate": 5e-05, "loss": 0.2184, "num_input_tokens_seen": 101686740, "step": 1114 }, { "epoch": 4.641666666666667, "loss": 0.29297542572021484, "loss_ce": 0.0011052797781303525, "loss_iou": 0.2080078125, "loss_num": 0.058349609375, "loss_xval": 0.291015625, "num_input_tokens_seen": 101686740, "step": 1114 }, { "epoch": 4.645833333333333, "grad_norm": 3.3219089109226085, "learning_rate": 5e-05, "loss": 0.1614, "num_input_tokens_seen": 101778172, "step": 1115 }, { "epoch": 4.645833333333333, "loss": 0.14340245723724365, "loss_ce": 6.140043842606246e-05, "loss_iou": 0.330078125, "loss_num": 0.0286865234375, "loss_xval": 0.1435546875, "num_input_tokens_seen": 101778172, "step": 1115 }, { "epoch": 4.65, "grad_norm": 10.960525792775918, "learning_rate": 5e-05, "loss": 0.1611, "num_input_tokens_seen": 101869384, "step": 1116 }, { "epoch": 4.65, "loss": 0.19843502342700958, "loss_ce": 0.0028860135935246944, "loss_iou": 0.373046875, "loss_num": 0.0390625, "loss_xval": 0.1953125, "num_input_tokens_seen": 101869384, "step": 1116 }, { "epoch": 4.654166666666667, "grad_norm": 4.007868167346531, "learning_rate": 5e-05, "loss": 0.1422, "num_input_tokens_seen": 101960296, "step": 1117 }, { "epoch": 4.654166666666667, "loss": 0.1601758450269699, "loss_ce": 0.0002637325960677117, "loss_iou": 0.310546875, "loss_num": 0.031982421875, "loss_xval": 0.16015625, "num_input_tokens_seen": 101960296, "step": 1117 }, { "epoch": 4.658333333333333, "grad_norm": 9.30897934729109, "learning_rate": 5e-05, "loss": 0.1561, "num_input_tokens_seen": 102051264, "step": 1118 }, { "epoch": 4.658333333333333, "loss": 0.1870819479227066, "loss_ce": 0.004830969497561455, "loss_iou": 0.326171875, "loss_num": 0.036376953125, "loss_xval": 0.1826171875, "num_input_tokens_seen": 102051264, "step": 1118 }, { "epoch": 4.6625, "grad_norm": 5.973730886667036, "learning_rate": 5e-05, "loss": 0.1121, "num_input_tokens_seen": 102142604, "step": 1119 }, { "epoch": 4.6625, "loss": 0.1165546178817749, "loss_ce": 0.004310957621783018, "loss_iou": 0.37109375, "loss_num": 0.0224609375, "loss_xval": 0.1123046875, "num_input_tokens_seen": 102142604, "step": 1119 }, { "epoch": 4.666666666666667, "grad_norm": 4.216800416560698, "learning_rate": 5e-05, "loss": 0.165, "num_input_tokens_seen": 102234036, "step": 1120 }, { "epoch": 4.666666666666667, "loss": 0.16760239005088806, "loss_ce": 0.0006101946346461773, "loss_iou": 0.3046875, "loss_num": 0.033447265625, "loss_xval": 0.1669921875, "num_input_tokens_seen": 102234036, "step": 1120 }, { "epoch": 4.670833333333333, "grad_norm": 10.564228271071244, "learning_rate": 5e-05, "loss": 0.0977, "num_input_tokens_seen": 102325188, "step": 1121 }, { "epoch": 4.670833333333333, "loss": 0.07861147820949554, "loss_ce": 0.00027285737451165915, "loss_iou": 0.29296875, "loss_num": 0.015625, "loss_xval": 0.078125, "num_input_tokens_seen": 102325188, "step": 1121 }, { "epoch": 4.675, "grad_norm": 5.613473524594385, "learning_rate": 5e-05, "loss": 0.1335, "num_input_tokens_seen": 102415764, "step": 1122 }, { "epoch": 4.675, "loss": 0.11580031365156174, "loss_ce": 3.188900154782459e-05, "loss_iou": 0.2001953125, "loss_num": 0.023193359375, "loss_xval": 0.11572265625, "num_input_tokens_seen": 102415764, "step": 1122 }, { "epoch": 4.679166666666667, "grad_norm": 3.4497010574149303, "learning_rate": 5e-05, "loss": 0.1118, "num_input_tokens_seen": 102507248, "step": 1123 }, { "epoch": 4.679166666666667, "loss": 0.12468966841697693, "loss_ce": 0.00011691106192301959, "loss_iou": 0.3203125, "loss_num": 0.02490234375, "loss_xval": 0.12451171875, "num_input_tokens_seen": 102507248, "step": 1123 }, { "epoch": 4.683333333333334, "grad_norm": 2.406253546576618, "learning_rate": 5e-05, "loss": 0.0764, "num_input_tokens_seen": 102598672, "step": 1124 }, { "epoch": 4.683333333333334, "loss": 0.0875929445028305, "loss_ce": 0.001060347887687385, "loss_iou": 0.2421875, "loss_num": 0.017333984375, "loss_xval": 0.08642578125, "num_input_tokens_seen": 102598672, "step": 1124 }, { "epoch": 4.6875, "grad_norm": 13.838199882433146, "learning_rate": 5e-05, "loss": 0.1055, "num_input_tokens_seen": 102689792, "step": 1125 }, { "epoch": 4.6875, "loss": 0.10166600346565247, "loss_ce": 2.7204778234590776e-05, "loss_iou": 0.2119140625, "loss_num": 0.0203857421875, "loss_xval": 0.1015625, "num_input_tokens_seen": 102689792, "step": 1125 }, { "epoch": 4.691666666666666, "grad_norm": 3.0925421228060834, "learning_rate": 5e-05, "loss": 0.1917, "num_input_tokens_seen": 102781628, "step": 1126 }, { "epoch": 4.691666666666666, "loss": 0.17679978907108307, "loss_ce": 0.0005302638746798038, "loss_iou": 0.2412109375, "loss_num": 0.03515625, "loss_xval": 0.17578125, "num_input_tokens_seen": 102781628, "step": 1126 }, { "epoch": 4.695833333333333, "grad_norm": 4.711952688437384, "learning_rate": 5e-05, "loss": 0.1452, "num_input_tokens_seen": 102873092, "step": 1127 }, { "epoch": 4.695833333333333, "loss": 0.11696916073560715, "loss_ce": 0.0010328851640224457, "loss_iou": 0.365234375, "loss_num": 0.023193359375, "loss_xval": 0.11572265625, "num_input_tokens_seen": 102873092, "step": 1127 }, { "epoch": 4.7, "grad_norm": 12.698922137838567, "learning_rate": 5e-05, "loss": 0.1822, "num_input_tokens_seen": 102964416, "step": 1128 }, { "epoch": 4.7, "loss": 0.18701621890068054, "loss_ce": 0.005451895762234926, "loss_iou": 0.28125, "loss_num": 0.036376953125, "loss_xval": 0.181640625, "num_input_tokens_seen": 102964416, "step": 1128 }, { "epoch": 4.704166666666667, "grad_norm": 10.40043243339498, "learning_rate": 5e-05, "loss": 0.1573, "num_input_tokens_seen": 103055708, "step": 1129 }, { "epoch": 4.704166666666667, "loss": 0.16252252459526062, "loss_ce": 1.1628793572526774e-06, "loss_iou": 0.46484375, "loss_num": 0.032470703125, "loss_xval": 0.162109375, "num_input_tokens_seen": 103055708, "step": 1129 }, { "epoch": 4.708333333333333, "grad_norm": 2.184086695791655, "learning_rate": 5e-05, "loss": 0.116, "num_input_tokens_seen": 103147032, "step": 1130 }, { "epoch": 4.708333333333333, "loss": 0.13449756801128387, "loss_ce": 0.0019291974604129791, "loss_iou": 0.390625, "loss_num": 0.026611328125, "loss_xval": 0.1328125, "num_input_tokens_seen": 103147032, "step": 1130 }, { "epoch": 4.7125, "grad_norm": 7.032209184530382, "learning_rate": 5e-05, "loss": 0.1126, "num_input_tokens_seen": 103238204, "step": 1131 }, { "epoch": 4.7125, "loss": 0.09560714662075043, "loss_ce": 0.0012162767816334963, "loss_iou": 0.44140625, "loss_num": 0.0189208984375, "loss_xval": 0.09423828125, "num_input_tokens_seen": 103238204, "step": 1131 }, { "epoch": 4.716666666666667, "grad_norm": 2.8403002406090687, "learning_rate": 5e-05, "loss": 0.1821, "num_input_tokens_seen": 103329796, "step": 1132 }, { "epoch": 4.716666666666667, "loss": 0.1839357614517212, "loss_ce": 0.0002504565636627376, "loss_iou": 0.2080078125, "loss_num": 0.03662109375, "loss_xval": 0.18359375, "num_input_tokens_seen": 103329796, "step": 1132 }, { "epoch": 4.720833333333333, "grad_norm": 6.8795684491862525, "learning_rate": 5e-05, "loss": 0.0855, "num_input_tokens_seen": 103421468, "step": 1133 }, { "epoch": 4.720833333333333, "loss": 0.0764055848121643, "loss_ce": 0.0008440621895715594, "loss_iou": 0.380859375, "loss_num": 0.01513671875, "loss_xval": 0.07568359375, "num_input_tokens_seen": 103421468, "step": 1133 }, { "epoch": 4.725, "grad_norm": 10.919233351301784, "learning_rate": 5e-05, "loss": 0.0997, "num_input_tokens_seen": 103513172, "step": 1134 }, { "epoch": 4.725, "loss": 0.056897103786468506, "loss_ce": 0.0006684675463475287, "loss_iou": 0.232421875, "loss_num": 0.01123046875, "loss_xval": 0.05615234375, "num_input_tokens_seen": 103513172, "step": 1134 }, { "epoch": 4.729166666666667, "grad_norm": 3.647598629703392, "learning_rate": 5e-05, "loss": 0.1303, "num_input_tokens_seen": 103604216, "step": 1135 }, { "epoch": 4.729166666666667, "loss": 0.11557944864034653, "loss_ce": 0.0004213701467961073, "loss_iou": 0.322265625, "loss_num": 0.0230712890625, "loss_xval": 0.115234375, "num_input_tokens_seen": 103604216, "step": 1135 }, { "epoch": 4.733333333333333, "grad_norm": 2.3437484820197847, "learning_rate": 5e-05, "loss": 0.11, "num_input_tokens_seen": 103695572, "step": 1136 }, { "epoch": 4.733333333333333, "loss": 0.07198523730039597, "loss_ce": 0.0001773782423697412, "loss_iou": 0.22265625, "loss_num": 0.01434326171875, "loss_xval": 0.07177734375, "num_input_tokens_seen": 103695572, "step": 1136 }, { "epoch": 4.7375, "grad_norm": 4.234167936321321, "learning_rate": 5e-05, "loss": 0.1169, "num_input_tokens_seen": 103787040, "step": 1137 }, { "epoch": 4.7375, "loss": 0.12047646939754486, "loss_ce": 0.0014884258853271604, "loss_iou": 0.30859375, "loss_num": 0.0238037109375, "loss_xval": 0.119140625, "num_input_tokens_seen": 103787040, "step": 1137 }, { "epoch": 4.741666666666667, "grad_norm": 16.781877619157644, "learning_rate": 5e-05, "loss": 0.1287, "num_input_tokens_seen": 103878008, "step": 1138 }, { "epoch": 4.741666666666667, "loss": 0.14299070835113525, "loss_ce": 0.003189687617123127, "loss_iou": 0.353515625, "loss_num": 0.0279541015625, "loss_xval": 0.1396484375, "num_input_tokens_seen": 103878008, "step": 1138 }, { "epoch": 4.745833333333334, "grad_norm": 4.72457382985536, "learning_rate": 5e-05, "loss": 0.0992, "num_input_tokens_seen": 103969652, "step": 1139 }, { "epoch": 4.745833333333334, "loss": 0.040563084185123444, "loss_ce": 0.000646095082629472, "loss_iou": 0.2177734375, "loss_num": 0.00799560546875, "loss_xval": 0.0400390625, "num_input_tokens_seen": 103969652, "step": 1139 }, { "epoch": 4.75, "grad_norm": 3.0250129729368194, "learning_rate": 5e-05, "loss": 0.0826, "num_input_tokens_seen": 104061028, "step": 1140 }, { "epoch": 4.75, "loss": 0.07690826058387756, "loss_ce": 0.001011039363220334, "loss_iou": 0.18359375, "loss_num": 0.01519775390625, "loss_xval": 0.07568359375, "num_input_tokens_seen": 104061028, "step": 1140 }, { "epoch": 4.754166666666666, "grad_norm": 2.2427258239010905, "learning_rate": 5e-05, "loss": 0.0678, "num_input_tokens_seen": 104152516, "step": 1141 }, { "epoch": 4.754166666666666, "loss": 0.07243698090314865, "loss_ce": 0.00030868116300553083, "loss_iou": 0.1845703125, "loss_num": 0.014404296875, "loss_xval": 0.072265625, "num_input_tokens_seen": 104152516, "step": 1141 }, { "epoch": 4.758333333333333, "grad_norm": 6.676180269717597, "learning_rate": 5e-05, "loss": 0.1645, "num_input_tokens_seen": 104244052, "step": 1142 }, { "epoch": 4.758333333333333, "loss": 0.12318507581949234, "loss_ce": 0.00019923440413549542, "loss_iou": 0.15625, "loss_num": 0.0245361328125, "loss_xval": 0.123046875, "num_input_tokens_seen": 104244052, "step": 1142 }, { "epoch": 4.7625, "grad_norm": 7.079291954745069, "learning_rate": 5e-05, "loss": 0.1527, "num_input_tokens_seen": 104333724, "step": 1143 }, { "epoch": 4.7625, "loss": 0.19446733593940735, "loss_ce": 0.0006807069876231253, "loss_iou": 0.2431640625, "loss_num": 0.038818359375, "loss_xval": 0.193359375, "num_input_tokens_seen": 104333724, "step": 1143 }, { "epoch": 4.766666666666667, "grad_norm": 3.550546756408287, "learning_rate": 5e-05, "loss": 0.1273, "num_input_tokens_seen": 104425440, "step": 1144 }, { "epoch": 4.766666666666667, "loss": 0.12909270823001862, "loss_ce": 0.0006137005402706563, "loss_iou": 0.28515625, "loss_num": 0.025634765625, "loss_xval": 0.12890625, "num_input_tokens_seen": 104425440, "step": 1144 }, { "epoch": 4.770833333333333, "grad_norm": 2.192891962766182, "learning_rate": 5e-05, "loss": 0.0826, "num_input_tokens_seen": 104516736, "step": 1145 }, { "epoch": 4.770833333333333, "loss": 0.09015144407749176, "loss_ce": 0.00015510247612837702, "loss_iou": 0.298828125, "loss_num": 0.01806640625, "loss_xval": 0.08984375, "num_input_tokens_seen": 104516736, "step": 1145 }, { "epoch": 4.775, "grad_norm": 3.1296938404560852, "learning_rate": 5e-05, "loss": 0.0988, "num_input_tokens_seen": 104608108, "step": 1146 }, { "epoch": 4.775, "loss": 0.10480667650699615, "loss_ce": 0.001443637884221971, "loss_iou": 0.23046875, "loss_num": 0.020751953125, "loss_xval": 0.103515625, "num_input_tokens_seen": 104608108, "step": 1146 }, { "epoch": 4.779166666666667, "grad_norm": 3.6372144037935232, "learning_rate": 5e-05, "loss": 0.1935, "num_input_tokens_seen": 104699640, "step": 1147 }, { "epoch": 4.779166666666667, "loss": 0.1913982331752777, "loss_ce": 0.004050817806273699, "loss_iou": 0.408203125, "loss_num": 0.03759765625, "loss_xval": 0.1875, "num_input_tokens_seen": 104699640, "step": 1147 }, { "epoch": 4.783333333333333, "grad_norm": 2.6064847950760606, "learning_rate": 5e-05, "loss": 0.0681, "num_input_tokens_seen": 104790628, "step": 1148 }, { "epoch": 4.783333333333333, "loss": 0.054977696388959885, "loss_ce": 3.0795123166171834e-05, "loss_iou": 0.30078125, "loss_num": 0.010986328125, "loss_xval": 0.054931640625, "num_input_tokens_seen": 104790628, "step": 1148 }, { "epoch": 4.7875, "grad_norm": 2.9820181209674432, "learning_rate": 5e-05, "loss": 0.1189, "num_input_tokens_seen": 104882404, "step": 1149 }, { "epoch": 4.7875, "loss": 0.1582833230495453, "loss_ce": 0.001056756591424346, "loss_iou": 0.2109375, "loss_num": 0.031494140625, "loss_xval": 0.1572265625, "num_input_tokens_seen": 104882404, "step": 1149 }, { "epoch": 4.791666666666667, "grad_norm": 3.9393256885276404, "learning_rate": 5e-05, "loss": 0.1714, "num_input_tokens_seen": 104973680, "step": 1150 }, { "epoch": 4.791666666666667, "loss": 0.16472193598747253, "loss_ce": 0.0041842274367809296, "loss_iou": 0.2578125, "loss_num": 0.0322265625, "loss_xval": 0.16015625, "num_input_tokens_seen": 104973680, "step": 1150 }, { "epoch": 4.795833333333333, "grad_norm": 5.190100418108923, "learning_rate": 5e-05, "loss": 0.096, "num_input_tokens_seen": 105064840, "step": 1151 }, { "epoch": 4.795833333333333, "loss": 0.09066504240036011, "loss_ce": 0.002286137081682682, "loss_iou": 0.26171875, "loss_num": 0.0177001953125, "loss_xval": 0.08837890625, "num_input_tokens_seen": 105064840, "step": 1151 }, { "epoch": 4.8, "grad_norm": 7.303473496948488, "learning_rate": 5e-05, "loss": 0.1512, "num_input_tokens_seen": 105157112, "step": 1152 }, { "epoch": 4.8, "loss": 0.18908609449863434, "loss_ce": 0.002074366668239236, "loss_iou": 0.232421875, "loss_num": 0.037353515625, "loss_xval": 0.1875, "num_input_tokens_seen": 105157112, "step": 1152 }, { "epoch": 4.804166666666667, "grad_norm": 3.060594925978094, "learning_rate": 5e-05, "loss": 0.1526, "num_input_tokens_seen": 105248432, "step": 1153 }, { "epoch": 4.804166666666667, "loss": 0.10005295276641846, "loss_ce": 0.0025035091675817966, "loss_iou": 0.189453125, "loss_num": 0.01953125, "loss_xval": 0.09765625, "num_input_tokens_seen": 105248432, "step": 1153 }, { "epoch": 4.808333333333334, "grad_norm": 4.609051925692657, "learning_rate": 5e-05, "loss": 0.1391, "num_input_tokens_seen": 105339660, "step": 1154 }, { "epoch": 4.808333333333334, "loss": 0.21614710986614227, "loss_ce": 0.0007693012012168765, "loss_iou": 0.236328125, "loss_num": 0.04296875, "loss_xval": 0.2158203125, "num_input_tokens_seen": 105339660, "step": 1154 }, { "epoch": 4.8125, "grad_norm": 7.8704766368309, "learning_rate": 5e-05, "loss": 0.1957, "num_input_tokens_seen": 105431824, "step": 1155 }, { "epoch": 4.8125, "loss": 0.24834512174129486, "loss_ce": 0.0006339406245388091, "loss_iou": 0.2734375, "loss_num": 0.049560546875, "loss_xval": 0.248046875, "num_input_tokens_seen": 105431824, "step": 1155 }, { "epoch": 4.816666666666666, "grad_norm": 3.1825568683650016, "learning_rate": 5e-05, "loss": 0.0872, "num_input_tokens_seen": 105522832, "step": 1156 }, { "epoch": 4.816666666666666, "loss": 0.08022044599056244, "loss_ce": 0.0006611213320866227, "loss_iou": 0.333984375, "loss_num": 0.015869140625, "loss_xval": 0.07958984375, "num_input_tokens_seen": 105522832, "step": 1156 }, { "epoch": 4.820833333333333, "grad_norm": 4.876646511952777, "learning_rate": 5e-05, "loss": 0.1709, "num_input_tokens_seen": 105614292, "step": 1157 }, { "epoch": 4.820833333333333, "loss": 0.13954287767410278, "loss_ce": 0.0021374865900725126, "loss_iou": 0.291015625, "loss_num": 0.027587890625, "loss_xval": 0.1376953125, "num_input_tokens_seen": 105614292, "step": 1157 }, { "epoch": 4.825, "grad_norm": 8.168819966830162, "learning_rate": 5e-05, "loss": 0.1076, "num_input_tokens_seen": 105705452, "step": 1158 }, { "epoch": 4.825, "loss": 0.12230473011732101, "loss_ce": 2.0796111130039208e-05, "loss_iou": 0.3515625, "loss_num": 0.0244140625, "loss_xval": 0.1220703125, "num_input_tokens_seen": 105705452, "step": 1158 }, { "epoch": 4.829166666666667, "grad_norm": 5.778774136351113, "learning_rate": 5e-05, "loss": 0.1174, "num_input_tokens_seen": 105796620, "step": 1159 }, { "epoch": 4.829166666666667, "loss": 0.09682287275791168, "loss_ce": 0.0014249193482100964, "loss_iou": 0.32421875, "loss_num": 0.01904296875, "loss_xval": 0.09521484375, "num_input_tokens_seen": 105796620, "step": 1159 }, { "epoch": 4.833333333333333, "grad_norm": 3.618191034070861, "learning_rate": 5e-05, "loss": 0.1313, "num_input_tokens_seen": 105887976, "step": 1160 }, { "epoch": 4.833333333333333, "loss": 0.10715027898550034, "loss_ce": 0.0015594599535688758, "loss_iou": 0.4296875, "loss_num": 0.0211181640625, "loss_xval": 0.10546875, "num_input_tokens_seen": 105887976, "step": 1160 }, { "epoch": 4.8375, "grad_norm": 3.0934691829656247, "learning_rate": 5e-05, "loss": 0.0743, "num_input_tokens_seen": 105979552, "step": 1161 }, { "epoch": 4.8375, "loss": 0.05586852878332138, "loss_ce": 0.002844237256795168, "loss_iou": 0.267578125, "loss_num": 0.0106201171875, "loss_xval": 0.052978515625, "num_input_tokens_seen": 105979552, "step": 1161 }, { "epoch": 4.841666666666667, "grad_norm": 2.761479297129452, "learning_rate": 5e-05, "loss": 0.1769, "num_input_tokens_seen": 106071192, "step": 1162 }, { "epoch": 4.841666666666667, "loss": 0.12410786747932434, "loss_ce": 0.002434288617223501, "loss_iou": 0.27734375, "loss_num": 0.0242919921875, "loss_xval": 0.12158203125, "num_input_tokens_seen": 106071192, "step": 1162 }, { "epoch": 4.845833333333333, "grad_norm": 7.136811974389668, "learning_rate": 5e-05, "loss": 0.0975, "num_input_tokens_seen": 106162500, "step": 1163 }, { "epoch": 4.845833333333333, "loss": 0.08225230872631073, "loss_ce": 0.0001600257819518447, "loss_iou": 0.35546875, "loss_num": 0.0164794921875, "loss_xval": 0.08203125, "num_input_tokens_seen": 106162500, "step": 1163 }, { "epoch": 4.85, "grad_norm": 7.198517497297793, "learning_rate": 5e-05, "loss": 0.1575, "num_input_tokens_seen": 106254376, "step": 1164 }, { "epoch": 4.85, "loss": 0.1532648205757141, "loss_ce": 0.0005853786133229733, "loss_iou": 0.34375, "loss_num": 0.030517578125, "loss_xval": 0.15234375, "num_input_tokens_seen": 106254376, "step": 1164 }, { "epoch": 4.854166666666667, "grad_norm": 4.436200013135143, "learning_rate": 5e-05, "loss": 0.1463, "num_input_tokens_seen": 106345416, "step": 1165 }, { "epoch": 4.854166666666667, "loss": 0.13062620162963867, "loss_ce": 0.0002245925134047866, "loss_iou": 0.37109375, "loss_num": 0.026123046875, "loss_xval": 0.130859375, "num_input_tokens_seen": 106345416, "step": 1165 }, { "epoch": 4.858333333333333, "grad_norm": 2.2125295380449472, "learning_rate": 5e-05, "loss": 0.0956, "num_input_tokens_seen": 106437532, "step": 1166 }, { "epoch": 4.858333333333333, "loss": 0.12548190355300903, "loss_ce": 0.0007413043058477342, "loss_iou": 0.26171875, "loss_num": 0.0250244140625, "loss_xval": 0.12451171875, "num_input_tokens_seen": 106437532, "step": 1166 }, { "epoch": 4.8625, "grad_norm": 26.07955525963802, "learning_rate": 5e-05, "loss": 0.1577, "num_input_tokens_seen": 106528440, "step": 1167 }, { "epoch": 4.8625, "loss": 0.18524158000946045, "loss_ce": 0.000976437411736697, "loss_iou": 0.185546875, "loss_num": 0.036865234375, "loss_xval": 0.1845703125, "num_input_tokens_seen": 106528440, "step": 1167 }, { "epoch": 4.866666666666667, "grad_norm": 3.417899391433817, "learning_rate": 5e-05, "loss": 0.1398, "num_input_tokens_seen": 106620244, "step": 1168 }, { "epoch": 4.866666666666667, "loss": 0.16416826844215393, "loss_ce": 0.0014638010179623961, "loss_iou": 0.259765625, "loss_num": 0.032470703125, "loss_xval": 0.1630859375, "num_input_tokens_seen": 106620244, "step": 1168 }, { "epoch": 4.870833333333334, "grad_norm": 2.36998391465569, "learning_rate": 5e-05, "loss": 0.1123, "num_input_tokens_seen": 106711812, "step": 1169 }, { "epoch": 4.870833333333334, "loss": 0.1460573673248291, "loss_ce": 0.0003512083785608411, "loss_iou": 0.23828125, "loss_num": 0.0291748046875, "loss_xval": 0.1455078125, "num_input_tokens_seen": 106711812, "step": 1169 }, { "epoch": 4.875, "grad_norm": 4.6140129234293985, "learning_rate": 5e-05, "loss": 0.1436, "num_input_tokens_seen": 106803516, "step": 1170 }, { "epoch": 4.875, "loss": 0.1088951975107193, "loss_ce": 0.001198665937408805, "loss_iou": 0.318359375, "loss_num": 0.021484375, "loss_xval": 0.10791015625, "num_input_tokens_seen": 106803516, "step": 1170 }, { "epoch": 4.879166666666666, "grad_norm": 4.106721008433326, "learning_rate": 5e-05, "loss": 0.1061, "num_input_tokens_seen": 106895244, "step": 1171 }, { "epoch": 4.879166666666666, "loss": 0.11301624774932861, "loss_ce": 0.0005284602520987391, "loss_iou": 0.37890625, "loss_num": 0.0224609375, "loss_xval": 0.1123046875, "num_input_tokens_seen": 106895244, "step": 1171 }, { "epoch": 4.883333333333333, "grad_norm": 3.3142334922220362, "learning_rate": 5e-05, "loss": 0.1314, "num_input_tokens_seen": 106986652, "step": 1172 }, { "epoch": 4.883333333333333, "loss": 0.13002213835716248, "loss_ce": 0.0013600302627310157, "loss_iou": 0.2734375, "loss_num": 0.0257568359375, "loss_xval": 0.12890625, "num_input_tokens_seen": 106986652, "step": 1172 }, { "epoch": 4.8875, "grad_norm": 3.2706971322529648, "learning_rate": 5e-05, "loss": 0.1275, "num_input_tokens_seen": 107077508, "step": 1173 }, { "epoch": 4.8875, "loss": 0.08167026191949844, "loss_ce": 0.0008597183041274548, "loss_iou": 0.23046875, "loss_num": 0.01611328125, "loss_xval": 0.0810546875, "num_input_tokens_seen": 107077508, "step": 1173 }, { "epoch": 4.891666666666667, "grad_norm": 5.044103138964741, "learning_rate": 5e-05, "loss": 0.1026, "num_input_tokens_seen": 107169048, "step": 1174 }, { "epoch": 4.891666666666667, "loss": 0.09915536642074585, "loss_ce": 0.0022925687953829765, "loss_iou": 0.318359375, "loss_num": 0.0194091796875, "loss_xval": 0.0966796875, "num_input_tokens_seen": 107169048, "step": 1174 }, { "epoch": 4.895833333333333, "grad_norm": 47.56655776407188, "learning_rate": 5e-05, "loss": 0.179, "num_input_tokens_seen": 107260840, "step": 1175 }, { "epoch": 4.895833333333333, "loss": 0.14901116490364075, "loss_ce": 0.0027861865237355232, "loss_iou": 0.25390625, "loss_num": 0.0291748046875, "loss_xval": 0.146484375, "num_input_tokens_seen": 107260840, "step": 1175 }, { "epoch": 4.9, "grad_norm": 14.474975416203984, "learning_rate": 5e-05, "loss": 0.1438, "num_input_tokens_seen": 107352072, "step": 1176 }, { "epoch": 4.9, "loss": 0.12309698760509491, "loss_ce": 1.9596440324676223e-05, "loss_iou": 0.35546875, "loss_num": 0.024658203125, "loss_xval": 0.123046875, "num_input_tokens_seen": 107352072, "step": 1176 }, { "epoch": 4.904166666666667, "grad_norm": 4.597554419096973, "learning_rate": 5e-05, "loss": 0.1488, "num_input_tokens_seen": 107443000, "step": 1177 }, { "epoch": 4.904166666666667, "loss": 0.1479775458574295, "loss_ce": 0.00024195160949602723, "loss_iou": 0.30078125, "loss_num": 0.029541015625, "loss_xval": 0.1474609375, "num_input_tokens_seen": 107443000, "step": 1177 }, { "epoch": 4.908333333333333, "grad_norm": 17.63616065243701, "learning_rate": 5e-05, "loss": 0.1815, "num_input_tokens_seen": 107534172, "step": 1178 }, { "epoch": 4.908333333333333, "loss": 0.16808560490608215, "loss_ce": 5.581736331805587e-05, "loss_iou": 0.296875, "loss_num": 0.03369140625, "loss_xval": 0.16796875, "num_input_tokens_seen": 107534172, "step": 1178 }, { "epoch": 4.9125, "grad_norm": 4.532270987973431, "learning_rate": 5e-05, "loss": 0.1264, "num_input_tokens_seen": 107625528, "step": 1179 }, { "epoch": 4.9125, "loss": 0.16151559352874756, "loss_ce": 0.0037702254485338926, "loss_iou": 0.19140625, "loss_num": 0.031494140625, "loss_xval": 0.158203125, "num_input_tokens_seen": 107625528, "step": 1179 }, { "epoch": 4.916666666666667, "grad_norm": 29.418004624772323, "learning_rate": 5e-05, "loss": 0.1425, "num_input_tokens_seen": 107717188, "step": 1180 }, { "epoch": 4.916666666666667, "loss": 0.11543691903352737, "loss_ce": 0.0002941017155535519, "loss_iou": 0.283203125, "loss_num": 0.0230712890625, "loss_xval": 0.115234375, "num_input_tokens_seen": 107717188, "step": 1180 }, { "epoch": 4.920833333333333, "grad_norm": 5.9605390657692, "learning_rate": 5e-05, "loss": 0.1664, "num_input_tokens_seen": 107808660, "step": 1181 }, { "epoch": 4.920833333333333, "loss": 0.1978244185447693, "loss_ce": 0.002908646594733, "loss_iou": 0.404296875, "loss_num": 0.0390625, "loss_xval": 0.1953125, "num_input_tokens_seen": 107808660, "step": 1181 }, { "epoch": 4.925, "grad_norm": 6.279078096263478, "learning_rate": 5e-05, "loss": 0.0919, "num_input_tokens_seen": 107900216, "step": 1182 }, { "epoch": 4.925, "loss": 0.10716290026903152, "loss_ce": 0.00039715541061013937, "loss_iou": 0.275390625, "loss_num": 0.0213623046875, "loss_xval": 0.10693359375, "num_input_tokens_seen": 107900216, "step": 1182 }, { "epoch": 4.929166666666667, "grad_norm": 3.9856423529113973, "learning_rate": 5e-05, "loss": 0.1249, "num_input_tokens_seen": 107989820, "step": 1183 }, { "epoch": 4.929166666666667, "loss": 0.11589328199625015, "loss_ce": 1.8040238501271233e-05, "loss_iou": 0.44921875, "loss_num": 0.023193359375, "loss_xval": 0.11572265625, "num_input_tokens_seen": 107989820, "step": 1183 }, { "epoch": 4.933333333333334, "grad_norm": 4.7845682379324534, "learning_rate": 5e-05, "loss": 0.1555, "num_input_tokens_seen": 108080832, "step": 1184 }, { "epoch": 4.933333333333334, "loss": 0.12583021819591522, "loss_ce": 6.233003659872338e-06, "loss_iou": 0.3671875, "loss_num": 0.025146484375, "loss_xval": 0.1259765625, "num_input_tokens_seen": 108080832, "step": 1184 }, { "epoch": 4.9375, "grad_norm": 3.950777986773663, "learning_rate": 5e-05, "loss": 0.1059, "num_input_tokens_seen": 108172540, "step": 1185 }, { "epoch": 4.9375, "loss": 0.10127120465040207, "loss_ce": 0.0010362147586420178, "loss_iou": 0.189453125, "loss_num": 0.02001953125, "loss_xval": 0.10009765625, "num_input_tokens_seen": 108172540, "step": 1185 }, { "epoch": 4.941666666666666, "grad_norm": 2.1953353587311546, "learning_rate": 5e-05, "loss": 0.0987, "num_input_tokens_seen": 108263896, "step": 1186 }, { "epoch": 4.941666666666666, "loss": 0.13001351058483124, "loss_ce": 0.0009699350339360535, "loss_iou": 0.232421875, "loss_num": 0.02587890625, "loss_xval": 0.12890625, "num_input_tokens_seen": 108263896, "step": 1186 }, { "epoch": 4.945833333333333, "grad_norm": 11.310572081689427, "learning_rate": 5e-05, "loss": 0.1665, "num_input_tokens_seen": 108355512, "step": 1187 }, { "epoch": 4.945833333333333, "loss": 0.21164894104003906, "loss_ce": 0.0024204296059906483, "loss_iou": 0.283203125, "loss_num": 0.041748046875, "loss_xval": 0.208984375, "num_input_tokens_seen": 108355512, "step": 1187 }, { "epoch": 4.95, "grad_norm": 12.34823239005613, "learning_rate": 5e-05, "loss": 0.111, "num_input_tokens_seen": 108447188, "step": 1188 }, { "epoch": 4.95, "loss": 0.08706867694854736, "loss_ce": 0.002885938622057438, "loss_iou": 0.2421875, "loss_num": 0.016845703125, "loss_xval": 0.083984375, "num_input_tokens_seen": 108447188, "step": 1188 }, { "epoch": 4.954166666666667, "grad_norm": 4.008203850158953, "learning_rate": 5e-05, "loss": 0.0917, "num_input_tokens_seen": 108538840, "step": 1189 }, { "epoch": 4.954166666666667, "loss": 0.046880945563316345, "loss_ce": 0.0006162988720461726, "loss_iou": 0.26171875, "loss_num": 0.00927734375, "loss_xval": 0.04638671875, "num_input_tokens_seen": 108538840, "step": 1189 }, { "epoch": 4.958333333333333, "grad_norm": 8.730872913036187, "learning_rate": 5e-05, "loss": 0.1671, "num_input_tokens_seen": 108629984, "step": 1190 }, { "epoch": 4.958333333333333, "loss": 0.14688724279403687, "loss_ce": 3.66496060451027e-05, "loss_iou": 0.267578125, "loss_num": 0.0294189453125, "loss_xval": 0.146484375, "num_input_tokens_seen": 108629984, "step": 1190 }, { "epoch": 4.9625, "grad_norm": 6.948890913958782, "learning_rate": 5e-05, "loss": 0.0737, "num_input_tokens_seen": 108721364, "step": 1191 }, { "epoch": 4.9625, "loss": 0.07616404443979263, "loss_ce": 0.0006025217589922249, "loss_iou": 0.306640625, "loss_num": 0.01513671875, "loss_xval": 0.07568359375, "num_input_tokens_seen": 108721364, "step": 1191 }, { "epoch": 4.966666666666667, "grad_norm": 12.006868126801029, "learning_rate": 5e-05, "loss": 0.1201, "num_input_tokens_seen": 108812772, "step": 1192 }, { "epoch": 4.966666666666667, "loss": 0.12916617095470428, "loss_ce": 0.0026250318624079227, "loss_iou": 0.400390625, "loss_num": 0.0252685546875, "loss_xval": 0.126953125, "num_input_tokens_seen": 108812772, "step": 1192 }, { "epoch": 4.970833333333333, "grad_norm": 11.075487431256267, "learning_rate": 5e-05, "loss": 0.1683, "num_input_tokens_seen": 108903696, "step": 1193 }, { "epoch": 4.970833333333333, "loss": 0.18713349103927612, "loss_ce": 0.00021332701726350933, "loss_iou": 0.37109375, "loss_num": 0.037353515625, "loss_xval": 0.1865234375, "num_input_tokens_seen": 108903696, "step": 1193 }, { "epoch": 4.975, "grad_norm": 5.9047766973597335, "learning_rate": 5e-05, "loss": 0.1342, "num_input_tokens_seen": 108995264, "step": 1194 }, { "epoch": 4.975, "loss": 0.1575443148612976, "loss_ce": 0.021435918286442757, "loss_iou": 0.38671875, "loss_num": 0.0272216796875, "loss_xval": 0.1357421875, "num_input_tokens_seen": 108995264, "step": 1194 }, { "epoch": 4.979166666666667, "grad_norm": 7.209285987756478, "learning_rate": 5e-05, "loss": 0.0953, "num_input_tokens_seen": 109086616, "step": 1195 }, { "epoch": 4.979166666666667, "loss": 0.08984746783971786, "loss_ce": 0.008487604558467865, "loss_iou": 0.171875, "loss_num": 0.0162353515625, "loss_xval": 0.08154296875, "num_input_tokens_seen": 109086616, "step": 1195 }, { "epoch": 4.983333333333333, "grad_norm": 4.959710692273525, "learning_rate": 5e-05, "loss": 0.1384, "num_input_tokens_seen": 109177672, "step": 1196 }, { "epoch": 4.983333333333333, "loss": 0.06564254313707352, "loss_ce": 0.0005638079019263387, "loss_iou": 0.412109375, "loss_num": 0.01300048828125, "loss_xval": 0.06494140625, "num_input_tokens_seen": 109177672, "step": 1196 }, { "epoch": 4.9875, "grad_norm": 4.4038327446744425, "learning_rate": 5e-05, "loss": 0.0954, "num_input_tokens_seen": 109268552, "step": 1197 }, { "epoch": 4.9875, "loss": 0.07659703493118286, "loss_ce": 0.00037938207970000803, "loss_iou": 0.287109375, "loss_num": 0.0152587890625, "loss_xval": 0.076171875, "num_input_tokens_seen": 109268552, "step": 1197 }, { "epoch": 4.991666666666667, "grad_norm": 2.5176914705181375, "learning_rate": 5e-05, "loss": 0.1254, "num_input_tokens_seen": 109359828, "step": 1198 }, { "epoch": 4.991666666666667, "loss": 0.11878697574138641, "loss_ce": 1.2563883501570672e-05, "loss_iou": 0.1484375, "loss_num": 0.0238037109375, "loss_xval": 0.11865234375, "num_input_tokens_seen": 109359828, "step": 1198 }, { "epoch": 4.995833333333334, "grad_norm": 7.96050941330993, "learning_rate": 5e-05, "loss": 0.1744, "num_input_tokens_seen": 109451164, "step": 1199 }, { "epoch": 4.995833333333334, "loss": 0.10806751996278763, "loss_ce": 4.776245532411849e-06, "loss_iou": 0.279296875, "loss_num": 0.0216064453125, "loss_xval": 0.10791015625, "num_input_tokens_seen": 109451164, "step": 1199 }, { "epoch": 5.0, "grad_norm": 20.87607456136375, "learning_rate": 5e-05, "loss": 0.1382, "num_input_tokens_seen": 109542924, "step": 1200 }, { "epoch": 5.0, "loss": 0.1367817372083664, "loss_ce": 0.0009785225847736, "loss_iou": 0.35546875, "loss_num": 0.0272216796875, "loss_xval": 0.1357421875, "num_input_tokens_seen": 109542924, "step": 1200 }, { "epoch": 5.004166666666666, "grad_norm": 3.8787681830586753, "learning_rate": 5e-05, "loss": 0.1366, "num_input_tokens_seen": 109633248, "step": 1201 }, { "epoch": 5.004166666666666, "loss": 0.19319066405296326, "loss_ce": 0.0005331888678483665, "loss_iou": 0.049560546875, "loss_num": 0.03857421875, "loss_xval": 0.1923828125, "num_input_tokens_seen": 109633248, "step": 1201 }, { "epoch": 5.008333333333334, "grad_norm": 2.8024044597653504, "learning_rate": 5e-05, "loss": 0.1, "num_input_tokens_seen": 109724012, "step": 1202 }, { "epoch": 5.008333333333334, "loss": 0.11142734438180923, "loss_ce": 6.869970820844173e-05, "loss_iou": 0.2109375, "loss_num": 0.0223388671875, "loss_xval": 0.111328125, "num_input_tokens_seen": 109724012, "step": 1202 }, { "epoch": 5.0125, "grad_norm": 11.504232933050812, "learning_rate": 5e-05, "loss": 0.1348, "num_input_tokens_seen": 109814980, "step": 1203 }, { "epoch": 5.0125, "loss": 0.14389806985855103, "loss_ce": 0.00040442385943606496, "loss_iou": 0.32421875, "loss_num": 0.0286865234375, "loss_xval": 0.1435546875, "num_input_tokens_seen": 109814980, "step": 1203 }, { "epoch": 5.016666666666667, "grad_norm": 3.7370252268604296, "learning_rate": 5e-05, "loss": 0.1411, "num_input_tokens_seen": 109905940, "step": 1204 }, { "epoch": 5.016666666666667, "loss": 0.13649845123291016, "loss_ce": 9.59103772402159e-07, "loss_iou": 0.392578125, "loss_num": 0.02734375, "loss_xval": 0.13671875, "num_input_tokens_seen": 109905940, "step": 1204 }, { "epoch": 5.020833333333333, "grad_norm": 4.4689807688140135, "learning_rate": 5e-05, "loss": 0.0909, "num_input_tokens_seen": 109997520, "step": 1205 }, { "epoch": 5.020833333333333, "loss": 0.08278882503509521, "loss_ce": 0.0015357693191617727, "loss_iou": 0.380859375, "loss_num": 0.0162353515625, "loss_xval": 0.0810546875, "num_input_tokens_seen": 109997520, "step": 1205 }, { "epoch": 5.025, "grad_norm": 2.4383737085153823, "learning_rate": 5e-05, "loss": 0.1215, "num_input_tokens_seen": 110088568, "step": 1206 }, { "epoch": 5.025, "loss": 0.12168996036052704, "loss_ce": 0.0015422508586198092, "loss_iou": 0.310546875, "loss_num": 0.0240478515625, "loss_xval": 0.1201171875, "num_input_tokens_seen": 110088568, "step": 1206 }, { "epoch": 5.029166666666667, "grad_norm": 1.789251070742093, "learning_rate": 5e-05, "loss": 0.0993, "num_input_tokens_seen": 110178884, "step": 1207 }, { "epoch": 5.029166666666667, "loss": 0.0567280575633049, "loss_ce": 0.0019642619881778955, "loss_iou": 0.30078125, "loss_num": 0.010986328125, "loss_xval": 0.0546875, "num_input_tokens_seen": 110178884, "step": 1207 }, { "epoch": 5.033333333333333, "grad_norm": 4.0877162405297724, "learning_rate": 5e-05, "loss": 0.1192, "num_input_tokens_seen": 110270332, "step": 1208 }, { "epoch": 5.033333333333333, "loss": 0.14080800116062164, "loss_ce": 0.00018300242663826793, "loss_iou": 0.326171875, "loss_num": 0.028076171875, "loss_xval": 0.140625, "num_input_tokens_seen": 110270332, "step": 1208 }, { "epoch": 5.0375, "grad_norm": 15.93284051558836, "learning_rate": 5e-05, "loss": 0.16, "num_input_tokens_seen": 110361576, "step": 1209 }, { "epoch": 5.0375, "loss": 0.167129784822464, "loss_ce": 0.0004122618702240288, "loss_iou": 0.32421875, "loss_num": 0.033203125, "loss_xval": 0.1669921875, "num_input_tokens_seen": 110361576, "step": 1209 }, { "epoch": 5.041666666666667, "grad_norm": 12.944063658397615, "learning_rate": 5e-05, "loss": 0.1346, "num_input_tokens_seen": 110452884, "step": 1210 }, { "epoch": 5.041666666666667, "loss": 0.10224296152591705, "loss_ce": 0.00028373015811666846, "loss_iou": 0.408203125, "loss_num": 0.0203857421875, "loss_xval": 0.10205078125, "num_input_tokens_seen": 110452884, "step": 1210 }, { "epoch": 5.045833333333333, "grad_norm": 23.60951585784412, "learning_rate": 5e-05, "loss": 0.1252, "num_input_tokens_seen": 110544128, "step": 1211 }, { "epoch": 5.045833333333333, "loss": 0.07057714462280273, "loss_ce": 0.00034093711292371154, "loss_iou": 0.2734375, "loss_num": 0.0140380859375, "loss_xval": 0.0703125, "num_input_tokens_seen": 110544128, "step": 1211 }, { "epoch": 5.05, "grad_norm": 3.8865840347157503, "learning_rate": 5e-05, "loss": 0.0641, "num_input_tokens_seen": 110634764, "step": 1212 }, { "epoch": 5.05, "loss": 0.05204097181558609, "loss_ce": 0.0005578203708864748, "loss_iou": 0.35546875, "loss_num": 0.01031494140625, "loss_xval": 0.051513671875, "num_input_tokens_seen": 110634764, "step": 1212 }, { "epoch": 5.054166666666666, "grad_norm": 4.8249299659894245, "learning_rate": 5e-05, "loss": 0.1227, "num_input_tokens_seen": 110726256, "step": 1213 }, { "epoch": 5.054166666666666, "loss": 0.09966389834880829, "loss_ce": 2.4005919840419665e-05, "loss_iou": 0.302734375, "loss_num": 0.0198974609375, "loss_xval": 0.099609375, "num_input_tokens_seen": 110726256, "step": 1213 }, { "epoch": 5.058333333333334, "grad_norm": 7.718292424470949, "learning_rate": 5e-05, "loss": 0.11, "num_input_tokens_seen": 110817604, "step": 1214 }, { "epoch": 5.058333333333334, "loss": 0.06570029258728027, "loss_ce": 0.0001027620310196653, "loss_iou": 0.49609375, "loss_num": 0.01312255859375, "loss_xval": 0.0654296875, "num_input_tokens_seen": 110817604, "step": 1214 }, { "epoch": 5.0625, "grad_norm": 5.757078059025428, "learning_rate": 5e-05, "loss": 0.0944, "num_input_tokens_seen": 110908712, "step": 1215 }, { "epoch": 5.0625, "loss": 0.10337609797716141, "loss_ce": 0.002912231022492051, "loss_iou": 0.279296875, "loss_num": 0.0201416015625, "loss_xval": 0.1005859375, "num_input_tokens_seen": 110908712, "step": 1215 }, { "epoch": 5.066666666666666, "grad_norm": 4.350185436094405, "learning_rate": 5e-05, "loss": 0.1392, "num_input_tokens_seen": 110999572, "step": 1216 }, { "epoch": 5.066666666666666, "loss": 0.16903723776340485, "loss_ce": 0.0002445149584673345, "loss_iou": 0.3359375, "loss_num": 0.03369140625, "loss_xval": 0.1689453125, "num_input_tokens_seen": 110999572, "step": 1216 }, { "epoch": 5.070833333333334, "grad_norm": 2.9290239729778995, "learning_rate": 5e-05, "loss": 0.1382, "num_input_tokens_seen": 111091312, "step": 1217 }, { "epoch": 5.070833333333334, "loss": 0.14560575783252716, "loss_ce": 0.00298185832798481, "loss_iou": 0.2197265625, "loss_num": 0.028564453125, "loss_xval": 0.142578125, "num_input_tokens_seen": 111091312, "step": 1217 }, { "epoch": 5.075, "grad_norm": 3.0035295725462503, "learning_rate": 5e-05, "loss": 0.0717, "num_input_tokens_seen": 111182248, "step": 1218 }, { "epoch": 5.075, "loss": 0.08869168907403946, "loss_ce": 6.864196620881557e-05, "loss_iou": 0.326171875, "loss_num": 0.0177001953125, "loss_xval": 0.0888671875, "num_input_tokens_seen": 111182248, "step": 1218 }, { "epoch": 5.079166666666667, "grad_norm": 3.3324160340032756, "learning_rate": 5e-05, "loss": 0.1397, "num_input_tokens_seen": 111274184, "step": 1219 }, { "epoch": 5.079166666666667, "loss": 0.10855446010828018, "loss_ce": 0.001727680442854762, "loss_iou": 0.3984375, "loss_num": 0.0213623046875, "loss_xval": 0.10693359375, "num_input_tokens_seen": 111274184, "step": 1219 }, { "epoch": 5.083333333333333, "grad_norm": 3.8266739457670855, "learning_rate": 5e-05, "loss": 0.0748, "num_input_tokens_seen": 111365224, "step": 1220 }, { "epoch": 5.083333333333333, "loss": 0.04669380933046341, "loss_ce": 1.9105934825347504e-06, "loss_iou": 0.35546875, "loss_num": 0.00933837890625, "loss_xval": 0.046630859375, "num_input_tokens_seen": 111365224, "step": 1220 }, { "epoch": 5.0875, "grad_norm": 4.20819565496716, "learning_rate": 5e-05, "loss": 0.126, "num_input_tokens_seen": 111456556, "step": 1221 }, { "epoch": 5.0875, "loss": 0.13568758964538574, "loss_ce": 6.435318482544972e-06, "loss_iou": 0.4453125, "loss_num": 0.027099609375, "loss_xval": 0.1357421875, "num_input_tokens_seen": 111456556, "step": 1221 }, { "epoch": 5.091666666666667, "grad_norm": 3.9317513583556125, "learning_rate": 5e-05, "loss": 0.0995, "num_input_tokens_seen": 111547324, "step": 1222 }, { "epoch": 5.091666666666667, "loss": 0.11891846358776093, "loss_ce": 0.0023718271404504776, "loss_iou": 0.2216796875, "loss_num": 0.0233154296875, "loss_xval": 0.11669921875, "num_input_tokens_seen": 111547324, "step": 1222 }, { "epoch": 5.095833333333333, "grad_norm": 4.900769539292761, "learning_rate": 5e-05, "loss": 0.1248, "num_input_tokens_seen": 111638688, "step": 1223 }, { "epoch": 5.095833333333333, "loss": 0.12108991295099258, "loss_ce": 0.0013694557128474116, "loss_iou": 0.384765625, "loss_num": 0.02392578125, "loss_xval": 0.11962890625, "num_input_tokens_seen": 111638688, "step": 1223 }, { "epoch": 5.1, "grad_norm": 6.628289053873807, "learning_rate": 5e-05, "loss": 0.1211, "num_input_tokens_seen": 111730696, "step": 1224 }, { "epoch": 5.1, "loss": 0.12536606192588806, "loss_ce": 0.0016783210448920727, "loss_iou": 0.220703125, "loss_num": 0.0247802734375, "loss_xval": 0.12353515625, "num_input_tokens_seen": 111730696, "step": 1224 }, { "epoch": 5.104166666666667, "grad_norm": 9.234334670863234, "learning_rate": 5e-05, "loss": 0.126, "num_input_tokens_seen": 111822212, "step": 1225 }, { "epoch": 5.104166666666667, "loss": 0.0658029243350029, "loss_ce": 1.4655664926976897e-05, "loss_iou": 0.29296875, "loss_num": 0.01318359375, "loss_xval": 0.06591796875, "num_input_tokens_seen": 111822212, "step": 1225 }, { "epoch": 5.108333333333333, "grad_norm": 2.4145462349780464, "learning_rate": 5e-05, "loss": 0.0894, "num_input_tokens_seen": 111913296, "step": 1226 }, { "epoch": 5.108333333333333, "loss": 0.06669288128614426, "loss_ce": 0.0002713756402954459, "loss_iou": 0.2177734375, "loss_num": 0.0133056640625, "loss_xval": 0.06640625, "num_input_tokens_seen": 111913296, "step": 1226 }, { "epoch": 5.1125, "grad_norm": 11.584890739030651, "learning_rate": 5e-05, "loss": 0.0991, "num_input_tokens_seen": 112004356, "step": 1227 }, { "epoch": 5.1125, "loss": 0.11644387245178223, "loss_ce": 4.057878868479747e-06, "loss_iou": 0.306640625, "loss_num": 0.0233154296875, "loss_xval": 0.1162109375, "num_input_tokens_seen": 112004356, "step": 1227 }, { "epoch": 5.116666666666666, "grad_norm": 5.636659565390342, "learning_rate": 5e-05, "loss": 0.0992, "num_input_tokens_seen": 112095520, "step": 1228 }, { "epoch": 5.116666666666666, "loss": 0.1413094401359558, "loss_ce": 0.00028771322104148567, "loss_iou": 0.150390625, "loss_num": 0.0281982421875, "loss_xval": 0.140625, "num_input_tokens_seen": 112095520, "step": 1228 }, { "epoch": 5.120833333333334, "grad_norm": 11.177885073008182, "learning_rate": 5e-05, "loss": 0.1417, "num_input_tokens_seen": 112186296, "step": 1229 }, { "epoch": 5.120833333333334, "loss": 0.21080881357192993, "loss_ce": 0.018044522032141685, "loss_iou": 0.10693359375, "loss_num": 0.03857421875, "loss_xval": 0.1923828125, "num_input_tokens_seen": 112186296, "step": 1229 }, { "epoch": 5.125, "grad_norm": 2.7526024291809055, "learning_rate": 5e-05, "loss": 0.0795, "num_input_tokens_seen": 112277736, "step": 1230 }, { "epoch": 5.125, "loss": 0.05874582752585411, "loss_ce": 0.0004496269393712282, "loss_iou": 0.3203125, "loss_num": 0.01165771484375, "loss_xval": 0.058349609375, "num_input_tokens_seen": 112277736, "step": 1230 }, { "epoch": 5.129166666666666, "grad_norm": 2.251815283862548, "learning_rate": 5e-05, "loss": 0.1232, "num_input_tokens_seen": 112369748, "step": 1231 }, { "epoch": 5.129166666666666, "loss": 0.11877487599849701, "loss_ce": 0.003067473880946636, "loss_iou": 0.349609375, "loss_num": 0.0230712890625, "loss_xval": 0.11572265625, "num_input_tokens_seen": 112369748, "step": 1231 }, { "epoch": 5.133333333333334, "grad_norm": 2.578317507730831, "learning_rate": 5e-05, "loss": 0.0606, "num_input_tokens_seen": 112461016, "step": 1232 }, { "epoch": 5.133333333333334, "loss": 0.06011161953210831, "loss_ce": 5.3028885304229334e-05, "loss_iou": 0.2314453125, "loss_num": 0.01202392578125, "loss_xval": 0.06005859375, "num_input_tokens_seen": 112461016, "step": 1232 }, { "epoch": 5.1375, "grad_norm": 1.360840339322072, "learning_rate": 5e-05, "loss": 0.0871, "num_input_tokens_seen": 112552472, "step": 1233 }, { "epoch": 5.1375, "loss": 0.11113837361335754, "loss_ce": 0.000931774964556098, "loss_iou": 0.240234375, "loss_num": 0.0220947265625, "loss_xval": 0.1103515625, "num_input_tokens_seen": 112552472, "step": 1233 }, { "epoch": 5.141666666666667, "grad_norm": 1.7276905893923984, "learning_rate": 5e-05, "loss": 0.0733, "num_input_tokens_seen": 112643440, "step": 1234 }, { "epoch": 5.141666666666667, "loss": 0.06363178789615631, "loss_ce": 9.4195143901743e-05, "loss_iou": 0.2890625, "loss_num": 0.0126953125, "loss_xval": 0.0634765625, "num_input_tokens_seen": 112643440, "step": 1234 }, { "epoch": 5.145833333333333, "grad_norm": 3.4148272040861287, "learning_rate": 5e-05, "loss": 0.1149, "num_input_tokens_seen": 112734996, "step": 1235 }, { "epoch": 5.145833333333333, "loss": 0.14283618330955505, "loss_ce": 1.392368631059071e-05, "loss_iou": 0.431640625, "loss_num": 0.028564453125, "loss_xval": 0.142578125, "num_input_tokens_seen": 112734996, "step": 1235 }, { "epoch": 5.15, "grad_norm": 3.4329962925971365, "learning_rate": 5e-05, "loss": 0.1615, "num_input_tokens_seen": 112826176, "step": 1236 }, { "epoch": 5.15, "loss": 0.1847839653491974, "loss_ce": 0.0009460803703404963, "loss_iou": 0.2421875, "loss_num": 0.036865234375, "loss_xval": 0.18359375, "num_input_tokens_seen": 112826176, "step": 1236 }, { "epoch": 5.154166666666667, "grad_norm": 4.912752549896548, "learning_rate": 5e-05, "loss": 0.0925, "num_input_tokens_seen": 112917236, "step": 1237 }, { "epoch": 5.154166666666667, "loss": 0.10758166015148163, "loss_ce": 7.199274023150792e-06, "loss_iou": 0.2314453125, "loss_num": 0.021484375, "loss_xval": 0.107421875, "num_input_tokens_seen": 112917236, "step": 1237 }, { "epoch": 5.158333333333333, "grad_norm": 3.057128424442674, "learning_rate": 5e-05, "loss": 0.093, "num_input_tokens_seen": 113008216, "step": 1238 }, { "epoch": 5.158333333333333, "loss": 0.08614519983530045, "loss_ce": 0.0007417544256895781, "loss_iou": 0.298828125, "loss_num": 0.01708984375, "loss_xval": 0.08544921875, "num_input_tokens_seen": 113008216, "step": 1238 }, { "epoch": 5.1625, "grad_norm": 3.202517671389756, "learning_rate": 5e-05, "loss": 0.0882, "num_input_tokens_seen": 113099600, "step": 1239 }, { "epoch": 5.1625, "loss": 0.10517486184835434, "loss_ce": 0.00429900549352169, "loss_iou": 0.2451171875, "loss_num": 0.0201416015625, "loss_xval": 0.10107421875, "num_input_tokens_seen": 113099600, "step": 1239 }, { "epoch": 5.166666666666667, "grad_norm": 8.732261105764604, "learning_rate": 5e-05, "loss": 0.1477, "num_input_tokens_seen": 113190864, "step": 1240 }, { "epoch": 5.166666666666667, "loss": 0.16263793408870697, "loss_ce": 0.00010131551243830472, "loss_iou": 0.4453125, "loss_num": 0.032470703125, "loss_xval": 0.162109375, "num_input_tokens_seen": 113190864, "step": 1240 }, { "epoch": 5.170833333333333, "grad_norm": 1.9516385622557737, "learning_rate": 5e-05, "loss": 0.0973, "num_input_tokens_seen": 113281976, "step": 1241 }, { "epoch": 5.170833333333333, "loss": 0.10548153519630432, "loss_ce": 0.000592618715018034, "loss_iou": 0.2890625, "loss_num": 0.02099609375, "loss_xval": 0.10498046875, "num_input_tokens_seen": 113281976, "step": 1241 }, { "epoch": 5.175, "grad_norm": 1.6857417263901389, "learning_rate": 5e-05, "loss": 0.083, "num_input_tokens_seen": 113373120, "step": 1242 }, { "epoch": 5.175, "loss": 0.08540096133947372, "loss_ce": 0.0014318418689072132, "loss_iou": 0.2255859375, "loss_num": 0.0167236328125, "loss_xval": 0.083984375, "num_input_tokens_seen": 113373120, "step": 1242 }, { "epoch": 5.179166666666666, "grad_norm": 3.5131015850856047, "learning_rate": 5e-05, "loss": 0.1122, "num_input_tokens_seen": 113464092, "step": 1243 }, { "epoch": 5.179166666666666, "loss": 0.08137943595647812, "loss_ce": 0.001087690470740199, "loss_iou": 0.2099609375, "loss_num": 0.0159912109375, "loss_xval": 0.080078125, "num_input_tokens_seen": 113464092, "step": 1243 }, { "epoch": 5.183333333333334, "grad_norm": 2.7252061609552136, "learning_rate": 5e-05, "loss": 0.0794, "num_input_tokens_seen": 113555832, "step": 1244 }, { "epoch": 5.183333333333334, "loss": 0.07820607721805573, "loss_ce": 0.0004015131271444261, "loss_iou": 0.28125, "loss_num": 0.01556396484375, "loss_xval": 0.07763671875, "num_input_tokens_seen": 113555832, "step": 1244 }, { "epoch": 5.1875, "grad_norm": 9.974603644228377, "learning_rate": 5e-05, "loss": 0.0804, "num_input_tokens_seen": 113647152, "step": 1245 }, { "epoch": 5.1875, "loss": 0.05682176351547241, "loss_ce": 0.0007914903690107167, "loss_iou": 0.3671875, "loss_num": 0.01123046875, "loss_xval": 0.05615234375, "num_input_tokens_seen": 113647152, "step": 1245 }, { "epoch": 5.191666666666666, "grad_norm": 3.461158509656596, "learning_rate": 5e-05, "loss": 0.1451, "num_input_tokens_seen": 113738720, "step": 1246 }, { "epoch": 5.191666666666666, "loss": 0.21678170561790466, "loss_ce": 0.001754840137436986, "loss_iou": 0.263671875, "loss_num": 0.04296875, "loss_xval": 0.21484375, "num_input_tokens_seen": 113738720, "step": 1246 }, { "epoch": 5.195833333333334, "grad_norm": 3.8199671948919143, "learning_rate": 5e-05, "loss": 0.121, "num_input_tokens_seen": 113830052, "step": 1247 }, { "epoch": 5.195833333333334, "loss": 0.16566026210784912, "loss_ce": 0.0017808763077482581, "loss_iou": 0.361328125, "loss_num": 0.03271484375, "loss_xval": 0.1640625, "num_input_tokens_seen": 113830052, "step": 1247 }, { "epoch": 5.2, "grad_norm": 11.496281960766611, "learning_rate": 5e-05, "loss": 0.134, "num_input_tokens_seen": 113919636, "step": 1248 }, { "epoch": 5.2, "loss": 0.15728822350502014, "loss_ce": 6.198486062203301e-07, "loss_iou": 0.34765625, "loss_num": 0.031494140625, "loss_xval": 0.1572265625, "num_input_tokens_seen": 113919636, "step": 1248 }, { "epoch": 5.204166666666667, "grad_norm": 4.603555009446634, "learning_rate": 5e-05, "loss": 0.1175, "num_input_tokens_seen": 114010788, "step": 1249 }, { "epoch": 5.204166666666667, "loss": 0.14147533476352692, "loss_ce": 0.0010486957617104053, "loss_iou": 0.333984375, "loss_num": 0.028076171875, "loss_xval": 0.140625, "num_input_tokens_seen": 114010788, "step": 1249 }, { "epoch": 5.208333333333333, "grad_norm": 3.3103145760410957, "learning_rate": 5e-05, "loss": 0.1269, "num_input_tokens_seen": 114102008, "step": 1250 }, { "epoch": 5.208333333333333, "eval_seeclick_CIoU": 0.19745288416743279, "eval_seeclick_GIoU": 0.1804250180721283, "eval_seeclick_IoU": 0.3141836002469063, "eval_seeclick_MAE_all": 0.10292381420731544, "eval_seeclick_MAE_h": 0.07238547503948212, "eval_seeclick_MAE_w": 0.22456881403923035, "eval_seeclick_MAE_x_boxes": 0.24006878584623337, "eval_seeclick_MAE_y_boxes": 0.07802290096879005, "eval_seeclick_NUM_probability": 0.999999463558197, "eval_seeclick_inside_bbox": 0.4630681872367859, "eval_seeclick_loss": 0.5826772451400757, "eval_seeclick_loss_ce": 0.13239652663469315, "eval_seeclick_loss_iou": 0.36871337890625, "eval_seeclick_loss_num": 0.0897979736328125, "eval_seeclick_loss_xval": 0.4488525390625, "eval_seeclick_runtime": 73.2552, "eval_seeclick_samples_per_second": 0.587, "eval_seeclick_steps_per_second": 0.027, "num_input_tokens_seen": 114102008, "step": 1250 }, { "epoch": 5.208333333333333, "eval_icons_CIoU": 0.3539682552218437, "eval_icons_GIoU": 0.3729694336652756, "eval_icons_IoU": 0.43295738101005554, "eval_icons_MAE_all": 0.06616230122745037, "eval_icons_MAE_h": 0.14156471192836761, "eval_icons_MAE_w": 0.09367327392101288, "eval_icons_MAE_x_boxes": 0.09583292528986931, "eval_icons_MAE_y_boxes": 0.14096488058567047, "eval_icons_NUM_probability": 0.9999997913837433, "eval_icons_inside_bbox": 0.6006944477558136, "eval_icons_loss": 0.3076817989349365, "eval_icons_loss_ce": 2.005231726798229e-05, "eval_icons_loss_iou": 0.34320068359375, "eval_icons_loss_num": 0.06174468994140625, "eval_icons_loss_xval": 0.30865478515625, "eval_icons_runtime": 86.1107, "eval_icons_samples_per_second": 0.581, "eval_icons_steps_per_second": 0.023, "num_input_tokens_seen": 114102008, "step": 1250 }, { "epoch": 5.208333333333333, "eval_screenspot_CIoU": 0.3847437302271525, "eval_screenspot_GIoU": 0.3658294876416524, "eval_screenspot_IoU": 0.4502400855223338, "eval_screenspot_MAE_all": 0.09436274568239848, "eval_screenspot_MAE_h": 0.08610273400942485, "eval_screenspot_MAE_w": 0.1881504605213801, "eval_screenspot_MAE_x_boxes": 0.17523721357186636, "eval_screenspot_MAE_y_boxes": 0.07575235267480214, "eval_screenspot_NUM_probability": 0.9999992450078329, "eval_screenspot_inside_bbox": 0.7116666634877523, "eval_screenspot_loss": 0.468685507774353, "eval_screenspot_loss_ce": 1.2692903320991415e-06, "eval_screenspot_loss_iou": 0.406005859375, "eval_screenspot_loss_num": 0.09428914388020833, "eval_screenspot_loss_xval": 0.47119140625, "eval_screenspot_runtime": 146.7117, "eval_screenspot_samples_per_second": 0.607, "eval_screenspot_steps_per_second": 0.02, "num_input_tokens_seen": 114102008, "step": 1250 }, { "epoch": 5.208333333333333, "eval_compot_CIoU": 0.49966710805892944, "eval_compot_GIoU": 0.48504847288131714, "eval_compot_IoU": 0.5533573031425476, "eval_compot_MAE_all": 0.0510424617677927, "eval_compot_MAE_h": 0.05651161074638367, "eval_compot_MAE_w": 0.12595685943961143, "eval_compot_MAE_x_boxes": 0.12238426506519318, "eval_compot_MAE_y_boxes": 0.05694563314318657, "eval_compot_NUM_probability": 0.9999992847442627, "eval_compot_inside_bbox": 0.8072916567325592, "eval_compot_loss": 0.2954633831977844, "eval_compot_loss_ce": 0.023688997142016888, "eval_compot_loss_iou": 0.37640380859375, "eval_compot_loss_num": 0.048618316650390625, "eval_compot_loss_xval": 0.2430572509765625, "eval_compot_runtime": 85.198, "eval_compot_samples_per_second": 0.587, "eval_compot_steps_per_second": 0.023, "num_input_tokens_seen": 114102008, "step": 1250 }, { "epoch": 5.208333333333333, "loss": 0.2156691700220108, "loss_ce": 0.026307594031095505, "loss_iou": 0.37890625, "loss_num": 0.037841796875, "loss_xval": 0.189453125, "num_input_tokens_seen": 114102008, "step": 1250 }, { "epoch": 5.2125, "grad_norm": 2.4482462710704627, "learning_rate": 5e-05, "loss": 0.0686, "num_input_tokens_seen": 114193780, "step": 1251 }, { "epoch": 5.2125, "loss": 0.0852910578250885, "loss_ce": 0.0006963338819332421, "loss_iou": 0.296875, "loss_num": 0.0169677734375, "loss_xval": 0.08447265625, "num_input_tokens_seen": 114193780, "step": 1251 }, { "epoch": 5.216666666666667, "grad_norm": 2.7907731585969113, "learning_rate": 5e-05, "loss": 0.0948, "num_input_tokens_seen": 114285468, "step": 1252 }, { "epoch": 5.216666666666667, "loss": 0.12171787023544312, "loss_ce": 0.0014786121901124716, "loss_iou": 0.26953125, "loss_num": 0.0240478515625, "loss_xval": 0.1201171875, "num_input_tokens_seen": 114285468, "step": 1252 }, { "epoch": 5.220833333333333, "grad_norm": 3.1560111385127017, "learning_rate": 5e-05, "loss": 0.1098, "num_input_tokens_seen": 114377144, "step": 1253 }, { "epoch": 5.220833333333333, "loss": 0.0772915780544281, "loss_ce": 0.0006314230267889798, "loss_iou": 0.205078125, "loss_num": 0.01531982421875, "loss_xval": 0.07666015625, "num_input_tokens_seen": 114377144, "step": 1253 }, { "epoch": 5.225, "grad_norm": 4.287537330574922, "learning_rate": 5e-05, "loss": 0.1235, "num_input_tokens_seen": 114468928, "step": 1254 }, { "epoch": 5.225, "loss": 0.15133850276470184, "loss_ce": 9.337875235360116e-05, "loss_iou": 0.2314453125, "loss_num": 0.0302734375, "loss_xval": 0.1513671875, "num_input_tokens_seen": 114468928, "step": 1254 }, { "epoch": 5.229166666666667, "grad_norm": 4.213109821782446, "learning_rate": 5e-05, "loss": 0.0713, "num_input_tokens_seen": 114560320, "step": 1255 }, { "epoch": 5.229166666666667, "loss": 0.05044609308242798, "loss_ce": 0.0013280524872243404, "loss_iou": 0.3046875, "loss_num": 0.00982666015625, "loss_xval": 0.049072265625, "num_input_tokens_seen": 114560320, "step": 1255 }, { "epoch": 5.233333333333333, "grad_norm": 4.437664875584358, "learning_rate": 5e-05, "loss": 0.1562, "num_input_tokens_seen": 114651584, "step": 1256 }, { "epoch": 5.233333333333333, "loss": 0.07865123450756073, "loss_ce": 0.0002515834057703614, "loss_iou": 0.392578125, "loss_num": 0.015625, "loss_xval": 0.07861328125, "num_input_tokens_seen": 114651584, "step": 1256 }, { "epoch": 5.2375, "grad_norm": 5.187559708651044, "learning_rate": 5e-05, "loss": 0.0946, "num_input_tokens_seen": 114743208, "step": 1257 }, { "epoch": 5.2375, "loss": 0.055650509893894196, "loss_ce": 0.0018937942804768682, "loss_iou": 0.10888671875, "loss_num": 0.0107421875, "loss_xval": 0.0537109375, "num_input_tokens_seen": 114743208, "step": 1257 }, { "epoch": 5.241666666666666, "grad_norm": 5.861157534137002, "learning_rate": 5e-05, "loss": 0.1179, "num_input_tokens_seen": 114834960, "step": 1258 }, { "epoch": 5.241666666666666, "loss": 0.08185291290283203, "loss_ce": 0.0008287417003884912, "loss_iou": 0.27734375, "loss_num": 0.0162353515625, "loss_xval": 0.0810546875, "num_input_tokens_seen": 114834960, "step": 1258 }, { "epoch": 5.245833333333334, "grad_norm": 3.0543935312914456, "learning_rate": 5e-05, "loss": 0.0786, "num_input_tokens_seen": 114926684, "step": 1259 }, { "epoch": 5.245833333333334, "loss": 0.07390153408050537, "loss_ce": 0.001361246220767498, "loss_iou": 0.267578125, "loss_num": 0.01446533203125, "loss_xval": 0.07275390625, "num_input_tokens_seen": 114926684, "step": 1259 }, { "epoch": 5.25, "grad_norm": 3.741079964142253, "learning_rate": 5e-05, "loss": 0.1447, "num_input_tokens_seen": 115017888, "step": 1260 }, { "epoch": 5.25, "loss": 0.1192452535033226, "loss_ce": 0.0007760171429254115, "loss_iou": 0.2216796875, "loss_num": 0.023681640625, "loss_xval": 0.11865234375, "num_input_tokens_seen": 115017888, "step": 1260 }, { "epoch": 5.254166666666666, "grad_norm": 11.260506478568418, "learning_rate": 5e-05, "loss": 0.0746, "num_input_tokens_seen": 115108324, "step": 1261 }, { "epoch": 5.254166666666666, "loss": 0.06887489557266235, "loss_ce": 1.197768870042637e-05, "loss_iou": 0.2890625, "loss_num": 0.01373291015625, "loss_xval": 0.06884765625, "num_input_tokens_seen": 115108324, "step": 1261 }, { "epoch": 5.258333333333334, "grad_norm": 5.874110416519585, "learning_rate": 5e-05, "loss": 0.1323, "num_input_tokens_seen": 115200368, "step": 1262 }, { "epoch": 5.258333333333334, "loss": 0.19069834053516388, "loss_ce": 0.002992353169247508, "loss_iou": 0.177734375, "loss_num": 0.03759765625, "loss_xval": 0.1875, "num_input_tokens_seen": 115200368, "step": 1262 }, { "epoch": 5.2625, "grad_norm": 2.3060382178106633, "learning_rate": 5e-05, "loss": 0.0757, "num_input_tokens_seen": 115291888, "step": 1263 }, { "epoch": 5.2625, "loss": 0.0695885717868805, "loss_ce": 0.0011071269400417805, "loss_iou": 0.34765625, "loss_num": 0.013671875, "loss_xval": 0.068359375, "num_input_tokens_seen": 115291888, "step": 1263 }, { "epoch": 5.266666666666667, "grad_norm": 4.184750546982303, "learning_rate": 5e-05, "loss": 0.169, "num_input_tokens_seen": 115383440, "step": 1264 }, { "epoch": 5.266666666666667, "loss": 0.14805006980895996, "loss_ce": 0.0004365481436252594, "loss_iou": 0.2373046875, "loss_num": 0.029541015625, "loss_xval": 0.1474609375, "num_input_tokens_seen": 115383440, "step": 1264 }, { "epoch": 5.270833333333333, "grad_norm": 5.722882309669123, "learning_rate": 5e-05, "loss": 0.1043, "num_input_tokens_seen": 115474844, "step": 1265 }, { "epoch": 5.270833333333333, "loss": 0.11676155775785446, "loss_ce": 0.00027596583822742105, "loss_iou": 0.15625, "loss_num": 0.0233154296875, "loss_xval": 0.11669921875, "num_input_tokens_seen": 115474844, "step": 1265 }, { "epoch": 5.275, "grad_norm": 2.1351602820682096, "learning_rate": 5e-05, "loss": 0.1252, "num_input_tokens_seen": 115565996, "step": 1266 }, { "epoch": 5.275, "loss": 0.061716049909591675, "loss_ce": 0.00014683924382552505, "loss_iou": 0.318359375, "loss_num": 0.0123291015625, "loss_xval": 0.0615234375, "num_input_tokens_seen": 115565996, "step": 1266 }, { "epoch": 5.279166666666667, "grad_norm": 2.9318578257220147, "learning_rate": 5e-05, "loss": 0.1345, "num_input_tokens_seen": 115657736, "step": 1267 }, { "epoch": 5.279166666666667, "loss": 0.12814414501190186, "loss_ce": 0.0016793095273897052, "loss_iou": 0.2060546875, "loss_num": 0.0252685546875, "loss_xval": 0.126953125, "num_input_tokens_seen": 115657736, "step": 1267 }, { "epoch": 5.283333333333333, "grad_norm": 5.993788064184745, "learning_rate": 5e-05, "loss": 0.1007, "num_input_tokens_seen": 115749500, "step": 1268 }, { "epoch": 5.283333333333333, "loss": 0.1011967882514, "loss_ce": 0.001953623490408063, "loss_iou": 0.201171875, "loss_num": 0.019775390625, "loss_xval": 0.09912109375, "num_input_tokens_seen": 115749500, "step": 1268 }, { "epoch": 5.2875, "grad_norm": 1.805664621328046, "learning_rate": 5e-05, "loss": 0.1322, "num_input_tokens_seen": 115839948, "step": 1269 }, { "epoch": 5.2875, "loss": 0.0894617959856987, "loss_ce": 3.003023266501259e-05, "loss_iou": 0.32421875, "loss_num": 0.017822265625, "loss_xval": 0.08935546875, "num_input_tokens_seen": 115839948, "step": 1269 }, { "epoch": 5.291666666666667, "grad_norm": 4.496194953161926, "learning_rate": 5e-05, "loss": 0.1543, "num_input_tokens_seen": 115930852, "step": 1270 }, { "epoch": 5.291666666666667, "loss": 0.1023472398519516, "loss_ce": 0.00047956418711692095, "loss_iou": 0.1982421875, "loss_num": 0.0203857421875, "loss_xval": 0.10205078125, "num_input_tokens_seen": 115930852, "step": 1270 }, { "epoch": 5.295833333333333, "grad_norm": 7.9884847357892195, "learning_rate": 5e-05, "loss": 0.1254, "num_input_tokens_seen": 116022452, "step": 1271 }, { "epoch": 5.295833333333333, "loss": 0.18090632557868958, "loss_ce": 0.0018902214942499995, "loss_iou": 0.42578125, "loss_num": 0.035888671875, "loss_xval": 0.1787109375, "num_input_tokens_seen": 116022452, "step": 1271 }, { "epoch": 5.3, "grad_norm": 4.888515317396117, "learning_rate": 5e-05, "loss": 0.0957, "num_input_tokens_seen": 116114104, "step": 1272 }, { "epoch": 5.3, "loss": 0.09846118092536926, "loss_ce": 0.0011406235862523317, "loss_iou": 0.251953125, "loss_num": 0.0194091796875, "loss_xval": 0.09716796875, "num_input_tokens_seen": 116114104, "step": 1272 }, { "epoch": 5.304166666666666, "grad_norm": 13.270427957057752, "learning_rate": 5e-05, "loss": 0.1048, "num_input_tokens_seen": 116205948, "step": 1273 }, { "epoch": 5.304166666666666, "loss": 0.07233616709709167, "loss_ce": 0.0012302087852731347, "loss_iou": 0.32421875, "loss_num": 0.01422119140625, "loss_xval": 0.0712890625, "num_input_tokens_seen": 116205948, "step": 1273 }, { "epoch": 5.308333333333334, "grad_norm": 11.445246012507008, "learning_rate": 5e-05, "loss": 0.1139, "num_input_tokens_seen": 116296720, "step": 1274 }, { "epoch": 5.308333333333334, "loss": 0.10334327071905136, "loss_ce": 0.0006210966967046261, "loss_iou": 0.265625, "loss_num": 0.0205078125, "loss_xval": 0.1025390625, "num_input_tokens_seen": 116296720, "step": 1274 }, { "epoch": 5.3125, "grad_norm": 3.9569976986556195, "learning_rate": 5e-05, "loss": 0.0976, "num_input_tokens_seen": 116388268, "step": 1275 }, { "epoch": 5.3125, "loss": 0.08903685212135315, "loss_ce": 0.0008868263103067875, "loss_iou": 0.228515625, "loss_num": 0.017578125, "loss_xval": 0.08837890625, "num_input_tokens_seen": 116388268, "step": 1275 }, { "epoch": 5.316666666666666, "grad_norm": 3.683810789503949, "learning_rate": 5e-05, "loss": 0.0943, "num_input_tokens_seen": 116479860, "step": 1276 }, { "epoch": 5.316666666666666, "loss": 0.07193230837583542, "loss_ce": 3.289574306108989e-05, "loss_iou": 0.32421875, "loss_num": 0.014404296875, "loss_xval": 0.07177734375, "num_input_tokens_seen": 116479860, "step": 1276 }, { "epoch": 5.320833333333334, "grad_norm": 4.154684216190813, "learning_rate": 5e-05, "loss": 0.1087, "num_input_tokens_seen": 116571556, "step": 1277 }, { "epoch": 5.320833333333334, "loss": 0.11861756443977356, "loss_ce": 0.0005298026371747255, "loss_iou": 0.474609375, "loss_num": 0.023681640625, "loss_xval": 0.1181640625, "num_input_tokens_seen": 116571556, "step": 1277 }, { "epoch": 5.325, "grad_norm": 3.5111047164996703, "learning_rate": 5e-05, "loss": 0.1377, "num_input_tokens_seen": 116662712, "step": 1278 }, { "epoch": 5.325, "loss": 0.16763438284397125, "loss_ce": 0.001313579734414816, "loss_iou": 0.279296875, "loss_num": 0.033203125, "loss_xval": 0.166015625, "num_input_tokens_seen": 116662712, "step": 1278 }, { "epoch": 5.329166666666667, "grad_norm": 2.4882705059093624, "learning_rate": 5e-05, "loss": 0.1037, "num_input_tokens_seen": 116754268, "step": 1279 }, { "epoch": 5.329166666666667, "loss": 0.1483645886182785, "loss_ce": 0.002421897603198886, "loss_iou": 0.1328125, "loss_num": 0.0291748046875, "loss_xval": 0.1455078125, "num_input_tokens_seen": 116754268, "step": 1279 }, { "epoch": 5.333333333333333, "grad_norm": 5.29046572456457, "learning_rate": 5e-05, "loss": 0.1165, "num_input_tokens_seen": 116845356, "step": 1280 }, { "epoch": 5.333333333333333, "loss": 0.08341895788908005, "loss_ce": 0.00021278511849232018, "loss_iou": 0.244140625, "loss_num": 0.0166015625, "loss_xval": 0.0830078125, "num_input_tokens_seen": 116845356, "step": 1280 }, { "epoch": 5.3375, "grad_norm": 9.806375446060992, "learning_rate": 5e-05, "loss": 0.1046, "num_input_tokens_seen": 116936416, "step": 1281 }, { "epoch": 5.3375, "loss": 0.09760526567697525, "loss_ce": 0.0004372965486254543, "loss_iou": 0.28515625, "loss_num": 0.0194091796875, "loss_xval": 0.09716796875, "num_input_tokens_seen": 116936416, "step": 1281 }, { "epoch": 5.341666666666667, "grad_norm": 3.2928939554918055, "learning_rate": 5e-05, "loss": 0.1307, "num_input_tokens_seen": 117027856, "step": 1282 }, { "epoch": 5.341666666666667, "loss": 0.1211928129196167, "loss_ce": 0.0005873381742276251, "loss_iou": 0.333984375, "loss_num": 0.024169921875, "loss_xval": 0.12060546875, "num_input_tokens_seen": 117027856, "step": 1282 }, { "epoch": 5.345833333333333, "grad_norm": 5.52683288897043, "learning_rate": 5e-05, "loss": 0.108, "num_input_tokens_seen": 117118588, "step": 1283 }, { "epoch": 5.345833333333333, "loss": 0.10166649520397186, "loss_ce": 1.2437561963452026e-05, "loss_iou": 0.3203125, "loss_num": 0.020263671875, "loss_xval": 0.1015625, "num_input_tokens_seen": 117118588, "step": 1283 }, { "epoch": 5.35, "grad_norm": 2.816179778442025, "learning_rate": 5e-05, "loss": 0.1203, "num_input_tokens_seen": 117209744, "step": 1284 }, { "epoch": 5.35, "loss": 0.08381817489862442, "loss_ce": 0.000688286847434938, "loss_iou": 0.2099609375, "loss_num": 0.0166015625, "loss_xval": 0.0830078125, "num_input_tokens_seen": 117209744, "step": 1284 }, { "epoch": 5.354166666666667, "grad_norm": 7.291796604136836, "learning_rate": 5e-05, "loss": 0.0903, "num_input_tokens_seen": 117301596, "step": 1285 }, { "epoch": 5.354166666666667, "loss": 0.11099517345428467, "loss_ce": 0.0013302592560648918, "loss_iou": 0.2138671875, "loss_num": 0.02197265625, "loss_xval": 0.10986328125, "num_input_tokens_seen": 117301596, "step": 1285 }, { "epoch": 5.358333333333333, "grad_norm": 3.210137899677224, "learning_rate": 5e-05, "loss": 0.0939, "num_input_tokens_seen": 117393500, "step": 1286 }, { "epoch": 5.358333333333333, "loss": 0.10765822231769562, "loss_ce": 0.0013960100477561355, "loss_iou": 0.291015625, "loss_num": 0.021240234375, "loss_xval": 0.1064453125, "num_input_tokens_seen": 117393500, "step": 1286 }, { "epoch": 5.3625, "grad_norm": 8.119859245564339, "learning_rate": 5e-05, "loss": 0.0997, "num_input_tokens_seen": 117484348, "step": 1287 }, { "epoch": 5.3625, "loss": 0.07025125622749329, "loss_ce": 0.00021341571118682623, "loss_iou": 0.3359375, "loss_num": 0.0140380859375, "loss_xval": 0.06982421875, "num_input_tokens_seen": 117484348, "step": 1287 }, { "epoch": 5.366666666666666, "grad_norm": 8.109501712065667, "learning_rate": 5e-05, "loss": 0.091, "num_input_tokens_seen": 117575352, "step": 1288 }, { "epoch": 5.366666666666666, "loss": 0.08108506351709366, "loss_ce": 0.0003202911466360092, "loss_iou": 0.294921875, "loss_num": 0.01611328125, "loss_xval": 0.08056640625, "num_input_tokens_seen": 117575352, "step": 1288 }, { "epoch": 5.370833333333334, "grad_norm": 9.906258072505857, "learning_rate": 5e-05, "loss": 0.1012, "num_input_tokens_seen": 117666916, "step": 1289 }, { "epoch": 5.370833333333334, "loss": 0.09506266564130783, "loss_ce": 0.0005954969674348831, "loss_iou": 0.318359375, "loss_num": 0.0189208984375, "loss_xval": 0.09423828125, "num_input_tokens_seen": 117666916, "step": 1289 }, { "epoch": 5.375, "grad_norm": 4.899701709085185, "learning_rate": 5e-05, "loss": 0.1321, "num_input_tokens_seen": 117758368, "step": 1290 }, { "epoch": 5.375, "loss": 0.13716095685958862, "loss_ce": 0.0009304783889092505, "loss_iou": 0.283203125, "loss_num": 0.0272216796875, "loss_xval": 0.13671875, "num_input_tokens_seen": 117758368, "step": 1290 }, { "epoch": 5.379166666666666, "grad_norm": 3.6447385189588575, "learning_rate": 5e-05, "loss": 0.1184, "num_input_tokens_seen": 117849536, "step": 1291 }, { "epoch": 5.379166666666666, "loss": 0.08016116917133331, "loss_ce": 0.0001440807245671749, "loss_iou": 0.353515625, "loss_num": 0.0159912109375, "loss_xval": 0.080078125, "num_input_tokens_seen": 117849536, "step": 1291 }, { "epoch": 5.383333333333334, "grad_norm": 9.279877811881972, "learning_rate": 5e-05, "loss": 0.1479, "num_input_tokens_seen": 117941236, "step": 1292 }, { "epoch": 5.383333333333334, "loss": 0.15879233181476593, "loss_ce": 0.0013216282241046429, "loss_iou": 0.33203125, "loss_num": 0.031494140625, "loss_xval": 0.1572265625, "num_input_tokens_seen": 117941236, "step": 1292 }, { "epoch": 5.3875, "grad_norm": 1.947697669110079, "learning_rate": 5e-05, "loss": 0.0772, "num_input_tokens_seen": 118032712, "step": 1293 }, { "epoch": 5.3875, "loss": 0.10585369169712067, "loss_ce": 0.0014683237532153726, "loss_iou": 0.4296875, "loss_num": 0.0208740234375, "loss_xval": 0.1044921875, "num_input_tokens_seen": 118032712, "step": 1293 }, { "epoch": 5.391666666666667, "grad_norm": 3.4046967806826474, "learning_rate": 5e-05, "loss": 0.1321, "num_input_tokens_seen": 118123684, "step": 1294 }, { "epoch": 5.391666666666667, "loss": 0.12018641829490662, "loss_ce": 8.188269930542447e-06, "loss_iou": 0.37890625, "loss_num": 0.0240478515625, "loss_xval": 0.1201171875, "num_input_tokens_seen": 118123684, "step": 1294 }, { "epoch": 5.395833333333333, "grad_norm": 4.557620525229239, "learning_rate": 5e-05, "loss": 0.1296, "num_input_tokens_seen": 118214100, "step": 1295 }, { "epoch": 5.395833333333333, "loss": 0.1263495683670044, "loss_ce": 0.00028145581018179655, "loss_iou": 0.16015625, "loss_num": 0.0252685546875, "loss_xval": 0.1259765625, "num_input_tokens_seen": 118214100, "step": 1295 }, { "epoch": 5.4, "grad_norm": 5.160410407604447, "learning_rate": 5e-05, "loss": 0.1075, "num_input_tokens_seen": 118305364, "step": 1296 }, { "epoch": 5.4, "loss": 0.09206394851207733, "loss_ce": 0.0007553547620773315, "loss_iou": 0.345703125, "loss_num": 0.018310546875, "loss_xval": 0.09130859375, "num_input_tokens_seen": 118305364, "step": 1296 }, { "epoch": 5.404166666666667, "grad_norm": 3.2523841062609904, "learning_rate": 5e-05, "loss": 0.0924, "num_input_tokens_seen": 118396576, "step": 1297 }, { "epoch": 5.404166666666667, "loss": 0.10512672364711761, "loss_ce": 0.002694476395845413, "loss_iou": 0.1748046875, "loss_num": 0.0205078125, "loss_xval": 0.1025390625, "num_input_tokens_seen": 118396576, "step": 1297 }, { "epoch": 5.408333333333333, "grad_norm": 3.821223046739121, "learning_rate": 5e-05, "loss": 0.0895, "num_input_tokens_seen": 118487648, "step": 1298 }, { "epoch": 5.408333333333333, "loss": 0.10508648306131363, "loss_ce": 0.0013572323368862271, "loss_iou": 0.2578125, "loss_num": 0.020751953125, "loss_xval": 0.103515625, "num_input_tokens_seen": 118487648, "step": 1298 }, { "epoch": 5.4125, "grad_norm": 2.9015572422467995, "learning_rate": 5e-05, "loss": 0.1482, "num_input_tokens_seen": 118578664, "step": 1299 }, { "epoch": 5.4125, "loss": 0.1781851053237915, "loss_ce": 0.0006948804948478937, "loss_iou": 0.26171875, "loss_num": 0.035400390625, "loss_xval": 0.177734375, "num_input_tokens_seen": 118578664, "step": 1299 }, { "epoch": 5.416666666666667, "grad_norm": 2.1261865505838276, "learning_rate": 5e-05, "loss": 0.0958, "num_input_tokens_seen": 118668848, "step": 1300 }, { "epoch": 5.416666666666667, "loss": 0.07364241033792496, "loss_ce": 3.492744099276024e-06, "loss_iou": 0.25390625, "loss_num": 0.01470947265625, "loss_xval": 0.07373046875, "num_input_tokens_seen": 118668848, "step": 1300 }, { "epoch": 5.420833333333333, "grad_norm": 10.712939128925962, "learning_rate": 5e-05, "loss": 0.1183, "num_input_tokens_seen": 118759692, "step": 1301 }, { "epoch": 5.420833333333333, "loss": 0.1656116545200348, "loss_ce": 0.0014881201786920428, "loss_iou": 0.2138671875, "loss_num": 0.032958984375, "loss_xval": 0.1640625, "num_input_tokens_seen": 118759692, "step": 1301 }, { "epoch": 5.425, "grad_norm": 4.153906106819758, "learning_rate": 5e-05, "loss": 0.1514, "num_input_tokens_seen": 118851164, "step": 1302 }, { "epoch": 5.425, "loss": 0.12954509258270264, "loss_ce": 0.0023173135705292225, "loss_iou": 0.388671875, "loss_num": 0.0255126953125, "loss_xval": 0.126953125, "num_input_tokens_seen": 118851164, "step": 1302 }, { "epoch": 5.429166666666666, "grad_norm": 18.213877327103127, "learning_rate": 5e-05, "loss": 0.1049, "num_input_tokens_seen": 118942860, "step": 1303 }, { "epoch": 5.429166666666666, "loss": 0.08647345751523972, "loss_ce": 0.0007343197357840836, "loss_iou": 0.283203125, "loss_num": 0.0172119140625, "loss_xval": 0.0859375, "num_input_tokens_seen": 118942860, "step": 1303 }, { "epoch": 5.433333333333334, "grad_norm": 3.8800971543706892, "learning_rate": 5e-05, "loss": 0.1463, "num_input_tokens_seen": 119034176, "step": 1304 }, { "epoch": 5.433333333333334, "loss": 0.14520961046218872, "loss_ce": 0.0018990678945556283, "loss_iou": 0.263671875, "loss_num": 0.0286865234375, "loss_xval": 0.1435546875, "num_input_tokens_seen": 119034176, "step": 1304 }, { "epoch": 5.4375, "grad_norm": 3.602987749501208, "learning_rate": 5e-05, "loss": 0.1382, "num_input_tokens_seen": 119125428, "step": 1305 }, { "epoch": 5.4375, "loss": 0.1354275494813919, "loss_ce": 0.0009976228466257453, "loss_iou": 0.2734375, "loss_num": 0.02685546875, "loss_xval": 0.134765625, "num_input_tokens_seen": 119125428, "step": 1305 }, { "epoch": 5.441666666666666, "grad_norm": 4.353646821760044, "learning_rate": 5e-05, "loss": 0.0576, "num_input_tokens_seen": 119216000, "step": 1306 }, { "epoch": 5.441666666666666, "loss": 0.06598338484764099, "loss_ce": 4.378542598715285e-06, "loss_iou": 0.3828125, "loss_num": 0.01318359375, "loss_xval": 0.06591796875, "num_input_tokens_seen": 119216000, "step": 1306 }, { "epoch": 5.445833333333334, "grad_norm": 4.755370196621049, "learning_rate": 5e-05, "loss": 0.141, "num_input_tokens_seen": 119307532, "step": 1307 }, { "epoch": 5.445833333333334, "loss": 0.17292292416095734, "loss_ce": 1.032604995998554e-05, "loss_iou": 0.498046875, "loss_num": 0.03466796875, "loss_xval": 0.1728515625, "num_input_tokens_seen": 119307532, "step": 1307 }, { "epoch": 5.45, "grad_norm": 2.3095424606443493, "learning_rate": 5e-05, "loss": 0.1128, "num_input_tokens_seen": 119399076, "step": 1308 }, { "epoch": 5.45, "loss": 0.10023734718561172, "loss_ce": 9.391548519488424e-05, "loss_iou": 0.33203125, "loss_num": 0.02001953125, "loss_xval": 0.10009765625, "num_input_tokens_seen": 119399076, "step": 1308 }, { "epoch": 5.454166666666667, "grad_norm": 5.580918206205799, "learning_rate": 5e-05, "loss": 0.1014, "num_input_tokens_seen": 119489568, "step": 1309 }, { "epoch": 5.454166666666667, "loss": 0.10514950007200241, "loss_ce": 3.170152194797993e-05, "loss_iou": 0.283203125, "loss_num": 0.02099609375, "loss_xval": 0.10498046875, "num_input_tokens_seen": 119489568, "step": 1309 }, { "epoch": 5.458333333333333, "grad_norm": 3.5980309370300323, "learning_rate": 5e-05, "loss": 0.1116, "num_input_tokens_seen": 119579068, "step": 1310 }, { "epoch": 5.458333333333333, "loss": 0.1334247887134552, "loss_ce": 0.000612288189586252, "loss_iou": 0.2275390625, "loss_num": 0.0264892578125, "loss_xval": 0.1328125, "num_input_tokens_seen": 119579068, "step": 1310 }, { "epoch": 5.4625, "grad_norm": 2.3323074278017764, "learning_rate": 5e-05, "loss": 0.126, "num_input_tokens_seen": 119669940, "step": 1311 }, { "epoch": 5.4625, "loss": 0.12619194388389587, "loss_ce": 0.0022600588854402304, "loss_iou": 0.2041015625, "loss_num": 0.0247802734375, "loss_xval": 0.1240234375, "num_input_tokens_seen": 119669940, "step": 1311 }, { "epoch": 5.466666666666667, "grad_norm": 9.77799078595796, "learning_rate": 5e-05, "loss": 0.1282, "num_input_tokens_seen": 119760884, "step": 1312 }, { "epoch": 5.466666666666667, "loss": 0.12309600412845612, "loss_ce": 0.0017733740387484431, "loss_iou": 0.248046875, "loss_num": 0.0242919921875, "loss_xval": 0.12109375, "num_input_tokens_seen": 119760884, "step": 1312 }, { "epoch": 5.470833333333333, "grad_norm": 29.41909642915102, "learning_rate": 5e-05, "loss": 0.1175, "num_input_tokens_seen": 119852328, "step": 1313 }, { "epoch": 5.470833333333333, "loss": 0.10807901620864868, "loss_ce": 0.001252235728316009, "loss_iou": 0.1904296875, "loss_num": 0.0213623046875, "loss_xval": 0.10693359375, "num_input_tokens_seen": 119852328, "step": 1313 }, { "epoch": 5.475, "grad_norm": 2.809070152447759, "learning_rate": 5e-05, "loss": 0.1081, "num_input_tokens_seen": 119943524, "step": 1314 }, { "epoch": 5.475, "loss": 0.05476412922143936, "loss_ce": 0.00011477663065306842, "loss_iou": 0.11669921875, "loss_num": 0.01092529296875, "loss_xval": 0.0546875, "num_input_tokens_seen": 119943524, "step": 1314 }, { "epoch": 5.479166666666667, "grad_norm": 4.520838147968077, "learning_rate": 5e-05, "loss": 0.1286, "num_input_tokens_seen": 120034676, "step": 1315 }, { "epoch": 5.479166666666667, "loss": 0.11469803005456924, "loss_ce": 0.0011726388474926353, "loss_iou": 0.3984375, "loss_num": 0.022705078125, "loss_xval": 0.11328125, "num_input_tokens_seen": 120034676, "step": 1315 }, { "epoch": 5.483333333333333, "grad_norm": 3.9476202456045812, "learning_rate": 5e-05, "loss": 0.1512, "num_input_tokens_seen": 120126436, "step": 1316 }, { "epoch": 5.483333333333333, "loss": 0.16531720757484436, "loss_ce": 0.003909740597009659, "loss_iou": 0.2421875, "loss_num": 0.0322265625, "loss_xval": 0.1611328125, "num_input_tokens_seen": 120126436, "step": 1316 }, { "epoch": 5.4875, "grad_norm": 5.073469551742587, "learning_rate": 5e-05, "loss": 0.1701, "num_input_tokens_seen": 120218940, "step": 1317 }, { "epoch": 5.4875, "loss": 0.16629727184772491, "loss_ce": 0.0010369562078267336, "loss_iou": 0.173828125, "loss_num": 0.032958984375, "loss_xval": 0.1650390625, "num_input_tokens_seen": 120218940, "step": 1317 }, { "epoch": 5.491666666666666, "grad_norm": 14.568072862075656, "learning_rate": 5e-05, "loss": 0.1424, "num_input_tokens_seen": 120310016, "step": 1318 }, { "epoch": 5.491666666666666, "loss": 0.1581239104270935, "loss_ce": 0.00014966003072913736, "loss_iou": 0.259765625, "loss_num": 0.03173828125, "loss_xval": 0.158203125, "num_input_tokens_seen": 120310016, "step": 1318 }, { "epoch": 5.495833333333334, "grad_norm": 4.039395395304905, "learning_rate": 5e-05, "loss": 0.132, "num_input_tokens_seen": 120401500, "step": 1319 }, { "epoch": 5.495833333333334, "loss": 0.07492822408676147, "loss_ce": 0.00043481256579980254, "loss_iou": 0.2412109375, "loss_num": 0.014892578125, "loss_xval": 0.07470703125, "num_input_tokens_seen": 120401500, "step": 1319 }, { "epoch": 5.5, "grad_norm": 4.834606208319437, "learning_rate": 5e-05, "loss": 0.1196, "num_input_tokens_seen": 120493428, "step": 1320 }, { "epoch": 5.5, "loss": 0.10158014297485352, "loss_ce": 0.00023125787265598774, "loss_iou": 0.443359375, "loss_num": 0.020263671875, "loss_xval": 0.1015625, "num_input_tokens_seen": 120493428, "step": 1320 }, { "epoch": 5.504166666666666, "grad_norm": 3.745125674672733, "learning_rate": 5e-05, "loss": 0.1399, "num_input_tokens_seen": 120584860, "step": 1321 }, { "epoch": 5.504166666666666, "loss": 0.14215955138206482, "loss_ce": 0.00031384555040858686, "loss_iou": 0.296875, "loss_num": 0.0283203125, "loss_xval": 0.1416015625, "num_input_tokens_seen": 120584860, "step": 1321 }, { "epoch": 5.508333333333333, "grad_norm": 4.699836077217276, "learning_rate": 5e-05, "loss": 0.0779, "num_input_tokens_seen": 120676340, "step": 1322 }, { "epoch": 5.508333333333333, "loss": 0.09844175726175308, "loss_ce": 0.0008465431164950132, "loss_iou": 0.25, "loss_num": 0.01953125, "loss_xval": 0.09765625, "num_input_tokens_seen": 120676340, "step": 1322 }, { "epoch": 5.5125, "grad_norm": 9.791064961882169, "learning_rate": 5e-05, "loss": 0.1098, "num_input_tokens_seen": 120767704, "step": 1323 }, { "epoch": 5.5125, "loss": 0.10201099514961243, "loss_ce": 0.00017383776139467955, "loss_iou": 0.392578125, "loss_num": 0.0203857421875, "loss_xval": 0.10205078125, "num_input_tokens_seen": 120767704, "step": 1323 }, { "epoch": 5.516666666666667, "grad_norm": 2.11375573286059, "learning_rate": 5e-05, "loss": 0.0684, "num_input_tokens_seen": 120858992, "step": 1324 }, { "epoch": 5.516666666666667, "loss": 0.08306519687175751, "loss_ce": 0.000332036754116416, "loss_iou": 0.2734375, "loss_num": 0.0166015625, "loss_xval": 0.08251953125, "num_input_tokens_seen": 120858992, "step": 1324 }, { "epoch": 5.520833333333333, "grad_norm": 3.817167031522216, "learning_rate": 5e-05, "loss": 0.1287, "num_input_tokens_seen": 120949912, "step": 1325 }, { "epoch": 5.520833333333333, "loss": 0.16249600052833557, "loss_ce": 0.0034231427125632763, "loss_iou": 0.140625, "loss_num": 0.03173828125, "loss_xval": 0.1591796875, "num_input_tokens_seen": 120949912, "step": 1325 }, { "epoch": 5.525, "grad_norm": 2.8508465367847236, "learning_rate": 5e-05, "loss": 0.1103, "num_input_tokens_seen": 121041228, "step": 1326 }, { "epoch": 5.525, "loss": 0.11468237638473511, "loss_ce": 0.0015842285938560963, "loss_iou": 0.2578125, "loss_num": 0.0225830078125, "loss_xval": 0.11328125, "num_input_tokens_seen": 121041228, "step": 1326 }, { "epoch": 5.529166666666667, "grad_norm": 3.405898668083595, "learning_rate": 5e-05, "loss": 0.1379, "num_input_tokens_seen": 121133032, "step": 1327 }, { "epoch": 5.529166666666667, "loss": 0.16581670939922333, "loss_ce": 0.001906805788166821, "loss_iou": 0.2578125, "loss_num": 0.03271484375, "loss_xval": 0.1640625, "num_input_tokens_seen": 121133032, "step": 1327 }, { "epoch": 5.533333333333333, "grad_norm": 14.186518262591795, "learning_rate": 5e-05, "loss": 0.137, "num_input_tokens_seen": 121223960, "step": 1328 }, { "epoch": 5.533333333333333, "loss": 0.18396437168121338, "loss_ce": 4.412940143083688e-06, "loss_iou": 0.171875, "loss_num": 0.036865234375, "loss_xval": 0.18359375, "num_input_tokens_seen": 121223960, "step": 1328 }, { "epoch": 5.5375, "grad_norm": 3.8342764663678093, "learning_rate": 5e-05, "loss": 0.1694, "num_input_tokens_seen": 121315624, "step": 1329 }, { "epoch": 5.5375, "loss": 0.2375406175851822, "loss_ce": 0.00963535811752081, "loss_iou": 0.236328125, "loss_num": 0.045654296875, "loss_xval": 0.2275390625, "num_input_tokens_seen": 121315624, "step": 1329 }, { "epoch": 5.541666666666667, "grad_norm": 7.0100846416572, "learning_rate": 5e-05, "loss": 0.1295, "num_input_tokens_seen": 121406800, "step": 1330 }, { "epoch": 5.541666666666667, "loss": 0.1506330966949463, "loss_ce": 0.0028669715393334627, "loss_iou": 0.26171875, "loss_num": 0.029541015625, "loss_xval": 0.1474609375, "num_input_tokens_seen": 121406800, "step": 1330 }, { "epoch": 5.545833333333333, "grad_norm": 6.519639379928086, "learning_rate": 5e-05, "loss": 0.1107, "num_input_tokens_seen": 121498668, "step": 1331 }, { "epoch": 5.545833333333333, "loss": 0.11283191293478012, "loss_ce": 0.0007713669911026955, "loss_iou": 0.42578125, "loss_num": 0.0224609375, "loss_xval": 0.1123046875, "num_input_tokens_seen": 121498668, "step": 1331 }, { "epoch": 5.55, "grad_norm": 3.8741606092338543, "learning_rate": 5e-05, "loss": 0.0984, "num_input_tokens_seen": 121590040, "step": 1332 }, { "epoch": 5.55, "loss": 0.09328826516866684, "loss_ce": 0.000362245220458135, "loss_iou": 0.25390625, "loss_num": 0.0185546875, "loss_xval": 0.0927734375, "num_input_tokens_seen": 121590040, "step": 1332 }, { "epoch": 5.554166666666667, "grad_norm": 3.3976582814650373, "learning_rate": 5e-05, "loss": 0.1116, "num_input_tokens_seen": 121681272, "step": 1333 }, { "epoch": 5.554166666666667, "loss": 0.09696759283542633, "loss_ce": 0.00013531387958209962, "loss_iou": 0.408203125, "loss_num": 0.0194091796875, "loss_xval": 0.0966796875, "num_input_tokens_seen": 121681272, "step": 1333 }, { "epoch": 5.558333333333334, "grad_norm": 5.950849140468419, "learning_rate": 5e-05, "loss": 0.1367, "num_input_tokens_seen": 121771680, "step": 1334 }, { "epoch": 5.558333333333334, "loss": 0.1399446427822113, "loss_ce": 0.0008150078938342631, "loss_iou": 0.40234375, "loss_num": 0.02783203125, "loss_xval": 0.138671875, "num_input_tokens_seen": 121771680, "step": 1334 }, { "epoch": 5.5625, "grad_norm": 4.336659565505447, "learning_rate": 5e-05, "loss": 0.0734, "num_input_tokens_seen": 121862724, "step": 1335 }, { "epoch": 5.5625, "loss": 0.08848069608211517, "loss_ce": 1.0237114111077972e-05, "loss_iou": 0.1884765625, "loss_num": 0.0177001953125, "loss_xval": 0.08837890625, "num_input_tokens_seen": 121862724, "step": 1335 }, { "epoch": 5.566666666666666, "grad_norm": 3.9108249628072014, "learning_rate": 5e-05, "loss": 0.0876, "num_input_tokens_seen": 121954404, "step": 1336 }, { "epoch": 5.566666666666666, "loss": 0.0976998582482338, "loss_ce": 0.00010464760998729616, "loss_iou": 0.2470703125, "loss_num": 0.01953125, "loss_xval": 0.09765625, "num_input_tokens_seen": 121954404, "step": 1336 }, { "epoch": 5.570833333333333, "grad_norm": 2.2674158318428037, "learning_rate": 5e-05, "loss": 0.1293, "num_input_tokens_seen": 122044452, "step": 1337 }, { "epoch": 5.570833333333333, "loss": 0.18020781874656677, "loss_ce": 0.0006118802120909095, "loss_iou": 0.08984375, "loss_num": 0.035888671875, "loss_xval": 0.1796875, "num_input_tokens_seen": 122044452, "step": 1337 }, { "epoch": 5.575, "grad_norm": 3.6274714851944436, "learning_rate": 5e-05, "loss": 0.0759, "num_input_tokens_seen": 122135744, "step": 1338 }, { "epoch": 5.575, "loss": 0.09790819138288498, "loss_ce": 7.802974323567469e-06, "loss_iou": 0.240234375, "loss_num": 0.01953125, "loss_xval": 0.09765625, "num_input_tokens_seen": 122135744, "step": 1338 }, { "epoch": 5.579166666666667, "grad_norm": 6.199970319443036, "learning_rate": 5e-05, "loss": 0.1686, "num_input_tokens_seen": 122226308, "step": 1339 }, { "epoch": 5.579166666666667, "loss": 0.232774555683136, "loss_ce": 0.0005968232871964574, "loss_iou": 0.35546875, "loss_num": 0.04638671875, "loss_xval": 0.232421875, "num_input_tokens_seen": 122226308, "step": 1339 }, { "epoch": 5.583333333333333, "grad_norm": 3.0077795954194295, "learning_rate": 5e-05, "loss": 0.1475, "num_input_tokens_seen": 122317768, "step": 1340 }, { "epoch": 5.583333333333333, "loss": 0.1635025143623352, "loss_ce": 5.0357546570012346e-05, "loss_iou": 0.279296875, "loss_num": 0.03271484375, "loss_xval": 0.1630859375, "num_input_tokens_seen": 122317768, "step": 1340 }, { "epoch": 5.5875, "grad_norm": 3.7756357130247578, "learning_rate": 5e-05, "loss": 0.1144, "num_input_tokens_seen": 122409612, "step": 1341 }, { "epoch": 5.5875, "loss": 0.12036249041557312, "loss_ce": 7.745936454739422e-05, "loss_iou": 0.330078125, "loss_num": 0.0240478515625, "loss_xval": 0.1201171875, "num_input_tokens_seen": 122409612, "step": 1341 }, { "epoch": 5.591666666666667, "grad_norm": 3.0500457168651973, "learning_rate": 5e-05, "loss": 0.0844, "num_input_tokens_seen": 122500640, "step": 1342 }, { "epoch": 5.591666666666667, "loss": 0.06776072829961777, "loss_ce": 1.1704881217156071e-05, "loss_iou": 0.33203125, "loss_num": 0.0135498046875, "loss_xval": 0.06787109375, "num_input_tokens_seen": 122500640, "step": 1342 }, { "epoch": 5.595833333333333, "grad_norm": 2.9487248334348326, "learning_rate": 5e-05, "loss": 0.1064, "num_input_tokens_seen": 122591824, "step": 1343 }, { "epoch": 5.595833333333333, "loss": 0.13929034769535065, "loss_ce": 0.0026021194644272327, "loss_iou": 0.2421875, "loss_num": 0.02734375, "loss_xval": 0.13671875, "num_input_tokens_seen": 122591824, "step": 1343 }, { "epoch": 5.6, "grad_norm": 9.735746598486841, "learning_rate": 5e-05, "loss": 0.1138, "num_input_tokens_seen": 122683248, "step": 1344 }, { "epoch": 5.6, "loss": 0.12929841876029968, "loss_ce": 0.00048372356104664505, "loss_iou": 0.365234375, "loss_num": 0.0257568359375, "loss_xval": 0.12890625, "num_input_tokens_seen": 122683248, "step": 1344 }, { "epoch": 5.604166666666667, "grad_norm": 6.662715988668031, "learning_rate": 5e-05, "loss": 0.1206, "num_input_tokens_seen": 122774692, "step": 1345 }, { "epoch": 5.604166666666667, "loss": 0.16683092713356018, "loss_ce": 0.001517201540991664, "loss_iou": 0.41796875, "loss_num": 0.032958984375, "loss_xval": 0.1650390625, "num_input_tokens_seen": 122774692, "step": 1345 }, { "epoch": 5.608333333333333, "grad_norm": 13.980924043996321, "learning_rate": 5e-05, "loss": 0.0558, "num_input_tokens_seen": 122865452, "step": 1346 }, { "epoch": 5.608333333333333, "loss": 0.04694174975156784, "loss_ce": 0.00043295894283801317, "loss_iou": 0.173828125, "loss_num": 0.00927734375, "loss_xval": 0.04638671875, "num_input_tokens_seen": 122865452, "step": 1346 }, { "epoch": 5.6125, "grad_norm": 2.274423403196373, "learning_rate": 5e-05, "loss": 0.0814, "num_input_tokens_seen": 122956992, "step": 1347 }, { "epoch": 5.6125, "loss": 0.08316200226545334, "loss_ce": 0.0005814348114654422, "loss_iou": 0.30078125, "loss_num": 0.0164794921875, "loss_xval": 0.08251953125, "num_input_tokens_seen": 122956992, "step": 1347 }, { "epoch": 5.616666666666667, "grad_norm": 1.7727502604459684, "learning_rate": 5e-05, "loss": 0.0623, "num_input_tokens_seen": 123048252, "step": 1348 }, { "epoch": 5.616666666666667, "loss": 0.07918908447027206, "loss_ce": 0.00014855354675091803, "loss_iou": 0.271484375, "loss_num": 0.0157470703125, "loss_xval": 0.0791015625, "num_input_tokens_seen": 123048252, "step": 1348 }, { "epoch": 5.620833333333334, "grad_norm": 6.512267768507208, "learning_rate": 5e-05, "loss": 0.1754, "num_input_tokens_seen": 123139824, "step": 1349 }, { "epoch": 5.620833333333334, "loss": 0.16340044140815735, "loss_ce": 0.001107958611100912, "loss_iou": 0.32421875, "loss_num": 0.032470703125, "loss_xval": 0.162109375, "num_input_tokens_seen": 123139824, "step": 1349 }, { "epoch": 5.625, "grad_norm": 2.7742319380800087, "learning_rate": 5e-05, "loss": 0.114, "num_input_tokens_seen": 123231200, "step": 1350 }, { "epoch": 5.625, "loss": 0.13310708105564117, "loss_ce": 0.0017289024544879794, "loss_iou": 0.3203125, "loss_num": 0.0262451171875, "loss_xval": 0.1318359375, "num_input_tokens_seen": 123231200, "step": 1350 }, { "epoch": 5.629166666666666, "grad_norm": 2.7421581868421154, "learning_rate": 5e-05, "loss": 0.0669, "num_input_tokens_seen": 123322200, "step": 1351 }, { "epoch": 5.629166666666666, "loss": 0.08086127042770386, "loss_ce": 0.0007373740081675351, "loss_iou": 0.224609375, "loss_num": 0.0159912109375, "loss_xval": 0.080078125, "num_input_tokens_seen": 123322200, "step": 1351 }, { "epoch": 5.633333333333333, "grad_norm": 12.181092060389654, "learning_rate": 5e-05, "loss": 0.1288, "num_input_tokens_seen": 123413700, "step": 1352 }, { "epoch": 5.633333333333333, "loss": 0.1216948851943016, "loss_ce": 2.892697011702694e-05, "loss_iou": 0.326171875, "loss_num": 0.0242919921875, "loss_xval": 0.12158203125, "num_input_tokens_seen": 123413700, "step": 1352 }, { "epoch": 5.6375, "grad_norm": 2.804866864367334, "learning_rate": 5e-05, "loss": 0.1766, "num_input_tokens_seen": 123505476, "step": 1353 }, { "epoch": 5.6375, "loss": 0.19010448455810547, "loss_ce": 0.0001630748447496444, "loss_iou": 0.203125, "loss_num": 0.0380859375, "loss_xval": 0.189453125, "num_input_tokens_seen": 123505476, "step": 1353 }, { "epoch": 5.641666666666667, "grad_norm": 2.970795203086947, "learning_rate": 5e-05, "loss": 0.1032, "num_input_tokens_seen": 123597304, "step": 1354 }, { "epoch": 5.641666666666667, "loss": 0.10757936537265778, "loss_ce": 0.001179831917397678, "loss_iou": 0.234375, "loss_num": 0.021240234375, "loss_xval": 0.1064453125, "num_input_tokens_seen": 123597304, "step": 1354 }, { "epoch": 5.645833333333333, "grad_norm": 4.862489334267388, "learning_rate": 5e-05, "loss": 0.0743, "num_input_tokens_seen": 123688472, "step": 1355 }, { "epoch": 5.645833333333333, "loss": 0.08013699948787689, "loss_ce": 2.8353737434372306e-05, "loss_iou": 0.310546875, "loss_num": 0.0159912109375, "loss_xval": 0.080078125, "num_input_tokens_seen": 123688472, "step": 1355 }, { "epoch": 5.65, "grad_norm": 6.869654964868086, "learning_rate": 5e-05, "loss": 0.1065, "num_input_tokens_seen": 123779376, "step": 1356 }, { "epoch": 5.65, "loss": 0.10132080316543579, "loss_ce": 4.822035043616779e-05, "loss_iou": 0.15234375, "loss_num": 0.020263671875, "loss_xval": 0.10107421875, "num_input_tokens_seen": 123779376, "step": 1356 }, { "epoch": 5.654166666666667, "grad_norm": 2.983921317394553, "learning_rate": 5e-05, "loss": 0.096, "num_input_tokens_seen": 123870148, "step": 1357 }, { "epoch": 5.654166666666667, "loss": 0.09474675357341766, "loss_ce": 0.000355890195351094, "loss_iou": 0.37109375, "loss_num": 0.0189208984375, "loss_xval": 0.09423828125, "num_input_tokens_seen": 123870148, "step": 1357 }, { "epoch": 5.658333333333333, "grad_norm": 5.16639456092189, "learning_rate": 5e-05, "loss": 0.1203, "num_input_tokens_seen": 123961500, "step": 1358 }, { "epoch": 5.658333333333333, "loss": 0.15206214785575867, "loss_ce": 0.00020666493219323456, "loss_iou": 0.40234375, "loss_num": 0.0303955078125, "loss_xval": 0.15234375, "num_input_tokens_seen": 123961500, "step": 1358 }, { "epoch": 5.6625, "grad_norm": 2.583455558358417, "learning_rate": 5e-05, "loss": 0.125, "num_input_tokens_seen": 124052488, "step": 1359 }, { "epoch": 5.6625, "loss": 0.15860290825366974, "loss_ce": 3.058829406654695e-06, "loss_iou": 0.359375, "loss_num": 0.03173828125, "loss_xval": 0.158203125, "num_input_tokens_seen": 124052488, "step": 1359 }, { "epoch": 5.666666666666667, "grad_norm": 5.218075262894594, "learning_rate": 5e-05, "loss": 0.119, "num_input_tokens_seen": 124142904, "step": 1360 }, { "epoch": 5.666666666666667, "loss": 0.13689467310905457, "loss_ce": 8.085868103080429e-06, "loss_iou": 0.21484375, "loss_num": 0.02734375, "loss_xval": 0.13671875, "num_input_tokens_seen": 124142904, "step": 1360 }, { "epoch": 5.670833333333333, "grad_norm": 4.067380297859276, "learning_rate": 5e-05, "loss": 0.1156, "num_input_tokens_seen": 124234728, "step": 1361 }, { "epoch": 5.670833333333333, "loss": 0.1417345404624939, "loss_ce": 0.0017504148418083787, "loss_iou": 0.361328125, "loss_num": 0.0279541015625, "loss_xval": 0.1396484375, "num_input_tokens_seen": 124234728, "step": 1361 }, { "epoch": 5.675, "grad_norm": 5.526560302845275, "learning_rate": 5e-05, "loss": 0.0803, "num_input_tokens_seen": 124325384, "step": 1362 }, { "epoch": 5.675, "loss": 0.05790679529309273, "loss_ce": 0.0008694432908669114, "loss_iou": 0.3203125, "loss_num": 0.01141357421875, "loss_xval": 0.05712890625, "num_input_tokens_seen": 124325384, "step": 1362 }, { "epoch": 5.679166666666667, "grad_norm": 4.522456855446742, "learning_rate": 5e-05, "loss": 0.134, "num_input_tokens_seen": 124417208, "step": 1363 }, { "epoch": 5.679166666666667, "loss": 0.14377330243587494, "loss_ce": 0.00235484610311687, "loss_iou": 0.06103515625, "loss_num": 0.0283203125, "loss_xval": 0.1416015625, "num_input_tokens_seen": 124417208, "step": 1363 }, { "epoch": 5.683333333333334, "grad_norm": 5.136114884030797, "learning_rate": 5e-05, "loss": 0.2137, "num_input_tokens_seen": 124508272, "step": 1364 }, { "epoch": 5.683333333333334, "loss": 0.3197259306907654, "loss_ce": 0.0006646495312452316, "loss_iou": 0.2421875, "loss_num": 0.06396484375, "loss_xval": 0.318359375, "num_input_tokens_seen": 124508272, "step": 1364 }, { "epoch": 5.6875, "grad_norm": 4.363438357339952, "learning_rate": 5e-05, "loss": 0.0901, "num_input_tokens_seen": 124599716, "step": 1365 }, { "epoch": 5.6875, "loss": 0.07296618819236755, "loss_ce": 0.0005632347892969847, "loss_iou": 0.333984375, "loss_num": 0.01446533203125, "loss_xval": 0.072265625, "num_input_tokens_seen": 124599716, "step": 1365 }, { "epoch": 5.691666666666666, "grad_norm": 5.8186678053413114, "learning_rate": 5e-05, "loss": 0.1762, "num_input_tokens_seen": 124691168, "step": 1366 }, { "epoch": 5.691666666666666, "loss": 0.1794736683368683, "loss_ce": 0.00012184677325421944, "loss_iou": 0.365234375, "loss_num": 0.035888671875, "loss_xval": 0.1796875, "num_input_tokens_seen": 124691168, "step": 1366 }, { "epoch": 5.695833333333333, "grad_norm": 5.472687379691303, "learning_rate": 5e-05, "loss": 0.1672, "num_input_tokens_seen": 124782024, "step": 1367 }, { "epoch": 5.695833333333333, "loss": 0.20980660617351532, "loss_ce": 5.9296260587871075e-05, "loss_iou": 0.32421875, "loss_num": 0.0419921875, "loss_xval": 0.2099609375, "num_input_tokens_seen": 124782024, "step": 1367 }, { "epoch": 5.7, "grad_norm": 6.9917419649008785, "learning_rate": 5e-05, "loss": 0.1356, "num_input_tokens_seen": 124873252, "step": 1368 }, { "epoch": 5.7, "loss": 0.12677843868732452, "loss_ce": 0.002281972672790289, "loss_iou": 0.26953125, "loss_num": 0.02490234375, "loss_xval": 0.12451171875, "num_input_tokens_seen": 124873252, "step": 1368 }, { "epoch": 5.704166666666667, "grad_norm": 5.814616735385409, "learning_rate": 5e-05, "loss": 0.1259, "num_input_tokens_seen": 124964664, "step": 1369 }, { "epoch": 5.704166666666667, "loss": 0.09749096632003784, "loss_ce": 0.007006343454122543, "loss_iou": 0.2890625, "loss_num": 0.01806640625, "loss_xval": 0.09033203125, "num_input_tokens_seen": 124964664, "step": 1369 }, { "epoch": 5.708333333333333, "grad_norm": 3.0660984423798148, "learning_rate": 5e-05, "loss": 0.0671, "num_input_tokens_seen": 125055940, "step": 1370 }, { "epoch": 5.708333333333333, "loss": 0.06529150158166885, "loss_ce": 0.001647092285566032, "loss_iou": 0.259765625, "loss_num": 0.0126953125, "loss_xval": 0.0634765625, "num_input_tokens_seen": 125055940, "step": 1370 }, { "epoch": 5.7125, "grad_norm": 6.218628239573136, "learning_rate": 5e-05, "loss": 0.1184, "num_input_tokens_seen": 125146680, "step": 1371 }, { "epoch": 5.7125, "loss": 0.09907136857509613, "loss_ce": 0.0005148549098521471, "loss_iou": 0.32421875, "loss_num": 0.0196533203125, "loss_xval": 0.0986328125, "num_input_tokens_seen": 125146680, "step": 1371 }, { "epoch": 5.716666666666667, "grad_norm": 3.549855326087419, "learning_rate": 5e-05, "loss": 0.1178, "num_input_tokens_seen": 125237192, "step": 1372 }, { "epoch": 5.716666666666667, "loss": 0.147294819355011, "loss_ce": 0.0002000824606511742, "loss_iou": 0.3359375, "loss_num": 0.0294189453125, "loss_xval": 0.1474609375, "num_input_tokens_seen": 125237192, "step": 1372 }, { "epoch": 5.720833333333333, "grad_norm": 3.438669447358349, "learning_rate": 5e-05, "loss": 0.1373, "num_input_tokens_seen": 125328156, "step": 1373 }, { "epoch": 5.720833333333333, "loss": 0.1804531067609787, "loss_ce": 4.8438068915857e-05, "loss_iou": 0.263671875, "loss_num": 0.0361328125, "loss_xval": 0.1806640625, "num_input_tokens_seen": 125328156, "step": 1373 }, { "epoch": 5.725, "grad_norm": 4.258688165607683, "learning_rate": 5e-05, "loss": 0.1213, "num_input_tokens_seen": 125419616, "step": 1374 }, { "epoch": 5.725, "loss": 0.1245311051607132, "loss_ce": 4.9905396735994145e-05, "loss_iou": 0.365234375, "loss_num": 0.02490234375, "loss_xval": 0.12451171875, "num_input_tokens_seen": 125419616, "step": 1374 }, { "epoch": 5.729166666666667, "grad_norm": 11.280003735303778, "learning_rate": 5e-05, "loss": 0.0804, "num_input_tokens_seen": 125511372, "step": 1375 }, { "epoch": 5.729166666666667, "loss": 0.08632014691829681, "loss_ce": 9.273333853343502e-05, "loss_iou": 0.375, "loss_num": 0.0172119140625, "loss_xval": 0.08642578125, "num_input_tokens_seen": 125511372, "step": 1375 }, { "epoch": 5.733333333333333, "grad_norm": 4.239119005263746, "learning_rate": 5e-05, "loss": 0.0794, "num_input_tokens_seen": 125602504, "step": 1376 }, { "epoch": 5.733333333333333, "loss": 0.07424075156450272, "loss_ce": 3.726166323758662e-05, "loss_iou": 0.283203125, "loss_num": 0.01483154296875, "loss_xval": 0.07421875, "num_input_tokens_seen": 125602504, "step": 1376 }, { "epoch": 5.7375, "grad_norm": 10.669632714655261, "learning_rate": 5e-05, "loss": 0.1424, "num_input_tokens_seen": 125693512, "step": 1377 }, { "epoch": 5.7375, "loss": 0.14643409848213196, "loss_ce": 0.0008347236434929073, "loss_iou": 0.1611328125, "loss_num": 0.029052734375, "loss_xval": 0.1455078125, "num_input_tokens_seen": 125693512, "step": 1377 }, { "epoch": 5.741666666666667, "grad_norm": 1.2401994437594337, "learning_rate": 5e-05, "loss": 0.1135, "num_input_tokens_seen": 125784776, "step": 1378 }, { "epoch": 5.741666666666667, "loss": 0.12819138169288635, "loss_ce": 0.00024643377400934696, "loss_iou": 0.09814453125, "loss_num": 0.025634765625, "loss_xval": 0.1279296875, "num_input_tokens_seen": 125784776, "step": 1378 }, { "epoch": 5.745833333333334, "grad_norm": 6.072454227835264, "learning_rate": 5e-05, "loss": 0.1155, "num_input_tokens_seen": 125875244, "step": 1379 }, { "epoch": 5.745833333333334, "loss": 0.10392003506422043, "loss_ce": 7.674920198041946e-06, "loss_iou": 0.279296875, "loss_num": 0.020751953125, "loss_xval": 0.10400390625, "num_input_tokens_seen": 125875244, "step": 1379 }, { "epoch": 5.75, "grad_norm": 4.779781538803755, "learning_rate": 5e-05, "loss": 0.1221, "num_input_tokens_seen": 125966560, "step": 1380 }, { "epoch": 5.75, "loss": 0.13277241587638855, "loss_ce": 0.006856895983219147, "loss_iou": 0.26953125, "loss_num": 0.025146484375, "loss_xval": 0.1259765625, "num_input_tokens_seen": 125966560, "step": 1380 }, { "epoch": 5.754166666666666, "grad_norm": 3.803003348193671, "learning_rate": 5e-05, "loss": 0.0758, "num_input_tokens_seen": 126058024, "step": 1381 }, { "epoch": 5.754166666666666, "loss": 0.08865412324666977, "loss_ce": 0.000580394989810884, "loss_iou": 0.322265625, "loss_num": 0.017578125, "loss_xval": 0.087890625, "num_input_tokens_seen": 126058024, "step": 1381 }, { "epoch": 5.758333333333333, "grad_norm": 7.029219413765709, "learning_rate": 5e-05, "loss": 0.1119, "num_input_tokens_seen": 126149752, "step": 1382 }, { "epoch": 5.758333333333333, "loss": 0.11876152455806732, "loss_ce": 0.0031609362922608852, "loss_iou": 0.345703125, "loss_num": 0.0230712890625, "loss_xval": 0.11572265625, "num_input_tokens_seen": 126149752, "step": 1382 }, { "epoch": 5.7625, "grad_norm": 4.803804465593587, "learning_rate": 5e-05, "loss": 0.1242, "num_input_tokens_seen": 126240872, "step": 1383 }, { "epoch": 5.7625, "loss": 0.14519238471984863, "loss_ce": 2.025286630669143e-05, "loss_iou": 0.41015625, "loss_num": 0.029052734375, "loss_xval": 0.1455078125, "num_input_tokens_seen": 126240872, "step": 1383 }, { "epoch": 5.766666666666667, "grad_norm": 3.753139448773272, "learning_rate": 5e-05, "loss": 0.0959, "num_input_tokens_seen": 126331408, "step": 1384 }, { "epoch": 5.766666666666667, "loss": 0.08953894674777985, "loss_ce": 0.0019229742465540767, "loss_iou": 0.271484375, "loss_num": 0.017578125, "loss_xval": 0.08740234375, "num_input_tokens_seen": 126331408, "step": 1384 }, { "epoch": 5.770833333333333, "grad_norm": 6.9389629332481215, "learning_rate": 5e-05, "loss": 0.1218, "num_input_tokens_seen": 126422480, "step": 1385 }, { "epoch": 5.770833333333333, "loss": 0.10029729455709457, "loss_ce": 0.0013287862529978156, "loss_iou": 0.2421875, "loss_num": 0.019775390625, "loss_xval": 0.09912109375, "num_input_tokens_seen": 126422480, "step": 1385 }, { "epoch": 5.775, "grad_norm": 2.935179544076038, "learning_rate": 5e-05, "loss": 0.1264, "num_input_tokens_seen": 126514056, "step": 1386 }, { "epoch": 5.775, "loss": 0.10370725393295288, "loss_ce": 8.521106792613864e-06, "loss_iou": 0.2734375, "loss_num": 0.020751953125, "loss_xval": 0.103515625, "num_input_tokens_seen": 126514056, "step": 1386 }, { "epoch": 5.779166666666667, "grad_norm": 3.8311792865692285, "learning_rate": 5e-05, "loss": 0.0775, "num_input_tokens_seen": 126605832, "step": 1387 }, { "epoch": 5.779166666666667, "loss": 0.08688151091337204, "loss_ce": 0.0016306517645716667, "loss_iou": 0.259765625, "loss_num": 0.01708984375, "loss_xval": 0.08544921875, "num_input_tokens_seen": 126605832, "step": 1387 }, { "epoch": 5.783333333333333, "grad_norm": 5.511775875742109, "learning_rate": 5e-05, "loss": 0.1236, "num_input_tokens_seen": 126697332, "step": 1388 }, { "epoch": 5.783333333333333, "loss": 0.13793256878852844, "loss_ce": 0.0012443342711776495, "loss_iou": 0.294921875, "loss_num": 0.02734375, "loss_xval": 0.13671875, "num_input_tokens_seen": 126697332, "step": 1388 }, { "epoch": 5.7875, "grad_norm": 3.1588293585554132, "learning_rate": 5e-05, "loss": 0.0794, "num_input_tokens_seen": 126788452, "step": 1389 }, { "epoch": 5.7875, "loss": 0.05696623772382736, "loss_ce": 2.0436800696188584e-05, "loss_iou": 0.1669921875, "loss_num": 0.0113525390625, "loss_xval": 0.056884765625, "num_input_tokens_seen": 126788452, "step": 1389 }, { "epoch": 5.791666666666667, "grad_norm": 1.7719242686496692, "learning_rate": 5e-05, "loss": 0.1047, "num_input_tokens_seen": 126879952, "step": 1390 }, { "epoch": 5.791666666666667, "loss": 0.08999593555927277, "loss_ce": 0.0005336473695933819, "loss_iou": 0.30859375, "loss_num": 0.0179443359375, "loss_xval": 0.08935546875, "num_input_tokens_seen": 126879952, "step": 1390 }, { "epoch": 5.795833333333333, "grad_norm": 4.821880653146912, "learning_rate": 5e-05, "loss": 0.0908, "num_input_tokens_seen": 126970880, "step": 1391 }, { "epoch": 5.795833333333333, "loss": 0.09289488196372986, "loss_ce": 0.00030455196974799037, "loss_iou": 0.31640625, "loss_num": 0.0185546875, "loss_xval": 0.0927734375, "num_input_tokens_seen": 126970880, "step": 1391 }, { "epoch": 5.8, "grad_norm": 19.571271712860046, "learning_rate": 5e-05, "loss": 0.1938, "num_input_tokens_seen": 127062744, "step": 1392 }, { "epoch": 5.8, "loss": 0.207724928855896, "loss_ce": 0.000693686306476593, "loss_iou": 0.298828125, "loss_num": 0.04150390625, "loss_xval": 0.20703125, "num_input_tokens_seen": 127062744, "step": 1392 }, { "epoch": 5.804166666666667, "grad_norm": 9.288593008201802, "learning_rate": 5e-05, "loss": 0.1026, "num_input_tokens_seen": 127154420, "step": 1393 }, { "epoch": 5.804166666666667, "loss": 0.11515213549137115, "loss_ce": 0.0022065802477300167, "loss_iou": 0.3125, "loss_num": 0.0225830078125, "loss_xval": 0.11279296875, "num_input_tokens_seen": 127154420, "step": 1393 }, { "epoch": 5.808333333333334, "grad_norm": 4.990670848828062, "learning_rate": 5e-05, "loss": 0.152, "num_input_tokens_seen": 127246160, "step": 1394 }, { "epoch": 5.808333333333334, "loss": 0.13062453269958496, "loss_ce": 0.001825102255679667, "loss_iou": 0.3359375, "loss_num": 0.0257568359375, "loss_xval": 0.12890625, "num_input_tokens_seen": 127246160, "step": 1394 }, { "epoch": 5.8125, "grad_norm": 9.611045071208759, "learning_rate": 5e-05, "loss": 0.1137, "num_input_tokens_seen": 127337672, "step": 1395 }, { "epoch": 5.8125, "loss": 0.13898390531539917, "loss_ce": 6.788775499444455e-05, "loss_iou": 0.3046875, "loss_num": 0.0277099609375, "loss_xval": 0.138671875, "num_input_tokens_seen": 127337672, "step": 1395 }, { "epoch": 5.816666666666666, "grad_norm": 6.6446252308609655, "learning_rate": 5e-05, "loss": 0.0853, "num_input_tokens_seen": 127428936, "step": 1396 }, { "epoch": 5.816666666666666, "loss": 0.09065217524766922, "loss_ce": 0.0022732634097337723, "loss_iou": 0.21484375, "loss_num": 0.0177001953125, "loss_xval": 0.08837890625, "num_input_tokens_seen": 127428936, "step": 1396 }, { "epoch": 5.820833333333333, "grad_norm": 5.360873250355205, "learning_rate": 5e-05, "loss": 0.161, "num_input_tokens_seen": 127520300, "step": 1397 }, { "epoch": 5.820833333333333, "loss": 0.16661198437213898, "loss_ce": 0.0024426807649433613, "loss_iou": 0.392578125, "loss_num": 0.03271484375, "loss_xval": 0.1640625, "num_input_tokens_seen": 127520300, "step": 1397 }, { "epoch": 5.825, "grad_norm": 3.458595941170449, "learning_rate": 5e-05, "loss": 0.1549, "num_input_tokens_seen": 127611440, "step": 1398 }, { "epoch": 5.825, "loss": 0.10185273736715317, "loss_ce": 0.00010713595838751644, "loss_iou": 0.298828125, "loss_num": 0.0203857421875, "loss_xval": 0.1015625, "num_input_tokens_seen": 127611440, "step": 1398 }, { "epoch": 5.829166666666667, "grad_norm": 5.753849664087916, "learning_rate": 5e-05, "loss": 0.1105, "num_input_tokens_seen": 127702876, "step": 1399 }, { "epoch": 5.829166666666667, "loss": 0.12810850143432617, "loss_ce": 0.00014066360017750412, "loss_iou": 0.265625, "loss_num": 0.025634765625, "loss_xval": 0.1279296875, "num_input_tokens_seen": 127702876, "step": 1399 }, { "epoch": 5.833333333333333, "grad_norm": 7.649382509419218, "learning_rate": 5e-05, "loss": 0.136, "num_input_tokens_seen": 127794236, "step": 1400 }, { "epoch": 5.833333333333333, "loss": 0.1401953399181366, "loss_ce": 0.0010351943783462048, "loss_iou": 0.23046875, "loss_num": 0.02783203125, "loss_xval": 0.138671875, "num_input_tokens_seen": 127794236, "step": 1400 }, { "epoch": 5.8375, "grad_norm": 3.7614483806142522, "learning_rate": 5e-05, "loss": 0.1117, "num_input_tokens_seen": 127885452, "step": 1401 }, { "epoch": 5.8375, "loss": 0.08002236485481262, "loss_ce": 0.00020363648945931345, "loss_iou": 0.26171875, "loss_num": 0.0159912109375, "loss_xval": 0.07958984375, "num_input_tokens_seen": 127885452, "step": 1401 }, { "epoch": 5.841666666666667, "grad_norm": 4.4975016940302615, "learning_rate": 5e-05, "loss": 0.2282, "num_input_tokens_seen": 127976184, "step": 1402 }, { "epoch": 5.841666666666667, "loss": 0.15244199335575104, "loss_ce": 0.0026617220137268305, "loss_iou": 0.310546875, "loss_num": 0.0299072265625, "loss_xval": 0.1494140625, "num_input_tokens_seen": 127976184, "step": 1402 }, { "epoch": 5.845833333333333, "grad_norm": 2.8847976439767526, "learning_rate": 5e-05, "loss": 0.1286, "num_input_tokens_seen": 128067436, "step": 1403 }, { "epoch": 5.845833333333333, "loss": 0.08370509743690491, "loss_ce": 0.010707048699259758, "loss_iou": 0.267578125, "loss_num": 0.01458740234375, "loss_xval": 0.0732421875, "num_input_tokens_seen": 128067436, "step": 1403 }, { "epoch": 5.85, "grad_norm": 3.791157871723593, "learning_rate": 5e-05, "loss": 0.1328, "num_input_tokens_seen": 128158428, "step": 1404 }, { "epoch": 5.85, "loss": 0.1484401822090149, "loss_ce": 0.0034969523549079895, "loss_iou": 0.1865234375, "loss_num": 0.0289306640625, "loss_xval": 0.14453125, "num_input_tokens_seen": 128158428, "step": 1404 }, { "epoch": 5.854166666666667, "grad_norm": 3.90694991816708, "learning_rate": 5e-05, "loss": 0.0794, "num_input_tokens_seen": 128249980, "step": 1405 }, { "epoch": 5.854166666666667, "loss": 0.080207958817482, "loss_ce": 0.0010301051661372185, "loss_iou": 0.224609375, "loss_num": 0.015869140625, "loss_xval": 0.0791015625, "num_input_tokens_seen": 128249980, "step": 1405 }, { "epoch": 5.858333333333333, "grad_norm": 8.05090517798076, "learning_rate": 5e-05, "loss": 0.0943, "num_input_tokens_seen": 128341360, "step": 1406 }, { "epoch": 5.858333333333333, "loss": 0.09763146936893463, "loss_ce": 0.0026912896428257227, "loss_iou": 0.259765625, "loss_num": 0.01904296875, "loss_xval": 0.0947265625, "num_input_tokens_seen": 128341360, "step": 1406 }, { "epoch": 5.8625, "grad_norm": 5.301622433216056, "learning_rate": 5e-05, "loss": 0.1137, "num_input_tokens_seen": 128432648, "step": 1407 }, { "epoch": 5.8625, "loss": 0.15396666526794434, "loss_ce": 0.0016381873283535242, "loss_iou": 0.28125, "loss_num": 0.030517578125, "loss_xval": 0.15234375, "num_input_tokens_seen": 128432648, "step": 1407 }, { "epoch": 5.866666666666667, "grad_norm": 4.992514994151027, "learning_rate": 5e-05, "loss": 0.0979, "num_input_tokens_seen": 128524052, "step": 1408 }, { "epoch": 5.866666666666667, "loss": 0.09603699296712875, "loss_ce": 0.0002728329855017364, "loss_iou": 0.40234375, "loss_num": 0.0191650390625, "loss_xval": 0.095703125, "num_input_tokens_seen": 128524052, "step": 1408 }, { "epoch": 5.870833333333334, "grad_norm": 4.992700817112639, "learning_rate": 5e-05, "loss": 0.157, "num_input_tokens_seen": 128615208, "step": 1409 }, { "epoch": 5.870833333333334, "loss": 0.14660075306892395, "loss_ce": 0.00158121925778687, "loss_iou": 0.3359375, "loss_num": 0.029052734375, "loss_xval": 0.14453125, "num_input_tokens_seen": 128615208, "step": 1409 }, { "epoch": 5.875, "grad_norm": 2.618801455878482, "learning_rate": 5e-05, "loss": 0.1197, "num_input_tokens_seen": 128706640, "step": 1410 }, { "epoch": 5.875, "loss": 0.09924958646297455, "loss_ce": 0.0009219488129019737, "loss_iou": 0.376953125, "loss_num": 0.0196533203125, "loss_xval": 0.09814453125, "num_input_tokens_seen": 128706640, "step": 1410 }, { "epoch": 5.879166666666666, "grad_norm": 2.0525356578434777, "learning_rate": 5e-05, "loss": 0.1074, "num_input_tokens_seen": 128798064, "step": 1411 }, { "epoch": 5.879166666666666, "loss": 0.12654848396778107, "loss_ce": 0.0030896144453436136, "loss_iou": 0.28125, "loss_num": 0.024658203125, "loss_xval": 0.12353515625, "num_input_tokens_seen": 128798064, "step": 1411 }, { "epoch": 5.883333333333333, "grad_norm": 4.246914326681091, "learning_rate": 5e-05, "loss": 0.1115, "num_input_tokens_seen": 128889052, "step": 1412 }, { "epoch": 5.883333333333333, "loss": 0.10726828873157501, "loss_ce": 9.055174450622872e-05, "loss_iou": 0.1689453125, "loss_num": 0.0213623046875, "loss_xval": 0.107421875, "num_input_tokens_seen": 128889052, "step": 1412 }, { "epoch": 5.8875, "grad_norm": 10.112082124521148, "learning_rate": 5e-05, "loss": 0.1061, "num_input_tokens_seen": 128980372, "step": 1413 }, { "epoch": 5.8875, "loss": 0.1580641269683838, "loss_ce": 0.0011732680723071098, "loss_iou": 0.2412109375, "loss_num": 0.031494140625, "loss_xval": 0.1572265625, "num_input_tokens_seen": 128980372, "step": 1413 }, { "epoch": 5.891666666666667, "grad_norm": 2.956973856175846, "learning_rate": 5e-05, "loss": 0.0793, "num_input_tokens_seen": 129071964, "step": 1414 }, { "epoch": 5.891666666666667, "loss": 0.09260989725589752, "loss_ce": 0.0021863128058612347, "loss_iou": 0.138671875, "loss_num": 0.01806640625, "loss_xval": 0.09033203125, "num_input_tokens_seen": 129071964, "step": 1414 }, { "epoch": 5.895833333333333, "grad_norm": 7.539293561220471, "learning_rate": 5e-05, "loss": 0.087, "num_input_tokens_seen": 129162712, "step": 1415 }, { "epoch": 5.895833333333333, "loss": 0.11663861572742462, "loss_ce": 0.0017780864145606756, "loss_iou": 0.234375, "loss_num": 0.02294921875, "loss_xval": 0.11474609375, "num_input_tokens_seen": 129162712, "step": 1415 }, { "epoch": 5.9, "grad_norm": 2.354926479183944, "learning_rate": 5e-05, "loss": 0.0929, "num_input_tokens_seen": 129254180, "step": 1416 }, { "epoch": 5.9, "loss": 0.14426201581954956, "loss_ce": 0.00046319590182974935, "loss_iou": 0.369140625, "loss_num": 0.02880859375, "loss_xval": 0.1435546875, "num_input_tokens_seen": 129254180, "step": 1416 }, { "epoch": 5.904166666666667, "grad_norm": 3.0284601028360214, "learning_rate": 5e-05, "loss": 0.0699, "num_input_tokens_seen": 129345164, "step": 1417 }, { "epoch": 5.904166666666667, "loss": 0.048389360308647156, "loss_ce": 0.0005377948982641101, "loss_iou": 0.3046875, "loss_num": 0.00958251953125, "loss_xval": 0.0478515625, "num_input_tokens_seen": 129345164, "step": 1417 }, { "epoch": 5.908333333333333, "grad_norm": 11.571107906423846, "learning_rate": 5e-05, "loss": 0.1213, "num_input_tokens_seen": 129436412, "step": 1418 }, { "epoch": 5.908333333333333, "loss": 0.08516918122768402, "loss_ce": 2.514522202545777e-05, "loss_iou": 0.12158203125, "loss_num": 0.0169677734375, "loss_xval": 0.0849609375, "num_input_tokens_seen": 129436412, "step": 1418 }, { "epoch": 5.9125, "grad_norm": 3.321284229778174, "learning_rate": 5e-05, "loss": 0.1184, "num_input_tokens_seen": 129528112, "step": 1419 }, { "epoch": 5.9125, "loss": 0.07697541266679764, "loss_ce": 0.0007577605429105461, "loss_iou": 0.32421875, "loss_num": 0.0152587890625, "loss_xval": 0.076171875, "num_input_tokens_seen": 129528112, "step": 1419 }, { "epoch": 5.916666666666667, "grad_norm": 2.8526600238132858, "learning_rate": 5e-05, "loss": 0.1374, "num_input_tokens_seen": 129618984, "step": 1420 }, { "epoch": 5.916666666666667, "loss": 0.13690005242824554, "loss_ce": 0.0009137189481407404, "loss_iou": 0.29296875, "loss_num": 0.0272216796875, "loss_xval": 0.1357421875, "num_input_tokens_seen": 129618984, "step": 1420 }, { "epoch": 5.920833333333333, "grad_norm": 3.1659884378062655, "learning_rate": 5e-05, "loss": 0.1042, "num_input_tokens_seen": 129710432, "step": 1421 }, { "epoch": 5.920833333333333, "loss": 0.07005725800991058, "loss_ce": 0.0004619219107553363, "loss_iou": 0.26171875, "loss_num": 0.013916015625, "loss_xval": 0.06982421875, "num_input_tokens_seen": 129710432, "step": 1421 }, { "epoch": 5.925, "grad_norm": 4.322000714896495, "learning_rate": 5e-05, "loss": 0.1226, "num_input_tokens_seen": 129801128, "step": 1422 }, { "epoch": 5.925, "loss": 0.13157625496387482, "loss_ce": 1.4974492842156906e-05, "loss_iou": 0.408203125, "loss_num": 0.0262451171875, "loss_xval": 0.1318359375, "num_input_tokens_seen": 129801128, "step": 1422 }, { "epoch": 5.929166666666667, "grad_norm": 4.908923880940535, "learning_rate": 5e-05, "loss": 0.1494, "num_input_tokens_seen": 129892840, "step": 1423 }, { "epoch": 5.929166666666667, "loss": 0.17185944318771362, "loss_ce": 0.002425836632028222, "loss_iou": 0.369140625, "loss_num": 0.033935546875, "loss_xval": 0.169921875, "num_input_tokens_seen": 129892840, "step": 1423 }, { "epoch": 5.933333333333334, "grad_norm": 3.4893970706080477, "learning_rate": 5e-05, "loss": 0.0859, "num_input_tokens_seen": 129983844, "step": 1424 }, { "epoch": 5.933333333333334, "loss": 0.11811276525259018, "loss_ce": 0.0005285373190417886, "loss_iou": 0.365234375, "loss_num": 0.0235595703125, "loss_xval": 0.11767578125, "num_input_tokens_seen": 129983844, "step": 1424 }, { "epoch": 5.9375, "grad_norm": 2.676893704577933, "learning_rate": 5e-05, "loss": 0.107, "num_input_tokens_seen": 130075308, "step": 1425 }, { "epoch": 5.9375, "loss": 0.09480118751525879, "loss_ce": 0.006071331910789013, "loss_iou": 0.341796875, "loss_num": 0.0177001953125, "loss_xval": 0.0888671875, "num_input_tokens_seen": 130075308, "step": 1425 }, { "epoch": 5.941666666666666, "grad_norm": 8.114316288839373, "learning_rate": 5e-05, "loss": 0.1014, "num_input_tokens_seen": 130166500, "step": 1426 }, { "epoch": 5.941666666666666, "loss": 0.08116979897022247, "loss_ce": 0.0008933134377002716, "loss_iou": 0.208984375, "loss_num": 0.01611328125, "loss_xval": 0.080078125, "num_input_tokens_seen": 130166500, "step": 1426 }, { "epoch": 5.945833333333333, "grad_norm": 4.24149996907433, "learning_rate": 5e-05, "loss": 0.0887, "num_input_tokens_seen": 130257872, "step": 1427 }, { "epoch": 5.945833333333333, "loss": 0.09025895595550537, "loss_ce": 7.9511315561831e-05, "loss_iou": 0.2890625, "loss_num": 0.01806640625, "loss_xval": 0.09033203125, "num_input_tokens_seen": 130257872, "step": 1427 }, { "epoch": 5.95, "grad_norm": 4.695269924066833, "learning_rate": 5e-05, "loss": 0.0995, "num_input_tokens_seen": 130349204, "step": 1428 }, { "epoch": 5.95, "loss": 0.09523400664329529, "loss_ce": 0.000553212477825582, "loss_iou": 0.228515625, "loss_num": 0.0189208984375, "loss_xval": 0.0947265625, "num_input_tokens_seen": 130349204, "step": 1428 }, { "epoch": 5.954166666666667, "grad_norm": 7.200325980740724, "learning_rate": 5e-05, "loss": 0.1506, "num_input_tokens_seen": 130440384, "step": 1429 }, { "epoch": 5.954166666666667, "loss": 0.15087425708770752, "loss_ce": 0.0002699988253880292, "loss_iou": 0.3046875, "loss_num": 0.0301513671875, "loss_xval": 0.150390625, "num_input_tokens_seen": 130440384, "step": 1429 }, { "epoch": 5.958333333333333, "grad_norm": 2.0693370843787076, "learning_rate": 5e-05, "loss": 0.0823, "num_input_tokens_seen": 130531700, "step": 1430 }, { "epoch": 5.958333333333333, "loss": 0.062041543424129486, "loss_ce": 0.0005791393341496587, "loss_iou": 0.291015625, "loss_num": 0.01226806640625, "loss_xval": 0.0615234375, "num_input_tokens_seen": 130531700, "step": 1430 }, { "epoch": 5.9625, "grad_norm": 3.6766062364680234, "learning_rate": 5e-05, "loss": 0.0712, "num_input_tokens_seen": 130623984, "step": 1431 }, { "epoch": 5.9625, "loss": 0.06569721549749374, "loss_ce": 0.0022969457786530256, "loss_iou": 0.44921875, "loss_num": 0.0126953125, "loss_xval": 0.0634765625, "num_input_tokens_seen": 130623984, "step": 1431 }, { "epoch": 5.966666666666667, "grad_norm": 4.2509085286253905, "learning_rate": 5e-05, "loss": 0.1284, "num_input_tokens_seen": 130715068, "step": 1432 }, { "epoch": 5.966666666666667, "loss": 0.11964157968759537, "loss_ce": 1.26781924336683e-05, "loss_iou": 0.3046875, "loss_num": 0.02392578125, "loss_xval": 0.11962890625, "num_input_tokens_seen": 130715068, "step": 1432 }, { "epoch": 5.970833333333333, "grad_norm": 5.827467890808661, "learning_rate": 5e-05, "loss": 0.1208, "num_input_tokens_seen": 130806308, "step": 1433 }, { "epoch": 5.970833333333333, "loss": 0.14763817191123962, "loss_ce": 0.0007112891180440784, "loss_iou": 0.142578125, "loss_num": 0.0294189453125, "loss_xval": 0.146484375, "num_input_tokens_seen": 130806308, "step": 1433 }, { "epoch": 5.975, "grad_norm": 5.04126021723349, "learning_rate": 5e-05, "loss": 0.1002, "num_input_tokens_seen": 130897584, "step": 1434 }, { "epoch": 5.975, "loss": 0.0726742222905159, "loss_ce": 0.004558989778161049, "loss_iou": 0.2109375, "loss_num": 0.01361083984375, "loss_xval": 0.068359375, "num_input_tokens_seen": 130897584, "step": 1434 }, { "epoch": 5.979166666666667, "grad_norm": 19.47772705817985, "learning_rate": 5e-05, "loss": 0.1388, "num_input_tokens_seen": 130989156, "step": 1435 }, { "epoch": 5.979166666666667, "loss": 0.12499289214611053, "loss_ce": 0.0004506511613726616, "loss_iou": 0.298828125, "loss_num": 0.02490234375, "loss_xval": 0.12451171875, "num_input_tokens_seen": 130989156, "step": 1435 }, { "epoch": 5.983333333333333, "grad_norm": 5.331301563357194, "learning_rate": 5e-05, "loss": 0.0831, "num_input_tokens_seen": 131080592, "step": 1436 }, { "epoch": 5.983333333333333, "loss": 0.07300704717636108, "loss_ce": 0.0007414190331473947, "loss_iou": 0.2119140625, "loss_num": 0.01446533203125, "loss_xval": 0.072265625, "num_input_tokens_seen": 131080592, "step": 1436 }, { "epoch": 5.9875, "grad_norm": 3.104241540677376, "learning_rate": 5e-05, "loss": 0.1283, "num_input_tokens_seen": 131171984, "step": 1437 }, { "epoch": 5.9875, "loss": 0.057797543704509735, "loss_ce": 5.828931898577139e-05, "loss_iou": 0.380859375, "loss_num": 0.01153564453125, "loss_xval": 0.0576171875, "num_input_tokens_seen": 131171984, "step": 1437 }, { "epoch": 5.991666666666667, "grad_norm": 57.36883699022541, "learning_rate": 5e-05, "loss": 0.0925, "num_input_tokens_seen": 131263136, "step": 1438 }, { "epoch": 5.991666666666667, "loss": 0.09304441511631012, "loss_ce": 0.0006066667847335339, "loss_iou": 0.28125, "loss_num": 0.0185546875, "loss_xval": 0.09228515625, "num_input_tokens_seen": 131263136, "step": 1438 }, { "epoch": 5.995833333333334, "grad_norm": 3.85331435872134, "learning_rate": 5e-05, "loss": 0.2113, "num_input_tokens_seen": 131355128, "step": 1439 }, { "epoch": 5.995833333333334, "loss": 0.20464988052845, "loss_ce": 0.0018910972867161036, "loss_iou": 0.427734375, "loss_num": 0.04052734375, "loss_xval": 0.203125, "num_input_tokens_seen": 131355128, "step": 1439 }, { "epoch": 6.0, "grad_norm": 4.926323185216086, "learning_rate": 5e-05, "loss": 0.0946, "num_input_tokens_seen": 131446204, "step": 1440 }, { "epoch": 6.0, "loss": 0.06529416143894196, "loss_ce": 0.0008562928414903581, "loss_iou": 0.169921875, "loss_num": 0.01287841796875, "loss_xval": 0.064453125, "num_input_tokens_seen": 131446204, "step": 1440 }, { "epoch": 6.004166666666666, "grad_norm": 7.437614331691922, "learning_rate": 5e-05, "loss": 0.1004, "num_input_tokens_seen": 131536320, "step": 1441 }, { "epoch": 6.004166666666666, "loss": 0.0987076535820961, "loss_ce": 0.000807262200396508, "loss_iou": 0.31640625, "loss_num": 0.0196533203125, "loss_xval": 0.09765625, "num_input_tokens_seen": 131536320, "step": 1441 }, { "epoch": 6.008333333333334, "grad_norm": 6.646119688252158, "learning_rate": 5e-05, "loss": 0.1103, "num_input_tokens_seen": 131626852, "step": 1442 }, { "epoch": 6.008333333333334, "loss": 0.13375121355056763, "loss_ce": 2.317709368071519e-05, "loss_iou": 0.357421875, "loss_num": 0.0267333984375, "loss_xval": 0.1337890625, "num_input_tokens_seen": 131626852, "step": 1442 }, { "epoch": 6.0125, "grad_norm": 3.237665159225561, "learning_rate": 5e-05, "loss": 0.0688, "num_input_tokens_seen": 131717908, "step": 1443 }, { "epoch": 6.0125, "loss": 0.06890007853507996, "loss_ce": 0.0015630427515134215, "loss_iou": 0.46875, "loss_num": 0.01348876953125, "loss_xval": 0.0673828125, "num_input_tokens_seen": 131717908, "step": 1443 }, { "epoch": 6.016666666666667, "grad_norm": 6.813543578042571, "learning_rate": 5e-05, "loss": 0.0959, "num_input_tokens_seen": 131809160, "step": 1444 }, { "epoch": 6.016666666666667, "loss": 0.07589545100927353, "loss_ce": 0.00024237479374278337, "loss_iou": 0.390625, "loss_num": 0.01513671875, "loss_xval": 0.07568359375, "num_input_tokens_seen": 131809160, "step": 1444 }, { "epoch": 6.020833333333333, "grad_norm": 3.849074999437511, "learning_rate": 5e-05, "loss": 0.0714, "num_input_tokens_seen": 131900668, "step": 1445 }, { "epoch": 6.020833333333333, "loss": 0.07924774289131165, "loss_ce": 0.00011566528701223433, "loss_iou": 0.35546875, "loss_num": 0.015869140625, "loss_xval": 0.0791015625, "num_input_tokens_seen": 131900668, "step": 1445 }, { "epoch": 6.025, "grad_norm": 10.093369529116636, "learning_rate": 5e-05, "loss": 0.1565, "num_input_tokens_seen": 131991300, "step": 1446 }, { "epoch": 6.025, "loss": 0.11482731252908707, "loss_ce": 0.0002643281768541783, "loss_iou": 0.2021484375, "loss_num": 0.02294921875, "loss_xval": 0.11474609375, "num_input_tokens_seen": 131991300, "step": 1446 }, { "epoch": 6.029166666666667, "grad_norm": 7.328155770517095, "learning_rate": 5e-05, "loss": 0.1191, "num_input_tokens_seen": 132082604, "step": 1447 }, { "epoch": 6.029166666666667, "loss": 0.14458438754081726, "loss_ce": 0.0007397783920168877, "loss_iou": 0.3046875, "loss_num": 0.02880859375, "loss_xval": 0.1435546875, "num_input_tokens_seen": 132082604, "step": 1447 }, { "epoch": 6.033333333333333, "grad_norm": 2.1194428935112812, "learning_rate": 5e-05, "loss": 0.0854, "num_input_tokens_seen": 132173368, "step": 1448 }, { "epoch": 6.033333333333333, "loss": 0.10437037795782089, "loss_ce": 0.008209485560655594, "loss_iou": 0.328125, "loss_num": 0.019287109375, "loss_xval": 0.09619140625, "num_input_tokens_seen": 132173368, "step": 1448 }, { "epoch": 6.0375, "grad_norm": 2.977298408978728, "learning_rate": 5e-05, "loss": 0.1087, "num_input_tokens_seen": 132265116, "step": 1449 }, { "epoch": 6.0375, "loss": 0.15406504273414612, "loss_ce": 0.0004853248246945441, "loss_iou": 0.255859375, "loss_num": 0.03076171875, "loss_xval": 0.1533203125, "num_input_tokens_seen": 132265116, "step": 1449 }, { "epoch": 6.041666666666667, "grad_norm": 1.7783545168507573, "learning_rate": 5e-05, "loss": 0.0689, "num_input_tokens_seen": 132356112, "step": 1450 }, { "epoch": 6.041666666666667, "loss": 0.07427071034908295, "loss_ce": 0.0002350689610466361, "loss_iou": 0.3515625, "loss_num": 0.0147705078125, "loss_xval": 0.07421875, "num_input_tokens_seen": 132356112, "step": 1450 }, { "epoch": 6.045833333333333, "grad_norm": 5.857286320553287, "learning_rate": 5e-05, "loss": 0.0973, "num_input_tokens_seen": 132447788, "step": 1451 }, { "epoch": 6.045833333333333, "loss": 0.09844372421503067, "loss_ce": 0.0008790281717665493, "loss_iou": 0.275390625, "loss_num": 0.01953125, "loss_xval": 0.09765625, "num_input_tokens_seen": 132447788, "step": 1451 }, { "epoch": 6.05, "grad_norm": 3.9468331314917138, "learning_rate": 5e-05, "loss": 0.0986, "num_input_tokens_seen": 132539164, "step": 1452 }, { "epoch": 6.05, "loss": 0.0783202052116394, "loss_ce": 0.0020415245089679956, "loss_iou": 0.3515625, "loss_num": 0.0152587890625, "loss_xval": 0.076171875, "num_input_tokens_seen": 132539164, "step": 1452 }, { "epoch": 6.054166666666666, "grad_norm": 2.213922720704317, "learning_rate": 5e-05, "loss": 0.0913, "num_input_tokens_seen": 132628796, "step": 1453 }, { "epoch": 6.054166666666666, "loss": 0.1065921038389206, "loss_ce": 0.000924985040910542, "loss_iou": 0.333984375, "loss_num": 0.0211181640625, "loss_xval": 0.10546875, "num_input_tokens_seen": 132628796, "step": 1453 }, { "epoch": 6.058333333333334, "grad_norm": 5.687537479030628, "learning_rate": 5e-05, "loss": 0.095, "num_input_tokens_seen": 132719588, "step": 1454 }, { "epoch": 6.058333333333334, "loss": 0.11358708888292313, "loss_ce": 0.0002753234875854105, "loss_iou": 0.0888671875, "loss_num": 0.022705078125, "loss_xval": 0.11328125, "num_input_tokens_seen": 132719588, "step": 1454 }, { "epoch": 6.0625, "grad_norm": 2.127818009920121, "learning_rate": 5e-05, "loss": 0.1102, "num_input_tokens_seen": 132810988, "step": 1455 }, { "epoch": 6.0625, "loss": 0.08428998291492462, "loss_ce": 0.0008091489435173571, "loss_iou": 0.251953125, "loss_num": 0.0167236328125, "loss_xval": 0.08349609375, "num_input_tokens_seen": 132810988, "step": 1455 }, { "epoch": 6.066666666666666, "grad_norm": 7.095893761527112, "learning_rate": 5e-05, "loss": 0.1075, "num_input_tokens_seen": 132902488, "step": 1456 }, { "epoch": 6.066666666666666, "loss": 0.09274320304393768, "loss_ce": 0.027572914958000183, "loss_iou": 0.240234375, "loss_num": 0.0130615234375, "loss_xval": 0.06494140625, "num_input_tokens_seen": 132902488, "step": 1456 }, { "epoch": 6.070833333333334, "grad_norm": 46.1430369354311, "learning_rate": 5e-05, "loss": 0.1321, "num_input_tokens_seen": 132993932, "step": 1457 }, { "epoch": 6.070833333333334, "loss": 0.11721807718276978, "loss_ce": 0.0012512854300439358, "loss_iou": 0.314453125, "loss_num": 0.023193359375, "loss_xval": 0.1162109375, "num_input_tokens_seen": 132993932, "step": 1457 }, { "epoch": 6.075, "grad_norm": 2.7616679834886724, "learning_rate": 5e-05, "loss": 0.0698, "num_input_tokens_seen": 133085476, "step": 1458 }, { "epoch": 6.075, "loss": 0.050556816160678864, "loss_ce": 0.0011488578747957945, "loss_iou": 0.17578125, "loss_num": 0.0098876953125, "loss_xval": 0.04931640625, "num_input_tokens_seen": 133085476, "step": 1458 }, { "epoch": 6.079166666666667, "grad_norm": 5.207176340008163, "learning_rate": 5e-05, "loss": 0.0959, "num_input_tokens_seen": 133177676, "step": 1459 }, { "epoch": 6.079166666666667, "loss": 0.104974165558815, "loss_ce": 0.001992600504308939, "loss_iou": 0.173828125, "loss_num": 0.0206298828125, "loss_xval": 0.10302734375, "num_input_tokens_seen": 133177676, "step": 1459 }, { "epoch": 6.083333333333333, "grad_norm": 3.054466049365312, "learning_rate": 5e-05, "loss": 0.0838, "num_input_tokens_seen": 133269072, "step": 1460 }, { "epoch": 6.083333333333333, "loss": 0.04987429827451706, "loss_ce": 5.4354113672161475e-05, "loss_iou": 0.333984375, "loss_num": 0.00994873046875, "loss_xval": 0.0498046875, "num_input_tokens_seen": 133269072, "step": 1460 }, { "epoch": 6.0875, "grad_norm": 2.9583717627145028, "learning_rate": 5e-05, "loss": 0.0773, "num_input_tokens_seen": 133360928, "step": 1461 }, { "epoch": 6.0875, "loss": 0.06315495073795319, "loss_ce": 0.003157388884574175, "loss_iou": 0.28515625, "loss_num": 0.011962890625, "loss_xval": 0.06005859375, "num_input_tokens_seen": 133360928, "step": 1461 }, { "epoch": 6.091666666666667, "grad_norm": 3.9025604867301147, "learning_rate": 5e-05, "loss": 0.1083, "num_input_tokens_seen": 133452228, "step": 1462 }, { "epoch": 6.091666666666667, "loss": 0.13133090734481812, "loss_ce": 0.0007767053321003914, "loss_iou": 0.34375, "loss_num": 0.026123046875, "loss_xval": 0.130859375, "num_input_tokens_seen": 133452228, "step": 1462 }, { "epoch": 6.095833333333333, "grad_norm": 2.801843021578155, "learning_rate": 5e-05, "loss": 0.1139, "num_input_tokens_seen": 133544256, "step": 1463 }, { "epoch": 6.095833333333333, "loss": 0.09132051467895508, "loss_ce": 0.0009732232429087162, "loss_iou": 0.298828125, "loss_num": 0.01806640625, "loss_xval": 0.09033203125, "num_input_tokens_seen": 133544256, "step": 1463 }, { "epoch": 6.1, "grad_norm": 2.8677230220587684, "learning_rate": 5e-05, "loss": 0.1237, "num_input_tokens_seen": 133635944, "step": 1464 }, { "epoch": 6.1, "loss": 0.13180416822433472, "loss_ce": 0.000120820157462731, "loss_iou": 0.34375, "loss_num": 0.0263671875, "loss_xval": 0.1318359375, "num_input_tokens_seen": 133635944, "step": 1464 }, { "epoch": 6.104166666666667, "grad_norm": 1.7808807963535567, "learning_rate": 5e-05, "loss": 0.1234, "num_input_tokens_seen": 133726988, "step": 1465 }, { "epoch": 6.104166666666667, "loss": 0.1303086131811142, "loss_ce": 0.001814344897866249, "loss_iou": 0.2001953125, "loss_num": 0.0257568359375, "loss_xval": 0.12890625, "num_input_tokens_seen": 133726988, "step": 1465 }, { "epoch": 6.108333333333333, "grad_norm": 5.937209116611709, "learning_rate": 5e-05, "loss": 0.113, "num_input_tokens_seen": 133818424, "step": 1466 }, { "epoch": 6.108333333333333, "loss": 0.116533562541008, "loss_ce": 0.0006964726489968598, "loss_iou": 0.2392578125, "loss_num": 0.023193359375, "loss_xval": 0.11572265625, "num_input_tokens_seen": 133818424, "step": 1466 }, { "epoch": 6.1125, "grad_norm": 3.39792518178655, "learning_rate": 5e-05, "loss": 0.0893, "num_input_tokens_seen": 133909628, "step": 1467 }, { "epoch": 6.1125, "loss": 0.0704282820224762, "loss_ce": 0.00011578691191971302, "loss_iou": 0.41796875, "loss_num": 0.0140380859375, "loss_xval": 0.0703125, "num_input_tokens_seen": 133909628, "step": 1467 }, { "epoch": 6.116666666666666, "grad_norm": 5.0612334717481176, "learning_rate": 5e-05, "loss": 0.0871, "num_input_tokens_seen": 134001744, "step": 1468 }, { "epoch": 6.116666666666666, "loss": 0.04524346441030502, "loss_ce": 0.0009471987141296268, "loss_iou": 0.30859375, "loss_num": 0.00885009765625, "loss_xval": 0.044189453125, "num_input_tokens_seen": 134001744, "step": 1468 }, { "epoch": 6.120833333333334, "grad_norm": 3.492075358097084, "learning_rate": 5e-05, "loss": 0.1234, "num_input_tokens_seen": 134092876, "step": 1469 }, { "epoch": 6.120833333333334, "loss": 0.11776579916477203, "loss_ce": 0.00024259783094748855, "loss_iou": 0.376953125, "loss_num": 0.0235595703125, "loss_xval": 0.11767578125, "num_input_tokens_seen": 134092876, "step": 1469 }, { "epoch": 6.125, "grad_norm": 3.2972687382843784, "learning_rate": 5e-05, "loss": 0.1241, "num_input_tokens_seen": 134183496, "step": 1470 }, { "epoch": 6.125, "loss": 0.15864822268486023, "loss_ce": 0.00035355405998416245, "loss_iou": 0.40625, "loss_num": 0.03173828125, "loss_xval": 0.158203125, "num_input_tokens_seen": 134183496, "step": 1470 }, { "epoch": 6.129166666666666, "grad_norm": 5.518848294310319, "learning_rate": 5e-05, "loss": 0.1456, "num_input_tokens_seen": 134274676, "step": 1471 }, { "epoch": 6.129166666666666, "loss": 0.1667410433292389, "loss_ce": 5.404305920819752e-05, "loss_iou": 0.349609375, "loss_num": 0.033447265625, "loss_xval": 0.1669921875, "num_input_tokens_seen": 134274676, "step": 1471 }, { "epoch": 6.133333333333334, "grad_norm": 2.9549253104072615, "learning_rate": 5e-05, "loss": 0.1211, "num_input_tokens_seen": 134366344, "step": 1472 }, { "epoch": 6.133333333333334, "loss": 0.15231193602085114, "loss_ce": 2.9224675017758273e-05, "loss_iou": 0.283203125, "loss_num": 0.030517578125, "loss_xval": 0.15234375, "num_input_tokens_seen": 134366344, "step": 1472 }, { "epoch": 6.1375, "grad_norm": 8.080211795996055, "learning_rate": 5e-05, "loss": 0.125, "num_input_tokens_seen": 134457720, "step": 1473 }, { "epoch": 6.1375, "loss": 0.10112521052360535, "loss_ce": 0.001439541345462203, "loss_iou": 0.314453125, "loss_num": 0.0198974609375, "loss_xval": 0.099609375, "num_input_tokens_seen": 134457720, "step": 1473 }, { "epoch": 6.141666666666667, "grad_norm": 3.3321246765884918, "learning_rate": 5e-05, "loss": 0.0586, "num_input_tokens_seen": 134549376, "step": 1474 }, { "epoch": 6.141666666666667, "loss": 0.045930132269859314, "loss_ce": 0.0023357742466032505, "loss_iou": 0.322265625, "loss_num": 0.00872802734375, "loss_xval": 0.043701171875, "num_input_tokens_seen": 134549376, "step": 1474 }, { "epoch": 6.145833333333333, "grad_norm": 2.323448607746615, "learning_rate": 5e-05, "loss": 0.1121, "num_input_tokens_seen": 134641320, "step": 1475 }, { "epoch": 6.145833333333333, "loss": 0.14385700225830078, "loss_ce": 0.0012483518803492188, "loss_iou": 0.2021484375, "loss_num": 0.028564453125, "loss_xval": 0.142578125, "num_input_tokens_seen": 134641320, "step": 1475 }, { "epoch": 6.15, "grad_norm": 9.099784493689713, "learning_rate": 5e-05, "loss": 0.1039, "num_input_tokens_seen": 134732684, "step": 1476 }, { "epoch": 6.15, "loss": 0.09532591700553894, "loss_ce": 0.0016216891817748547, "loss_iou": 0.341796875, "loss_num": 0.018798828125, "loss_xval": 0.09375, "num_input_tokens_seen": 134732684, "step": 1476 }, { "epoch": 6.154166666666667, "grad_norm": 2.0798782768136723, "learning_rate": 5e-05, "loss": 0.0845, "num_input_tokens_seen": 134824232, "step": 1477 }, { "epoch": 6.154166666666667, "loss": 0.08923730254173279, "loss_ce": 0.00021752758766524494, "loss_iou": 0.33984375, "loss_num": 0.017822265625, "loss_xval": 0.0888671875, "num_input_tokens_seen": 134824232, "step": 1477 }, { "epoch": 6.158333333333333, "grad_norm": 1.7335927622566298, "learning_rate": 5e-05, "loss": 0.0884, "num_input_tokens_seen": 134915328, "step": 1478 }, { "epoch": 6.158333333333333, "loss": 0.08593946695327759, "loss_ce": 0.0001240387064171955, "loss_iou": 0.2734375, "loss_num": 0.0172119140625, "loss_xval": 0.0859375, "num_input_tokens_seen": 134915328, "step": 1478 }, { "epoch": 6.1625, "grad_norm": 2.8402593123658186, "learning_rate": 5e-05, "loss": 0.1228, "num_input_tokens_seen": 135006756, "step": 1479 }, { "epoch": 6.1625, "loss": 0.13833099603652954, "loss_ce": 0.0007653862703591585, "loss_iou": 0.322265625, "loss_num": 0.027587890625, "loss_xval": 0.1376953125, "num_input_tokens_seen": 135006756, "step": 1479 }, { "epoch": 6.166666666666667, "grad_norm": 4.202233004494281, "learning_rate": 5e-05, "loss": 0.1646, "num_input_tokens_seen": 135098072, "step": 1480 }, { "epoch": 6.166666666666667, "loss": 0.11366622895002365, "loss_ce": 0.0014530995395034552, "loss_iou": 0.2890625, "loss_num": 0.0224609375, "loss_xval": 0.1123046875, "num_input_tokens_seen": 135098072, "step": 1480 }, { "epoch": 6.170833333333333, "grad_norm": 4.031083413757542, "learning_rate": 5e-05, "loss": 0.0726, "num_input_tokens_seen": 135189284, "step": 1481 }, { "epoch": 6.170833333333333, "loss": 0.07110023498535156, "loss_ce": 0.0002841942186933011, "loss_iou": 0.451171875, "loss_num": 0.01416015625, "loss_xval": 0.07080078125, "num_input_tokens_seen": 135189284, "step": 1481 }, { "epoch": 6.175, "grad_norm": 5.6421420925258365, "learning_rate": 5e-05, "loss": 0.1378, "num_input_tokens_seen": 135280224, "step": 1482 }, { "epoch": 6.175, "loss": 0.15990953147411346, "loss_ce": 0.0003636321926023811, "loss_iou": 0.35546875, "loss_num": 0.031982421875, "loss_xval": 0.1591796875, "num_input_tokens_seen": 135280224, "step": 1482 }, { "epoch": 6.179166666666666, "grad_norm": 18.276343621342225, "learning_rate": 5e-05, "loss": 0.1245, "num_input_tokens_seen": 135371488, "step": 1483 }, { "epoch": 6.179166666666666, "loss": 0.1595810353755951, "loss_ce": 9.616982424631715e-05, "loss_iou": 0.39453125, "loss_num": 0.031982421875, "loss_xval": 0.1591796875, "num_input_tokens_seen": 135371488, "step": 1483 }, { "epoch": 6.183333333333334, "grad_norm": 2.4957749905650672, "learning_rate": 5e-05, "loss": 0.0817, "num_input_tokens_seen": 135463448, "step": 1484 }, { "epoch": 6.183333333333334, "loss": 0.08113342523574829, "loss_ce": 3.2958269002847373e-05, "loss_iou": 0.1572265625, "loss_num": 0.0162353515625, "loss_xval": 0.0810546875, "num_input_tokens_seen": 135463448, "step": 1484 }, { "epoch": 6.1875, "grad_norm": 2.7456250298887594, "learning_rate": 5e-05, "loss": 0.0983, "num_input_tokens_seen": 135554848, "step": 1485 }, { "epoch": 6.1875, "loss": 0.08282457292079926, "loss_ce": 4.5643879275303334e-05, "loss_iou": 0.396484375, "loss_num": 0.0166015625, "loss_xval": 0.0830078125, "num_input_tokens_seen": 135554848, "step": 1485 }, { "epoch": 6.191666666666666, "grad_norm": 4.149341669310884, "learning_rate": 5e-05, "loss": 0.1047, "num_input_tokens_seen": 135646256, "step": 1486 }, { "epoch": 6.191666666666666, "loss": 0.1378132700920105, "loss_ce": 1.114791666623205e-05, "loss_iou": 0.0986328125, "loss_num": 0.027587890625, "loss_xval": 0.1376953125, "num_input_tokens_seen": 135646256, "step": 1486 }, { "epoch": 6.195833333333334, "grad_norm": 1.3396171777343122, "learning_rate": 5e-05, "loss": 0.0547, "num_input_tokens_seen": 135737080, "step": 1487 }, { "epoch": 6.195833333333334, "loss": 0.06343643367290497, "loss_ce": 5.651055289490614e-06, "loss_iou": 0.11376953125, "loss_num": 0.0126953125, "loss_xval": 0.0634765625, "num_input_tokens_seen": 135737080, "step": 1487 }, { "epoch": 6.2, "grad_norm": 1.9860821156088158, "learning_rate": 5e-05, "loss": 0.1024, "num_input_tokens_seen": 135828560, "step": 1488 }, { "epoch": 6.2, "loss": 0.15105964243412018, "loss_ce": 0.0005469413590617478, "loss_iou": 0.1591796875, "loss_num": 0.0301513671875, "loss_xval": 0.150390625, "num_input_tokens_seen": 135828560, "step": 1488 }, { "epoch": 6.204166666666667, "grad_norm": 6.458528606026268, "learning_rate": 5e-05, "loss": 0.13, "num_input_tokens_seen": 135920016, "step": 1489 }, { "epoch": 6.204166666666667, "loss": 0.12129916250705719, "loss_ce": 6.808717444073409e-05, "loss_iou": 0.27734375, "loss_num": 0.024169921875, "loss_xval": 0.12109375, "num_input_tokens_seen": 135920016, "step": 1489 }, { "epoch": 6.208333333333333, "grad_norm": 2.15464906974121, "learning_rate": 5e-05, "loss": 0.0627, "num_input_tokens_seen": 136011944, "step": 1490 }, { "epoch": 6.208333333333333, "loss": 0.07406759262084961, "loss_ce": 0.00021505873883143067, "loss_iou": 0.294921875, "loss_num": 0.0147705078125, "loss_xval": 0.07373046875, "num_input_tokens_seen": 136011944, "step": 1490 }, { "epoch": 6.2125, "grad_norm": 3.2448957136092105, "learning_rate": 5e-05, "loss": 0.0912, "num_input_tokens_seen": 136103984, "step": 1491 }, { "epoch": 6.2125, "loss": 0.09109637886285782, "loss_ce": 0.0012373746139928699, "loss_iou": 0.220703125, "loss_num": 0.0179443359375, "loss_xval": 0.08984375, "num_input_tokens_seen": 136103984, "step": 1491 }, { "epoch": 6.216666666666667, "grad_norm": 3.997611756884178, "learning_rate": 5e-05, "loss": 0.0991, "num_input_tokens_seen": 136195588, "step": 1492 }, { "epoch": 6.216666666666667, "loss": 0.06629104912281036, "loss_ce": 0.0008918737876228988, "loss_iou": 0.314453125, "loss_num": 0.0130615234375, "loss_xval": 0.0654296875, "num_input_tokens_seen": 136195588, "step": 1492 }, { "epoch": 6.220833333333333, "grad_norm": 2.595286086541442, "learning_rate": 5e-05, "loss": 0.0658, "num_input_tokens_seen": 136286792, "step": 1493 }, { "epoch": 6.220833333333333, "loss": 0.08234380930662155, "loss_ce": 6.84204715071246e-05, "loss_iou": 0.30859375, "loss_num": 0.0164794921875, "loss_xval": 0.08203125, "num_input_tokens_seen": 136286792, "step": 1493 }, { "epoch": 6.225, "grad_norm": 3.270012287666817, "learning_rate": 5e-05, "loss": 0.0776, "num_input_tokens_seen": 136378704, "step": 1494 }, { "epoch": 6.225, "loss": 0.09001626074314117, "loss_ce": 0.0010651469929143786, "loss_iou": 0.34765625, "loss_num": 0.017822265625, "loss_xval": 0.0888671875, "num_input_tokens_seen": 136378704, "step": 1494 }, { "epoch": 6.229166666666667, "grad_norm": 3.2462930501077816, "learning_rate": 5e-05, "loss": 0.065, "num_input_tokens_seen": 136470544, "step": 1495 }, { "epoch": 6.229166666666667, "loss": 0.05531272664666176, "loss_ce": 0.001144025707617402, "loss_iou": 0.30859375, "loss_num": 0.01080322265625, "loss_xval": 0.05419921875, "num_input_tokens_seen": 136470544, "step": 1495 }, { "epoch": 6.233333333333333, "grad_norm": 2.739395620419567, "learning_rate": 5e-05, "loss": 0.1097, "num_input_tokens_seen": 136562108, "step": 1496 }, { "epoch": 6.233333333333333, "loss": 0.08684214949607849, "loss_ce": 0.00047740069567225873, "loss_iou": 0.27734375, "loss_num": 0.017333984375, "loss_xval": 0.08642578125, "num_input_tokens_seen": 136562108, "step": 1496 }, { "epoch": 6.2375, "grad_norm": 4.301696296347918, "learning_rate": 5e-05, "loss": 0.1126, "num_input_tokens_seen": 136653324, "step": 1497 }, { "epoch": 6.2375, "loss": 0.09686444699764252, "loss_ce": 4.7431603888981044e-05, "loss_iou": 0.30859375, "loss_num": 0.0194091796875, "loss_xval": 0.0966796875, "num_input_tokens_seen": 136653324, "step": 1497 }, { "epoch": 6.241666666666666, "grad_norm": 7.418185168253085, "learning_rate": 5e-05, "loss": 0.1631, "num_input_tokens_seen": 136745004, "step": 1498 }, { "epoch": 6.241666666666666, "loss": 0.1644340306520462, "loss_ce": 0.001714312587864697, "loss_iou": 0.154296875, "loss_num": 0.032470703125, "loss_xval": 0.1630859375, "num_input_tokens_seen": 136745004, "step": 1498 }, { "epoch": 6.245833333333334, "grad_norm": 4.203904690296846, "learning_rate": 5e-05, "loss": 0.0761, "num_input_tokens_seen": 136835992, "step": 1499 }, { "epoch": 6.245833333333334, "loss": 0.09441401809453964, "loss_ce": 7.891281711636111e-06, "loss_iou": 0.32421875, "loss_num": 0.0189208984375, "loss_xval": 0.09423828125, "num_input_tokens_seen": 136835992, "step": 1499 }, { "epoch": 6.25, "grad_norm": 4.399785995865877, "learning_rate": 5e-05, "loss": 0.1119, "num_input_tokens_seen": 136927000, "step": 1500 }, { "epoch": 6.25, "eval_seeclick_CIoU": 0.20020649209618568, "eval_seeclick_GIoU": 0.18264785408973694, "eval_seeclick_IoU": 0.3048545867204666, "eval_seeclick_MAE_all": 0.09458190575242043, "eval_seeclick_MAE_h": 0.07541835866868496, "eval_seeclick_MAE_w": 0.2035619094967842, "eval_seeclick_MAE_x_boxes": 0.2103462964296341, "eval_seeclick_MAE_y_boxes": 0.0807495042681694, "eval_seeclick_NUM_probability": 0.9999992549419403, "eval_seeclick_inside_bbox": 0.4332386404275894, "eval_seeclick_loss": 0.5469575524330139, "eval_seeclick_loss_ce": 0.13234156370162964, "eval_seeclick_loss_iou": 0.3863525390625, "eval_seeclick_loss_num": 0.08294677734375, "eval_seeclick_loss_xval": 0.414794921875, "eval_seeclick_runtime": 74.5888, "eval_seeclick_samples_per_second": 0.576, "eval_seeclick_steps_per_second": 0.027, "num_input_tokens_seen": 136927000, "step": 1500 }, { "epoch": 6.25, "eval_icons_CIoU": 0.3565501272678375, "eval_icons_GIoU": 0.37757113575935364, "eval_icons_IoU": 0.438765212893486, "eval_icons_MAE_all": 0.06771966256201267, "eval_icons_MAE_h": 0.13854750245809555, "eval_icons_MAE_w": 0.10688314586877823, "eval_icons_MAE_x_boxes": 0.10696740448474884, "eval_icons_MAE_y_boxes": 0.13959594815969467, "eval_icons_NUM_probability": 0.999999612569809, "eval_icons_inside_bbox": 0.6371527910232544, "eval_icons_loss": 0.3279392719268799, "eval_icons_loss_ce": 7.772192816446477e-06, "eval_icons_loss_iou": 0.2781982421875, "eval_icons_loss_num": 0.06714630126953125, "eval_icons_loss_xval": 0.335784912109375, "eval_icons_runtime": 84.0066, "eval_icons_samples_per_second": 0.595, "eval_icons_steps_per_second": 0.024, "num_input_tokens_seen": 136927000, "step": 1500 }, { "epoch": 6.25, "eval_screenspot_CIoU": 0.3894497851530711, "eval_screenspot_GIoU": 0.38316018382708233, "eval_screenspot_IoU": 0.4590388039747874, "eval_screenspot_MAE_all": 0.09581841280062993, "eval_screenspot_MAE_h": 0.08313464000821114, "eval_screenspot_MAE_w": 0.19596777856349945, "eval_screenspot_MAE_x_boxes": 0.18529337644577026, "eval_screenspot_MAE_y_boxes": 0.07308414205908775, "eval_screenspot_NUM_probability": 0.9999992251396179, "eval_screenspot_inside_bbox": 0.725000003973643, "eval_screenspot_loss": 0.4786304831504822, "eval_screenspot_loss_ce": 2.510744145448977e-05, "eval_screenspot_loss_iou": 0.407958984375, "eval_screenspot_loss_num": 0.0963592529296875, "eval_screenspot_loss_xval": 0.4816487630208333, "eval_screenspot_runtime": 159.7863, "eval_screenspot_samples_per_second": 0.557, "eval_screenspot_steps_per_second": 0.019, "num_input_tokens_seen": 136927000, "step": 1500 }, { "epoch": 6.25, "eval_compot_CIoU": 0.4208727031946182, "eval_compot_GIoU": 0.41121308505535126, "eval_compot_IoU": 0.49092237651348114, "eval_compot_MAE_all": 0.06092522293329239, "eval_compot_MAE_h": 0.06699452549219131, "eval_compot_MAE_w": 0.15196169167757034, "eval_compot_MAE_x_boxes": 0.14676962792873383, "eval_compot_MAE_y_boxes": 0.06717049330472946, "eval_compot_NUM_probability": 0.9999985098838806, "eval_compot_inside_bbox": 0.7204861044883728, "eval_compot_loss": 0.3193596601486206, "eval_compot_loss_ce": 0.011697435285896063, "eval_compot_loss_iou": 0.32623291015625, "eval_compot_loss_num": 0.0563812255859375, "eval_compot_loss_xval": 0.2819061279296875, "eval_compot_runtime": 88.4549, "eval_compot_samples_per_second": 0.565, "eval_compot_steps_per_second": 0.023, "num_input_tokens_seen": 136927000, "step": 1500 }, { "epoch": 6.25, "loss": 0.25956547260284424, "loss_ce": 0.012830821797251701, "loss_iou": 0.326171875, "loss_num": 0.04931640625, "loss_xval": 0.2470703125, "num_input_tokens_seen": 136927000, "step": 1500 }, { "epoch": 6.254166666666666, "grad_norm": 2.1614253857529997, "learning_rate": 5e-05, "loss": 0.097, "num_input_tokens_seen": 137018200, "step": 1501 }, { "epoch": 6.254166666666666, "loss": 0.11686230450868607, "loss_ce": 4.101651211385615e-05, "loss_iou": 0.392578125, "loss_num": 0.0233154296875, "loss_xval": 0.11669921875, "num_input_tokens_seen": 137018200, "step": 1501 }, { "epoch": 6.258333333333334, "grad_norm": 2.7009496825179626, "learning_rate": 5e-05, "loss": 0.101, "num_input_tokens_seen": 137109356, "step": 1502 }, { "epoch": 6.258333333333334, "loss": 0.11811286211013794, "loss_ce": 0.0006506989011541009, "loss_iou": 0.384765625, "loss_num": 0.0234375, "loss_xval": 0.11767578125, "num_input_tokens_seen": 137109356, "step": 1502 }, { "epoch": 6.2625, "grad_norm": 7.285209682589673, "learning_rate": 5e-05, "loss": 0.0777, "num_input_tokens_seen": 137200692, "step": 1503 }, { "epoch": 6.2625, "loss": 0.05951286107301712, "loss_ce": 0.00018668676784727722, "loss_iou": 0.40625, "loss_num": 0.0118408203125, "loss_xval": 0.059326171875, "num_input_tokens_seen": 137200692, "step": 1503 }, { "epoch": 6.266666666666667, "grad_norm": 4.393759772485003, "learning_rate": 5e-05, "loss": 0.0987, "num_input_tokens_seen": 137292620, "step": 1504 }, { "epoch": 6.266666666666667, "loss": 0.09740344434976578, "loss_ce": 0.002066532615572214, "loss_iou": 0.330078125, "loss_num": 0.01904296875, "loss_xval": 0.09521484375, "num_input_tokens_seen": 137292620, "step": 1504 }, { "epoch": 6.270833333333333, "grad_norm": 4.201436243065331, "learning_rate": 5e-05, "loss": 0.0735, "num_input_tokens_seen": 137384388, "step": 1505 }, { "epoch": 6.270833333333333, "loss": 0.0596717894077301, "loss_ce": 0.002146154874935746, "loss_iou": 0.2373046875, "loss_num": 0.01153564453125, "loss_xval": 0.0576171875, "num_input_tokens_seen": 137384388, "step": 1505 }, { "epoch": 6.275, "grad_norm": 5.428190265142759, "learning_rate": 5e-05, "loss": 0.0977, "num_input_tokens_seen": 137475952, "step": 1506 }, { "epoch": 6.275, "loss": 0.11430226266384125, "loss_ce": 0.0012346402509137988, "loss_iou": 0.154296875, "loss_num": 0.0225830078125, "loss_xval": 0.11328125, "num_input_tokens_seen": 137475952, "step": 1506 }, { "epoch": 6.279166666666667, "grad_norm": 8.940473330611962, "learning_rate": 5e-05, "loss": 0.1001, "num_input_tokens_seen": 137567536, "step": 1507 }, { "epoch": 6.279166666666667, "loss": 0.08325809240341187, "loss_ce": 0.0006470083026215434, "loss_iou": 0.1708984375, "loss_num": 0.0164794921875, "loss_xval": 0.08251953125, "num_input_tokens_seen": 137567536, "step": 1507 }, { "epoch": 6.283333333333333, "grad_norm": 6.633974688331686, "learning_rate": 5e-05, "loss": 0.1029, "num_input_tokens_seen": 137659404, "step": 1508 }, { "epoch": 6.283333333333333, "loss": 0.08943825960159302, "loss_ce": 0.0007160389795899391, "loss_iou": 0.259765625, "loss_num": 0.0177001953125, "loss_xval": 0.0888671875, "num_input_tokens_seen": 137659404, "step": 1508 }, { "epoch": 6.2875, "grad_norm": 2.7176989734291284, "learning_rate": 5e-05, "loss": 0.108, "num_input_tokens_seen": 137750828, "step": 1509 }, { "epoch": 6.2875, "loss": 0.08843083679676056, "loss_ce": 0.0017304003704339266, "loss_iou": 0.181640625, "loss_num": 0.017333984375, "loss_xval": 0.0869140625, "num_input_tokens_seen": 137750828, "step": 1509 }, { "epoch": 6.291666666666667, "grad_norm": 2.0135651794519402, "learning_rate": 5e-05, "loss": 0.086, "num_input_tokens_seen": 137842976, "step": 1510 }, { "epoch": 6.291666666666667, "loss": 0.047538742423057556, "loss_ce": 0.0022048787213861942, "loss_iou": 0.2177734375, "loss_num": 0.00909423828125, "loss_xval": 0.04541015625, "num_input_tokens_seen": 137842976, "step": 1510 }, { "epoch": 6.295833333333333, "grad_norm": 5.309436376013892, "learning_rate": 5e-05, "loss": 0.0671, "num_input_tokens_seen": 137934964, "step": 1511 }, { "epoch": 6.295833333333333, "loss": 0.05960085242986679, "loss_ce": 0.0008850316517055035, "loss_iou": 0.384765625, "loss_num": 0.01177978515625, "loss_xval": 0.05859375, "num_input_tokens_seen": 137934964, "step": 1511 }, { "epoch": 6.3, "grad_norm": 4.1509108275575715, "learning_rate": 5e-05, "loss": 0.094, "num_input_tokens_seen": 138026256, "step": 1512 }, { "epoch": 6.3, "loss": 0.10616110265254974, "loss_ce": 0.0001735452242428437, "loss_iou": 0.36328125, "loss_num": 0.0211181640625, "loss_xval": 0.10595703125, "num_input_tokens_seen": 138026256, "step": 1512 }, { "epoch": 6.304166666666666, "grad_norm": 2.621120171436987, "learning_rate": 5e-05, "loss": 0.125, "num_input_tokens_seen": 138117368, "step": 1513 }, { "epoch": 6.304166666666666, "loss": 0.14343056082725525, "loss_ce": 0.0007303733727894723, "loss_iou": 0.28515625, "loss_num": 0.028564453125, "loss_xval": 0.142578125, "num_input_tokens_seen": 138117368, "step": 1513 }, { "epoch": 6.308333333333334, "grad_norm": 2.7964839161004944, "learning_rate": 5e-05, "loss": 0.1252, "num_input_tokens_seen": 138208852, "step": 1514 }, { "epoch": 6.308333333333334, "loss": 0.10081670433282852, "loss_ce": 0.0007343016914092004, "loss_iou": 0.2275390625, "loss_num": 0.02001953125, "loss_xval": 0.10009765625, "num_input_tokens_seen": 138208852, "step": 1514 }, { "epoch": 6.3125, "grad_norm": 2.282470761627585, "learning_rate": 5e-05, "loss": 0.0655, "num_input_tokens_seen": 138299940, "step": 1515 }, { "epoch": 6.3125, "loss": 0.05543072521686554, "loss_ce": 0.0010483998339623213, "loss_iou": 0.189453125, "loss_num": 0.0108642578125, "loss_xval": 0.054443359375, "num_input_tokens_seen": 138299940, "step": 1515 }, { "epoch": 6.316666666666666, "grad_norm": 4.131042125302842, "learning_rate": 5e-05, "loss": 0.073, "num_input_tokens_seen": 138391328, "step": 1516 }, { "epoch": 6.316666666666666, "loss": 0.08035748451948166, "loss_ce": 0.00046246696729213, "loss_iou": 0.275390625, "loss_num": 0.0159912109375, "loss_xval": 0.080078125, "num_input_tokens_seen": 138391328, "step": 1516 }, { "epoch": 6.320833333333334, "grad_norm": 8.27696528656622, "learning_rate": 5e-05, "loss": 0.0808, "num_input_tokens_seen": 138482532, "step": 1517 }, { "epoch": 6.320833333333334, "loss": 0.07589618861675262, "loss_ce": 0.000273626996204257, "loss_iou": 0.26953125, "loss_num": 0.01513671875, "loss_xval": 0.07568359375, "num_input_tokens_seen": 138482532, "step": 1517 }, { "epoch": 6.325, "grad_norm": 3.7231851864538137, "learning_rate": 5e-05, "loss": 0.1171, "num_input_tokens_seen": 138573620, "step": 1518 }, { "epoch": 6.325, "loss": 0.09571607410907745, "loss_ce": 0.0011115849483758211, "loss_iou": 0.35546875, "loss_num": 0.0189208984375, "loss_xval": 0.0947265625, "num_input_tokens_seen": 138573620, "step": 1518 }, { "epoch": 6.329166666666667, "grad_norm": 8.090219286591468, "learning_rate": 5e-05, "loss": 0.1327, "num_input_tokens_seen": 138664500, "step": 1519 }, { "epoch": 6.329166666666667, "loss": 0.12188278883695602, "loss_ce": 0.000499117944855243, "loss_iou": 0.30859375, "loss_num": 0.0242919921875, "loss_xval": 0.12158203125, "num_input_tokens_seen": 138664500, "step": 1519 }, { "epoch": 6.333333333333333, "grad_norm": 4.691978282317901, "learning_rate": 5e-05, "loss": 0.1281, "num_input_tokens_seen": 138755820, "step": 1520 }, { "epoch": 6.333333333333333, "loss": 0.08768007159233093, "loss_ce": 0.0009185929084196687, "loss_iou": 0.46484375, "loss_num": 0.017333984375, "loss_xval": 0.0869140625, "num_input_tokens_seen": 138755820, "step": 1520 }, { "epoch": 6.3375, "grad_norm": 4.05389583485167, "learning_rate": 5e-05, "loss": 0.1139, "num_input_tokens_seen": 138846992, "step": 1521 }, { "epoch": 6.3375, "loss": 0.15876007080078125, "loss_ce": 0.00052641675574705, "loss_iou": 0.205078125, "loss_num": 0.03173828125, "loss_xval": 0.158203125, "num_input_tokens_seen": 138846992, "step": 1521 }, { "epoch": 6.341666666666667, "grad_norm": 9.50735611222299, "learning_rate": 5e-05, "loss": 0.1257, "num_input_tokens_seen": 138938144, "step": 1522 }, { "epoch": 6.341666666666667, "loss": 0.14819855988025665, "loss_ce": 5.187587703403551e-06, "loss_iou": 0.455078125, "loss_num": 0.0296630859375, "loss_xval": 0.1484375, "num_input_tokens_seen": 138938144, "step": 1522 }, { "epoch": 6.345833333333333, "grad_norm": 4.860180651361698, "learning_rate": 5e-05, "loss": 0.0836, "num_input_tokens_seen": 139029296, "step": 1523 }, { "epoch": 6.345833333333333, "loss": 0.07304719090461731, "loss_ce": 0.0006594919832423329, "loss_iou": 0.1923828125, "loss_num": 0.01446533203125, "loss_xval": 0.072265625, "num_input_tokens_seen": 139029296, "step": 1523 }, { "epoch": 6.35, "grad_norm": 6.304762865561375, "learning_rate": 5e-05, "loss": 0.1134, "num_input_tokens_seen": 139120612, "step": 1524 }, { "epoch": 6.35, "loss": 0.163400799036026, "loss_ce": 0.00010123385436600074, "loss_iou": 0.318359375, "loss_num": 0.03271484375, "loss_xval": 0.1630859375, "num_input_tokens_seen": 139120612, "step": 1524 }, { "epoch": 6.354166666666667, "grad_norm": 4.101944550969832, "learning_rate": 5e-05, "loss": 0.0855, "num_input_tokens_seen": 139211944, "step": 1525 }, { "epoch": 6.354166666666667, "loss": 0.10763823986053467, "loss_ce": 2.7328803753334796e-06, "loss_iou": 0.306640625, "loss_num": 0.021484375, "loss_xval": 0.107421875, "num_input_tokens_seen": 139211944, "step": 1525 }, { "epoch": 6.358333333333333, "grad_norm": 2.8743059740589767, "learning_rate": 5e-05, "loss": 0.1122, "num_input_tokens_seen": 139302760, "step": 1526 }, { "epoch": 6.358333333333333, "loss": 0.109195277094841, "loss_ce": 6.442190351663157e-05, "loss_iou": 0.37109375, "loss_num": 0.0218505859375, "loss_xval": 0.109375, "num_input_tokens_seen": 139302760, "step": 1526 }, { "epoch": 6.3625, "grad_norm": 5.708628465847872, "learning_rate": 5e-05, "loss": 0.1256, "num_input_tokens_seen": 139394120, "step": 1527 }, { "epoch": 6.3625, "loss": 0.15449079871177673, "loss_ce": 0.006007526069879532, "loss_iou": 0.2890625, "loss_num": 0.0296630859375, "loss_xval": 0.1484375, "num_input_tokens_seen": 139394120, "step": 1527 }, { "epoch": 6.366666666666666, "grad_norm": 7.874856125203204, "learning_rate": 5e-05, "loss": 0.1415, "num_input_tokens_seen": 139485488, "step": 1528 }, { "epoch": 6.366666666666666, "loss": 0.19475838541984558, "loss_ce": 0.00019355639233253896, "loss_iou": 0.388671875, "loss_num": 0.038818359375, "loss_xval": 0.1943359375, "num_input_tokens_seen": 139485488, "step": 1528 }, { "epoch": 6.370833333333334, "grad_norm": 2.231030623695389, "learning_rate": 5e-05, "loss": 0.0958, "num_input_tokens_seen": 139577164, "step": 1529 }, { "epoch": 6.370833333333334, "loss": 0.0933036208152771, "loss_ce": 0.0006522503099404275, "loss_iou": 0.345703125, "loss_num": 0.0185546875, "loss_xval": 0.0927734375, "num_input_tokens_seen": 139577164, "step": 1529 }, { "epoch": 6.375, "grad_norm": 1.9998978914688346, "learning_rate": 5e-05, "loss": 0.0999, "num_input_tokens_seen": 139668980, "step": 1530 }, { "epoch": 6.375, "loss": 0.07884591817855835, "loss_ce": 0.0002936720848083496, "loss_iou": 0.2890625, "loss_num": 0.0157470703125, "loss_xval": 0.07861328125, "num_input_tokens_seen": 139668980, "step": 1530 }, { "epoch": 6.379166666666666, "grad_norm": 3.496869670948756, "learning_rate": 5e-05, "loss": 0.0708, "num_input_tokens_seen": 139760436, "step": 1531 }, { "epoch": 6.379166666666666, "loss": 0.11092659831047058, "loss_ce": 8.675569551996887e-05, "loss_iou": 0.298828125, "loss_num": 0.022216796875, "loss_xval": 0.11083984375, "num_input_tokens_seen": 139760436, "step": 1531 }, { "epoch": 6.383333333333334, "grad_norm": 3.3589500487803674, "learning_rate": 5e-05, "loss": 0.1107, "num_input_tokens_seen": 139851524, "step": 1532 }, { "epoch": 6.383333333333334, "loss": 0.1541489064693451, "loss_ce": 0.0009506536880508065, "loss_iou": 0.259765625, "loss_num": 0.0306396484375, "loss_xval": 0.1533203125, "num_input_tokens_seen": 139851524, "step": 1532 }, { "epoch": 6.3875, "grad_norm": 2.828243613484087, "learning_rate": 5e-05, "loss": 0.1191, "num_input_tokens_seen": 139942808, "step": 1533 }, { "epoch": 6.3875, "loss": 0.10458563268184662, "loss_ce": 1.7153741282527335e-05, "loss_iou": 0.328125, "loss_num": 0.0208740234375, "loss_xval": 0.1044921875, "num_input_tokens_seen": 139942808, "step": 1533 }, { "epoch": 6.391666666666667, "grad_norm": 3.5751189494029765, "learning_rate": 5e-05, "loss": 0.1509, "num_input_tokens_seen": 140033636, "step": 1534 }, { "epoch": 6.391666666666667, "loss": 0.1473105400800705, "loss_ce": 0.0007346185739152133, "loss_iou": 0.353515625, "loss_num": 0.029296875, "loss_xval": 0.146484375, "num_input_tokens_seen": 140033636, "step": 1534 }, { "epoch": 6.395833333333333, "grad_norm": 3.7131141432185877, "learning_rate": 5e-05, "loss": 0.1078, "num_input_tokens_seen": 140124936, "step": 1535 }, { "epoch": 6.395833333333333, "loss": 0.06990425288677216, "loss_ce": 0.00038521114038303494, "loss_iou": 0.3203125, "loss_num": 0.013916015625, "loss_xval": 0.0693359375, "num_input_tokens_seen": 140124936, "step": 1535 }, { "epoch": 6.4, "grad_norm": 5.4413194288201145, "learning_rate": 5e-05, "loss": 0.1216, "num_input_tokens_seen": 140216540, "step": 1536 }, { "epoch": 6.4, "loss": 0.04465536028146744, "loss_ce": 0.00011495660874061286, "loss_iou": 0.30859375, "loss_num": 0.0089111328125, "loss_xval": 0.04443359375, "num_input_tokens_seen": 140216540, "step": 1536 }, { "epoch": 6.404166666666667, "grad_norm": 3.0270632390471923, "learning_rate": 5e-05, "loss": 0.1218, "num_input_tokens_seen": 140308108, "step": 1537 }, { "epoch": 6.404166666666667, "loss": 0.14716391265392303, "loss_ce": 0.001472989795729518, "loss_iou": 0.1640625, "loss_num": 0.0291748046875, "loss_xval": 0.1455078125, "num_input_tokens_seen": 140308108, "step": 1537 }, { "epoch": 6.408333333333333, "grad_norm": 12.993363647613782, "learning_rate": 5e-05, "loss": 0.1193, "num_input_tokens_seen": 140399288, "step": 1538 }, { "epoch": 6.408333333333333, "loss": 0.14310534298419952, "loss_ce": 6.94481932441704e-05, "loss_iou": 0.2578125, "loss_num": 0.028564453125, "loss_xval": 0.142578125, "num_input_tokens_seen": 140399288, "step": 1538 }, { "epoch": 6.4125, "grad_norm": 3.263224080350857, "learning_rate": 5e-05, "loss": 0.1301, "num_input_tokens_seen": 140490692, "step": 1539 }, { "epoch": 6.4125, "loss": 0.15512920916080475, "loss_ce": 8.361228537978604e-06, "loss_iou": 0.2109375, "loss_num": 0.031005859375, "loss_xval": 0.1552734375, "num_input_tokens_seen": 140490692, "step": 1539 }, { "epoch": 6.416666666666667, "grad_norm": 5.526571946364959, "learning_rate": 5e-05, "loss": 0.1131, "num_input_tokens_seen": 140580992, "step": 1540 }, { "epoch": 6.416666666666667, "loss": 0.08357784152030945, "loss_ce": 0.00021907762857154012, "loss_iou": 0.15234375, "loss_num": 0.0167236328125, "loss_xval": 0.08349609375, "num_input_tokens_seen": 140580992, "step": 1540 }, { "epoch": 6.420833333333333, "grad_norm": 2.6590068483528353, "learning_rate": 5e-05, "loss": 0.0616, "num_input_tokens_seen": 140671864, "step": 1541 }, { "epoch": 6.420833333333333, "loss": 0.06562237441539764, "loss_ce": 0.0007114805048331618, "loss_iou": 0.228515625, "loss_num": 0.01300048828125, "loss_xval": 0.06494140625, "num_input_tokens_seen": 140671864, "step": 1541 }, { "epoch": 6.425, "grad_norm": 5.086586938084599, "learning_rate": 5e-05, "loss": 0.1152, "num_input_tokens_seen": 140762204, "step": 1542 }, { "epoch": 6.425, "loss": 0.061930038034915924, "loss_ce": 2.5129629648290575e-05, "loss_iou": 0.328125, "loss_num": 0.01239013671875, "loss_xval": 0.06201171875, "num_input_tokens_seen": 140762204, "step": 1542 }, { "epoch": 6.429166666666666, "grad_norm": 8.287373152003422, "learning_rate": 5e-05, "loss": 0.0898, "num_input_tokens_seen": 140853496, "step": 1543 }, { "epoch": 6.429166666666666, "loss": 0.11346882581710815, "loss_ce": 0.0021559547167271376, "loss_iou": 0.275390625, "loss_num": 0.0223388671875, "loss_xval": 0.111328125, "num_input_tokens_seen": 140853496, "step": 1543 }, { "epoch": 6.433333333333334, "grad_norm": 2.6481493953194724, "learning_rate": 5e-05, "loss": 0.1064, "num_input_tokens_seen": 140944676, "step": 1544 }, { "epoch": 6.433333333333334, "loss": 0.11181750893592834, "loss_ce": 0.0027782029937952757, "loss_iou": 0.212890625, "loss_num": 0.021728515625, "loss_xval": 0.10888671875, "num_input_tokens_seen": 140944676, "step": 1544 }, { "epoch": 6.4375, "grad_norm": 2.5476793630601224, "learning_rate": 5e-05, "loss": 0.0924, "num_input_tokens_seen": 141036008, "step": 1545 }, { "epoch": 6.4375, "loss": 0.0740530788898468, "loss_ce": 0.0023215145338326693, "loss_iou": 0.205078125, "loss_num": 0.01434326171875, "loss_xval": 0.07177734375, "num_input_tokens_seen": 141036008, "step": 1545 }, { "epoch": 6.441666666666666, "grad_norm": 3.222784811184737, "learning_rate": 5e-05, "loss": 0.1079, "num_input_tokens_seen": 141127444, "step": 1546 }, { "epoch": 6.441666666666666, "loss": 0.09326840192079544, "loss_ce": 0.00015926752530504018, "loss_iou": 0.275390625, "loss_num": 0.0186767578125, "loss_xval": 0.09326171875, "num_input_tokens_seen": 141127444, "step": 1546 }, { "epoch": 6.445833333333334, "grad_norm": 9.30775884504721, "learning_rate": 5e-05, "loss": 0.1288, "num_input_tokens_seen": 141218368, "step": 1547 }, { "epoch": 6.445833333333334, "loss": 0.1136021539568901, "loss_ce": 0.000778665067628026, "loss_iou": 0.33203125, "loss_num": 0.0225830078125, "loss_xval": 0.11279296875, "num_input_tokens_seen": 141218368, "step": 1547 }, { "epoch": 6.45, "grad_norm": 2.857794250190675, "learning_rate": 5e-05, "loss": 0.087, "num_input_tokens_seen": 141309712, "step": 1548 }, { "epoch": 6.45, "loss": 0.07592833787202835, "loss_ce": 3.111540718236938e-05, "loss_iou": 0.265625, "loss_num": 0.01519775390625, "loss_xval": 0.07568359375, "num_input_tokens_seen": 141309712, "step": 1548 }, { "epoch": 6.454166666666667, "grad_norm": 10.41193222880002, "learning_rate": 5e-05, "loss": 0.0904, "num_input_tokens_seen": 141401432, "step": 1549 }, { "epoch": 6.454166666666667, "loss": 0.08329534530639648, "loss_ce": 0.0031943346839398146, "loss_iou": 0.244140625, "loss_num": 0.0159912109375, "loss_xval": 0.080078125, "num_input_tokens_seen": 141401432, "step": 1549 }, { "epoch": 6.458333333333333, "grad_norm": 3.074911801642584, "learning_rate": 5e-05, "loss": 0.0777, "num_input_tokens_seen": 141492608, "step": 1550 }, { "epoch": 6.458333333333333, "loss": 0.0982789471745491, "loss_ce": 0.00019544607494026423, "loss_iou": 0.41796875, "loss_num": 0.0196533203125, "loss_xval": 0.09814453125, "num_input_tokens_seen": 141492608, "step": 1550 }, { "epoch": 6.4625, "grad_norm": 10.409750197190485, "learning_rate": 5e-05, "loss": 0.101, "num_input_tokens_seen": 141584332, "step": 1551 }, { "epoch": 6.4625, "loss": 0.10331732034683228, "loss_ce": 0.0012970553943887353, "loss_iou": 0.337890625, "loss_num": 0.0203857421875, "loss_xval": 0.10205078125, "num_input_tokens_seen": 141584332, "step": 1551 }, { "epoch": 6.466666666666667, "grad_norm": 2.2759078001399535, "learning_rate": 5e-05, "loss": 0.1277, "num_input_tokens_seen": 141676000, "step": 1552 }, { "epoch": 6.466666666666667, "loss": 0.1590229868888855, "loss_ce": 0.008357708342373371, "loss_iou": 0.314453125, "loss_num": 0.0301513671875, "loss_xval": 0.150390625, "num_input_tokens_seen": 141676000, "step": 1552 }, { "epoch": 6.470833333333333, "grad_norm": 2.7347982728298326, "learning_rate": 5e-05, "loss": 0.1051, "num_input_tokens_seen": 141767732, "step": 1553 }, { "epoch": 6.470833333333333, "loss": 0.10795509815216064, "loss_ce": 0.0003195986500941217, "loss_iou": 0.205078125, "loss_num": 0.021484375, "loss_xval": 0.107421875, "num_input_tokens_seen": 141767732, "step": 1553 }, { "epoch": 6.475, "grad_norm": 3.010863972284371, "learning_rate": 5e-05, "loss": 0.1039, "num_input_tokens_seen": 141859912, "step": 1554 }, { "epoch": 6.475, "loss": 0.1464884877204895, "loss_ce": 0.0006449909415096045, "loss_iou": 0.2890625, "loss_num": 0.0291748046875, "loss_xval": 0.1455078125, "num_input_tokens_seen": 141859912, "step": 1554 }, { "epoch": 6.479166666666667, "grad_norm": 4.290786568194312, "learning_rate": 5e-05, "loss": 0.1159, "num_input_tokens_seen": 141951128, "step": 1555 }, { "epoch": 6.479166666666667, "loss": 0.12644046545028687, "loss_ce": 0.0007080405484884977, "loss_iou": 0.212890625, "loss_num": 0.025146484375, "loss_xval": 0.1259765625, "num_input_tokens_seen": 141951128, "step": 1555 }, { "epoch": 6.483333333333333, "grad_norm": 5.329031237918446, "learning_rate": 5e-05, "loss": 0.1194, "num_input_tokens_seen": 142042252, "step": 1556 }, { "epoch": 6.483333333333333, "loss": 0.09762811660766602, "loss_ce": 2.3853920083638513e-06, "loss_iou": 0.32421875, "loss_num": 0.01953125, "loss_xval": 0.09765625, "num_input_tokens_seen": 142042252, "step": 1556 }, { "epoch": 6.4875, "grad_norm": 3.0036119988830863, "learning_rate": 5e-05, "loss": 0.0886, "num_input_tokens_seen": 142133724, "step": 1557 }, { "epoch": 6.4875, "loss": 0.08220556378364563, "loss_ce": 0.0009372499189339578, "loss_iou": 0.40234375, "loss_num": 0.0162353515625, "loss_xval": 0.0810546875, "num_input_tokens_seen": 142133724, "step": 1557 }, { "epoch": 6.491666666666666, "grad_norm": 5.490478725111228, "learning_rate": 5e-05, "loss": 0.0967, "num_input_tokens_seen": 142225104, "step": 1558 }, { "epoch": 6.491666666666666, "loss": 0.08201521635055542, "loss_ce": 0.0021812329068779945, "loss_iou": 0.4296875, "loss_num": 0.0159912109375, "loss_xval": 0.080078125, "num_input_tokens_seen": 142225104, "step": 1558 }, { "epoch": 6.495833333333334, "grad_norm": 13.400139381111213, "learning_rate": 5e-05, "loss": 0.1145, "num_input_tokens_seen": 142316760, "step": 1559 }, { "epoch": 6.495833333333334, "loss": 0.06825940310955048, "loss_ce": 6.787155871279538e-05, "loss_iou": 0.193359375, "loss_num": 0.01361083984375, "loss_xval": 0.068359375, "num_input_tokens_seen": 142316760, "step": 1559 }, { "epoch": 6.5, "grad_norm": 3.718230018374104, "learning_rate": 5e-05, "loss": 0.0934, "num_input_tokens_seen": 142407900, "step": 1560 }, { "epoch": 6.5, "loss": 0.12160571664571762, "loss_ce": 0.001763185835443437, "loss_iou": 0.263671875, "loss_num": 0.02392578125, "loss_xval": 0.11962890625, "num_input_tokens_seen": 142407900, "step": 1560 }, { "epoch": 6.504166666666666, "grad_norm": 3.332714913964349, "learning_rate": 5e-05, "loss": 0.1115, "num_input_tokens_seen": 142498844, "step": 1561 }, { "epoch": 6.504166666666666, "loss": 0.09362047910690308, "loss_ce": 0.0001756606507115066, "loss_iou": 0.306640625, "loss_num": 0.0186767578125, "loss_xval": 0.09326171875, "num_input_tokens_seen": 142498844, "step": 1561 }, { "epoch": 6.508333333333333, "grad_norm": 3.329570004581266, "learning_rate": 5e-05, "loss": 0.1104, "num_input_tokens_seen": 142590136, "step": 1562 }, { "epoch": 6.508333333333333, "loss": 0.10767680406570435, "loss_ce": 0.00013285694876685739, "loss_iou": 0.44140625, "loss_num": 0.021484375, "loss_xval": 0.107421875, "num_input_tokens_seen": 142590136, "step": 1562 }, { "epoch": 6.5125, "grad_norm": 3.5592758832194806, "learning_rate": 5e-05, "loss": 0.1144, "num_input_tokens_seen": 142681180, "step": 1563 }, { "epoch": 6.5125, "loss": 0.06250756978988647, "loss_ce": 0.0017089198809117079, "loss_iou": 0.24609375, "loss_num": 0.01214599609375, "loss_xval": 0.060791015625, "num_input_tokens_seen": 142681180, "step": 1563 }, { "epoch": 6.516666666666667, "grad_norm": 2.79429299910581, "learning_rate": 5e-05, "loss": 0.0551, "num_input_tokens_seen": 142772400, "step": 1564 }, { "epoch": 6.516666666666667, "loss": 0.04886992275714874, "loss_ce": 0.0014303472125902772, "loss_iou": 0.240234375, "loss_num": 0.009521484375, "loss_xval": 0.04736328125, "num_input_tokens_seen": 142772400, "step": 1564 }, { "epoch": 6.520833333333333, "grad_norm": 4.021842943865755, "learning_rate": 5e-05, "loss": 0.1259, "num_input_tokens_seen": 142863396, "step": 1565 }, { "epoch": 6.520833333333333, "loss": 0.14970502257347107, "loss_ce": 0.0012675110483542085, "loss_iou": 0.3125, "loss_num": 0.0296630859375, "loss_xval": 0.1484375, "num_input_tokens_seen": 142863396, "step": 1565 }, { "epoch": 6.525, "grad_norm": 3.9084038734956557, "learning_rate": 5e-05, "loss": 0.1083, "num_input_tokens_seen": 142954776, "step": 1566 }, { "epoch": 6.525, "loss": 0.10808855295181274, "loss_ce": 0.0004988283035345376, "loss_iou": 0.2470703125, "loss_num": 0.021484375, "loss_xval": 0.107421875, "num_input_tokens_seen": 142954776, "step": 1566 }, { "epoch": 6.529166666666667, "grad_norm": 1.9237307426450732, "learning_rate": 5e-05, "loss": 0.0757, "num_input_tokens_seen": 143045836, "step": 1567 }, { "epoch": 6.529166666666667, "loss": 0.06775303930044174, "loss_ce": 1.927517951116897e-05, "loss_iou": 0.29296875, "loss_num": 0.0135498046875, "loss_xval": 0.06787109375, "num_input_tokens_seen": 143045836, "step": 1567 }, { "epoch": 6.533333333333333, "grad_norm": 4.421940334695053, "learning_rate": 5e-05, "loss": 0.1091, "num_input_tokens_seen": 143137316, "step": 1568 }, { "epoch": 6.533333333333333, "loss": 0.12645383179187775, "loss_ce": 0.00047726318007335067, "loss_iou": 0.279296875, "loss_num": 0.0252685546875, "loss_xval": 0.1259765625, "num_input_tokens_seen": 143137316, "step": 1568 }, { "epoch": 6.5375, "grad_norm": 4.5291712901952295, "learning_rate": 5e-05, "loss": 0.0846, "num_input_tokens_seen": 143228708, "step": 1569 }, { "epoch": 6.5375, "loss": 0.08038702607154846, "loss_ce": 0.0004767438513226807, "loss_iou": 0.2421875, "loss_num": 0.0159912109375, "loss_xval": 0.080078125, "num_input_tokens_seen": 143228708, "step": 1569 }, { "epoch": 6.541666666666667, "grad_norm": 7.921331568022988, "learning_rate": 5e-05, "loss": 0.0751, "num_input_tokens_seen": 143319700, "step": 1570 }, { "epoch": 6.541666666666667, "loss": 0.0716252326965332, "loss_ce": 4.6252054744400084e-05, "loss_iou": 0.3203125, "loss_num": 0.01434326171875, "loss_xval": 0.07177734375, "num_input_tokens_seen": 143319700, "step": 1570 }, { "epoch": 6.545833333333333, "grad_norm": 2.6572462351600374, "learning_rate": 5e-05, "loss": 0.1061, "num_input_tokens_seen": 143410636, "step": 1571 }, { "epoch": 6.545833333333333, "loss": 0.12413694709539413, "loss_ce": 2.195342858613003e-05, "loss_iou": 0.314453125, "loss_num": 0.02490234375, "loss_xval": 0.1240234375, "num_input_tokens_seen": 143410636, "step": 1571 }, { "epoch": 6.55, "grad_norm": 3.452842329680069, "learning_rate": 5e-05, "loss": 0.0772, "num_input_tokens_seen": 143502496, "step": 1572 }, { "epoch": 6.55, "loss": 0.09016988426446915, "loss_ce": 6.673274037893862e-05, "loss_iou": 0.330078125, "loss_num": 0.01806640625, "loss_xval": 0.09033203125, "num_input_tokens_seen": 143502496, "step": 1572 }, { "epoch": 6.554166666666667, "grad_norm": 2.4604739050610465, "learning_rate": 5e-05, "loss": 0.0748, "num_input_tokens_seen": 143594092, "step": 1573 }, { "epoch": 6.554166666666667, "loss": 0.08416140079498291, "loss_ce": 7.021539204288274e-05, "loss_iou": 0.36328125, "loss_num": 0.016845703125, "loss_xval": 0.083984375, "num_input_tokens_seen": 143594092, "step": 1573 }, { "epoch": 6.558333333333334, "grad_norm": 3.4583027620478486, "learning_rate": 5e-05, "loss": 0.1189, "num_input_tokens_seen": 143684768, "step": 1574 }, { "epoch": 6.558333333333334, "loss": 0.18618930876255035, "loss_ce": 1.5710368188592838e-06, "loss_iou": 0.22265625, "loss_num": 0.037353515625, "loss_xval": 0.1865234375, "num_input_tokens_seen": 143684768, "step": 1574 }, { "epoch": 6.5625, "grad_norm": 4.276077174262582, "learning_rate": 5e-05, "loss": 0.117, "num_input_tokens_seen": 143776528, "step": 1575 }, { "epoch": 6.5625, "loss": 0.15433287620544434, "loss_ce": 0.00028013830888085067, "loss_iou": 0.28125, "loss_num": 0.03076171875, "loss_xval": 0.154296875, "num_input_tokens_seen": 143776528, "step": 1575 }, { "epoch": 6.566666666666666, "grad_norm": 3.6015378520829078, "learning_rate": 5e-05, "loss": 0.1529, "num_input_tokens_seen": 143867688, "step": 1576 }, { "epoch": 6.566666666666666, "loss": 0.1484794020652771, "loss_ce": 0.0014762284699827433, "loss_iou": 0.298828125, "loss_num": 0.0294189453125, "loss_xval": 0.1474609375, "num_input_tokens_seen": 143867688, "step": 1576 }, { "epoch": 6.570833333333333, "grad_norm": 2.562262945602401, "learning_rate": 5e-05, "loss": 0.0847, "num_input_tokens_seen": 143959236, "step": 1577 }, { "epoch": 6.570833333333333, "loss": 0.09467719495296478, "loss_ce": 0.001583316596224904, "loss_iou": 0.328125, "loss_num": 0.0185546875, "loss_xval": 0.09326171875, "num_input_tokens_seen": 143959236, "step": 1577 }, { "epoch": 6.575, "grad_norm": 2.621665897659335, "learning_rate": 5e-05, "loss": 0.068, "num_input_tokens_seen": 144050656, "step": 1578 }, { "epoch": 6.575, "loss": 0.07747948914766312, "loss_ce": 4.876002640230581e-05, "loss_iou": 0.287109375, "loss_num": 0.0155029296875, "loss_xval": 0.07763671875, "num_input_tokens_seen": 144050656, "step": 1578 }, { "epoch": 6.579166666666667, "grad_norm": 10.60835756383391, "learning_rate": 5e-05, "loss": 0.1034, "num_input_tokens_seen": 144140052, "step": 1579 }, { "epoch": 6.579166666666667, "loss": 0.1193399503827095, "loss_ce": 7.724653551122174e-05, "loss_iou": 0.427734375, "loss_num": 0.02392578125, "loss_xval": 0.119140625, "num_input_tokens_seen": 144140052, "step": 1579 }, { "epoch": 6.583333333333333, "grad_norm": 6.254374980240291, "learning_rate": 5e-05, "loss": 0.1012, "num_input_tokens_seen": 144231644, "step": 1580 }, { "epoch": 6.583333333333333, "loss": 0.08207383751869202, "loss_ce": 0.0006224174285307527, "loss_iou": 0.423828125, "loss_num": 0.0162353515625, "loss_xval": 0.08154296875, "num_input_tokens_seen": 144231644, "step": 1580 }, { "epoch": 6.5875, "grad_norm": 4.899090457878927, "learning_rate": 5e-05, "loss": 0.1001, "num_input_tokens_seen": 144322608, "step": 1581 }, { "epoch": 6.5875, "loss": 0.06415297091007233, "loss_ce": 0.0028584187384694815, "loss_iou": 0.345703125, "loss_num": 0.01226806640625, "loss_xval": 0.061279296875, "num_input_tokens_seen": 144322608, "step": 1581 }, { "epoch": 6.591666666666667, "grad_norm": 2.766175501257015, "learning_rate": 5e-05, "loss": 0.1122, "num_input_tokens_seen": 144413772, "step": 1582 }, { "epoch": 6.591666666666667, "loss": 0.10372322797775269, "loss_ce": 0.0006882529705762863, "loss_iou": 0.2734375, "loss_num": 0.0206298828125, "loss_xval": 0.10302734375, "num_input_tokens_seen": 144413772, "step": 1582 }, { "epoch": 6.595833333333333, "grad_norm": 2.7264734440635925, "learning_rate": 5e-05, "loss": 0.0975, "num_input_tokens_seen": 144504496, "step": 1583 }, { "epoch": 6.595833333333333, "loss": 0.134153351187706, "loss_ce": 0.0001811852998798713, "loss_iou": 0.0771484375, "loss_num": 0.0267333984375, "loss_xval": 0.1337890625, "num_input_tokens_seen": 144504496, "step": 1583 }, { "epoch": 6.6, "grad_norm": 3.967699299929471, "learning_rate": 5e-05, "loss": 0.0948, "num_input_tokens_seen": 144596468, "step": 1584 }, { "epoch": 6.6, "loss": 0.13596948981285095, "loss_ce": 7.469956472050399e-05, "loss_iou": 0.39453125, "loss_num": 0.0272216796875, "loss_xval": 0.1357421875, "num_input_tokens_seen": 144596468, "step": 1584 }, { "epoch": 6.604166666666667, "grad_norm": 5.155884044774041, "learning_rate": 5e-05, "loss": 0.0707, "num_input_tokens_seen": 144687304, "step": 1585 }, { "epoch": 6.604166666666667, "loss": 0.04194006323814392, "loss_ce": 0.0004208995960652828, "loss_iou": 0.27734375, "loss_num": 0.00830078125, "loss_xval": 0.04150390625, "num_input_tokens_seen": 144687304, "step": 1585 }, { "epoch": 6.608333333333333, "grad_norm": 2.303773461049103, "learning_rate": 5e-05, "loss": 0.0779, "num_input_tokens_seen": 144778800, "step": 1586 }, { "epoch": 6.608333333333333, "loss": 0.05826836824417114, "loss_ce": 0.0009410970378667116, "loss_iou": 0.251953125, "loss_num": 0.011474609375, "loss_xval": 0.057373046875, "num_input_tokens_seen": 144778800, "step": 1586 }, { "epoch": 6.6125, "grad_norm": 2.0548811267657325, "learning_rate": 5e-05, "loss": 0.1494, "num_input_tokens_seen": 144870196, "step": 1587 }, { "epoch": 6.6125, "loss": 0.15138491988182068, "loss_ce": 4.8250392865156755e-05, "loss_iou": 0.2158203125, "loss_num": 0.0302734375, "loss_xval": 0.1513671875, "num_input_tokens_seen": 144870196, "step": 1587 }, { "epoch": 6.616666666666667, "grad_norm": 5.6424045492552395, "learning_rate": 5e-05, "loss": 0.111, "num_input_tokens_seen": 144959672, "step": 1588 }, { "epoch": 6.616666666666667, "loss": 0.13981932401657104, "loss_ce": 3.0346068342623767e-06, "loss_iou": 0.294921875, "loss_num": 0.0279541015625, "loss_xval": 0.1396484375, "num_input_tokens_seen": 144959672, "step": 1588 }, { "epoch": 6.620833333333334, "grad_norm": 4.411052069608031, "learning_rate": 5e-05, "loss": 0.0917, "num_input_tokens_seen": 145051176, "step": 1589 }, { "epoch": 6.620833333333334, "loss": 0.10094712674617767, "loss_ce": 0.002283800160512328, "loss_iou": 0.419921875, "loss_num": 0.019775390625, "loss_xval": 0.0986328125, "num_input_tokens_seen": 145051176, "step": 1589 }, { "epoch": 6.625, "grad_norm": 4.046755342352393, "learning_rate": 5e-05, "loss": 0.1034, "num_input_tokens_seen": 145142076, "step": 1590 }, { "epoch": 6.625, "loss": 0.11325374245643616, "loss_ce": 0.0004302625893615186, "loss_iou": 0.287109375, "loss_num": 0.0225830078125, "loss_xval": 0.11279296875, "num_input_tokens_seen": 145142076, "step": 1590 }, { "epoch": 6.629166666666666, "grad_norm": 2.562778790765417, "learning_rate": 5e-05, "loss": 0.1086, "num_input_tokens_seen": 145233604, "step": 1591 }, { "epoch": 6.629166666666666, "loss": 0.10232369601726532, "loss_ce": 8.980841812444851e-05, "loss_iou": 0.33203125, "loss_num": 0.0203857421875, "loss_xval": 0.10205078125, "num_input_tokens_seen": 145233604, "step": 1591 }, { "epoch": 6.633333333333333, "grad_norm": 5.5019492787113, "learning_rate": 5e-05, "loss": 0.127, "num_input_tokens_seen": 145324992, "step": 1592 }, { "epoch": 6.633333333333333, "loss": 0.1514090746641159, "loss_ce": 1.1372105291229673e-05, "loss_iou": 0.39453125, "loss_num": 0.0302734375, "loss_xval": 0.1513671875, "num_input_tokens_seen": 145324992, "step": 1592 }, { "epoch": 6.6375, "grad_norm": 3.1473607225260904, "learning_rate": 5e-05, "loss": 0.0828, "num_input_tokens_seen": 145416480, "step": 1593 }, { "epoch": 6.6375, "loss": 0.10823452472686768, "loss_ce": 0.00035488815046846867, "loss_iou": 0.205078125, "loss_num": 0.0216064453125, "loss_xval": 0.10791015625, "num_input_tokens_seen": 145416480, "step": 1593 }, { "epoch": 6.641666666666667, "grad_norm": 11.970380699147968, "learning_rate": 5e-05, "loss": 0.1205, "num_input_tokens_seen": 145508484, "step": 1594 }, { "epoch": 6.641666666666667, "loss": 0.1729552000761032, "loss_ce": 0.004803344141691923, "loss_iou": 0.2333984375, "loss_num": 0.03369140625, "loss_xval": 0.16796875, "num_input_tokens_seen": 145508484, "step": 1594 }, { "epoch": 6.645833333333333, "grad_norm": 4.8173482154462945, "learning_rate": 5e-05, "loss": 0.0927, "num_input_tokens_seen": 145599724, "step": 1595 }, { "epoch": 6.645833333333333, "loss": 0.10813955962657928, "loss_ce": 0.0008092427160590887, "loss_iou": 0.244140625, "loss_num": 0.021484375, "loss_xval": 0.107421875, "num_input_tokens_seen": 145599724, "step": 1595 }, { "epoch": 6.65, "grad_norm": 4.772495453558251, "learning_rate": 5e-05, "loss": 0.1477, "num_input_tokens_seen": 145690884, "step": 1596 }, { "epoch": 6.65, "loss": 0.1627688705921173, "loss_ce": 0.00021699043281842023, "loss_iou": 0.375, "loss_num": 0.032470703125, "loss_xval": 0.162109375, "num_input_tokens_seen": 145690884, "step": 1596 }, { "epoch": 6.654166666666667, "grad_norm": 3.9667383937114513, "learning_rate": 5e-05, "loss": 0.105, "num_input_tokens_seen": 145781884, "step": 1597 }, { "epoch": 6.654166666666667, "loss": 0.1544293314218521, "loss_ce": 0.000986945815384388, "loss_iou": 0.326171875, "loss_num": 0.03076171875, "loss_xval": 0.1533203125, "num_input_tokens_seen": 145781884, "step": 1597 }, { "epoch": 6.658333333333333, "grad_norm": 4.575704460362979, "learning_rate": 5e-05, "loss": 0.1118, "num_input_tokens_seen": 145873312, "step": 1598 }, { "epoch": 6.658333333333333, "loss": 0.06805611401796341, "loss_ce": 0.0012226162943989038, "loss_iou": 0.1591796875, "loss_num": 0.01336669921875, "loss_xval": 0.06689453125, "num_input_tokens_seen": 145873312, "step": 1598 }, { "epoch": 6.6625, "grad_norm": 1.9421159193829607, "learning_rate": 5e-05, "loss": 0.0884, "num_input_tokens_seen": 145964720, "step": 1599 }, { "epoch": 6.6625, "loss": 0.04284782335162163, "loss_ce": 0.0012065876508131623, "loss_iou": 0.2255859375, "loss_num": 0.00836181640625, "loss_xval": 0.041748046875, "num_input_tokens_seen": 145964720, "step": 1599 }, { "epoch": 6.666666666666667, "grad_norm": 4.205435522590141, "learning_rate": 5e-05, "loss": 0.0853, "num_input_tokens_seen": 146056544, "step": 1600 }, { "epoch": 6.666666666666667, "loss": 0.10162333399057388, "loss_ce": 0.0015409357147291303, "loss_iou": 0.26953125, "loss_num": 0.02001953125, "loss_xval": 0.10009765625, "num_input_tokens_seen": 146056544, "step": 1600 }, { "epoch": 6.670833333333333, "grad_norm": 4.996232392456174, "learning_rate": 5e-05, "loss": 0.1017, "num_input_tokens_seen": 146147216, "step": 1601 }, { "epoch": 6.670833333333333, "loss": 0.1020415648818016, "loss_ce": 0.0002044006687356159, "loss_iou": 0.310546875, "loss_num": 0.0203857421875, "loss_xval": 0.10205078125, "num_input_tokens_seen": 146147216, "step": 1601 }, { "epoch": 6.675, "grad_norm": 5.239999392150706, "learning_rate": 5e-05, "loss": 0.1187, "num_input_tokens_seen": 146239040, "step": 1602 }, { "epoch": 6.675, "loss": 0.125333771109581, "loss_ce": 0.002058014739304781, "loss_iou": 0.294921875, "loss_num": 0.024658203125, "loss_xval": 0.123046875, "num_input_tokens_seen": 146239040, "step": 1602 }, { "epoch": 6.679166666666667, "grad_norm": 11.941534013991284, "learning_rate": 5e-05, "loss": 0.1318, "num_input_tokens_seen": 146330304, "step": 1603 }, { "epoch": 6.679166666666667, "loss": 0.08114509284496307, "loss_ce": 9.041121666086838e-05, "loss_iou": 0.2138671875, "loss_num": 0.0162353515625, "loss_xval": 0.0810546875, "num_input_tokens_seen": 146330304, "step": 1603 }, { "epoch": 6.683333333333334, "grad_norm": 4.644288999865973, "learning_rate": 5e-05, "loss": 0.096, "num_input_tokens_seen": 146421016, "step": 1604 }, { "epoch": 6.683333333333334, "loss": 0.1072845384478569, "loss_ce": 0.000381466350518167, "loss_iou": 0.2177734375, "loss_num": 0.0213623046875, "loss_xval": 0.10693359375, "num_input_tokens_seen": 146421016, "step": 1604 }, { "epoch": 6.6875, "grad_norm": 4.404307762190488, "learning_rate": 5e-05, "loss": 0.096, "num_input_tokens_seen": 146512496, "step": 1605 }, { "epoch": 6.6875, "loss": 0.0725177675485611, "loss_ce": 0.0004962862585671246, "loss_iou": 0.34765625, "loss_num": 0.014404296875, "loss_xval": 0.072265625, "num_input_tokens_seen": 146512496, "step": 1605 }, { "epoch": 6.691666666666666, "grad_norm": 2.097287608660134, "learning_rate": 5e-05, "loss": 0.0792, "num_input_tokens_seen": 146603768, "step": 1606 }, { "epoch": 6.691666666666666, "loss": 0.0623895600438118, "loss_ce": 0.0010339674772694707, "loss_iou": 0.267578125, "loss_num": 0.01226806640625, "loss_xval": 0.061279296875, "num_input_tokens_seen": 146603768, "step": 1606 }, { "epoch": 6.695833333333333, "grad_norm": 4.467146990709377, "learning_rate": 5e-05, "loss": 0.1235, "num_input_tokens_seen": 146695264, "step": 1607 }, { "epoch": 6.695833333333333, "loss": 0.17170248925685883, "loss_ce": 0.0025435483548790216, "loss_iou": 0.33203125, "loss_num": 0.033935546875, "loss_xval": 0.1689453125, "num_input_tokens_seen": 146695264, "step": 1607 }, { "epoch": 6.7, "grad_norm": 6.296580911285716, "learning_rate": 5e-05, "loss": 0.1097, "num_input_tokens_seen": 146786668, "step": 1608 }, { "epoch": 6.7, "loss": 0.11704735457897186, "loss_ce": 0.000805901363492012, "loss_iou": 0.1025390625, "loss_num": 0.023193359375, "loss_xval": 0.1162109375, "num_input_tokens_seen": 146786668, "step": 1608 }, { "epoch": 6.704166666666667, "grad_norm": 5.336888472091619, "learning_rate": 5e-05, "loss": 0.1373, "num_input_tokens_seen": 146877584, "step": 1609 }, { "epoch": 6.704166666666667, "loss": 0.10470438748598099, "loss_ce": 0.0004563412512652576, "loss_iou": 0.359375, "loss_num": 0.0208740234375, "loss_xval": 0.1044921875, "num_input_tokens_seen": 146877584, "step": 1609 }, { "epoch": 6.708333333333333, "grad_norm": 2.571261156505444, "learning_rate": 5e-05, "loss": 0.1165, "num_input_tokens_seen": 146968948, "step": 1610 }, { "epoch": 6.708333333333333, "loss": 0.12027784436941147, "loss_ce": 0.0006794579094275832, "loss_iou": 0.3359375, "loss_num": 0.02392578125, "loss_xval": 0.11962890625, "num_input_tokens_seen": 146968948, "step": 1610 }, { "epoch": 6.7125, "grad_norm": 10.122247458063885, "learning_rate": 5e-05, "loss": 0.0949, "num_input_tokens_seen": 147059772, "step": 1611 }, { "epoch": 6.7125, "loss": 0.09837611019611359, "loss_ce": 0.0013912541326135397, "loss_iou": 0.205078125, "loss_num": 0.0194091796875, "loss_xval": 0.09716796875, "num_input_tokens_seen": 147059772, "step": 1611 }, { "epoch": 6.716666666666667, "grad_norm": 4.187241160722909, "learning_rate": 5e-05, "loss": 0.1325, "num_input_tokens_seen": 147150372, "step": 1612 }, { "epoch": 6.716666666666667, "loss": 0.15941470861434937, "loss_ce": 0.000479156500659883, "loss_iou": 0.310546875, "loss_num": 0.03173828125, "loss_xval": 0.1591796875, "num_input_tokens_seen": 147150372, "step": 1612 }, { "epoch": 6.720833333333333, "grad_norm": 3.4823413210199328, "learning_rate": 5e-05, "loss": 0.0829, "num_input_tokens_seen": 147241568, "step": 1613 }, { "epoch": 6.720833333333333, "loss": 0.10597635060548782, "loss_ce": 4.063520464114845e-06, "loss_iou": 0.1806640625, "loss_num": 0.021240234375, "loss_xval": 0.10595703125, "num_input_tokens_seen": 147241568, "step": 1613 }, { "epoch": 6.725, "grad_norm": 7.069400820243496, "learning_rate": 5e-05, "loss": 0.1113, "num_input_tokens_seen": 147332292, "step": 1614 }, { "epoch": 6.725, "loss": 0.1232280358672142, "loss_ce": 0.0009517316939309239, "loss_iou": 0.1826171875, "loss_num": 0.0244140625, "loss_xval": 0.1220703125, "num_input_tokens_seen": 147332292, "step": 1614 }, { "epoch": 6.729166666666667, "grad_norm": 1.9823059290298373, "learning_rate": 5e-05, "loss": 0.083, "num_input_tokens_seen": 147423048, "step": 1615 }, { "epoch": 6.729166666666667, "loss": 0.08469944447278976, "loss_ce": 2.0791940187336877e-05, "loss_iou": 0.408203125, "loss_num": 0.0169677734375, "loss_xval": 0.08447265625, "num_input_tokens_seen": 147423048, "step": 1615 }, { "epoch": 6.733333333333333, "grad_norm": 2.4822174257526077, "learning_rate": 5e-05, "loss": 0.0585, "num_input_tokens_seen": 147513936, "step": 1616 }, { "epoch": 6.733333333333333, "loss": 0.057547569274902344, "loss_ce": 0.00044917932245880365, "loss_iou": 0.283203125, "loss_num": 0.01141357421875, "loss_xval": 0.05712890625, "num_input_tokens_seen": 147513936, "step": 1616 }, { "epoch": 6.7375, "grad_norm": 2.833317054725588, "learning_rate": 5e-05, "loss": 0.1107, "num_input_tokens_seen": 147605708, "step": 1617 }, { "epoch": 6.7375, "loss": 0.07036544382572174, "loss_ce": 0.0010142435785382986, "loss_iou": 0.298828125, "loss_num": 0.01385498046875, "loss_xval": 0.0693359375, "num_input_tokens_seen": 147605708, "step": 1617 }, { "epoch": 6.741666666666667, "grad_norm": 4.051920430799818, "learning_rate": 5e-05, "loss": 0.1105, "num_input_tokens_seen": 147697648, "step": 1618 }, { "epoch": 6.741666666666667, "loss": 0.08087938278913498, "loss_ce": 0.0002824601251631975, "loss_iou": 0.265625, "loss_num": 0.01611328125, "loss_xval": 0.08056640625, "num_input_tokens_seen": 147697648, "step": 1618 }, { "epoch": 6.745833333333334, "grad_norm": 3.1222260691833696, "learning_rate": 5e-05, "loss": 0.1416, "num_input_tokens_seen": 147788916, "step": 1619 }, { "epoch": 6.745833333333334, "loss": 0.13396668434143066, "loss_ce": 0.00019288396288175136, "loss_iou": 0.189453125, "loss_num": 0.0267333984375, "loss_xval": 0.1337890625, "num_input_tokens_seen": 147788916, "step": 1619 }, { "epoch": 6.75, "grad_norm": 3.6992346856006324, "learning_rate": 5e-05, "loss": 0.1125, "num_input_tokens_seen": 147880112, "step": 1620 }, { "epoch": 6.75, "loss": 0.11802740395069122, "loss_ce": 0.000916196615435183, "loss_iou": 0.380859375, "loss_num": 0.0234375, "loss_xval": 0.1171875, "num_input_tokens_seen": 147880112, "step": 1620 }, { "epoch": 6.754166666666666, "grad_norm": 8.15302032172279, "learning_rate": 5e-05, "loss": 0.1344, "num_input_tokens_seen": 147971792, "step": 1621 }, { "epoch": 6.754166666666666, "loss": 0.1697504073381424, "loss_ce": 0.002224163617938757, "loss_iou": 0.27734375, "loss_num": 0.033447265625, "loss_xval": 0.16796875, "num_input_tokens_seen": 147971792, "step": 1621 }, { "epoch": 6.758333333333333, "grad_norm": 4.230587195826469, "learning_rate": 5e-05, "loss": 0.1052, "num_input_tokens_seen": 148062112, "step": 1622 }, { "epoch": 6.758333333333333, "loss": 0.12787632644176483, "loss_ce": 1.5303545296774246e-05, "loss_iou": 0.193359375, "loss_num": 0.025634765625, "loss_xval": 0.1279296875, "num_input_tokens_seen": 148062112, "step": 1622 }, { "epoch": 6.7625, "grad_norm": 4.901904450953437, "learning_rate": 5e-05, "loss": 0.1005, "num_input_tokens_seen": 148153220, "step": 1623 }, { "epoch": 6.7625, "loss": 0.07316801697015762, "loss_ce": 0.0028860336169600487, "loss_iou": 0.28515625, "loss_num": 0.01409912109375, "loss_xval": 0.0703125, "num_input_tokens_seen": 148153220, "step": 1623 }, { "epoch": 6.766666666666667, "grad_norm": 3.9667116514826097, "learning_rate": 5e-05, "loss": 0.1551, "num_input_tokens_seen": 148244284, "step": 1624 }, { "epoch": 6.766666666666667, "loss": 0.151185542345047, "loss_ce": 1.4684163716083276e-06, "loss_iou": 0.328125, "loss_num": 0.0302734375, "loss_xval": 0.1513671875, "num_input_tokens_seen": 148244284, "step": 1624 }, { "epoch": 6.770833333333333, "grad_norm": 13.36068440365898, "learning_rate": 5e-05, "loss": 0.0834, "num_input_tokens_seen": 148335192, "step": 1625 }, { "epoch": 6.770833333333333, "loss": 0.09784172475337982, "loss_ce": 0.00039909378392621875, "loss_iou": 0.265625, "loss_num": 0.01953125, "loss_xval": 0.09765625, "num_input_tokens_seen": 148335192, "step": 1625 }, { "epoch": 6.775, "grad_norm": 4.5555459822511395, "learning_rate": 5e-05, "loss": 0.141, "num_input_tokens_seen": 148426388, "step": 1626 }, { "epoch": 6.775, "loss": 0.14454184472560883, "loss_ce": 0.0003768008027691394, "loss_iou": 0.3046875, "loss_num": 0.02880859375, "loss_xval": 0.14453125, "num_input_tokens_seen": 148426388, "step": 1626 }, { "epoch": 6.779166666666667, "grad_norm": 6.7472256934039025, "learning_rate": 5e-05, "loss": 0.1081, "num_input_tokens_seen": 148516512, "step": 1627 }, { "epoch": 6.779166666666667, "loss": 0.06415146589279175, "loss_ce": 3.5179459700884763e-06, "loss_iou": 0.1025390625, "loss_num": 0.0128173828125, "loss_xval": 0.06396484375, "num_input_tokens_seen": 148516512, "step": 1627 }, { "epoch": 6.783333333333333, "grad_norm": 7.431925452000919, "learning_rate": 5e-05, "loss": 0.0868, "num_input_tokens_seen": 148607596, "step": 1628 }, { "epoch": 6.783333333333333, "loss": 0.10833147913217545, "loss_ce": 0.0010011536069214344, "loss_iou": 0.06494140625, "loss_num": 0.021484375, "loss_xval": 0.107421875, "num_input_tokens_seen": 148607596, "step": 1628 }, { "epoch": 6.7875, "grad_norm": 3.834227391343124, "learning_rate": 5e-05, "loss": 0.1278, "num_input_tokens_seen": 148699612, "step": 1629 }, { "epoch": 6.7875, "loss": 0.14319217205047607, "loss_ce": 0.001041282550431788, "loss_iou": 0.296875, "loss_num": 0.0284423828125, "loss_xval": 0.142578125, "num_input_tokens_seen": 148699612, "step": 1629 }, { "epoch": 6.791666666666667, "grad_norm": 2.6814714656783742, "learning_rate": 5e-05, "loss": 0.1478, "num_input_tokens_seen": 148789564, "step": 1630 }, { "epoch": 6.791666666666667, "loss": 0.09333618730306625, "loss_ce": 0.00021942633611615747, "loss_iou": 0.26171875, "loss_num": 0.0186767578125, "loss_xval": 0.09326171875, "num_input_tokens_seen": 148789564, "step": 1630 }, { "epoch": 6.795833333333333, "grad_norm": 4.520185963024227, "learning_rate": 5e-05, "loss": 0.1148, "num_input_tokens_seen": 148879640, "step": 1631 }, { "epoch": 6.795833333333333, "loss": 0.14096848666667938, "loss_ce": 0.002510237041860819, "loss_iou": 0.296875, "loss_num": 0.0277099609375, "loss_xval": 0.138671875, "num_input_tokens_seen": 148879640, "step": 1631 }, { "epoch": 6.8, "grad_norm": 4.447410411705429, "learning_rate": 5e-05, "loss": 0.1073, "num_input_tokens_seen": 148970324, "step": 1632 }, { "epoch": 6.8, "loss": 0.1173885315656662, "loss_ce": 3.318277231301181e-05, "loss_iou": 0.296875, "loss_num": 0.0234375, "loss_xval": 0.1171875, "num_input_tokens_seen": 148970324, "step": 1632 }, { "epoch": 6.804166666666667, "grad_norm": 3.4723243249443794, "learning_rate": 5e-05, "loss": 0.0711, "num_input_tokens_seen": 149061416, "step": 1633 }, { "epoch": 6.804166666666667, "loss": 0.07656733691692352, "loss_ce": 0.001734415884129703, "loss_iou": 0.2333984375, "loss_num": 0.0150146484375, "loss_xval": 0.07470703125, "num_input_tokens_seen": 149061416, "step": 1633 }, { "epoch": 6.808333333333334, "grad_norm": 4.963455215852101, "learning_rate": 5e-05, "loss": 0.1045, "num_input_tokens_seen": 149152852, "step": 1634 }, { "epoch": 6.808333333333334, "loss": 0.07179413735866547, "loss_ce": 0.0003524910134728998, "loss_iou": 0.294921875, "loss_num": 0.0142822265625, "loss_xval": 0.0712890625, "num_input_tokens_seen": 149152852, "step": 1634 }, { "epoch": 6.8125, "grad_norm": 6.349033898429906, "learning_rate": 5e-05, "loss": 0.0939, "num_input_tokens_seen": 149244076, "step": 1635 }, { "epoch": 6.8125, "loss": 0.07682006061077118, "loss_ce": 1.4947971067158505e-05, "loss_iou": 0.283203125, "loss_num": 0.015380859375, "loss_xval": 0.07666015625, "num_input_tokens_seen": 149244076, "step": 1635 }, { "epoch": 6.816666666666666, "grad_norm": 3.292050967196103, "learning_rate": 5e-05, "loss": 0.0796, "num_input_tokens_seen": 149335296, "step": 1636 }, { "epoch": 6.816666666666666, "loss": 0.09713432937860489, "loss_ce": 5.791701914859004e-05, "loss_iou": 0.1884765625, "loss_num": 0.0194091796875, "loss_xval": 0.09716796875, "num_input_tokens_seen": 149335296, "step": 1636 }, { "epoch": 6.820833333333333, "grad_norm": 16.227618304181107, "learning_rate": 5e-05, "loss": 0.1328, "num_input_tokens_seen": 149426624, "step": 1637 }, { "epoch": 6.820833333333333, "loss": 0.10378938913345337, "loss_ce": 0.001647052587941289, "loss_iou": 0.154296875, "loss_num": 0.0203857421875, "loss_xval": 0.10205078125, "num_input_tokens_seen": 149426624, "step": 1637 }, { "epoch": 6.825, "grad_norm": 9.950541544841345, "learning_rate": 5e-05, "loss": 0.1208, "num_input_tokens_seen": 149517172, "step": 1638 }, { "epoch": 6.825, "loss": 0.1490859091281891, "loss_ce": 0.0022963478695601225, "loss_iou": 0.2734375, "loss_num": 0.0294189453125, "loss_xval": 0.146484375, "num_input_tokens_seen": 149517172, "step": 1638 }, { "epoch": 6.829166666666667, "grad_norm": 2.888281399700848, "learning_rate": 5e-05, "loss": 0.0709, "num_input_tokens_seen": 149608580, "step": 1639 }, { "epoch": 6.829166666666667, "loss": 0.08360613882541656, "loss_ce": 0.002017396269366145, "loss_iou": 0.2578125, "loss_num": 0.016357421875, "loss_xval": 0.08154296875, "num_input_tokens_seen": 149608580, "step": 1639 }, { "epoch": 6.833333333333333, "grad_norm": 3.125102142349154, "learning_rate": 5e-05, "loss": 0.0874, "num_input_tokens_seen": 149699524, "step": 1640 }, { "epoch": 6.833333333333333, "loss": 0.07310190796852112, "loss_ce": 2.756038520601578e-05, "loss_iou": 0.341796875, "loss_num": 0.01458740234375, "loss_xval": 0.0732421875, "num_input_tokens_seen": 149699524, "step": 1640 }, { "epoch": 6.8375, "grad_norm": 5.308252886132232, "learning_rate": 5e-05, "loss": 0.1095, "num_input_tokens_seen": 149790584, "step": 1641 }, { "epoch": 6.8375, "loss": 0.0729517862200737, "loss_ce": 0.0010676286183297634, "loss_iou": 0.11328125, "loss_num": 0.01434326171875, "loss_xval": 0.07177734375, "num_input_tokens_seen": 149790584, "step": 1641 }, { "epoch": 6.841666666666667, "grad_norm": 4.232688259836703, "learning_rate": 5e-05, "loss": 0.0783, "num_input_tokens_seen": 149882032, "step": 1642 }, { "epoch": 6.841666666666667, "loss": 0.08408161997795105, "loss_ce": 0.00041005387902259827, "loss_iou": 0.283203125, "loss_num": 0.0167236328125, "loss_xval": 0.08349609375, "num_input_tokens_seen": 149882032, "step": 1642 }, { "epoch": 6.845833333333333, "grad_norm": 3.5412300451929783, "learning_rate": 5e-05, "loss": 0.0779, "num_input_tokens_seen": 149973372, "step": 1643 }, { "epoch": 6.845833333333333, "loss": 0.04515673965215683, "loss_ce": 0.0004332278040237725, "loss_iou": 0.3359375, "loss_num": 0.0089111328125, "loss_xval": 0.044677734375, "num_input_tokens_seen": 149973372, "step": 1643 }, { "epoch": 6.85, "grad_norm": 4.356357508934272, "learning_rate": 5e-05, "loss": 0.0771, "num_input_tokens_seen": 150064332, "step": 1644 }, { "epoch": 6.85, "loss": 0.09591878205537796, "loss_ce": 3.254601324442774e-05, "loss_iou": 0.275390625, "loss_num": 0.0191650390625, "loss_xval": 0.095703125, "num_input_tokens_seen": 150064332, "step": 1644 }, { "epoch": 6.854166666666667, "grad_norm": 9.82791526650308, "learning_rate": 5e-05, "loss": 0.0799, "num_input_tokens_seen": 150155820, "step": 1645 }, { "epoch": 6.854166666666667, "loss": 0.11722063273191452, "loss_ce": 0.0007655585068278015, "loss_iou": 0.337890625, "loss_num": 0.0233154296875, "loss_xval": 0.1162109375, "num_input_tokens_seen": 150155820, "step": 1645 }, { "epoch": 6.858333333333333, "grad_norm": 8.523554873394339, "learning_rate": 5e-05, "loss": 0.0961, "num_input_tokens_seen": 150246624, "step": 1646 }, { "epoch": 6.858333333333333, "loss": 0.11677127331495285, "loss_ce": 1.1021089449059218e-05, "loss_iou": 0.259765625, "loss_num": 0.0233154296875, "loss_xval": 0.11669921875, "num_input_tokens_seen": 150246624, "step": 1646 }, { "epoch": 6.8625, "grad_norm": 2.841981736257889, "learning_rate": 5e-05, "loss": 0.078, "num_input_tokens_seen": 150338200, "step": 1647 }, { "epoch": 6.8625, "loss": 0.07999046891927719, "loss_ce": 3.441448279772885e-05, "loss_iou": 0.33984375, "loss_num": 0.0159912109375, "loss_xval": 0.080078125, "num_input_tokens_seen": 150338200, "step": 1647 }, { "epoch": 6.866666666666667, "grad_norm": 4.739673541337045, "learning_rate": 5e-05, "loss": 0.0881, "num_input_tokens_seen": 150429216, "step": 1648 }, { "epoch": 6.866666666666667, "loss": 0.10366171598434448, "loss_ce": 0.0005733439465984702, "loss_iou": 0.208984375, "loss_num": 0.0206298828125, "loss_xval": 0.10302734375, "num_input_tokens_seen": 150429216, "step": 1648 }, { "epoch": 6.870833333333334, "grad_norm": 3.6239360814138815, "learning_rate": 5e-05, "loss": 0.1146, "num_input_tokens_seen": 150520388, "step": 1649 }, { "epoch": 6.870833333333334, "loss": 0.10713136941194534, "loss_ce": 0.0002359184727538377, "loss_iou": 0.296875, "loss_num": 0.0213623046875, "loss_xval": 0.10693359375, "num_input_tokens_seen": 150520388, "step": 1649 }, { "epoch": 6.875, "grad_norm": 10.076261514779913, "learning_rate": 5e-05, "loss": 0.0655, "num_input_tokens_seen": 150611424, "step": 1650 }, { "epoch": 6.875, "loss": 0.054482243955135345, "loss_ce": 0.0006187166436575353, "loss_iou": 0.416015625, "loss_num": 0.0107421875, "loss_xval": 0.053955078125, "num_input_tokens_seen": 150611424, "step": 1650 }, { "epoch": 6.879166666666666, "grad_norm": 7.037017041308367, "learning_rate": 5e-05, "loss": 0.1007, "num_input_tokens_seen": 150702592, "step": 1651 }, { "epoch": 6.879166666666666, "loss": 0.0805845558643341, "loss_ce": 6.392721115844324e-05, "loss_iou": 0.298828125, "loss_num": 0.01611328125, "loss_xval": 0.08056640625, "num_input_tokens_seen": 150702592, "step": 1651 }, { "epoch": 6.883333333333333, "grad_norm": 2.2476195907523726, "learning_rate": 5e-05, "loss": 0.1, "num_input_tokens_seen": 150792232, "step": 1652 }, { "epoch": 6.883333333333333, "loss": 0.10652382671833038, "loss_ce": 0.0002539954148232937, "loss_iou": 0.1435546875, "loss_num": 0.021240234375, "loss_xval": 0.1064453125, "num_input_tokens_seen": 150792232, "step": 1652 }, { "epoch": 6.8875, "grad_norm": 8.071696596054197, "learning_rate": 5e-05, "loss": 0.1469, "num_input_tokens_seen": 150883820, "step": 1653 }, { "epoch": 6.8875, "loss": 0.12309402227401733, "loss_ce": 0.0009016396361403167, "loss_iou": 0.3125, "loss_num": 0.0244140625, "loss_xval": 0.1220703125, "num_input_tokens_seen": 150883820, "step": 1653 }, { "epoch": 6.891666666666667, "grad_norm": 6.26788141368876, "learning_rate": 5e-05, "loss": 0.1246, "num_input_tokens_seen": 150975532, "step": 1654 }, { "epoch": 6.891666666666667, "loss": 0.07721350342035294, "loss_ce": 0.0004923146916553378, "loss_iou": 0.21484375, "loss_num": 0.015380859375, "loss_xval": 0.07666015625, "num_input_tokens_seen": 150975532, "step": 1654 }, { "epoch": 6.895833333333333, "grad_norm": 2.9578790282701592, "learning_rate": 5e-05, "loss": 0.1126, "num_input_tokens_seen": 151066492, "step": 1655 }, { "epoch": 6.895833333333333, "loss": 0.1371522843837738, "loss_ce": 3.680928057292476e-05, "loss_iou": 0.31640625, "loss_num": 0.0274658203125, "loss_xval": 0.13671875, "num_input_tokens_seen": 151066492, "step": 1655 }, { "epoch": 6.9, "grad_norm": 7.018461363606477, "learning_rate": 5e-05, "loss": 0.1057, "num_input_tokens_seen": 151156936, "step": 1656 }, { "epoch": 6.9, "loss": 0.11592155694961548, "loss_ce": 0.000977097311988473, "loss_iou": 0.267578125, "loss_num": 0.02294921875, "loss_xval": 0.11474609375, "num_input_tokens_seen": 151156936, "step": 1656 }, { "epoch": 6.904166666666667, "grad_norm": 9.570021745040235, "learning_rate": 5e-05, "loss": 0.0916, "num_input_tokens_seen": 151248348, "step": 1657 }, { "epoch": 6.904166666666667, "loss": 0.06212965026497841, "loss_ce": 0.0002247430384159088, "loss_iou": 0.291015625, "loss_num": 0.01239013671875, "loss_xval": 0.06201171875, "num_input_tokens_seen": 151248348, "step": 1657 }, { "epoch": 6.908333333333333, "grad_norm": 7.408031973473457, "learning_rate": 5e-05, "loss": 0.0955, "num_input_tokens_seen": 151339644, "step": 1658 }, { "epoch": 6.908333333333333, "loss": 0.07610473036766052, "loss_ce": 8.544308366253972e-05, "loss_iou": 0.296875, "loss_num": 0.01519775390625, "loss_xval": 0.076171875, "num_input_tokens_seen": 151339644, "step": 1658 }, { "epoch": 6.9125, "grad_norm": 3.9852023699854087, "learning_rate": 5e-05, "loss": 0.0868, "num_input_tokens_seen": 151430928, "step": 1659 }, { "epoch": 6.9125, "loss": 0.10938706994056702, "loss_ce": 0.0001646591699682176, "loss_iou": 0.458984375, "loss_num": 0.0218505859375, "loss_xval": 0.109375, "num_input_tokens_seen": 151430928, "step": 1659 }, { "epoch": 6.916666666666667, "grad_norm": 3.203128534099005, "learning_rate": 5e-05, "loss": 0.162, "num_input_tokens_seen": 151521964, "step": 1660 }, { "epoch": 6.916666666666667, "loss": 0.1142687052488327, "loss_ce": 4.141416138736531e-05, "loss_iou": 0.3984375, "loss_num": 0.0228271484375, "loss_xval": 0.1142578125, "num_input_tokens_seen": 151521964, "step": 1660 }, { "epoch": 6.920833333333333, "grad_norm": 3.0602870881140274, "learning_rate": 5e-05, "loss": 0.1139, "num_input_tokens_seen": 151613612, "step": 1661 }, { "epoch": 6.920833333333333, "loss": 0.06900735199451447, "loss_ce": 0.00019021421030629426, "loss_iou": 0.322265625, "loss_num": 0.01373291015625, "loss_xval": 0.06884765625, "num_input_tokens_seen": 151613612, "step": 1661 }, { "epoch": 6.925, "grad_norm": 3.7765433687416072, "learning_rate": 5e-05, "loss": 0.0734, "num_input_tokens_seen": 151705088, "step": 1662 }, { "epoch": 6.925, "loss": 0.05029073357582092, "loss_ce": 0.0009438100969418883, "loss_iou": 0.234375, "loss_num": 0.0098876953125, "loss_xval": 0.04931640625, "num_input_tokens_seen": 151705088, "step": 1662 }, { "epoch": 6.929166666666667, "grad_norm": 9.780738346093939, "learning_rate": 5e-05, "loss": 0.0924, "num_input_tokens_seen": 151796132, "step": 1663 }, { "epoch": 6.929166666666667, "loss": 0.06529055535793304, "loss_ce": 0.0007916553295217454, "loss_iou": 0.1943359375, "loss_num": 0.01287841796875, "loss_xval": 0.064453125, "num_input_tokens_seen": 151796132, "step": 1663 }, { "epoch": 6.933333333333334, "grad_norm": 15.110671025740874, "learning_rate": 5e-05, "loss": 0.1172, "num_input_tokens_seen": 151887784, "step": 1664 }, { "epoch": 6.933333333333334, "loss": 0.1223883330821991, "loss_ce": 0.001172511139884591, "loss_iou": 0.310546875, "loss_num": 0.0242919921875, "loss_xval": 0.12109375, "num_input_tokens_seen": 151887784, "step": 1664 }, { "epoch": 6.9375, "grad_norm": 3.9839866327172326, "learning_rate": 5e-05, "loss": 0.088, "num_input_tokens_seen": 151979232, "step": 1665 }, { "epoch": 6.9375, "loss": 0.12088834494352341, "loss_ce": 0.0007711583748459816, "loss_iou": 0.271484375, "loss_num": 0.0240478515625, "loss_xval": 0.1201171875, "num_input_tokens_seen": 151979232, "step": 1665 }, { "epoch": 6.941666666666666, "grad_norm": 31.80681572542876, "learning_rate": 5e-05, "loss": 0.0928, "num_input_tokens_seen": 152070888, "step": 1666 }, { "epoch": 6.941666666666666, "loss": 0.11462222039699554, "loss_ce": 0.0007611305918544531, "loss_iou": 0.337890625, "loss_num": 0.0228271484375, "loss_xval": 0.11376953125, "num_input_tokens_seen": 152070888, "step": 1666 }, { "epoch": 6.945833333333333, "grad_norm": 1.9487821449189608, "learning_rate": 5e-05, "loss": 0.0967, "num_input_tokens_seen": 152162488, "step": 1667 }, { "epoch": 6.945833333333333, "loss": 0.10232115536928177, "loss_ce": 0.001414780505001545, "loss_iou": 0.328125, "loss_num": 0.0201416015625, "loss_xval": 0.10107421875, "num_input_tokens_seen": 152162488, "step": 1667 }, { "epoch": 6.95, "grad_norm": 2.296245106334686, "learning_rate": 5e-05, "loss": 0.079, "num_input_tokens_seen": 152253852, "step": 1668 }, { "epoch": 6.95, "loss": 0.08986086398363113, "loss_ce": 0.0011920389952138066, "loss_iou": 0.333984375, "loss_num": 0.0177001953125, "loss_xval": 0.0888671875, "num_input_tokens_seen": 152253852, "step": 1668 }, { "epoch": 6.954166666666667, "grad_norm": 3.10920167890936, "learning_rate": 5e-05, "loss": 0.0904, "num_input_tokens_seen": 152344728, "step": 1669 }, { "epoch": 6.954166666666667, "loss": 0.11594560742378235, "loss_ce": 0.0003145021037198603, "loss_iou": 0.287109375, "loss_num": 0.0230712890625, "loss_xval": 0.11572265625, "num_input_tokens_seen": 152344728, "step": 1669 }, { "epoch": 6.958333333333333, "grad_norm": 3.1787055623635814, "learning_rate": 5e-05, "loss": 0.1104, "num_input_tokens_seen": 152434540, "step": 1670 }, { "epoch": 6.958333333333333, "loss": 0.11103759706020355, "loss_ce": 0.003005367936566472, "loss_iou": 0.244140625, "loss_num": 0.0216064453125, "loss_xval": 0.10791015625, "num_input_tokens_seen": 152434540, "step": 1670 }, { "epoch": 6.9625, "grad_norm": 3.809738381982028, "learning_rate": 5e-05, "loss": 0.1244, "num_input_tokens_seen": 152525336, "step": 1671 }, { "epoch": 6.9625, "loss": 0.11085816472768784, "loss_ce": 0.0011627288768067956, "loss_iou": 0.04931640625, "loss_num": 0.02197265625, "loss_xval": 0.10986328125, "num_input_tokens_seen": 152525336, "step": 1671 }, { "epoch": 6.966666666666667, "grad_norm": 2.7704701899550392, "learning_rate": 5e-05, "loss": 0.1406, "num_input_tokens_seen": 152617008, "step": 1672 }, { "epoch": 6.966666666666667, "loss": 0.10031631588935852, "loss_ce": 0.00012711159070022404, "loss_iou": 0.361328125, "loss_num": 0.02001953125, "loss_xval": 0.10009765625, "num_input_tokens_seen": 152617008, "step": 1672 }, { "epoch": 6.970833333333333, "grad_norm": 3.4645120063749784, "learning_rate": 5e-05, "loss": 0.072, "num_input_tokens_seen": 152708576, "step": 1673 }, { "epoch": 6.970833333333333, "loss": 0.07226431369781494, "loss_ce": 0.001585603691637516, "loss_iou": 0.1591796875, "loss_num": 0.01416015625, "loss_xval": 0.07080078125, "num_input_tokens_seen": 152708576, "step": 1673 }, { "epoch": 6.975, "grad_norm": 6.7276705007467195, "learning_rate": 5e-05, "loss": 0.0878, "num_input_tokens_seen": 152800444, "step": 1674 }, { "epoch": 6.975, "loss": 0.10426914691925049, "loss_ce": 0.0008298219181597233, "loss_iou": 0.291015625, "loss_num": 0.0206298828125, "loss_xval": 0.103515625, "num_input_tokens_seen": 152800444, "step": 1674 }, { "epoch": 6.979166666666667, "grad_norm": 3.366820868004411, "learning_rate": 5e-05, "loss": 0.0757, "num_input_tokens_seen": 152890124, "step": 1675 }, { "epoch": 6.979166666666667, "loss": 0.07411395013332367, "loss_ce": 1.7270358512178063e-05, "loss_iou": 0.263671875, "loss_num": 0.01483154296875, "loss_xval": 0.07421875, "num_input_tokens_seen": 152890124, "step": 1675 }, { "epoch": 6.983333333333333, "grad_norm": 4.284830340007906, "learning_rate": 5e-05, "loss": 0.1426, "num_input_tokens_seen": 152982236, "step": 1676 }, { "epoch": 6.983333333333333, "loss": 0.15787337720394135, "loss_ce": 0.0007231036433950067, "loss_iou": 0.3828125, "loss_num": 0.031494140625, "loss_xval": 0.1572265625, "num_input_tokens_seen": 152982236, "step": 1676 }, { "epoch": 6.9875, "grad_norm": 2.7269392270511936, "learning_rate": 5e-05, "loss": 0.1975, "num_input_tokens_seen": 153072924, "step": 1677 }, { "epoch": 6.9875, "loss": 0.2391592264175415, "loss_ce": 0.000420216383645311, "loss_iou": 0.421875, "loss_num": 0.0478515625, "loss_xval": 0.23828125, "num_input_tokens_seen": 153072924, "step": 1677 }, { "epoch": 6.991666666666667, "grad_norm": 2.2636614694574386, "learning_rate": 5e-05, "loss": 0.1208, "num_input_tokens_seen": 153163848, "step": 1678 }, { "epoch": 6.991666666666667, "loss": 0.13624174892902374, "loss_ce": 0.0009573178831487894, "loss_iou": 0.08642578125, "loss_num": 0.027099609375, "loss_xval": 0.1357421875, "num_input_tokens_seen": 153163848, "step": 1678 }, { "epoch": 6.995833333333334, "grad_norm": 4.28179404571259, "learning_rate": 5e-05, "loss": 0.0896, "num_input_tokens_seen": 153255320, "step": 1679 }, { "epoch": 6.995833333333334, "loss": 0.10170367360115051, "loss_ce": 0.0024910294450819492, "loss_iou": 0.375, "loss_num": 0.0198974609375, "loss_xval": 0.09912109375, "num_input_tokens_seen": 153255320, "step": 1679 }, { "epoch": 7.0, "grad_norm": 6.3688665214514755, "learning_rate": 5e-05, "loss": 0.1089, "num_input_tokens_seen": 153346276, "step": 1680 }, { "epoch": 7.0, "loss": 0.11220959573984146, "loss_ce": 1.1721253031282686e-05, "loss_iou": 0.2275390625, "loss_num": 0.0224609375, "loss_xval": 0.1123046875, "num_input_tokens_seen": 153346276, "step": 1680 }, { "epoch": 7.004166666666666, "grad_norm": 3.5212944807976916, "learning_rate": 5e-05, "loss": 0.0766, "num_input_tokens_seen": 153437916, "step": 1681 }, { "epoch": 7.004166666666666, "loss": 0.043974943459033966, "loss_ce": 0.0007086455589160323, "loss_iou": 0.2890625, "loss_num": 0.0086669921875, "loss_xval": 0.043212890625, "num_input_tokens_seen": 153437916, "step": 1681 }, { "epoch": 7.008333333333334, "grad_norm": 3.0994500949291277, "learning_rate": 5e-05, "loss": 0.0619, "num_input_tokens_seen": 153528748, "step": 1682 }, { "epoch": 7.008333333333334, "loss": 0.07113240659236908, "loss_ce": 0.00017903783009387553, "loss_iou": 0.34375, "loss_num": 0.01416015625, "loss_xval": 0.07080078125, "num_input_tokens_seen": 153528748, "step": 1682 }, { "epoch": 7.0125, "grad_norm": 2.7395665448088615, "learning_rate": 5e-05, "loss": 0.0495, "num_input_tokens_seen": 153619940, "step": 1683 }, { "epoch": 7.0125, "loss": 0.04401383176445961, "loss_ce": 0.0005491725169122219, "loss_iou": 0.2578125, "loss_num": 0.00872802734375, "loss_xval": 0.04345703125, "num_input_tokens_seen": 153619940, "step": 1683 }, { "epoch": 7.016666666666667, "grad_norm": 4.569723659784505, "learning_rate": 5e-05, "loss": 0.1008, "num_input_tokens_seen": 153710872, "step": 1684 }, { "epoch": 7.016666666666667, "loss": 0.1020163893699646, "loss_ce": 0.002246800111606717, "loss_iou": 0.29296875, "loss_num": 0.02001953125, "loss_xval": 0.099609375, "num_input_tokens_seen": 153710872, "step": 1684 }, { "epoch": 7.020833333333333, "grad_norm": 2.6046811093814908, "learning_rate": 5e-05, "loss": 0.079, "num_input_tokens_seen": 153801900, "step": 1685 }, { "epoch": 7.020833333333333, "loss": 0.0888538509607315, "loss_ce": 1.7180602299049497e-05, "loss_iou": 0.40234375, "loss_num": 0.017822265625, "loss_xval": 0.0888671875, "num_input_tokens_seen": 153801900, "step": 1685 }, { "epoch": 7.025, "grad_norm": 5.760755754723652, "learning_rate": 5e-05, "loss": 0.1325, "num_input_tokens_seen": 153893304, "step": 1686 }, { "epoch": 7.025, "loss": 0.09790083765983582, "loss_ce": 0.00230452255345881, "loss_iou": 0.390625, "loss_num": 0.0191650390625, "loss_xval": 0.095703125, "num_input_tokens_seen": 153893304, "step": 1686 }, { "epoch": 7.029166666666667, "grad_norm": 5.651359354673425, "learning_rate": 5e-05, "loss": 0.0892, "num_input_tokens_seen": 153984260, "step": 1687 }, { "epoch": 7.029166666666667, "loss": 0.12273789942264557, "loss_ce": 0.0003013756650034338, "loss_iou": 0.357421875, "loss_num": 0.0245361328125, "loss_xval": 0.12255859375, "num_input_tokens_seen": 153984260, "step": 1687 }, { "epoch": 7.033333333333333, "grad_norm": 4.5602271451019565, "learning_rate": 5e-05, "loss": 0.0749, "num_input_tokens_seen": 154076040, "step": 1688 }, { "epoch": 7.033333333333333, "loss": 0.0731680616736412, "loss_ce": 0.0006277795182541013, "loss_iou": 0.3359375, "loss_num": 0.0145263671875, "loss_xval": 0.07275390625, "num_input_tokens_seen": 154076040, "step": 1688 }, { "epoch": 7.0375, "grad_norm": 4.816161090981711, "learning_rate": 5e-05, "loss": 0.0918, "num_input_tokens_seen": 154166928, "step": 1689 }, { "epoch": 7.0375, "loss": 0.07955670356750488, "loss_ce": 0.000699278840329498, "loss_iou": 0.1328125, "loss_num": 0.0157470703125, "loss_xval": 0.0791015625, "num_input_tokens_seen": 154166928, "step": 1689 }, { "epoch": 7.041666666666667, "grad_norm": 4.0218740355714315, "learning_rate": 5e-05, "loss": 0.0825, "num_input_tokens_seen": 154258108, "step": 1690 }, { "epoch": 7.041666666666667, "loss": 0.09026078134775162, "loss_ce": 0.00026444171089679003, "loss_iou": 0.259765625, "loss_num": 0.01806640625, "loss_xval": 0.08984375, "num_input_tokens_seen": 154258108, "step": 1690 }, { "epoch": 7.045833333333333, "grad_norm": 10.266520343130312, "learning_rate": 5e-05, "loss": 0.1145, "num_input_tokens_seen": 154349940, "step": 1691 }, { "epoch": 7.045833333333333, "loss": 0.061724916100502014, "loss_ce": 0.002810731064528227, "loss_iou": 0.298828125, "loss_num": 0.01177978515625, "loss_xval": 0.058837890625, "num_input_tokens_seen": 154349940, "step": 1691 }, { "epoch": 7.05, "grad_norm": 6.154462162547571, "learning_rate": 5e-05, "loss": 0.1351, "num_input_tokens_seen": 154441532, "step": 1692 }, { "epoch": 7.05, "loss": 0.1281442642211914, "loss_ce": 0.0013589877635240555, "loss_iou": 0.302734375, "loss_num": 0.025390625, "loss_xval": 0.126953125, "num_input_tokens_seen": 154441532, "step": 1692 }, { "epoch": 7.054166666666666, "grad_norm": 3.2667525209962776, "learning_rate": 5e-05, "loss": 0.1133, "num_input_tokens_seen": 154533092, "step": 1693 }, { "epoch": 7.054166666666666, "loss": 0.09928688406944275, "loss_ce": 0.0016916656168177724, "loss_iou": 0.265625, "loss_num": 0.01953125, "loss_xval": 0.09765625, "num_input_tokens_seen": 154533092, "step": 1693 }, { "epoch": 7.058333333333334, "grad_norm": 3.077707416306053, "learning_rate": 5e-05, "loss": 0.1039, "num_input_tokens_seen": 154624224, "step": 1694 }, { "epoch": 7.058333333333334, "loss": 0.09326724708080292, "loss_ce": 3.6047880712430924e-05, "loss_iou": 0.2890625, "loss_num": 0.0186767578125, "loss_xval": 0.09326171875, "num_input_tokens_seen": 154624224, "step": 1694 }, { "epoch": 7.0625, "grad_norm": 5.3368795339634945, "learning_rate": 5e-05, "loss": 0.0891, "num_input_tokens_seen": 154715264, "step": 1695 }, { "epoch": 7.0625, "loss": 0.0723080262541771, "loss_ce": 1.1880889360327274e-05, "loss_iou": 0.28125, "loss_num": 0.01446533203125, "loss_xval": 0.072265625, "num_input_tokens_seen": 154715264, "step": 1695 }, { "epoch": 7.066666666666666, "grad_norm": 6.211008016088809, "learning_rate": 5e-05, "loss": 0.1016, "num_input_tokens_seen": 154805936, "step": 1696 }, { "epoch": 7.066666666666666, "loss": 0.13396210968494415, "loss_ce": 8.14942322904244e-05, "loss_iou": 0.2265625, "loss_num": 0.0267333984375, "loss_xval": 0.1337890625, "num_input_tokens_seen": 154805936, "step": 1696 }, { "epoch": 7.070833333333334, "grad_norm": 7.984183401976844, "learning_rate": 5e-05, "loss": 0.1112, "num_input_tokens_seen": 154897224, "step": 1697 }, { "epoch": 7.070833333333334, "loss": 0.11862719058990479, "loss_ce": 0.0006462404271587729, "loss_iou": 0.2578125, "loss_num": 0.0235595703125, "loss_xval": 0.1181640625, "num_input_tokens_seen": 154897224, "step": 1697 }, { "epoch": 7.075, "grad_norm": 4.0972656746849685, "learning_rate": 5e-05, "loss": 0.1214, "num_input_tokens_seen": 154987660, "step": 1698 }, { "epoch": 7.075, "loss": 0.09393851459026337, "loss_ce": 5.118623812450096e-05, "loss_iou": 0.279296875, "loss_num": 0.018798828125, "loss_xval": 0.09375, "num_input_tokens_seen": 154987660, "step": 1698 }, { "epoch": 7.079166666666667, "grad_norm": 7.568165199224343, "learning_rate": 5e-05, "loss": 0.0579, "num_input_tokens_seen": 155078880, "step": 1699 }, { "epoch": 7.079166666666667, "loss": 0.05662469193339348, "loss_ce": 0.0005028644227422774, "loss_iou": 0.341796875, "loss_num": 0.01123046875, "loss_xval": 0.05615234375, "num_input_tokens_seen": 155078880, "step": 1699 }, { "epoch": 7.083333333333333, "grad_norm": 3.1584135776625484, "learning_rate": 5e-05, "loss": 0.0835, "num_input_tokens_seen": 155170128, "step": 1700 }, { "epoch": 7.083333333333333, "loss": 0.08303529024124146, "loss_ce": 0.00010377775470260531, "loss_iou": 0.21484375, "loss_num": 0.0166015625, "loss_xval": 0.0830078125, "num_input_tokens_seen": 155170128, "step": 1700 }, { "epoch": 7.0875, "grad_norm": 5.456883289815665, "learning_rate": 5e-05, "loss": 0.1264, "num_input_tokens_seen": 155261300, "step": 1701 }, { "epoch": 7.0875, "loss": 0.08729474991559982, "loss_ce": 0.0007011258276179433, "loss_iou": 0.189453125, "loss_num": 0.017333984375, "loss_xval": 0.08642578125, "num_input_tokens_seen": 155261300, "step": 1701 }, { "epoch": 7.091666666666667, "grad_norm": 6.332498523640968, "learning_rate": 5e-05, "loss": 0.0662, "num_input_tokens_seen": 155352628, "step": 1702 }, { "epoch": 7.091666666666667, "loss": 0.0688634067773819, "loss_ce": 0.0005803273525089025, "loss_iou": 0.2890625, "loss_num": 0.013671875, "loss_xval": 0.068359375, "num_input_tokens_seen": 155352628, "step": 1702 }, { "epoch": 7.095833333333333, "grad_norm": 3.144063696162726, "learning_rate": 5e-05, "loss": 0.1169, "num_input_tokens_seen": 155443380, "step": 1703 }, { "epoch": 7.095833333333333, "loss": 0.12068355828523636, "loss_ce": 1.7052898328984156e-05, "loss_iou": 0.2578125, "loss_num": 0.024169921875, "loss_xval": 0.12060546875, "num_input_tokens_seen": 155443380, "step": 1703 }, { "epoch": 7.1, "grad_norm": 2.2506929894056227, "learning_rate": 5e-05, "loss": 0.0684, "num_input_tokens_seen": 155534608, "step": 1704 }, { "epoch": 7.1, "loss": 0.06894703209400177, "loss_ce": 2.308711555087939e-05, "loss_iou": 0.392578125, "loss_num": 0.0137939453125, "loss_xval": 0.06884765625, "num_input_tokens_seen": 155534608, "step": 1704 }, { "epoch": 7.104166666666667, "grad_norm": 5.102828558713652, "learning_rate": 5e-05, "loss": 0.0963, "num_input_tokens_seen": 155625756, "step": 1705 }, { "epoch": 7.104166666666667, "loss": 0.08216731250286102, "loss_ce": 0.0002810218429658562, "loss_iou": 0.283203125, "loss_num": 0.016357421875, "loss_xval": 0.08203125, "num_input_tokens_seen": 155625756, "step": 1705 }, { "epoch": 7.108333333333333, "grad_norm": 1.9318064493586913, "learning_rate": 5e-05, "loss": 0.0907, "num_input_tokens_seen": 155717672, "step": 1706 }, { "epoch": 7.108333333333333, "loss": 0.07421234250068665, "loss_ce": 0.000268249015789479, "loss_iou": 0.287109375, "loss_num": 0.0147705078125, "loss_xval": 0.07373046875, "num_input_tokens_seen": 155717672, "step": 1706 }, { "epoch": 7.1125, "grad_norm": 4.992836138998759, "learning_rate": 5e-05, "loss": 0.1104, "num_input_tokens_seen": 155808784, "step": 1707 }, { "epoch": 7.1125, "loss": 0.10705584287643433, "loss_ce": 0.000419794290792197, "loss_iou": 0.244140625, "loss_num": 0.0213623046875, "loss_xval": 0.1064453125, "num_input_tokens_seen": 155808784, "step": 1707 }, { "epoch": 7.116666666666666, "grad_norm": 4.0846848007526955, "learning_rate": 5e-05, "loss": 0.084, "num_input_tokens_seen": 155900316, "step": 1708 }, { "epoch": 7.116666666666666, "loss": 0.10803817212581635, "loss_ce": 0.0026304549537599087, "loss_iou": 0.31640625, "loss_num": 0.0211181640625, "loss_xval": 0.10546875, "num_input_tokens_seen": 155900316, "step": 1708 }, { "epoch": 7.120833333333334, "grad_norm": 3.303076878654234, "learning_rate": 5e-05, "loss": 0.0758, "num_input_tokens_seen": 155991784, "step": 1709 }, { "epoch": 7.120833333333334, "loss": 0.10933535546064377, "loss_ce": 0.0003570847911760211, "loss_iou": 0.265625, "loss_num": 0.0218505859375, "loss_xval": 0.10888671875, "num_input_tokens_seen": 155991784, "step": 1709 }, { "epoch": 7.125, "grad_norm": 15.969175324546415, "learning_rate": 5e-05, "loss": 0.0786, "num_input_tokens_seen": 156083244, "step": 1710 }, { "epoch": 7.125, "loss": 0.07175493240356445, "loss_ce": 0.0028767550829797983, "loss_iou": 0.271484375, "loss_num": 0.0137939453125, "loss_xval": 0.06884765625, "num_input_tokens_seen": 156083244, "step": 1710 }, { "epoch": 7.129166666666666, "grad_norm": 5.318811792978995, "learning_rate": 5e-05, "loss": 0.094, "num_input_tokens_seen": 156174512, "step": 1711 }, { "epoch": 7.129166666666666, "loss": 0.08147059381008148, "loss_ce": 0.0002480625989846885, "loss_iou": 0.48046875, "loss_num": 0.0162353515625, "loss_xval": 0.0810546875, "num_input_tokens_seen": 156174512, "step": 1711 }, { "epoch": 7.133333333333334, "grad_norm": 3.591541353502176, "learning_rate": 5e-05, "loss": 0.0569, "num_input_tokens_seen": 156266228, "step": 1712 }, { "epoch": 7.133333333333334, "loss": 0.04183837026357651, "loss_ce": 0.0005480895051732659, "loss_iou": 0.310546875, "loss_num": 0.00823974609375, "loss_xval": 0.041259765625, "num_input_tokens_seen": 156266228, "step": 1712 }, { "epoch": 7.1375, "grad_norm": 6.928688783074016, "learning_rate": 5e-05, "loss": 0.1291, "num_input_tokens_seen": 156357936, "step": 1713 }, { "epoch": 7.1375, "loss": 0.14094755053520203, "loss_ce": 7.841399929020554e-05, "loss_iou": 0.287109375, "loss_num": 0.0281982421875, "loss_xval": 0.140625, "num_input_tokens_seen": 156357936, "step": 1713 }, { "epoch": 7.141666666666667, "grad_norm": 6.571922978586136, "learning_rate": 5e-05, "loss": 0.0955, "num_input_tokens_seen": 156449336, "step": 1714 }, { "epoch": 7.141666666666667, "loss": 0.10532679408788681, "loss_ce": 1.0631374607328326e-05, "loss_iou": 0.37109375, "loss_num": 0.02099609375, "loss_xval": 0.10546875, "num_input_tokens_seen": 156449336, "step": 1714 }, { "epoch": 7.145833333333333, "grad_norm": 3.1341725428157314, "learning_rate": 5e-05, "loss": 0.1052, "num_input_tokens_seen": 156541464, "step": 1715 }, { "epoch": 7.145833333333333, "loss": 0.11285033822059631, "loss_ce": 0.006099840626120567, "loss_iou": 0.33203125, "loss_num": 0.0213623046875, "loss_xval": 0.10693359375, "num_input_tokens_seen": 156541464, "step": 1715 }, { "epoch": 7.15, "grad_norm": 4.091806497084258, "learning_rate": 5e-05, "loss": 0.1007, "num_input_tokens_seen": 156632996, "step": 1716 }, { "epoch": 7.15, "loss": 0.08084506541490555, "loss_ce": 0.0005380603251978755, "loss_iou": 0.28125, "loss_num": 0.01611328125, "loss_xval": 0.080078125, "num_input_tokens_seen": 156632996, "step": 1716 }, { "epoch": 7.154166666666667, "grad_norm": 4.742170644569578, "learning_rate": 5e-05, "loss": 0.0893, "num_input_tokens_seen": 156724204, "step": 1717 }, { "epoch": 7.154166666666667, "loss": 0.07490211725234985, "loss_ce": 1.1978670954704285e-05, "loss_iou": 0.3828125, "loss_num": 0.01495361328125, "loss_xval": 0.07470703125, "num_input_tokens_seen": 156724204, "step": 1717 }, { "epoch": 7.158333333333333, "grad_norm": 5.407806077931396, "learning_rate": 5e-05, "loss": 0.1, "num_input_tokens_seen": 156815800, "step": 1718 }, { "epoch": 7.158333333333333, "loss": 0.093394935131073, "loss_ce": 0.0001637310051592067, "loss_iou": 0.283203125, "loss_num": 0.0186767578125, "loss_xval": 0.09326171875, "num_input_tokens_seen": 156815800, "step": 1718 }, { "epoch": 7.1625, "grad_norm": 2.095490907997988, "learning_rate": 5e-05, "loss": 0.0565, "num_input_tokens_seen": 156907332, "step": 1719 }, { "epoch": 7.1625, "loss": 0.04713945463299751, "loss_ce": 0.0006001486326567829, "loss_iou": 0.205078125, "loss_num": 0.00933837890625, "loss_xval": 0.046630859375, "num_input_tokens_seen": 156907332, "step": 1719 }, { "epoch": 7.166666666666667, "grad_norm": 13.781994171670535, "learning_rate": 5e-05, "loss": 0.1179, "num_input_tokens_seen": 156999412, "step": 1720 }, { "epoch": 7.166666666666667, "loss": 0.1414538472890854, "loss_ce": 0.002720939228311181, "loss_iou": 0.2080078125, "loss_num": 0.0277099609375, "loss_xval": 0.138671875, "num_input_tokens_seen": 156999412, "step": 1720 }, { "epoch": 7.170833333333333, "grad_norm": 5.382132021159549, "learning_rate": 5e-05, "loss": 0.1094, "num_input_tokens_seen": 157090580, "step": 1721 }, { "epoch": 7.170833333333333, "loss": 0.14994627237319946, "loss_ce": 0.0006390261114574969, "loss_iou": 0.40625, "loss_num": 0.02978515625, "loss_xval": 0.1494140625, "num_input_tokens_seen": 157090580, "step": 1721 }, { "epoch": 7.175, "grad_norm": 2.8820191575286382, "learning_rate": 5e-05, "loss": 0.0963, "num_input_tokens_seen": 157181548, "step": 1722 }, { "epoch": 7.175, "loss": 0.08300650119781494, "loss_ce": 0.0009599894401617348, "loss_iou": 0.255859375, "loss_num": 0.0164794921875, "loss_xval": 0.08203125, "num_input_tokens_seen": 157181548, "step": 1722 }, { "epoch": 7.179166666666666, "grad_norm": 2.4030723794000637, "learning_rate": 5e-05, "loss": 0.1025, "num_input_tokens_seen": 157272880, "step": 1723 }, { "epoch": 7.179166666666666, "loss": 0.142893984913826, "loss_ce": 0.0014907920267432928, "loss_iou": 0.2138671875, "loss_num": 0.0283203125, "loss_xval": 0.1416015625, "num_input_tokens_seen": 157272880, "step": 1723 }, { "epoch": 7.183333333333334, "grad_norm": 6.555642784643792, "learning_rate": 5e-05, "loss": 0.1072, "num_input_tokens_seen": 157363724, "step": 1724 }, { "epoch": 7.183333333333334, "loss": 0.08335313946008682, "loss_ce": 9.633986337576061e-06, "loss_iou": 0.31640625, "loss_num": 0.0167236328125, "loss_xval": 0.08349609375, "num_input_tokens_seen": 157363724, "step": 1724 }, { "epoch": 7.1875, "grad_norm": 2.5435019828539365, "learning_rate": 5e-05, "loss": 0.0735, "num_input_tokens_seen": 157455424, "step": 1725 }, { "epoch": 7.1875, "loss": 0.053869716823101044, "loss_ce": 0.0006470587686635554, "loss_iou": 0.384765625, "loss_num": 0.0106201171875, "loss_xval": 0.05322265625, "num_input_tokens_seen": 157455424, "step": 1725 }, { "epoch": 7.191666666666666, "grad_norm": 2.8380242838082737, "learning_rate": 5e-05, "loss": 0.1243, "num_input_tokens_seen": 157546968, "step": 1726 }, { "epoch": 7.191666666666666, "loss": 0.12494509667158127, "loss_ce": 3.665284748421982e-05, "loss_iou": 0.30078125, "loss_num": 0.02490234375, "loss_xval": 0.125, "num_input_tokens_seen": 157546968, "step": 1726 }, { "epoch": 7.195833333333334, "grad_norm": 4.899015514922593, "learning_rate": 5e-05, "loss": 0.0606, "num_input_tokens_seen": 157638908, "step": 1727 }, { "epoch": 7.195833333333334, "loss": 0.048561014235019684, "loss_ce": 0.00087729626102373, "loss_iou": 0.31640625, "loss_num": 0.009521484375, "loss_xval": 0.047607421875, "num_input_tokens_seen": 157638908, "step": 1727 }, { "epoch": 7.2, "grad_norm": 1.9257435909322718, "learning_rate": 5e-05, "loss": 0.0835, "num_input_tokens_seen": 157730612, "step": 1728 }, { "epoch": 7.2, "loss": 0.11189435422420502, "loss_ce": 1.6904214135138318e-05, "loss_iou": 0.216796875, "loss_num": 0.0223388671875, "loss_xval": 0.11181640625, "num_input_tokens_seen": 157730612, "step": 1728 }, { "epoch": 7.204166666666667, "grad_norm": 2.4804711767686283, "learning_rate": 5e-05, "loss": 0.094, "num_input_tokens_seen": 157821592, "step": 1729 }, { "epoch": 7.204166666666667, "loss": 0.1316571682691574, "loss_ce": 4.337953214417212e-06, "loss_iou": 0.380859375, "loss_num": 0.0263671875, "loss_xval": 0.1318359375, "num_input_tokens_seen": 157821592, "step": 1729 }, { "epoch": 7.208333333333333, "grad_norm": 8.301899808282865, "learning_rate": 5e-05, "loss": 0.1034, "num_input_tokens_seen": 157913000, "step": 1730 }, { "epoch": 7.208333333333333, "loss": 0.12285293638706207, "loss_ce": 1.968711512745358e-05, "loss_iou": 0.283203125, "loss_num": 0.024658203125, "loss_xval": 0.123046875, "num_input_tokens_seen": 157913000, "step": 1730 }, { "epoch": 7.2125, "grad_norm": 4.1752854960273, "learning_rate": 5e-05, "loss": 0.0798, "num_input_tokens_seen": 158004764, "step": 1731 }, { "epoch": 7.2125, "loss": 0.06936931610107422, "loss_ce": 0.0003538095043040812, "loss_iou": 0.36328125, "loss_num": 0.0137939453125, "loss_xval": 0.06884765625, "num_input_tokens_seen": 158004764, "step": 1731 }, { "epoch": 7.216666666666667, "grad_norm": 3.3411710342234895, "learning_rate": 5e-05, "loss": 0.0788, "num_input_tokens_seen": 158096040, "step": 1732 }, { "epoch": 7.216666666666667, "loss": 0.10201053321361542, "loss_ce": 0.0016687337774783373, "loss_iou": 0.384765625, "loss_num": 0.02001953125, "loss_xval": 0.1005859375, "num_input_tokens_seen": 158096040, "step": 1732 }, { "epoch": 7.220833333333333, "grad_norm": 3.532147965636991, "learning_rate": 5e-05, "loss": 0.0754, "num_input_tokens_seen": 158186896, "step": 1733 }, { "epoch": 7.220833333333333, "loss": 0.08085125684738159, "loss_ce": 0.0004450652631931007, "loss_iou": 0.32421875, "loss_num": 0.01611328125, "loss_xval": 0.08056640625, "num_input_tokens_seen": 158186896, "step": 1733 }, { "epoch": 7.225, "grad_norm": 4.222192536783828, "learning_rate": 5e-05, "loss": 0.1023, "num_input_tokens_seen": 158278220, "step": 1734 }, { "epoch": 7.225, "loss": 0.07080645859241486, "loss_ce": 0.0016383626498281956, "loss_iou": 0.341796875, "loss_num": 0.01385498046875, "loss_xval": 0.0693359375, "num_input_tokens_seen": 158278220, "step": 1734 }, { "epoch": 7.229166666666667, "grad_norm": 5.968139727235326, "learning_rate": 5e-05, "loss": 0.0941, "num_input_tokens_seen": 158369004, "step": 1735 }, { "epoch": 7.229166666666667, "loss": 0.0862986296415329, "loss_ce": 1.018310558720259e-05, "loss_iou": 0.322265625, "loss_num": 0.017333984375, "loss_xval": 0.08642578125, "num_input_tokens_seen": 158369004, "step": 1735 }, { "epoch": 7.233333333333333, "grad_norm": 6.387746133616494, "learning_rate": 5e-05, "loss": 0.1032, "num_input_tokens_seen": 158460752, "step": 1736 }, { "epoch": 7.233333333333333, "loss": 0.1081860139966011, "loss_ce": 0.00024534264230169356, "loss_iou": 0.392578125, "loss_num": 0.0216064453125, "loss_xval": 0.10791015625, "num_input_tokens_seen": 158460752, "step": 1736 }, { "epoch": 7.2375, "grad_norm": 1.3763892685803873, "learning_rate": 5e-05, "loss": 0.1363, "num_input_tokens_seen": 158552252, "step": 1737 }, { "epoch": 7.2375, "loss": 0.16385185718536377, "loss_ce": 0.0014983414439484477, "loss_iou": 0.302734375, "loss_num": 0.032470703125, "loss_xval": 0.162109375, "num_input_tokens_seen": 158552252, "step": 1737 }, { "epoch": 7.241666666666666, "grad_norm": 5.7207446363743895, "learning_rate": 5e-05, "loss": 0.1203, "num_input_tokens_seen": 158644072, "step": 1738 }, { "epoch": 7.241666666666666, "loss": 0.16266997158527374, "loss_ce": 0.0008810389554128051, "loss_iou": 0.1953125, "loss_num": 0.032470703125, "loss_xval": 0.162109375, "num_input_tokens_seen": 158644072, "step": 1738 }, { "epoch": 7.245833333333334, "grad_norm": 4.812282703252393, "learning_rate": 5e-05, "loss": 0.1012, "num_input_tokens_seen": 158734884, "step": 1739 }, { "epoch": 7.245833333333334, "loss": 0.10428060591220856, "loss_ce": 1.7295438738074154e-05, "loss_iou": 0.21875, "loss_num": 0.0208740234375, "loss_xval": 0.1044921875, "num_input_tokens_seen": 158734884, "step": 1739 }, { "epoch": 7.25, "grad_norm": 1.5478532072316726, "learning_rate": 5e-05, "loss": 0.0765, "num_input_tokens_seen": 158826440, "step": 1740 }, { "epoch": 7.25, "loss": 0.07442457973957062, "loss_ce": 0.0011671364773064852, "loss_iou": 0.24609375, "loss_num": 0.0146484375, "loss_xval": 0.0732421875, "num_input_tokens_seen": 158826440, "step": 1740 }, { "epoch": 7.254166666666666, "grad_norm": 6.928940226324635, "learning_rate": 5e-05, "loss": 0.0902, "num_input_tokens_seen": 158917548, "step": 1741 }, { "epoch": 7.254166666666666, "loss": 0.11471019685268402, "loss_ce": 8.617914863862097e-05, "loss_iou": 0.2578125, "loss_num": 0.02294921875, "loss_xval": 0.11474609375, "num_input_tokens_seen": 158917548, "step": 1741 }, { "epoch": 7.258333333333334, "grad_norm": 3.058820583296289, "learning_rate": 5e-05, "loss": 0.0841, "num_input_tokens_seen": 159009512, "step": 1742 }, { "epoch": 7.258333333333334, "loss": 0.053757186979055405, "loss_ce": 0.0035862871445715427, "loss_iou": 0.34375, "loss_num": 0.010009765625, "loss_xval": 0.05029296875, "num_input_tokens_seen": 159009512, "step": 1742 }, { "epoch": 7.2625, "grad_norm": 2.936191227345829, "learning_rate": 5e-05, "loss": 0.0919, "num_input_tokens_seen": 159101340, "step": 1743 }, { "epoch": 7.2625, "loss": 0.0766802728176117, "loss_ce": 0.0006152114365249872, "loss_iou": 0.38671875, "loss_num": 0.01519775390625, "loss_xval": 0.076171875, "num_input_tokens_seen": 159101340, "step": 1743 }, { "epoch": 7.266666666666667, "grad_norm": 7.309826953349478, "learning_rate": 5e-05, "loss": 0.0705, "num_input_tokens_seen": 159193408, "step": 1744 }, { "epoch": 7.266666666666667, "loss": 0.08390648663043976, "loss_ce": 0.002439806703478098, "loss_iou": 0.302734375, "loss_num": 0.0162353515625, "loss_xval": 0.08154296875, "num_input_tokens_seen": 159193408, "step": 1744 }, { "epoch": 7.270833333333333, "grad_norm": 4.970001831892565, "learning_rate": 5e-05, "loss": 0.0955, "num_input_tokens_seen": 159284636, "step": 1745 }, { "epoch": 7.270833333333333, "loss": 0.12510189414024353, "loss_ce": 7.13820118107833e-05, "loss_iou": 0.28515625, "loss_num": 0.0250244140625, "loss_xval": 0.125, "num_input_tokens_seen": 159284636, "step": 1745 }, { "epoch": 7.275, "grad_norm": 3.2103467072844767, "learning_rate": 5e-05, "loss": 0.1118, "num_input_tokens_seen": 159376364, "step": 1746 }, { "epoch": 7.275, "loss": 0.08453178405761719, "loss_ce": 0.001142505556344986, "loss_iou": 0.35546875, "loss_num": 0.0167236328125, "loss_xval": 0.08349609375, "num_input_tokens_seen": 159376364, "step": 1746 }, { "epoch": 7.279166666666667, "grad_norm": 2.8748852927933912, "learning_rate": 5e-05, "loss": 0.0864, "num_input_tokens_seen": 159467452, "step": 1747 }, { "epoch": 7.279166666666667, "loss": 0.09032277762889862, "loss_ce": 0.00034169177524745464, "loss_iou": 0.0751953125, "loss_num": 0.01806640625, "loss_xval": 0.08984375, "num_input_tokens_seen": 159467452, "step": 1747 }, { "epoch": 7.283333333333333, "grad_norm": 1.8429200812612072, "learning_rate": 5e-05, "loss": 0.0886, "num_input_tokens_seen": 159558896, "step": 1748 }, { "epoch": 7.283333333333333, "loss": 0.07524539530277252, "loss_ce": 0.002735632471740246, "loss_iou": 0.263671875, "loss_num": 0.0145263671875, "loss_xval": 0.072265625, "num_input_tokens_seen": 159558896, "step": 1748 }, { "epoch": 7.2875, "grad_norm": 2.1798804635746323, "learning_rate": 5e-05, "loss": 0.0865, "num_input_tokens_seen": 159648372, "step": 1749 }, { "epoch": 7.2875, "loss": 0.06625260412693024, "loss_ce": 1.4205359548213892e-05, "loss_iou": 0.1669921875, "loss_num": 0.01324462890625, "loss_xval": 0.06640625, "num_input_tokens_seen": 159648372, "step": 1749 }, { "epoch": 7.291666666666667, "grad_norm": 2.636309635324973, "learning_rate": 5e-05, "loss": 0.0642, "num_input_tokens_seen": 159739544, "step": 1750 }, { "epoch": 7.291666666666667, "eval_seeclick_CIoU": 0.274772547185421, "eval_seeclick_GIoU": 0.2568713426589966, "eval_seeclick_IoU": 0.3638703525066376, "eval_seeclick_MAE_all": 0.08645062521100044, "eval_seeclick_MAE_h": 0.08346085995435715, "eval_seeclick_MAE_w": 0.157043918967247, "eval_seeclick_MAE_x_boxes": 0.17312509566545486, "eval_seeclick_MAE_y_boxes": 0.08508550748229027, "eval_seeclick_NUM_probability": 0.9999987185001373, "eval_seeclick_inside_bbox": 0.5241477340459824, "eval_seeclick_loss": 0.5137174725532532, "eval_seeclick_loss_ce": 0.13229186832904816, "eval_seeclick_loss_iou": 0.4346923828125, "eval_seeclick_loss_num": 0.07465362548828125, "eval_seeclick_loss_xval": 0.372894287109375, "eval_seeclick_runtime": 75.2043, "eval_seeclick_samples_per_second": 0.572, "eval_seeclick_steps_per_second": 0.027, "num_input_tokens_seen": 159739544, "step": 1750 }, { "epoch": 7.291666666666667, "eval_icons_CIoU": 0.3470269590616226, "eval_icons_GIoU": 0.37907470762729645, "eval_icons_IoU": 0.42870812118053436, "eval_icons_MAE_all": 0.0643687080591917, "eval_icons_MAE_h": 0.13963689282536507, "eval_icons_MAE_w": 0.09502165019512177, "eval_icons_MAE_x_boxes": 0.09512116760015488, "eval_icons_MAE_y_boxes": 0.14081553369760513, "eval_icons_NUM_probability": 0.9999993741512299, "eval_icons_inside_bbox": 0.6215277910232544, "eval_icons_loss": 0.31115666031837463, "eval_icons_loss_ce": 0.0013536059414036572, "eval_icons_loss_iou": 0.27740478515625, "eval_icons_loss_num": 0.0612945556640625, "eval_icons_loss_xval": 0.30670166015625, "eval_icons_runtime": 88.8429, "eval_icons_samples_per_second": 0.563, "eval_icons_steps_per_second": 0.023, "num_input_tokens_seen": 159739544, "step": 1750 }, { "epoch": 7.291666666666667, "eval_screenspot_CIoU": 0.3959275384744008, "eval_screenspot_GIoU": 0.3979260226090749, "eval_screenspot_IoU": 0.4649760623772939, "eval_screenspot_MAE_all": 0.09103357543547948, "eval_screenspot_MAE_h": 0.06914364298184712, "eval_screenspot_MAE_w": 0.20312588413556418, "eval_screenspot_MAE_x_boxes": 0.18540192147096, "eval_screenspot_MAE_y_boxes": 0.06311593949794769, "eval_screenspot_NUM_probability": 0.9999985496203104, "eval_screenspot_inside_bbox": 0.693750003973643, "eval_screenspot_loss": 0.4558645188808441, "eval_screenspot_loss_ce": 0.0005789737806480844, "eval_screenspot_loss_iou": 0.4074300130208333, "eval_screenspot_loss_num": 0.09119669596354167, "eval_screenspot_loss_xval": 0.4561360677083333, "eval_screenspot_runtime": 153.3033, "eval_screenspot_samples_per_second": 0.581, "eval_screenspot_steps_per_second": 0.02, "num_input_tokens_seen": 159739544, "step": 1750 }, { "epoch": 7.291666666666667, "eval_compot_CIoU": 0.39731016755104065, "eval_compot_GIoU": 0.3796231150627136, "eval_compot_IoU": 0.47868672013282776, "eval_compot_MAE_all": 0.06850907020270824, "eval_compot_MAE_h": 0.09156372398138046, "eval_compot_MAE_w": 0.15624917298555374, "eval_compot_MAE_x_boxes": 0.15558727830648422, "eval_compot_MAE_y_boxes": 0.08946410566568375, "eval_compot_NUM_probability": 0.9999960064888, "eval_compot_inside_bbox": 0.6371527910232544, "eval_compot_loss": 0.3551335632801056, "eval_compot_loss_ce": 0.03361728601157665, "eval_compot_loss_iou": 0.3394775390625, "eval_compot_loss_num": 0.057178497314453125, "eval_compot_loss_xval": 0.2858428955078125, "eval_compot_runtime": 91.3889, "eval_compot_samples_per_second": 0.547, "eval_compot_steps_per_second": 0.022, "num_input_tokens_seen": 159739544, "step": 1750 }, { "epoch": 7.291666666666667, "loss": 0.35525137186050415, "loss_ce": 0.03658682852983475, "loss_iou": 0.330078125, "loss_num": 0.06396484375, "loss_xval": 0.318359375, "num_input_tokens_seen": 159739544, "step": 1750 }, { "epoch": 7.295833333333333, "grad_norm": 6.198843157364279, "learning_rate": 5e-05, "loss": 0.0698, "num_input_tokens_seen": 159830292, "step": 1751 }, { "epoch": 7.295833333333333, "loss": 0.05363104119896889, "loss_ce": 4.2173967813141644e-05, "loss_iou": 0.2890625, "loss_num": 0.0107421875, "loss_xval": 0.0537109375, "num_input_tokens_seen": 159830292, "step": 1751 }, { "epoch": 7.3, "grad_norm": 8.462068378678884, "learning_rate": 5e-05, "loss": 0.0886, "num_input_tokens_seen": 159921348, "step": 1752 }, { "epoch": 7.3, "loss": 0.06018263101577759, "loss_ce": 0.00015455312677659094, "loss_iou": 0.2890625, "loss_num": 0.01202392578125, "loss_xval": 0.06005859375, "num_input_tokens_seen": 159921348, "step": 1752 }, { "epoch": 7.304166666666666, "grad_norm": 2.9406017989968753, "learning_rate": 5e-05, "loss": 0.039, "num_input_tokens_seen": 160013564, "step": 1753 }, { "epoch": 7.304166666666666, "loss": 0.03546938672661781, "loss_ce": 0.003761622356250882, "loss_iou": 0.232421875, "loss_num": 0.00634765625, "loss_xval": 0.03173828125, "num_input_tokens_seen": 160013564, "step": 1753 }, { "epoch": 7.308333333333334, "grad_norm": 2.107795043077596, "learning_rate": 5e-05, "loss": 0.0827, "num_input_tokens_seen": 160104816, "step": 1754 }, { "epoch": 7.308333333333334, "loss": 0.1058989018201828, "loss_ce": 0.0004911827854812145, "loss_iou": 0.2734375, "loss_num": 0.0211181640625, "loss_xval": 0.10546875, "num_input_tokens_seen": 160104816, "step": 1754 }, { "epoch": 7.3125, "grad_norm": 4.2175137993238305, "learning_rate": 5e-05, "loss": 0.1338, "num_input_tokens_seen": 160196380, "step": 1755 }, { "epoch": 7.3125, "loss": 0.16473883390426636, "loss_ce": 0.0004474582092370838, "loss_iou": 0.1103515625, "loss_num": 0.032958984375, "loss_xval": 0.1640625, "num_input_tokens_seen": 160196380, "step": 1755 }, { "epoch": 7.316666666666666, "grad_norm": 1.958983067716341, "learning_rate": 5e-05, "loss": 0.0813, "num_input_tokens_seen": 160286304, "step": 1756 }, { "epoch": 7.316666666666666, "loss": 0.07399453222751617, "loss_ce": 0.00073708884883672, "loss_iou": 0.271484375, "loss_num": 0.0146484375, "loss_xval": 0.0732421875, "num_input_tokens_seen": 160286304, "step": 1756 }, { "epoch": 7.320833333333334, "grad_norm": 2.9633889667155144, "learning_rate": 5e-05, "loss": 0.0693, "num_input_tokens_seen": 160377004, "step": 1757 }, { "epoch": 7.320833333333334, "loss": 0.05870777368545532, "loss_ce": 0.00048023491399362683, "loss_iou": 0.37890625, "loss_num": 0.01165771484375, "loss_xval": 0.05810546875, "num_input_tokens_seen": 160377004, "step": 1757 }, { "epoch": 7.325, "grad_norm": 29.96955038482011, "learning_rate": 5e-05, "loss": 0.0882, "num_input_tokens_seen": 160468532, "step": 1758 }, { "epoch": 7.325, "loss": 0.0886864885687828, "loss_ce": 0.0011010420275852084, "loss_iou": 0.2578125, "loss_num": 0.017578125, "loss_xval": 0.08740234375, "num_input_tokens_seen": 160468532, "step": 1758 }, { "epoch": 7.329166666666667, "grad_norm": 3.084052387858358, "learning_rate": 5e-05, "loss": 0.1047, "num_input_tokens_seen": 160559280, "step": 1759 }, { "epoch": 7.329166666666667, "loss": 0.0805143266916275, "loss_ce": 6.99890370015055e-05, "loss_iou": 0.1513671875, "loss_num": 0.01611328125, "loss_xval": 0.08056640625, "num_input_tokens_seen": 160559280, "step": 1759 }, { "epoch": 7.333333333333333, "grad_norm": 3.032790090810266, "learning_rate": 5e-05, "loss": 0.1004, "num_input_tokens_seen": 160650272, "step": 1760 }, { "epoch": 7.333333333333333, "loss": 0.10603293776512146, "loss_ce": 0.0004116000491194427, "loss_iou": 0.322265625, "loss_num": 0.0211181640625, "loss_xval": 0.10546875, "num_input_tokens_seen": 160650272, "step": 1760 }, { "epoch": 7.3375, "grad_norm": 2.9409556739143667, "learning_rate": 5e-05, "loss": 0.0716, "num_input_tokens_seen": 160741936, "step": 1761 }, { "epoch": 7.3375, "loss": 0.08558277040719986, "loss_ce": 0.00027087461785413325, "loss_iou": 0.306640625, "loss_num": 0.01708984375, "loss_xval": 0.08544921875, "num_input_tokens_seen": 160741936, "step": 1761 }, { "epoch": 7.341666666666667, "grad_norm": 5.113927821544099, "learning_rate": 5e-05, "loss": 0.1019, "num_input_tokens_seen": 160833300, "step": 1762 }, { "epoch": 7.341666666666667, "loss": 0.06774862110614777, "loss_ce": 3.774898505071178e-05, "loss_iou": 0.271484375, "loss_num": 0.0135498046875, "loss_xval": 0.06787109375, "num_input_tokens_seen": 160833300, "step": 1762 }, { "epoch": 7.345833333333333, "grad_norm": 2.827984109355143, "learning_rate": 5e-05, "loss": 0.1087, "num_input_tokens_seen": 160924888, "step": 1763 }, { "epoch": 7.345833333333333, "loss": 0.13058581948280334, "loss_ce": 0.0015193530125543475, "loss_iou": 0.251953125, "loss_num": 0.02587890625, "loss_xval": 0.12890625, "num_input_tokens_seen": 160924888, "step": 1763 }, { "epoch": 7.35, "grad_norm": 2.7586655479972992, "learning_rate": 5e-05, "loss": 0.106, "num_input_tokens_seen": 161015604, "step": 1764 }, { "epoch": 7.35, "loss": 0.08002396672964096, "loss_ce": 0.004813396371901035, "loss_iou": 0.216796875, "loss_num": 0.01507568359375, "loss_xval": 0.0751953125, "num_input_tokens_seen": 161015604, "step": 1764 }, { "epoch": 7.354166666666667, "grad_norm": 4.62529291783808, "learning_rate": 5e-05, "loss": 0.0898, "num_input_tokens_seen": 161107176, "step": 1765 }, { "epoch": 7.354166666666667, "loss": 0.10449203848838806, "loss_ce": 0.002532810904085636, "loss_iou": 0.263671875, "loss_num": 0.0203857421875, "loss_xval": 0.10205078125, "num_input_tokens_seen": 161107176, "step": 1765 }, { "epoch": 7.358333333333333, "grad_norm": 2.5774845134583004, "learning_rate": 5e-05, "loss": 0.1122, "num_input_tokens_seen": 161198640, "step": 1766 }, { "epoch": 7.358333333333333, "loss": 0.07467900216579437, "loss_ce": 1.774665724951774e-05, "loss_iou": 0.22265625, "loss_num": 0.01495361328125, "loss_xval": 0.07470703125, "num_input_tokens_seen": 161198640, "step": 1766 }, { "epoch": 7.3625, "grad_norm": 1.900906724887545, "learning_rate": 5e-05, "loss": 0.1232, "num_input_tokens_seen": 161290120, "step": 1767 }, { "epoch": 7.3625, "loss": 0.09348450601100922, "loss_ce": 0.00036012171767652035, "loss_iou": 0.30078125, "loss_num": 0.0186767578125, "loss_xval": 0.09326171875, "num_input_tokens_seen": 161290120, "step": 1767 }, { "epoch": 7.366666666666666, "grad_norm": 2.3698025807422476, "learning_rate": 5e-05, "loss": 0.0604, "num_input_tokens_seen": 161381656, "step": 1768 }, { "epoch": 7.366666666666666, "loss": 0.0635412186384201, "loss_ce": 0.00021724399994127452, "loss_iou": 0.3203125, "loss_num": 0.0126953125, "loss_xval": 0.0634765625, "num_input_tokens_seen": 161381656, "step": 1768 }, { "epoch": 7.370833333333334, "grad_norm": 5.86903240139928, "learning_rate": 5e-05, "loss": 0.0694, "num_input_tokens_seen": 161472568, "step": 1769 }, { "epoch": 7.370833333333334, "loss": 0.03189694136381149, "loss_ce": 0.0002273284044349566, "loss_iou": 0.265625, "loss_num": 0.006317138671875, "loss_xval": 0.03173828125, "num_input_tokens_seen": 161472568, "step": 1769 }, { "epoch": 7.375, "grad_norm": 3.7597933825689926, "learning_rate": 5e-05, "loss": 0.1349, "num_input_tokens_seen": 161562416, "step": 1770 }, { "epoch": 7.375, "loss": 0.10958030074834824, "loss_ce": 3.745627327589318e-05, "loss_iou": 0.2490234375, "loss_num": 0.02197265625, "loss_xval": 0.109375, "num_input_tokens_seen": 161562416, "step": 1770 }, { "epoch": 7.379166666666666, "grad_norm": 3.4972689341463834, "learning_rate": 5e-05, "loss": 0.0715, "num_input_tokens_seen": 161653772, "step": 1771 }, { "epoch": 7.379166666666666, "loss": 0.06820204108953476, "loss_ce": 0.00020887877326458693, "loss_iou": 0.23828125, "loss_num": 0.0135498046875, "loss_xval": 0.06787109375, "num_input_tokens_seen": 161653772, "step": 1771 }, { "epoch": 7.383333333333334, "grad_norm": 5.528369180509633, "learning_rate": 5e-05, "loss": 0.16, "num_input_tokens_seen": 161745908, "step": 1772 }, { "epoch": 7.383333333333334, "loss": 0.17425945401191711, "loss_ce": 0.004047665745019913, "loss_iou": 0.2734375, "loss_num": 0.0341796875, "loss_xval": 0.169921875, "num_input_tokens_seen": 161745908, "step": 1772 }, { "epoch": 7.3875, "grad_norm": 14.436643120766435, "learning_rate": 5e-05, "loss": 0.1285, "num_input_tokens_seen": 161837884, "step": 1773 }, { "epoch": 7.3875, "loss": 0.14607399702072144, "loss_ce": 0.0024887986946851015, "loss_iou": 0.25, "loss_num": 0.0286865234375, "loss_xval": 0.1435546875, "num_input_tokens_seen": 161837884, "step": 1773 }, { "epoch": 7.391666666666667, "grad_norm": 5.205319470785945, "learning_rate": 5e-05, "loss": 0.0941, "num_input_tokens_seen": 161929180, "step": 1774 }, { "epoch": 7.391666666666667, "loss": 0.11127667874097824, "loss_ce": 7.062430813675746e-05, "loss_iou": 0.185546875, "loss_num": 0.022216796875, "loss_xval": 0.111328125, "num_input_tokens_seen": 161929180, "step": 1774 }, { "epoch": 7.395833333333333, "grad_norm": 3.9657384758323784, "learning_rate": 5e-05, "loss": 0.0796, "num_input_tokens_seen": 162020952, "step": 1775 }, { "epoch": 7.395833333333333, "loss": 0.041575320065021515, "loss_ce": 0.00013244900037534535, "loss_iou": 0.3203125, "loss_num": 0.00830078125, "loss_xval": 0.04150390625, "num_input_tokens_seen": 162020952, "step": 1775 }, { "epoch": 7.4, "grad_norm": 3.3889964350870416, "learning_rate": 5e-05, "loss": 0.0642, "num_input_tokens_seen": 162112412, "step": 1776 }, { "epoch": 7.4, "loss": 0.05319908261299133, "loss_ce": 0.000830917851999402, "loss_iou": 0.41015625, "loss_num": 0.010498046875, "loss_xval": 0.05224609375, "num_input_tokens_seen": 162112412, "step": 1776 }, { "epoch": 7.404166666666667, "grad_norm": 3.1937320635112303, "learning_rate": 5e-05, "loss": 0.1485, "num_input_tokens_seen": 162203620, "step": 1777 }, { "epoch": 7.404166666666667, "loss": 0.20381496846675873, "loss_ce": 0.00026272700051777065, "loss_iou": 0.2216796875, "loss_num": 0.040771484375, "loss_xval": 0.203125, "num_input_tokens_seen": 162203620, "step": 1777 }, { "epoch": 7.408333333333333, "grad_norm": 3.9853992529387043, "learning_rate": 5e-05, "loss": 0.0981, "num_input_tokens_seen": 162294724, "step": 1778 }, { "epoch": 7.408333333333333, "loss": 0.046426184475421906, "loss_ce": 0.0010465431259945035, "loss_iou": 0.287109375, "loss_num": 0.00909423828125, "loss_xval": 0.04541015625, "num_input_tokens_seen": 162294724, "step": 1778 }, { "epoch": 7.4125, "grad_norm": 4.154016201076832, "learning_rate": 5e-05, "loss": 0.0682, "num_input_tokens_seen": 162387112, "step": 1779 }, { "epoch": 7.4125, "loss": 0.05779798701405525, "loss_ce": 0.0002952393260784447, "loss_iou": 0.25, "loss_num": 0.01153564453125, "loss_xval": 0.0576171875, "num_input_tokens_seen": 162387112, "step": 1779 }, { "epoch": 7.416666666666667, "grad_norm": 2.7081336478210845, "learning_rate": 5e-05, "loss": 0.0791, "num_input_tokens_seen": 162478244, "step": 1780 }, { "epoch": 7.416666666666667, "loss": 0.12627843022346497, "loss_ce": 0.0008054060745052993, "loss_iou": 0.263671875, "loss_num": 0.025146484375, "loss_xval": 0.125, "num_input_tokens_seen": 162478244, "step": 1780 }, { "epoch": 7.420833333333333, "grad_norm": 3.848145766539644, "learning_rate": 5e-05, "loss": 0.1032, "num_input_tokens_seen": 162570056, "step": 1781 }, { "epoch": 7.420833333333333, "loss": 0.12114088982343674, "loss_ce": 0.0005659367889165878, "loss_iou": 0.2734375, "loss_num": 0.024169921875, "loss_xval": 0.12060546875, "num_input_tokens_seen": 162570056, "step": 1781 }, { "epoch": 7.425, "grad_norm": 4.482393117561415, "learning_rate": 5e-05, "loss": 0.0661, "num_input_tokens_seen": 162661248, "step": 1782 }, { "epoch": 7.425, "loss": 0.07931487262248993, "loss_ce": 3.0203231290215626e-05, "loss_iou": 0.31640625, "loss_num": 0.015869140625, "loss_xval": 0.0791015625, "num_input_tokens_seen": 162661248, "step": 1782 }, { "epoch": 7.429166666666666, "grad_norm": 3.044807825876327, "learning_rate": 5e-05, "loss": 0.0735, "num_input_tokens_seen": 162752764, "step": 1783 }, { "epoch": 7.429166666666666, "loss": 0.07627473771572113, "loss_ce": 0.0013845983194187284, "loss_iou": 0.283203125, "loss_num": 0.01495361328125, "loss_xval": 0.07470703125, "num_input_tokens_seen": 162752764, "step": 1783 }, { "epoch": 7.433333333333334, "grad_norm": 4.313521043969587, "learning_rate": 5e-05, "loss": 0.0968, "num_input_tokens_seen": 162843720, "step": 1784 }, { "epoch": 7.433333333333334, "loss": 0.07608497887849808, "loss_ce": 4.6575505621149205e-06, "loss_iou": 0.3515625, "loss_num": 0.01519775390625, "loss_xval": 0.076171875, "num_input_tokens_seen": 162843720, "step": 1784 }, { "epoch": 7.4375, "grad_norm": 3.3882547187141774, "learning_rate": 5e-05, "loss": 0.0833, "num_input_tokens_seen": 162934908, "step": 1785 }, { "epoch": 7.4375, "loss": 0.050569839775562286, "loss_ce": 0.003069228958338499, "loss_iou": 0.306640625, "loss_num": 0.009521484375, "loss_xval": 0.047607421875, "num_input_tokens_seen": 162934908, "step": 1785 }, { "epoch": 7.441666666666666, "grad_norm": 9.266034898076681, "learning_rate": 5e-05, "loss": 0.1423, "num_input_tokens_seen": 163025636, "step": 1786 }, { "epoch": 7.441666666666666, "loss": 0.13430999219417572, "loss_ce": 0.0001089509969460778, "loss_iou": 0.251953125, "loss_num": 0.02685546875, "loss_xval": 0.1337890625, "num_input_tokens_seen": 163025636, "step": 1786 }, { "epoch": 7.445833333333334, "grad_norm": 5.601334033354807, "learning_rate": 5e-05, "loss": 0.083, "num_input_tokens_seen": 163116296, "step": 1787 }, { "epoch": 7.445833333333334, "loss": 0.08957132697105408, "loss_ce": 0.00032267015194520354, "loss_iou": 0.21875, "loss_num": 0.017822265625, "loss_xval": 0.08935546875, "num_input_tokens_seen": 163116296, "step": 1787 }, { "epoch": 7.45, "grad_norm": 4.064665573397149, "learning_rate": 5e-05, "loss": 0.0832, "num_input_tokens_seen": 163207372, "step": 1788 }, { "epoch": 7.45, "loss": 0.07631734013557434, "loss_ce": 8.442741818726063e-05, "loss_iou": 0.3359375, "loss_num": 0.0152587890625, "loss_xval": 0.076171875, "num_input_tokens_seen": 163207372, "step": 1788 }, { "epoch": 7.454166666666667, "grad_norm": 4.578626723828098, "learning_rate": 5e-05, "loss": 0.0702, "num_input_tokens_seen": 163299240, "step": 1789 }, { "epoch": 7.454166666666667, "loss": 0.07299777865409851, "loss_ce": 0.0004727557534351945, "loss_iou": 0.330078125, "loss_num": 0.0145263671875, "loss_xval": 0.07275390625, "num_input_tokens_seen": 163299240, "step": 1789 }, { "epoch": 7.458333333333333, "grad_norm": 3.3572150574552944, "learning_rate": 5e-05, "loss": 0.0722, "num_input_tokens_seen": 163390396, "step": 1790 }, { "epoch": 7.458333333333333, "loss": 0.05886243283748627, "loss_ce": 0.00036023391294293106, "loss_iou": 0.314453125, "loss_num": 0.01171875, "loss_xval": 0.05859375, "num_input_tokens_seen": 163390396, "step": 1790 }, { "epoch": 7.4625, "grad_norm": 4.475665431055516, "learning_rate": 5e-05, "loss": 0.1383, "num_input_tokens_seen": 163481352, "step": 1791 }, { "epoch": 7.4625, "loss": 0.11866101622581482, "loss_ce": 0.00034437025897204876, "loss_iou": 0.3828125, "loss_num": 0.023681640625, "loss_xval": 0.1181640625, "num_input_tokens_seen": 163481352, "step": 1791 }, { "epoch": 7.466666666666667, "grad_norm": 6.067499830201406, "learning_rate": 5e-05, "loss": 0.0587, "num_input_tokens_seen": 163573364, "step": 1792 }, { "epoch": 7.466666666666667, "loss": 0.03994344547390938, "loss_ce": 1.1194573744433e-05, "loss_iou": 0.3125, "loss_num": 0.00799560546875, "loss_xval": 0.0400390625, "num_input_tokens_seen": 163573364, "step": 1792 }, { "epoch": 7.470833333333333, "grad_norm": 3.6669409658968752, "learning_rate": 5e-05, "loss": 0.1106, "num_input_tokens_seen": 163664580, "step": 1793 }, { "epoch": 7.470833333333333, "loss": 0.06494011729955673, "loss_ce": 0.00010552479943726212, "loss_iou": 0.1640625, "loss_num": 0.012939453125, "loss_xval": 0.06494140625, "num_input_tokens_seen": 163664580, "step": 1793 }, { "epoch": 7.475, "grad_norm": 1.9554889225100551, "learning_rate": 5e-05, "loss": 0.1038, "num_input_tokens_seen": 163755956, "step": 1794 }, { "epoch": 7.475, "loss": 0.03204867243766785, "loss_ce": 0.000508758588694036, "loss_iou": 0.1982421875, "loss_num": 0.006317138671875, "loss_xval": 0.031494140625, "num_input_tokens_seen": 163755956, "step": 1794 }, { "epoch": 7.479166666666667, "grad_norm": 3.8375101247338526, "learning_rate": 5e-05, "loss": 0.1012, "num_input_tokens_seen": 163847680, "step": 1795 }, { "epoch": 7.479166666666667, "loss": 0.08776212483644485, "loss_ce": 0.0027172621339559555, "loss_iou": 0.1787109375, "loss_num": 0.01708984375, "loss_xval": 0.0849609375, "num_input_tokens_seen": 163847680, "step": 1795 }, { "epoch": 7.483333333333333, "grad_norm": 7.015851833823572, "learning_rate": 5e-05, "loss": 0.1032, "num_input_tokens_seen": 163939356, "step": 1796 }, { "epoch": 7.483333333333333, "loss": 0.12932045757770538, "loss_ce": 0.00032265795744024217, "loss_iou": 0.240234375, "loss_num": 0.0257568359375, "loss_xval": 0.12890625, "num_input_tokens_seen": 163939356, "step": 1796 }, { "epoch": 7.4875, "grad_norm": 6.226142800925313, "learning_rate": 5e-05, "loss": 0.109, "num_input_tokens_seen": 164030500, "step": 1797 }, { "epoch": 7.4875, "loss": 0.11356852203607559, "loss_ce": 0.00040170963620767, "loss_iou": 0.236328125, "loss_num": 0.0225830078125, "loss_xval": 0.11328125, "num_input_tokens_seen": 164030500, "step": 1797 }, { "epoch": 7.491666666666666, "grad_norm": 3.3726405865994757, "learning_rate": 5e-05, "loss": 0.086, "num_input_tokens_seen": 164121848, "step": 1798 }, { "epoch": 7.491666666666666, "loss": 0.11696110665798187, "loss_ce": 0.00012456333206500858, "loss_iou": 0.328125, "loss_num": 0.0233154296875, "loss_xval": 0.11669921875, "num_input_tokens_seen": 164121848, "step": 1798 }, { "epoch": 7.495833333333334, "grad_norm": 6.853153266624445, "learning_rate": 5e-05, "loss": 0.1338, "num_input_tokens_seen": 164212348, "step": 1799 }, { "epoch": 7.495833333333334, "loss": 0.1441565304994583, "loss_ce": 0.003134795930236578, "loss_iou": 0.28515625, "loss_num": 0.0281982421875, "loss_xval": 0.140625, "num_input_tokens_seen": 164212348, "step": 1799 }, { "epoch": 7.5, "grad_norm": 3.5010048761569976, "learning_rate": 5e-05, "loss": 0.0802, "num_input_tokens_seen": 164303544, "step": 1800 }, { "epoch": 7.5, "loss": 0.07428819686174393, "loss_ce": 6.944570486666635e-05, "loss_iou": 0.439453125, "loss_num": 0.01483154296875, "loss_xval": 0.07421875, "num_input_tokens_seen": 164303544, "step": 1800 }, { "epoch": 7.504166666666666, "grad_norm": 2.7467803518871814, "learning_rate": 5e-05, "loss": 0.0795, "num_input_tokens_seen": 164395328, "step": 1801 }, { "epoch": 7.504166666666666, "loss": 0.08531016856431961, "loss_ce": 0.0025922697968780994, "loss_iou": 0.373046875, "loss_num": 0.0166015625, "loss_xval": 0.08251953125, "num_input_tokens_seen": 164395328, "step": 1801 }, { "epoch": 7.508333333333333, "grad_norm": 2.641313105386422, "learning_rate": 5e-05, "loss": 0.1034, "num_input_tokens_seen": 164486700, "step": 1802 }, { "epoch": 7.508333333333333, "loss": 0.14157049357891083, "loss_ce": 2.996017792611383e-05, "loss_iou": 0.26171875, "loss_num": 0.0283203125, "loss_xval": 0.1416015625, "num_input_tokens_seen": 164486700, "step": 1802 }, { "epoch": 7.5125, "grad_norm": 2.5541777417865483, "learning_rate": 5e-05, "loss": 0.1008, "num_input_tokens_seen": 164578112, "step": 1803 }, { "epoch": 7.5125, "loss": 0.06394675374031067, "loss_ce": 0.0002565682225394994, "loss_iou": 0.16015625, "loss_num": 0.01275634765625, "loss_xval": 0.0634765625, "num_input_tokens_seen": 164578112, "step": 1803 }, { "epoch": 7.516666666666667, "grad_norm": 5.188109239324115, "learning_rate": 5e-05, "loss": 0.1443, "num_input_tokens_seen": 164669168, "step": 1804 }, { "epoch": 7.516666666666667, "loss": 0.1864711046218872, "loss_ce": 6.974764983169734e-05, "loss_iou": 0.2421875, "loss_num": 0.037353515625, "loss_xval": 0.1865234375, "num_input_tokens_seen": 164669168, "step": 1804 }, { "epoch": 7.520833333333333, "grad_norm": 5.032568141629647, "learning_rate": 5e-05, "loss": 0.1401, "num_input_tokens_seen": 164760132, "step": 1805 }, { "epoch": 7.520833333333333, "loss": 0.13615745306015015, "loss_ce": 0.0006136275478638709, "loss_iou": 0.2451171875, "loss_num": 0.027099609375, "loss_xval": 0.1357421875, "num_input_tokens_seen": 164760132, "step": 1805 }, { "epoch": 7.525, "grad_norm": 4.41854309371531, "learning_rate": 5e-05, "loss": 0.1193, "num_input_tokens_seen": 164851280, "step": 1806 }, { "epoch": 7.525, "loss": 0.12078698724508286, "loss_ce": 0.0022567142732441425, "loss_iou": 0.34765625, "loss_num": 0.0238037109375, "loss_xval": 0.11865234375, "num_input_tokens_seen": 164851280, "step": 1806 }, { "epoch": 7.529166666666667, "grad_norm": 33.93754758442748, "learning_rate": 5e-05, "loss": 0.1264, "num_input_tokens_seen": 164942324, "step": 1807 }, { "epoch": 7.529166666666667, "loss": 0.06015244871377945, "loss_ce": 2.2963965875533177e-06, "loss_iou": 0.248046875, "loss_num": 0.01202392578125, "loss_xval": 0.06005859375, "num_input_tokens_seen": 164942324, "step": 1807 }, { "epoch": 7.533333333333333, "grad_norm": 3.4167314336458787, "learning_rate": 5e-05, "loss": 0.1355, "num_input_tokens_seen": 165033428, "step": 1808 }, { "epoch": 7.533333333333333, "loss": 0.17138734459877014, "loss_ce": 4.6413282689172775e-05, "loss_iou": 0.234375, "loss_num": 0.0341796875, "loss_xval": 0.1708984375, "num_input_tokens_seen": 165033428, "step": 1808 }, { "epoch": 7.5375, "grad_norm": 2.9019079700505035, "learning_rate": 5e-05, "loss": 0.0953, "num_input_tokens_seen": 165124268, "step": 1809 }, { "epoch": 7.5375, "loss": 0.09286807477474213, "loss_ce": 0.000521889771334827, "loss_iou": 0.28125, "loss_num": 0.0184326171875, "loss_xval": 0.09228515625, "num_input_tokens_seen": 165124268, "step": 1809 }, { "epoch": 7.541666666666667, "grad_norm": 2.17195560578624, "learning_rate": 5e-05, "loss": 0.1329, "num_input_tokens_seen": 165215284, "step": 1810 }, { "epoch": 7.541666666666667, "loss": 0.1432662159204483, "loss_ce": 0.0005965381278656423, "loss_iou": 0.3515625, "loss_num": 0.028564453125, "loss_xval": 0.142578125, "num_input_tokens_seen": 165215284, "step": 1810 }, { "epoch": 7.545833333333333, "grad_norm": 1.8534083120133813, "learning_rate": 5e-05, "loss": 0.1059, "num_input_tokens_seen": 165306224, "step": 1811 }, { "epoch": 7.545833333333333, "loss": 0.14295382797718048, "loss_ce": 0.002542451722547412, "loss_iou": 0.28515625, "loss_num": 0.028076171875, "loss_xval": 0.140625, "num_input_tokens_seen": 165306224, "step": 1811 }, { "epoch": 7.55, "grad_norm": 2.4445060582381823, "learning_rate": 5e-05, "loss": 0.1071, "num_input_tokens_seen": 165397308, "step": 1812 }, { "epoch": 7.55, "loss": 0.11119158565998077, "loss_ce": 0.0020302007906138897, "loss_iou": 0.259765625, "loss_num": 0.0218505859375, "loss_xval": 0.109375, "num_input_tokens_seen": 165397308, "step": 1812 }, { "epoch": 7.554166666666667, "grad_norm": 2.9314327836733636, "learning_rate": 5e-05, "loss": 0.1002, "num_input_tokens_seen": 165488264, "step": 1813 }, { "epoch": 7.554166666666667, "loss": 0.05487770587205887, "loss_ce": 3.762233245652169e-05, "loss_iou": 0.341796875, "loss_num": 0.010986328125, "loss_xval": 0.054931640625, "num_input_tokens_seen": 165488264, "step": 1813 }, { "epoch": 7.558333333333334, "grad_norm": 2.961509975422286, "learning_rate": 5e-05, "loss": 0.0755, "num_input_tokens_seen": 165579952, "step": 1814 }, { "epoch": 7.558333333333334, "loss": 0.06028253957629204, "loss_ce": 0.0006588200340047479, "loss_iou": 0.2294921875, "loss_num": 0.01190185546875, "loss_xval": 0.0595703125, "num_input_tokens_seen": 165579952, "step": 1814 }, { "epoch": 7.5625, "grad_norm": 6.481985228004668, "learning_rate": 5e-05, "loss": 0.0843, "num_input_tokens_seen": 165671460, "step": 1815 }, { "epoch": 7.5625, "loss": 0.07226623594760895, "loss_ce": 0.0004965229891240597, "loss_iou": 0.228515625, "loss_num": 0.01434326171875, "loss_xval": 0.07177734375, "num_input_tokens_seen": 165671460, "step": 1815 }, { "epoch": 7.566666666666666, "grad_norm": 3.116352673166522, "learning_rate": 5e-05, "loss": 0.0947, "num_input_tokens_seen": 165762564, "step": 1816 }, { "epoch": 7.566666666666666, "loss": 0.11289401352405548, "loss_ce": 9.493182005826384e-06, "loss_iou": 0.3125, "loss_num": 0.0225830078125, "loss_xval": 0.11279296875, "num_input_tokens_seen": 165762564, "step": 1816 }, { "epoch": 7.570833333333333, "grad_norm": 1.9908482594454786, "learning_rate": 5e-05, "loss": 0.0727, "num_input_tokens_seen": 165853848, "step": 1817 }, { "epoch": 7.570833333333333, "loss": 0.08714696764945984, "loss_ce": 1.9282315406599082e-05, "loss_iou": 0.166015625, "loss_num": 0.0174560546875, "loss_xval": 0.0869140625, "num_input_tokens_seen": 165853848, "step": 1817 }, { "epoch": 7.575, "grad_norm": 7.419420004332939, "learning_rate": 5e-05, "loss": 0.1001, "num_input_tokens_seen": 165944908, "step": 1818 }, { "epoch": 7.575, "loss": 0.0688440203666687, "loss_ce": 0.0010339573491364717, "loss_iou": 0.33203125, "loss_num": 0.0135498046875, "loss_xval": 0.06787109375, "num_input_tokens_seen": 165944908, "step": 1818 }, { "epoch": 7.579166666666667, "grad_norm": 2.3215453774434605, "learning_rate": 5e-05, "loss": 0.0911, "num_input_tokens_seen": 166035940, "step": 1819 }, { "epoch": 7.579166666666667, "loss": 0.10113656520843506, "loss_ce": 0.00033700454514473677, "loss_iou": 0.296875, "loss_num": 0.0201416015625, "loss_xval": 0.1005859375, "num_input_tokens_seen": 166035940, "step": 1819 }, { "epoch": 7.583333333333333, "grad_norm": 8.378167852766184, "learning_rate": 5e-05, "loss": 0.0907, "num_input_tokens_seen": 166127672, "step": 1820 }, { "epoch": 7.583333333333333, "loss": 0.11757265776395798, "loss_ce": 0.002384064719080925, "loss_iou": 0.306640625, "loss_num": 0.0230712890625, "loss_xval": 0.115234375, "num_input_tokens_seen": 166127672, "step": 1820 }, { "epoch": 7.5875, "grad_norm": 3.225532831738545, "learning_rate": 5e-05, "loss": 0.1326, "num_input_tokens_seen": 166218888, "step": 1821 }, { "epoch": 7.5875, "loss": 0.13063114881515503, "loss_ce": 1.59115697897505e-05, "loss_iou": 0.2578125, "loss_num": 0.026123046875, "loss_xval": 0.130859375, "num_input_tokens_seen": 166218888, "step": 1821 }, { "epoch": 7.591666666666667, "grad_norm": 2.9795922010986216, "learning_rate": 5e-05, "loss": 0.0916, "num_input_tokens_seen": 166310356, "step": 1822 }, { "epoch": 7.591666666666667, "loss": 0.11850694566965103, "loss_ce": 3.77087781089358e-05, "loss_iou": 0.400390625, "loss_num": 0.023681640625, "loss_xval": 0.11865234375, "num_input_tokens_seen": 166310356, "step": 1822 }, { "epoch": 7.595833333333333, "grad_norm": 3.9278588907171903, "learning_rate": 5e-05, "loss": 0.0925, "num_input_tokens_seen": 166401668, "step": 1823 }, { "epoch": 7.595833333333333, "loss": 0.10767964273691177, "loss_ce": 0.0028212470933794975, "loss_iou": 0.365234375, "loss_num": 0.02099609375, "loss_xval": 0.10498046875, "num_input_tokens_seen": 166401668, "step": 1823 }, { "epoch": 7.6, "grad_norm": 3.5113905798888885, "learning_rate": 5e-05, "loss": 0.1064, "num_input_tokens_seen": 166493016, "step": 1824 }, { "epoch": 7.6, "loss": 0.07028500735759735, "loss_ce": 6.406075408449396e-05, "loss_iou": 0.193359375, "loss_num": 0.0140380859375, "loss_xval": 0.0703125, "num_input_tokens_seen": 166493016, "step": 1824 }, { "epoch": 7.604166666666667, "grad_norm": 4.0741472080729, "learning_rate": 5e-05, "loss": 0.0861, "num_input_tokens_seen": 166584856, "step": 1825 }, { "epoch": 7.604166666666667, "loss": 0.0921793282032013, "loss_ce": 4.675585660152137e-05, "loss_iou": 0.17578125, "loss_num": 0.0184326171875, "loss_xval": 0.09228515625, "num_input_tokens_seen": 166584856, "step": 1825 }, { "epoch": 7.608333333333333, "grad_norm": 4.428130988861915, "learning_rate": 5e-05, "loss": 0.116, "num_input_tokens_seen": 166675996, "step": 1826 }, { "epoch": 7.608333333333333, "loss": 0.12392785400152206, "loss_ce": 0.00017907016444951296, "loss_iou": 0.181640625, "loss_num": 0.0247802734375, "loss_xval": 0.12353515625, "num_input_tokens_seen": 166675996, "step": 1826 }, { "epoch": 7.6125, "grad_norm": 7.578798622859177, "learning_rate": 5e-05, "loss": 0.0992, "num_input_tokens_seen": 166767272, "step": 1827 }, { "epoch": 7.6125, "loss": 0.07048434019088745, "loss_ce": 0.001316254260018468, "loss_iou": 0.400390625, "loss_num": 0.01385498046875, "loss_xval": 0.0693359375, "num_input_tokens_seen": 166767272, "step": 1827 }, { "epoch": 7.616666666666667, "grad_norm": 2.482887727779412, "learning_rate": 5e-05, "loss": 0.065, "num_input_tokens_seen": 166858764, "step": 1828 }, { "epoch": 7.616666666666667, "loss": 0.06286533176898956, "loss_ce": 0.00019748101476579905, "loss_iou": 0.31640625, "loss_num": 0.0125732421875, "loss_xval": 0.0625, "num_input_tokens_seen": 166858764, "step": 1828 }, { "epoch": 7.620833333333334, "grad_norm": 3.2505263368321122, "learning_rate": 5e-05, "loss": 0.0719, "num_input_tokens_seen": 166950188, "step": 1829 }, { "epoch": 7.620833333333334, "loss": 0.07289623469114304, "loss_ce": 0.0010273351799696684, "loss_iou": 0.28125, "loss_num": 0.014404296875, "loss_xval": 0.07177734375, "num_input_tokens_seen": 166950188, "step": 1829 }, { "epoch": 7.625, "grad_norm": 6.27788075124899, "learning_rate": 5e-05, "loss": 0.1238, "num_input_tokens_seen": 167039588, "step": 1830 }, { "epoch": 7.625, "loss": 0.10974088311195374, "loss_ce": 0.0013424496864899993, "loss_iou": 0.2890625, "loss_num": 0.021728515625, "loss_xval": 0.1083984375, "num_input_tokens_seen": 167039588, "step": 1830 }, { "epoch": 7.629166666666666, "grad_norm": 2.2287300058880852, "learning_rate": 5e-05, "loss": 0.0802, "num_input_tokens_seen": 167130012, "step": 1831 }, { "epoch": 7.629166666666666, "loss": 0.07976742833852768, "loss_ce": 0.0007879381300881505, "loss_iou": 0.375, "loss_num": 0.015869140625, "loss_xval": 0.0791015625, "num_input_tokens_seen": 167130012, "step": 1831 }, { "epoch": 7.633333333333333, "grad_norm": 6.5495447919435685, "learning_rate": 5e-05, "loss": 0.1034, "num_input_tokens_seen": 167220916, "step": 1832 }, { "epoch": 7.633333333333333, "loss": 0.08783036470413208, "loss_ce": 0.000504314957652241, "loss_iou": 0.2890625, "loss_num": 0.0174560546875, "loss_xval": 0.08740234375, "num_input_tokens_seen": 167220916, "step": 1832 }, { "epoch": 7.6375, "grad_norm": 2.199207485947871, "learning_rate": 5e-05, "loss": 0.1095, "num_input_tokens_seen": 167312560, "step": 1833 }, { "epoch": 7.6375, "loss": 0.1437080353498459, "loss_ce": 0.0005805969703942537, "loss_iou": 0.19140625, "loss_num": 0.0286865234375, "loss_xval": 0.1435546875, "num_input_tokens_seen": 167312560, "step": 1833 }, { "epoch": 7.641666666666667, "grad_norm": 3.217155580648952, "learning_rate": 5e-05, "loss": 0.0586, "num_input_tokens_seen": 167404172, "step": 1834 }, { "epoch": 7.641666666666667, "loss": 0.049773965030908585, "loss_ce": 0.0006101447506807745, "loss_iou": 0.349609375, "loss_num": 0.00982666015625, "loss_xval": 0.049072265625, "num_input_tokens_seen": 167404172, "step": 1834 }, { "epoch": 7.645833333333333, "grad_norm": 4.943883406727365, "learning_rate": 5e-05, "loss": 0.0878, "num_input_tokens_seen": 167495484, "step": 1835 }, { "epoch": 7.645833333333333, "loss": 0.08797941356897354, "loss_ce": 0.0005847018328495324, "loss_iou": 0.37890625, "loss_num": 0.0174560546875, "loss_xval": 0.08740234375, "num_input_tokens_seen": 167495484, "step": 1835 }, { "epoch": 7.65, "grad_norm": 3.7136872933224874, "learning_rate": 5e-05, "loss": 0.1135, "num_input_tokens_seen": 167586412, "step": 1836 }, { "epoch": 7.65, "loss": 0.12610657513141632, "loss_ce": 0.0008624346228316426, "loss_iou": 0.359375, "loss_num": 0.0250244140625, "loss_xval": 0.125, "num_input_tokens_seen": 167586412, "step": 1836 }, { "epoch": 7.654166666666667, "grad_norm": 8.318407165207367, "learning_rate": 5e-05, "loss": 0.09, "num_input_tokens_seen": 167677852, "step": 1837 }, { "epoch": 7.654166666666667, "loss": 0.10709414631128311, "loss_ce": 0.0015491031808778644, "loss_iou": 0.1845703125, "loss_num": 0.0211181640625, "loss_xval": 0.10546875, "num_input_tokens_seen": 167677852, "step": 1837 }, { "epoch": 7.658333333333333, "grad_norm": 4.611140970076494, "learning_rate": 5e-05, "loss": 0.0605, "num_input_tokens_seen": 167769152, "step": 1838 }, { "epoch": 7.658333333333333, "loss": 0.06653960049152374, "loss_ce": 0.00030119341681711376, "loss_iou": 0.267578125, "loss_num": 0.01324462890625, "loss_xval": 0.06640625, "num_input_tokens_seen": 167769152, "step": 1838 }, { "epoch": 7.6625, "grad_norm": 2.719468001458222, "learning_rate": 5e-05, "loss": 0.0788, "num_input_tokens_seen": 167860692, "step": 1839 }, { "epoch": 7.6625, "loss": 0.10713286697864532, "loss_ce": 3.1428429792867973e-05, "loss_iou": 0.333984375, "loss_num": 0.021484375, "loss_xval": 0.10693359375, "num_input_tokens_seen": 167860692, "step": 1839 }, { "epoch": 7.666666666666667, "grad_norm": 3.146483263204079, "learning_rate": 5e-05, "loss": 0.0734, "num_input_tokens_seen": 167952176, "step": 1840 }, { "epoch": 7.666666666666667, "loss": 0.1102394163608551, "loss_ce": 0.00010147166176466271, "loss_iou": 0.267578125, "loss_num": 0.02197265625, "loss_xval": 0.1103515625, "num_input_tokens_seen": 167952176, "step": 1840 }, { "epoch": 7.670833333333333, "grad_norm": 7.767544722147642, "learning_rate": 5e-05, "loss": 0.1318, "num_input_tokens_seen": 168044080, "step": 1841 }, { "epoch": 7.670833333333333, "loss": 0.155223548412323, "loss_ce": 0.002299969084560871, "loss_iou": 0.310546875, "loss_num": 0.030517578125, "loss_xval": 0.1533203125, "num_input_tokens_seen": 168044080, "step": 1841 }, { "epoch": 7.675, "grad_norm": 2.668628386340425, "learning_rate": 5e-05, "loss": 0.1067, "num_input_tokens_seen": 168135656, "step": 1842 }, { "epoch": 7.675, "loss": 0.11792254447937012, "loss_ce": 4.8401838284917176e-05, "loss_iou": 0.26171875, "loss_num": 0.0235595703125, "loss_xval": 0.11767578125, "num_input_tokens_seen": 168135656, "step": 1842 }, { "epoch": 7.679166666666667, "grad_norm": 5.252145209510077, "learning_rate": 5e-05, "loss": 0.1599, "num_input_tokens_seen": 168227168, "step": 1843 }, { "epoch": 7.679166666666667, "loss": 0.16746670007705688, "loss_ce": 1.6735710232751444e-05, "loss_iou": 0.353515625, "loss_num": 0.033447265625, "loss_xval": 0.1669921875, "num_input_tokens_seen": 168227168, "step": 1843 }, { "epoch": 7.683333333333334, "grad_norm": 5.91726659370321, "learning_rate": 5e-05, "loss": 0.1143, "num_input_tokens_seen": 168318868, "step": 1844 }, { "epoch": 7.683333333333334, "loss": 0.11179641634225845, "loss_ce": 0.0002699237084016204, "loss_iou": 0.294921875, "loss_num": 0.0223388671875, "loss_xval": 0.111328125, "num_input_tokens_seen": 168318868, "step": 1844 }, { "epoch": 7.6875, "grad_norm": 4.096332452166496, "learning_rate": 5e-05, "loss": 0.1178, "num_input_tokens_seen": 168409736, "step": 1845 }, { "epoch": 7.6875, "loss": 0.10695420950651169, "loss_ce": 0.0005241526523604989, "loss_iou": 0.2109375, "loss_num": 0.0213623046875, "loss_xval": 0.1064453125, "num_input_tokens_seen": 168409736, "step": 1845 }, { "epoch": 7.691666666666666, "grad_norm": 3.4184673683269455, "learning_rate": 5e-05, "loss": 0.1033, "num_input_tokens_seen": 168501456, "step": 1846 }, { "epoch": 7.691666666666666, "loss": 0.10996747016906738, "loss_ce": 0.0020267972722649574, "loss_iou": 0.248046875, "loss_num": 0.0216064453125, "loss_xval": 0.10791015625, "num_input_tokens_seen": 168501456, "step": 1846 }, { "epoch": 7.695833333333333, "grad_norm": 3.4885460354486733, "learning_rate": 5e-05, "loss": 0.0956, "num_input_tokens_seen": 168591996, "step": 1847 }, { "epoch": 7.695833333333333, "loss": 0.12594377994537354, "loss_ce": 4.350730523583479e-05, "loss_iou": 0.2490234375, "loss_num": 0.025146484375, "loss_xval": 0.1259765625, "num_input_tokens_seen": 168591996, "step": 1847 }, { "epoch": 7.7, "grad_norm": 9.28699081074281, "learning_rate": 5e-05, "loss": 0.0809, "num_input_tokens_seen": 168683664, "step": 1848 }, { "epoch": 7.7, "loss": 0.1240956038236618, "loss_ce": 0.0022083972580730915, "loss_iou": 0.376953125, "loss_num": 0.0244140625, "loss_xval": 0.1220703125, "num_input_tokens_seen": 168683664, "step": 1848 }, { "epoch": 7.704166666666667, "grad_norm": 65.82829142422523, "learning_rate": 5e-05, "loss": 0.1116, "num_input_tokens_seen": 168775256, "step": 1849 }, { "epoch": 7.704166666666667, "loss": 0.1421854943037033, "loss_ce": 0.00011090566113125533, "loss_iou": 0.2431640625, "loss_num": 0.0284423828125, "loss_xval": 0.1416015625, "num_input_tokens_seen": 168775256, "step": 1849 }, { "epoch": 7.708333333333333, "grad_norm": 8.425677609000575, "learning_rate": 5e-05, "loss": 0.0714, "num_input_tokens_seen": 168866368, "step": 1850 }, { "epoch": 7.708333333333333, "loss": 0.06213594973087311, "loss_ce": 6.31937655271031e-05, "loss_iou": 0.36328125, "loss_num": 0.012451171875, "loss_xval": 0.06201171875, "num_input_tokens_seen": 168866368, "step": 1850 }, { "epoch": 7.7125, "grad_norm": 4.058259871683852, "learning_rate": 5e-05, "loss": 0.1456, "num_input_tokens_seen": 168957900, "step": 1851 }, { "epoch": 7.7125, "loss": 0.15907834470272064, "loss_ce": 0.0002343444648431614, "loss_iou": 0.34765625, "loss_num": 0.03173828125, "loss_xval": 0.1591796875, "num_input_tokens_seen": 168957900, "step": 1851 }, { "epoch": 7.716666666666667, "grad_norm": 5.1463068875925915, "learning_rate": 5e-05, "loss": 0.1189, "num_input_tokens_seen": 169048904, "step": 1852 }, { "epoch": 7.716666666666667, "loss": 0.14455726742744446, "loss_ce": 0.0004303784226067364, "loss_iou": 0.19921875, "loss_num": 0.02880859375, "loss_xval": 0.14453125, "num_input_tokens_seen": 169048904, "step": 1852 }, { "epoch": 7.720833333333333, "grad_norm": 3.2107538886682767, "learning_rate": 5e-05, "loss": 0.1203, "num_input_tokens_seen": 169139440, "step": 1853 }, { "epoch": 7.720833333333333, "loss": 0.13053694367408752, "loss_ce": 0.0003337044327054173, "loss_iou": 0.31640625, "loss_num": 0.0260009765625, "loss_xval": 0.1298828125, "num_input_tokens_seen": 169139440, "step": 1853 }, { "epoch": 7.725, "grad_norm": 3.870283976834058, "learning_rate": 5e-05, "loss": 0.0604, "num_input_tokens_seen": 169230360, "step": 1854 }, { "epoch": 7.725, "loss": 0.06204288825392723, "loss_ce": 0.0007635898073203862, "loss_iou": 0.2333984375, "loss_num": 0.01226806640625, "loss_xval": 0.061279296875, "num_input_tokens_seen": 169230360, "step": 1854 }, { "epoch": 7.729166666666667, "grad_norm": 3.443786270282577, "learning_rate": 5e-05, "loss": 0.1153, "num_input_tokens_seen": 169321300, "step": 1855 }, { "epoch": 7.729166666666667, "loss": 0.15847985446453094, "loss_ce": 0.0018026053439825773, "loss_iou": 0.302734375, "loss_num": 0.03125, "loss_xval": 0.15625, "num_input_tokens_seen": 169321300, "step": 1855 }, { "epoch": 7.733333333333333, "grad_norm": 2.49730143699476, "learning_rate": 5e-05, "loss": 0.1253, "num_input_tokens_seen": 169411800, "step": 1856 }, { "epoch": 7.733333333333333, "loss": 0.06755806505680084, "loss_ce": 0.00019051358685828745, "loss_iou": 0.24609375, "loss_num": 0.01348876953125, "loss_xval": 0.0673828125, "num_input_tokens_seen": 169411800, "step": 1856 }, { "epoch": 7.7375, "grad_norm": 3.293136068195067, "learning_rate": 5e-05, "loss": 0.1055, "num_input_tokens_seen": 169503240, "step": 1857 }, { "epoch": 7.7375, "loss": 0.059763744473457336, "loss_ce": 0.005366160534322262, "loss_iou": 0.2412109375, "loss_num": 0.0108642578125, "loss_xval": 0.054443359375, "num_input_tokens_seen": 169503240, "step": 1857 }, { "epoch": 7.741666666666667, "grad_norm": 3.009868145722623, "learning_rate": 5e-05, "loss": 0.0908, "num_input_tokens_seen": 169594456, "step": 1858 }, { "epoch": 7.741666666666667, "loss": 0.11617320775985718, "loss_ce": 0.0018543555634096265, "loss_iou": 0.21484375, "loss_num": 0.0228271484375, "loss_xval": 0.1142578125, "num_input_tokens_seen": 169594456, "step": 1858 }, { "epoch": 7.745833333333334, "grad_norm": 4.291106420758004, "learning_rate": 5e-05, "loss": 0.0961, "num_input_tokens_seen": 169686484, "step": 1859 }, { "epoch": 7.745833333333334, "loss": 0.07079610228538513, "loss_ce": 0.001109211822040379, "loss_iou": 0.28125, "loss_num": 0.013916015625, "loss_xval": 0.06982421875, "num_input_tokens_seen": 169686484, "step": 1859 }, { "epoch": 7.75, "grad_norm": 3.5117610849754146, "learning_rate": 5e-05, "loss": 0.0632, "num_input_tokens_seen": 169777856, "step": 1860 }, { "epoch": 7.75, "loss": 0.08096813410520554, "loss_ce": 0.00035595300141721964, "loss_iou": 0.390625, "loss_num": 0.01611328125, "loss_xval": 0.08056640625, "num_input_tokens_seen": 169777856, "step": 1860 }, { "epoch": 7.754166666666666, "grad_norm": 3.216652645214244, "learning_rate": 5e-05, "loss": 0.0757, "num_input_tokens_seen": 169869736, "step": 1861 }, { "epoch": 7.754166666666666, "loss": 0.056906167417764664, "loss_ce": 0.0049194754101336, "loss_iou": 0.35546875, "loss_num": 0.0103759765625, "loss_xval": 0.052001953125, "num_input_tokens_seen": 169869736, "step": 1861 }, { "epoch": 7.758333333333333, "grad_norm": 4.3446204121281085, "learning_rate": 5e-05, "loss": 0.1213, "num_input_tokens_seen": 169961388, "step": 1862 }, { "epoch": 7.758333333333333, "loss": 0.08808216452598572, "loss_ce": 0.0010307676857337356, "loss_iou": 0.416015625, "loss_num": 0.017333984375, "loss_xval": 0.0869140625, "num_input_tokens_seen": 169961388, "step": 1862 }, { "epoch": 7.7625, "grad_norm": 3.631632204860175, "learning_rate": 5e-05, "loss": 0.0958, "num_input_tokens_seen": 170052108, "step": 1863 }, { "epoch": 7.7625, "loss": 0.05665234476327896, "loss_ce": 1.1720794645952992e-05, "loss_iou": 0.275390625, "loss_num": 0.0113525390625, "loss_xval": 0.056640625, "num_input_tokens_seen": 170052108, "step": 1863 }, { "epoch": 7.766666666666667, "grad_norm": 4.006442486133152, "learning_rate": 5e-05, "loss": 0.094, "num_input_tokens_seen": 170142784, "step": 1864 }, { "epoch": 7.766666666666667, "loss": 0.13484537601470947, "loss_ce": 4.922464722767472e-05, "loss_iou": 0.27734375, "loss_num": 0.0269775390625, "loss_xval": 0.134765625, "num_input_tokens_seen": 170142784, "step": 1864 }, { "epoch": 7.770833333333333, "grad_norm": 3.8589614399252836, "learning_rate": 5e-05, "loss": 0.1013, "num_input_tokens_seen": 170233396, "step": 1865 }, { "epoch": 7.770833333333333, "loss": 0.12279890477657318, "loss_ce": 0.0018882558215409517, "loss_iou": 0.380859375, "loss_num": 0.024169921875, "loss_xval": 0.12109375, "num_input_tokens_seen": 170233396, "step": 1865 }, { "epoch": 7.775, "grad_norm": 4.936401157655488, "learning_rate": 5e-05, "loss": 0.1006, "num_input_tokens_seen": 170323888, "step": 1866 }, { "epoch": 7.775, "loss": 0.10373049974441528, "loss_ce": 0.0020726316142827272, "loss_iou": 0.244140625, "loss_num": 0.0203857421875, "loss_xval": 0.1015625, "num_input_tokens_seen": 170323888, "step": 1866 }, { "epoch": 7.779166666666667, "grad_norm": 3.4016737513695263, "learning_rate": 5e-05, "loss": 0.0813, "num_input_tokens_seen": 170414608, "step": 1867 }, { "epoch": 7.779166666666667, "loss": 0.08684270083904266, "loss_ce": 4.935580363962799e-06, "loss_iou": 0.36328125, "loss_num": 0.017333984375, "loss_xval": 0.0869140625, "num_input_tokens_seen": 170414608, "step": 1867 }, { "epoch": 7.783333333333333, "grad_norm": 2.8843144741959255, "learning_rate": 5e-05, "loss": 0.0822, "num_input_tokens_seen": 170505572, "step": 1868 }, { "epoch": 7.783333333333333, "loss": 0.12317100912332535, "loss_ce": 2.0571660570567474e-06, "loss_iou": 0.3125, "loss_num": 0.024658203125, "loss_xval": 0.123046875, "num_input_tokens_seen": 170505572, "step": 1868 }, { "epoch": 7.7875, "grad_norm": 2.774487838379211, "learning_rate": 5e-05, "loss": 0.0632, "num_input_tokens_seen": 170596752, "step": 1869 }, { "epoch": 7.7875, "loss": 0.058745529502630234, "loss_ce": 0.0003348880272824317, "loss_iou": 0.298828125, "loss_num": 0.01165771484375, "loss_xval": 0.058349609375, "num_input_tokens_seen": 170596752, "step": 1869 }, { "epoch": 7.791666666666667, "grad_norm": 2.6278615952842075, "learning_rate": 5e-05, "loss": 0.1284, "num_input_tokens_seen": 170687504, "step": 1870 }, { "epoch": 7.791666666666667, "loss": 0.1696607768535614, "loss_ce": 4.4082615204388276e-05, "loss_iou": 0.423828125, "loss_num": 0.033935546875, "loss_xval": 0.169921875, "num_input_tokens_seen": 170687504, "step": 1870 }, { "epoch": 7.795833333333333, "grad_norm": 6.625014552147208, "learning_rate": 5e-05, "loss": 0.0545, "num_input_tokens_seen": 170778504, "step": 1871 }, { "epoch": 7.795833333333333, "loss": 0.05947096273303032, "loss_ce": 0.0007856582524254918, "loss_iou": 0.2578125, "loss_num": 0.01177978515625, "loss_xval": 0.05859375, "num_input_tokens_seen": 170778504, "step": 1871 }, { "epoch": 7.8, "grad_norm": 4.606783633451379, "learning_rate": 5e-05, "loss": 0.0717, "num_input_tokens_seen": 170869956, "step": 1872 }, { "epoch": 7.8, "loss": 0.10387454181909561, "loss_ce": 8.426039858022705e-05, "loss_iou": 0.318359375, "loss_num": 0.020751953125, "loss_xval": 0.10400390625, "num_input_tokens_seen": 170869956, "step": 1872 }, { "epoch": 7.804166666666667, "grad_norm": 5.782852205094, "learning_rate": 5e-05, "loss": 0.1185, "num_input_tokens_seen": 170961116, "step": 1873 }, { "epoch": 7.804166666666667, "loss": 0.09680216014385223, "loss_ce": 0.0006412738002836704, "loss_iou": 0.341796875, "loss_num": 0.019287109375, "loss_xval": 0.09619140625, "num_input_tokens_seen": 170961116, "step": 1873 }, { "epoch": 7.808333333333334, "grad_norm": 1.7964825342418311, "learning_rate": 5e-05, "loss": 0.0923, "num_input_tokens_seen": 171052512, "step": 1874 }, { "epoch": 7.808333333333334, "loss": 0.12361488491296768, "loss_ce": 1.8692413505050354e-05, "loss_iou": 0.25, "loss_num": 0.0247802734375, "loss_xval": 0.12353515625, "num_input_tokens_seen": 171052512, "step": 1874 }, { "epoch": 7.8125, "grad_norm": 2.0243645225505853, "learning_rate": 5e-05, "loss": 0.0513, "num_input_tokens_seen": 171143440, "step": 1875 }, { "epoch": 7.8125, "loss": 0.04031776636838913, "loss_ce": 4.047397851536516e-06, "loss_iou": 0.189453125, "loss_num": 0.008056640625, "loss_xval": 0.040283203125, "num_input_tokens_seen": 171143440, "step": 1875 }, { "epoch": 7.816666666666666, "grad_norm": 46.0512120265654, "learning_rate": 5e-05, "loss": 0.1445, "num_input_tokens_seen": 171234224, "step": 1876 }, { "epoch": 7.816666666666666, "loss": 0.19212490320205688, "loss_ce": 1.6747279005357996e-05, "loss_iou": 0.384765625, "loss_num": 0.03857421875, "loss_xval": 0.1923828125, "num_input_tokens_seen": 171234224, "step": 1876 }, { "epoch": 7.820833333333333, "grad_norm": 3.132204944431851, "learning_rate": 5e-05, "loss": 0.0643, "num_input_tokens_seen": 171325636, "step": 1877 }, { "epoch": 7.820833333333333, "loss": 0.07965581119060516, "loss_ce": 3.5450975701678544e-05, "loss_iou": 0.2578125, "loss_num": 0.015869140625, "loss_xval": 0.07958984375, "num_input_tokens_seen": 171325636, "step": 1877 }, { "epoch": 7.825, "grad_norm": 2.754800643128859, "learning_rate": 5e-05, "loss": 0.065, "num_input_tokens_seen": 171417576, "step": 1878 }, { "epoch": 7.825, "loss": 0.09073353558778763, "loss_ce": 0.0015916909324005246, "loss_iou": 0.279296875, "loss_num": 0.017822265625, "loss_xval": 0.08935546875, "num_input_tokens_seen": 171417576, "step": 1878 }, { "epoch": 7.829166666666667, "grad_norm": 4.052791143927779, "learning_rate": 5e-05, "loss": 0.1028, "num_input_tokens_seen": 171509616, "step": 1879 }, { "epoch": 7.829166666666667, "loss": 0.13964907824993134, "loss_ce": 6.167963874759153e-05, "loss_iou": 0.330078125, "loss_num": 0.0279541015625, "loss_xval": 0.1396484375, "num_input_tokens_seen": 171509616, "step": 1879 }, { "epoch": 7.833333333333333, "grad_norm": 7.993966658893457, "learning_rate": 5e-05, "loss": 0.1307, "num_input_tokens_seen": 171601172, "step": 1880 }, { "epoch": 7.833333333333333, "loss": 0.1629827916622162, "loss_ce": 6.088300870032981e-05, "loss_iou": 0.1708984375, "loss_num": 0.032470703125, "loss_xval": 0.1630859375, "num_input_tokens_seen": 171601172, "step": 1880 }, { "epoch": 7.8375, "grad_norm": 6.632999575668596, "learning_rate": 5e-05, "loss": 0.0669, "num_input_tokens_seen": 171693368, "step": 1881 }, { "epoch": 7.8375, "loss": 0.09090165048837662, "loss_ce": 0.0015919567085802555, "loss_iou": 0.1962890625, "loss_num": 0.017822265625, "loss_xval": 0.08935546875, "num_input_tokens_seen": 171693368, "step": 1881 }, { "epoch": 7.841666666666667, "grad_norm": 3.9364271993934463, "learning_rate": 5e-05, "loss": 0.0714, "num_input_tokens_seen": 171784312, "step": 1882 }, { "epoch": 7.841666666666667, "loss": 0.07746930420398712, "loss_ce": 0.0007175912614911795, "loss_iou": 0.1796875, "loss_num": 0.015380859375, "loss_xval": 0.07666015625, "num_input_tokens_seen": 171784312, "step": 1882 }, { "epoch": 7.845833333333333, "grad_norm": 2.5086294138768084, "learning_rate": 5e-05, "loss": 0.0731, "num_input_tokens_seen": 171875756, "step": 1883 }, { "epoch": 7.845833333333333, "loss": 0.05923663079738617, "loss_ce": 0.0008412470342591405, "loss_iou": 0.236328125, "loss_num": 0.01171875, "loss_xval": 0.058349609375, "num_input_tokens_seen": 171875756, "step": 1883 }, { "epoch": 7.85, "grad_norm": 3.9182379212981764, "learning_rate": 5e-05, "loss": 0.0981, "num_input_tokens_seen": 171967624, "step": 1884 }, { "epoch": 7.85, "loss": 0.1100146546959877, "loss_ce": 0.0007235782104544342, "loss_iou": 0.353515625, "loss_num": 0.0218505859375, "loss_xval": 0.109375, "num_input_tokens_seen": 171967624, "step": 1884 }, { "epoch": 7.854166666666667, "grad_norm": 3.8226533127540403, "learning_rate": 5e-05, "loss": 0.0649, "num_input_tokens_seen": 172058620, "step": 1885 }, { "epoch": 7.854166666666667, "loss": 0.06194145977497101, "loss_ce": 0.001837087795138359, "loss_iou": 0.21484375, "loss_num": 0.01202392578125, "loss_xval": 0.06005859375, "num_input_tokens_seen": 172058620, "step": 1885 }, { "epoch": 7.858333333333333, "grad_norm": 6.492949437861943, "learning_rate": 5e-05, "loss": 0.0616, "num_input_tokens_seen": 172150144, "step": 1886 }, { "epoch": 7.858333333333333, "loss": 0.07762310653924942, "loss_ce": 7.794049452058971e-05, "loss_iou": 0.3046875, "loss_num": 0.0155029296875, "loss_xval": 0.07763671875, "num_input_tokens_seen": 172150144, "step": 1886 }, { "epoch": 7.8625, "grad_norm": 3.818031643454962, "learning_rate": 5e-05, "loss": 0.104, "num_input_tokens_seen": 172241676, "step": 1887 }, { "epoch": 7.8625, "loss": 0.060611166059970856, "loss_ce": 3.2567709240538534e-06, "loss_iou": 0.275390625, "loss_num": 0.01214599609375, "loss_xval": 0.060546875, "num_input_tokens_seen": 172241676, "step": 1887 }, { "epoch": 7.866666666666667, "grad_norm": 3.2853044128340465, "learning_rate": 5e-05, "loss": 0.1077, "num_input_tokens_seen": 172333200, "step": 1888 }, { "epoch": 7.866666666666667, "loss": 0.1084718331694603, "loss_ce": 0.000546415220014751, "loss_iou": 0.32421875, "loss_num": 0.0216064453125, "loss_xval": 0.10791015625, "num_input_tokens_seen": 172333200, "step": 1888 }, { "epoch": 7.870833333333334, "grad_norm": 4.887550603839418, "learning_rate": 5e-05, "loss": 0.058, "num_input_tokens_seen": 172423792, "step": 1889 }, { "epoch": 7.870833333333334, "loss": 0.06197632476687431, "loss_ce": 1.038287973642582e-05, "loss_iou": 0.3046875, "loss_num": 0.01239013671875, "loss_xval": 0.06201171875, "num_input_tokens_seen": 172423792, "step": 1889 }, { "epoch": 7.875, "grad_norm": 2.795912828890586, "learning_rate": 5e-05, "loss": 0.0996, "num_input_tokens_seen": 172514996, "step": 1890 }, { "epoch": 7.875, "loss": 0.12136028707027435, "loss_ce": 0.00025128168636001647, "loss_iou": 0.205078125, "loss_num": 0.024169921875, "loss_xval": 0.12109375, "num_input_tokens_seen": 172514996, "step": 1890 }, { "epoch": 7.879166666666666, "grad_norm": 3.52171389089644, "learning_rate": 5e-05, "loss": 0.1072, "num_input_tokens_seen": 172605524, "step": 1891 }, { "epoch": 7.879166666666666, "loss": 0.1516215056180954, "loss_ce": 1.0173911505262367e-05, "loss_iou": 0.353515625, "loss_num": 0.0302734375, "loss_xval": 0.1513671875, "num_input_tokens_seen": 172605524, "step": 1891 }, { "epoch": 7.883333333333333, "grad_norm": 10.1324719488612, "learning_rate": 5e-05, "loss": 0.0854, "num_input_tokens_seen": 172696440, "step": 1892 }, { "epoch": 7.883333333333333, "loss": 0.11221377551555634, "loss_ce": 3.1158208003034815e-05, "loss_iou": 0.267578125, "loss_num": 0.0224609375, "loss_xval": 0.1123046875, "num_input_tokens_seen": 172696440, "step": 1892 }, { "epoch": 7.8875, "grad_norm": 10.27030357885242, "learning_rate": 5e-05, "loss": 0.1131, "num_input_tokens_seen": 172788140, "step": 1893 }, { "epoch": 7.8875, "loss": 0.17470771074295044, "loss_ce": 0.00016241407138295472, "loss_iou": 0.21875, "loss_num": 0.034912109375, "loss_xval": 0.1748046875, "num_input_tokens_seen": 172788140, "step": 1893 }, { "epoch": 7.891666666666667, "grad_norm": 3.9635728907775243, "learning_rate": 5e-05, "loss": 0.1529, "num_input_tokens_seen": 172879188, "step": 1894 }, { "epoch": 7.891666666666667, "loss": 0.20308756828308105, "loss_ce": 0.001488459762185812, "loss_iou": 0.1630859375, "loss_num": 0.040283203125, "loss_xval": 0.201171875, "num_input_tokens_seen": 172879188, "step": 1894 }, { "epoch": 7.895833333333333, "grad_norm": 2.936241068833577, "learning_rate": 5e-05, "loss": 0.0716, "num_input_tokens_seen": 172970984, "step": 1895 }, { "epoch": 7.895833333333333, "loss": 0.053100377321243286, "loss_ce": 0.0014951550401747227, "loss_iou": 0.185546875, "loss_num": 0.01031494140625, "loss_xval": 0.051513671875, "num_input_tokens_seen": 172970984, "step": 1895 }, { "epoch": 7.9, "grad_norm": 3.421123041364866, "learning_rate": 5e-05, "loss": 0.1266, "num_input_tokens_seen": 173062196, "step": 1896 }, { "epoch": 7.9, "loss": 0.13779032230377197, "loss_ce": 6.44890678813681e-05, "loss_iou": 0.333984375, "loss_num": 0.027587890625, "loss_xval": 0.1376953125, "num_input_tokens_seen": 173062196, "step": 1896 }, { "epoch": 7.904166666666667, "grad_norm": 7.247987310893387, "learning_rate": 5e-05, "loss": 0.1087, "num_input_tokens_seen": 173153772, "step": 1897 }, { "epoch": 7.904166666666667, "loss": 0.08903989940881729, "loss_ce": 0.002934554824605584, "loss_iou": 0.1328125, "loss_num": 0.0172119140625, "loss_xval": 0.0859375, "num_input_tokens_seen": 173153772, "step": 1897 }, { "epoch": 7.908333333333333, "grad_norm": 7.165382740064726, "learning_rate": 5e-05, "loss": 0.1056, "num_input_tokens_seen": 173244088, "step": 1898 }, { "epoch": 7.908333333333333, "loss": 0.08239862322807312, "loss_ce": 8.508615428581834e-05, "loss_iou": 0.328125, "loss_num": 0.0164794921875, "loss_xval": 0.08251953125, "num_input_tokens_seen": 173244088, "step": 1898 }, { "epoch": 7.9125, "grad_norm": 5.654221775747249, "learning_rate": 5e-05, "loss": 0.1309, "num_input_tokens_seen": 173335192, "step": 1899 }, { "epoch": 7.9125, "loss": 0.13726767897605896, "loss_ce": 0.0011592707596719265, "loss_iou": 0.33203125, "loss_num": 0.0272216796875, "loss_xval": 0.1357421875, "num_input_tokens_seen": 173335192, "step": 1899 }, { "epoch": 7.916666666666667, "grad_norm": 5.467712394921771, "learning_rate": 5e-05, "loss": 0.0613, "num_input_tokens_seen": 173426656, "step": 1900 }, { "epoch": 7.916666666666667, "loss": 0.060835033655166626, "loss_ce": 0.0005170417134650052, "loss_iou": 0.40234375, "loss_num": 0.0120849609375, "loss_xval": 0.060302734375, "num_input_tokens_seen": 173426656, "step": 1900 }, { "epoch": 7.920833333333333, "grad_norm": 3.4832356065050822, "learning_rate": 5e-05, "loss": 0.085, "num_input_tokens_seen": 173518204, "step": 1901 }, { "epoch": 7.920833333333333, "loss": 0.07123122364282608, "loss_ce": 0.0002168197388527915, "loss_iou": 0.259765625, "loss_num": 0.01422119140625, "loss_xval": 0.07080078125, "num_input_tokens_seen": 173518204, "step": 1901 }, { "epoch": 7.925, "grad_norm": 8.246157039093085, "learning_rate": 5e-05, "loss": 0.0665, "num_input_tokens_seen": 173609712, "step": 1902 }, { "epoch": 7.925, "loss": 0.06213460490107536, "loss_ce": 0.0048988861963152885, "loss_iou": 0.234375, "loss_num": 0.011474609375, "loss_xval": 0.05712890625, "num_input_tokens_seen": 173609712, "step": 1902 }, { "epoch": 7.929166666666667, "grad_norm": 1.8546311321482563, "learning_rate": 5e-05, "loss": 0.0903, "num_input_tokens_seen": 173701596, "step": 1903 }, { "epoch": 7.929166666666667, "loss": 0.0635804757475853, "loss_ce": 0.0011720317415893078, "loss_iou": 0.263671875, "loss_num": 0.01251220703125, "loss_xval": 0.0625, "num_input_tokens_seen": 173701596, "step": 1903 }, { "epoch": 7.933333333333334, "grad_norm": 9.380074079968875, "learning_rate": 5e-05, "loss": 0.062, "num_input_tokens_seen": 173793244, "step": 1904 }, { "epoch": 7.933333333333334, "loss": 0.069390207529068, "loss_ce": 0.0006646226975135505, "loss_iou": 0.154296875, "loss_num": 0.0137939453125, "loss_xval": 0.06884765625, "num_input_tokens_seen": 173793244, "step": 1904 }, { "epoch": 7.9375, "grad_norm": 2.965480902277429, "learning_rate": 5e-05, "loss": 0.0743, "num_input_tokens_seen": 173884972, "step": 1905 }, { "epoch": 7.9375, "loss": 0.08638650178909302, "loss_ce": 0.003958521876484156, "loss_iou": 0.1904296875, "loss_num": 0.0164794921875, "loss_xval": 0.08251953125, "num_input_tokens_seen": 173884972, "step": 1905 }, { "epoch": 7.941666666666666, "grad_norm": 5.18458949518755, "learning_rate": 5e-05, "loss": 0.1219, "num_input_tokens_seen": 173976040, "step": 1906 }, { "epoch": 7.941666666666666, "loss": 0.07028573006391525, "loss_ce": 0.0013160043163225055, "loss_iou": 0.33203125, "loss_num": 0.0137939453125, "loss_xval": 0.06884765625, "num_input_tokens_seen": 173976040, "step": 1906 }, { "epoch": 7.945833333333333, "grad_norm": 2.3609421895316305, "learning_rate": 5e-05, "loss": 0.0801, "num_input_tokens_seen": 174067072, "step": 1907 }, { "epoch": 7.945833333333333, "loss": 0.0694548636674881, "loss_ce": 0.001217557000927627, "loss_iou": 0.267578125, "loss_num": 0.013671875, "loss_xval": 0.068359375, "num_input_tokens_seen": 174067072, "step": 1907 }, { "epoch": 7.95, "grad_norm": 2.7941265022676958, "learning_rate": 5e-05, "loss": 0.059, "num_input_tokens_seen": 174158448, "step": 1908 }, { "epoch": 7.95, "loss": 0.05729863792657852, "loss_ce": 0.0006427493062801659, "loss_iou": 0.40625, "loss_num": 0.0113525390625, "loss_xval": 0.056640625, "num_input_tokens_seen": 174158448, "step": 1908 }, { "epoch": 7.954166666666667, "grad_norm": 3.680505406275804, "learning_rate": 5e-05, "loss": 0.0677, "num_input_tokens_seen": 174249904, "step": 1909 }, { "epoch": 7.954166666666667, "loss": 0.05491199344396591, "loss_ce": 0.0043977717868983746, "loss_iou": 0.2177734375, "loss_num": 0.01007080078125, "loss_xval": 0.050537109375, "num_input_tokens_seen": 174249904, "step": 1909 }, { "epoch": 7.958333333333333, "grad_norm": 7.906246591304872, "learning_rate": 5e-05, "loss": 0.1651, "num_input_tokens_seen": 174341280, "step": 1910 }, { "epoch": 7.958333333333333, "loss": 0.18820932507514954, "loss_ce": 9.898372809402645e-05, "loss_iou": 0.3125, "loss_num": 0.03759765625, "loss_xval": 0.1884765625, "num_input_tokens_seen": 174341280, "step": 1910 }, { "epoch": 7.9625, "grad_norm": 1.848318747618714, "learning_rate": 5e-05, "loss": 0.0976, "num_input_tokens_seen": 174432296, "step": 1911 }, { "epoch": 7.9625, "loss": 0.07310444116592407, "loss_ce": 0.0002284618967678398, "loss_iou": 0.2265625, "loss_num": 0.01458740234375, "loss_xval": 0.07275390625, "num_input_tokens_seen": 174432296, "step": 1911 }, { "epoch": 7.966666666666667, "grad_norm": 3.8571062261351057, "learning_rate": 5e-05, "loss": 0.0428, "num_input_tokens_seen": 174523860, "step": 1912 }, { "epoch": 7.966666666666667, "loss": 0.02909906394779682, "loss_ce": 0.0005956448148936033, "loss_iou": 0.283203125, "loss_num": 0.005706787109375, "loss_xval": 0.028564453125, "num_input_tokens_seen": 174523860, "step": 1912 }, { "epoch": 7.970833333333333, "grad_norm": 5.069425793896958, "learning_rate": 5e-05, "loss": 0.0914, "num_input_tokens_seen": 174615020, "step": 1913 }, { "epoch": 7.970833333333333, "loss": 0.06316525489091873, "loss_ce": 0.00209958222694695, "loss_iou": 0.21484375, "loss_num": 0.01220703125, "loss_xval": 0.06103515625, "num_input_tokens_seen": 174615020, "step": 1913 }, { "epoch": 7.975, "grad_norm": 7.767234055317729, "learning_rate": 5e-05, "loss": 0.1184, "num_input_tokens_seen": 174706624, "step": 1914 }, { "epoch": 7.975, "loss": 0.07708540558815002, "loss_ce": 5.9040161431767046e-05, "loss_iou": 0.341796875, "loss_num": 0.015380859375, "loss_xval": 0.0771484375, "num_input_tokens_seen": 174706624, "step": 1914 }, { "epoch": 7.979166666666667, "grad_norm": 5.756764179723251, "learning_rate": 5e-05, "loss": 0.0732, "num_input_tokens_seen": 174797684, "step": 1915 }, { "epoch": 7.979166666666667, "loss": 0.0657171905040741, "loss_ce": 7.387852383544669e-05, "loss_iou": 0.10009765625, "loss_num": 0.01312255859375, "loss_xval": 0.0654296875, "num_input_tokens_seen": 174797684, "step": 1915 }, { "epoch": 7.983333333333333, "grad_norm": 2.8165166655344525, "learning_rate": 5e-05, "loss": 0.0782, "num_input_tokens_seen": 174889664, "step": 1916 }, { "epoch": 7.983333333333333, "loss": 0.07849019765853882, "loss_ce": 0.0003499391896184534, "loss_iou": 0.26171875, "loss_num": 0.015625, "loss_xval": 0.078125, "num_input_tokens_seen": 174889664, "step": 1916 }, { "epoch": 7.9875, "grad_norm": 4.331019918999385, "learning_rate": 5e-05, "loss": 0.0817, "num_input_tokens_seen": 174981060, "step": 1917 }, { "epoch": 7.9875, "loss": 0.08154531568288803, "loss_ce": 0.0007042511133477092, "loss_iou": 0.271484375, "loss_num": 0.01611328125, "loss_xval": 0.0810546875, "num_input_tokens_seen": 174981060, "step": 1917 }, { "epoch": 7.991666666666667, "grad_norm": 2.5517017116929397, "learning_rate": 5e-05, "loss": 0.1072, "num_input_tokens_seen": 175072248, "step": 1918 }, { "epoch": 7.991666666666667, "loss": 0.0968976840376854, "loss_ce": 0.0007673102663829923, "loss_iou": 0.1904296875, "loss_num": 0.0191650390625, "loss_xval": 0.09619140625, "num_input_tokens_seen": 175072248, "step": 1918 }, { "epoch": 7.995833333333334, "grad_norm": 10.617618874938415, "learning_rate": 5e-05, "loss": 0.0718, "num_input_tokens_seen": 175163296, "step": 1919 }, { "epoch": 7.995833333333334, "loss": 0.07148117572069168, "loss_ce": 0.00025314692175015807, "loss_iou": 0.08837890625, "loss_num": 0.01422119140625, "loss_xval": 0.0712890625, "num_input_tokens_seen": 175163296, "step": 1919 }, { "epoch": 8.0, "grad_norm": 9.509254592027485, "learning_rate": 5e-05, "loss": 0.1508, "num_input_tokens_seen": 175254576, "step": 1920 }, { "epoch": 8.0, "loss": 0.11267201602458954, "loss_ce": 0.0007335399859584868, "loss_iou": 0.1826171875, "loss_num": 0.0223388671875, "loss_xval": 0.11181640625, "num_input_tokens_seen": 175254576, "step": 1920 }, { "epoch": 8.004166666666666, "grad_norm": 2.7558632771081855, "learning_rate": 5e-05, "loss": 0.0556, "num_input_tokens_seen": 175346468, "step": 1921 }, { "epoch": 8.004166666666666, "loss": 0.05728989467024803, "loss_ce": 1.602834163350053e-05, "loss_iou": 0.328125, "loss_num": 0.011474609375, "loss_xval": 0.057373046875, "num_input_tokens_seen": 175346468, "step": 1921 }, { "epoch": 8.008333333333333, "grad_norm": 3.141904081215093, "learning_rate": 5e-05, "loss": 0.051, "num_input_tokens_seen": 175437396, "step": 1922 }, { "epoch": 8.008333333333333, "loss": 0.06695879250764847, "loss_ce": 0.0001252962974831462, "loss_iou": 0.2734375, "loss_num": 0.01336669921875, "loss_xval": 0.06689453125, "num_input_tokens_seen": 175437396, "step": 1922 }, { "epoch": 8.0125, "grad_norm": 5.683325618422793, "learning_rate": 5e-05, "loss": 0.0587, "num_input_tokens_seen": 175529152, "step": 1923 }, { "epoch": 8.0125, "loss": 0.05996830761432648, "loss_ce": 0.00016911182319745421, "loss_iou": 0.369140625, "loss_num": 0.011962890625, "loss_xval": 0.059814453125, "num_input_tokens_seen": 175529152, "step": 1923 }, { "epoch": 8.016666666666667, "grad_norm": 4.526126081879709, "learning_rate": 5e-05, "loss": 0.0979, "num_input_tokens_seen": 175620760, "step": 1924 }, { "epoch": 8.016666666666667, "loss": 0.14104092121124268, "loss_ce": 8.023128611966968e-05, "loss_iou": 0.0, "loss_num": 0.0281982421875, "loss_xval": 0.140625, "num_input_tokens_seen": 175620760, "step": 1924 }, { "epoch": 8.020833333333334, "grad_norm": 2.5408139371917704, "learning_rate": 5e-05, "loss": 0.0512, "num_input_tokens_seen": 175712500, "step": 1925 }, { "epoch": 8.020833333333334, "loss": 0.06760569661855698, "loss_ce": 0.0007111626910045743, "loss_iou": 0.296875, "loss_num": 0.01336669921875, "loss_xval": 0.06689453125, "num_input_tokens_seen": 175712500, "step": 1925 }, { "epoch": 8.025, "grad_norm": 2.229218566080554, "learning_rate": 5e-05, "loss": 0.0888, "num_input_tokens_seen": 175803620, "step": 1926 }, { "epoch": 8.025, "loss": 0.09539847820997238, "loss_ce": 0.00026755582075566053, "loss_iou": 0.2890625, "loss_num": 0.01904296875, "loss_xval": 0.09521484375, "num_input_tokens_seen": 175803620, "step": 1926 }, { "epoch": 8.029166666666667, "grad_norm": 2.0268256812580963, "learning_rate": 5e-05, "loss": 0.0863, "num_input_tokens_seen": 175895072, "step": 1927 }, { "epoch": 8.029166666666667, "loss": 0.09223587810993195, "loss_ce": 4.9901744205271825e-05, "loss_iou": 0.203125, "loss_num": 0.0184326171875, "loss_xval": 0.09228515625, "num_input_tokens_seen": 175895072, "step": 1927 }, { "epoch": 8.033333333333333, "grad_norm": 2.28864677907678, "learning_rate": 5e-05, "loss": 0.1157, "num_input_tokens_seen": 175986172, "step": 1928 }, { "epoch": 8.033333333333333, "loss": 0.11336939036846161, "loss_ce": 7.288237247848883e-05, "loss_iou": 0.2890625, "loss_num": 0.022705078125, "loss_xval": 0.11328125, "num_input_tokens_seen": 175986172, "step": 1928 }, { "epoch": 8.0375, "grad_norm": 2.2996102812429093, "learning_rate": 5e-05, "loss": 0.0638, "num_input_tokens_seen": 176077696, "step": 1929 }, { "epoch": 8.0375, "loss": 0.05129075050354004, "loss_ce": 9.75160874077119e-05, "loss_iou": 0.34765625, "loss_num": 0.01025390625, "loss_xval": 0.05126953125, "num_input_tokens_seen": 176077696, "step": 1929 }, { "epoch": 8.041666666666666, "grad_norm": 8.353008091933523, "learning_rate": 5e-05, "loss": 0.0816, "num_input_tokens_seen": 176168784, "step": 1930 }, { "epoch": 8.041666666666666, "loss": 0.07607695460319519, "loss_ce": 1.1887209439009894e-05, "loss_iou": 0.1875, "loss_num": 0.01519775390625, "loss_xval": 0.076171875, "num_input_tokens_seen": 176168784, "step": 1930 }, { "epoch": 8.045833333333333, "grad_norm": 2.8779811742424117, "learning_rate": 5e-05, "loss": 0.1072, "num_input_tokens_seen": 176260380, "step": 1931 }, { "epoch": 8.045833333333333, "loss": 0.05590657889842987, "loss_ce": 0.00010518834460526705, "loss_iou": 0.390625, "loss_num": 0.01116943359375, "loss_xval": 0.055908203125, "num_input_tokens_seen": 176260380, "step": 1931 }, { "epoch": 8.05, "grad_norm": 3.310628240398785, "learning_rate": 5e-05, "loss": 0.0635, "num_input_tokens_seen": 176351776, "step": 1932 }, { "epoch": 8.05, "loss": 0.05620376765727997, "loss_ce": 5.650423645420233e-06, "loss_iou": 0.27734375, "loss_num": 0.01123046875, "loss_xval": 0.05615234375, "num_input_tokens_seen": 176351776, "step": 1932 }, { "epoch": 8.054166666666667, "grad_norm": 2.3084546178570946, "learning_rate": 5e-05, "loss": 0.0579, "num_input_tokens_seen": 176442468, "step": 1933 }, { "epoch": 8.054166666666667, "loss": 0.0433502122759819, "loss_ce": 3.05097291857237e-05, "loss_iou": 0.251953125, "loss_num": 0.0086669921875, "loss_xval": 0.043212890625, "num_input_tokens_seen": 176442468, "step": 1933 }, { "epoch": 8.058333333333334, "grad_norm": 1.8548286182587073, "learning_rate": 5e-05, "loss": 0.0709, "num_input_tokens_seen": 176533344, "step": 1934 }, { "epoch": 8.058333333333334, "loss": 0.07856310904026031, "loss_ce": 0.00011767195246648043, "loss_iou": 0.2314453125, "loss_num": 0.0157470703125, "loss_xval": 0.07861328125, "num_input_tokens_seen": 176533344, "step": 1934 }, { "epoch": 8.0625, "grad_norm": 1.9476347775688605, "learning_rate": 5e-05, "loss": 0.0692, "num_input_tokens_seen": 176624524, "step": 1935 }, { "epoch": 8.0625, "loss": 0.06961099803447723, "loss_ce": 0.0011600647121667862, "loss_iou": 0.26171875, "loss_num": 0.013671875, "loss_xval": 0.068359375, "num_input_tokens_seen": 176624524, "step": 1935 }, { "epoch": 8.066666666666666, "grad_norm": 3.4435977127079016, "learning_rate": 5e-05, "loss": 0.083, "num_input_tokens_seen": 176716328, "step": 1936 }, { "epoch": 8.066666666666666, "loss": 0.10934816300868988, "loss_ce": 3.419243876123801e-05, "loss_iou": 0.5, "loss_num": 0.0218505859375, "loss_xval": 0.109375, "num_input_tokens_seen": 176716328, "step": 1936 }, { "epoch": 8.070833333333333, "grad_norm": 2.4531498270055305, "learning_rate": 5e-05, "loss": 0.0836, "num_input_tokens_seen": 176806684, "step": 1937 }, { "epoch": 8.070833333333333, "loss": 0.0756228044629097, "loss_ce": 8.416684431722388e-05, "loss_iou": 0.365234375, "loss_num": 0.01513671875, "loss_xval": 0.07568359375, "num_input_tokens_seen": 176806684, "step": 1937 }, { "epoch": 8.075, "grad_norm": 3.6167625223497257, "learning_rate": 5e-05, "loss": 0.0942, "num_input_tokens_seen": 176897692, "step": 1938 }, { "epoch": 8.075, "loss": 0.09166642278432846, "loss_ce": 6.876679435663391e-06, "loss_iou": 0.40234375, "loss_num": 0.018310546875, "loss_xval": 0.091796875, "num_input_tokens_seen": 176897692, "step": 1938 }, { "epoch": 8.079166666666667, "grad_norm": 2.436191096534518, "learning_rate": 5e-05, "loss": 0.0832, "num_input_tokens_seen": 176988544, "step": 1939 }, { "epoch": 8.079166666666667, "loss": 0.11645212769508362, "loss_ce": 0.00019542011432349682, "loss_iou": 0.267578125, "loss_num": 0.0233154296875, "loss_xval": 0.1162109375, "num_input_tokens_seen": 176988544, "step": 1939 }, { "epoch": 8.083333333333334, "grad_norm": 3.7810363764121036, "learning_rate": 5e-05, "loss": 0.0975, "num_input_tokens_seen": 177080004, "step": 1940 }, { "epoch": 8.083333333333334, "loss": 0.11389046162366867, "loss_ce": 0.0005481801927089691, "loss_iou": 0.310546875, "loss_num": 0.022705078125, "loss_xval": 0.11328125, "num_input_tokens_seen": 177080004, "step": 1940 }, { "epoch": 8.0875, "grad_norm": 5.408759817581234, "learning_rate": 5e-05, "loss": 0.0822, "num_input_tokens_seen": 177171380, "step": 1941 }, { "epoch": 8.0875, "loss": 0.09823840111494064, "loss_ce": 0.00027697303448803723, "loss_iou": 0.341796875, "loss_num": 0.0196533203125, "loss_xval": 0.09814453125, "num_input_tokens_seen": 177171380, "step": 1941 }, { "epoch": 8.091666666666667, "grad_norm": 6.60248086092445, "learning_rate": 5e-05, "loss": 0.1204, "num_input_tokens_seen": 177262460, "step": 1942 }, { "epoch": 8.091666666666667, "loss": 0.13897444307804108, "loss_ce": 0.0013706819154322147, "loss_iou": 0.2890625, "loss_num": 0.027587890625, "loss_xval": 0.1376953125, "num_input_tokens_seen": 177262460, "step": 1942 }, { "epoch": 8.095833333333333, "grad_norm": 3.6691278808928183, "learning_rate": 5e-05, "loss": 0.0678, "num_input_tokens_seen": 177353752, "step": 1943 }, { "epoch": 8.095833333333333, "loss": 0.05663401260972023, "loss_ce": 0.0008783958619460464, "loss_iou": 0.302734375, "loss_num": 0.0111083984375, "loss_xval": 0.0556640625, "num_input_tokens_seen": 177353752, "step": 1943 }, { "epoch": 8.1, "grad_norm": 2.7196245880429375, "learning_rate": 5e-05, "loss": 0.0855, "num_input_tokens_seen": 177445656, "step": 1944 }, { "epoch": 8.1, "loss": 0.1299806535243988, "loss_ce": 0.00012835516827180982, "loss_iou": 0.193359375, "loss_num": 0.02587890625, "loss_xval": 0.1298828125, "num_input_tokens_seen": 177445656, "step": 1944 }, { "epoch": 8.104166666666666, "grad_norm": 16.454721595659908, "learning_rate": 5e-05, "loss": 0.091, "num_input_tokens_seen": 177536400, "step": 1945 }, { "epoch": 8.104166666666666, "loss": 0.040842410176992416, "loss_ce": 0.00034558697370812297, "loss_iou": 0.220703125, "loss_num": 0.00811767578125, "loss_xval": 0.04052734375, "num_input_tokens_seen": 177536400, "step": 1945 }, { "epoch": 8.108333333333333, "grad_norm": 7.317190624473312, "learning_rate": 5e-05, "loss": 0.0779, "num_input_tokens_seen": 177627352, "step": 1946 }, { "epoch": 8.108333333333333, "loss": 0.09328107535839081, "loss_ce": 4.101587364857551e-06, "loss_iou": 0.279296875, "loss_num": 0.0186767578125, "loss_xval": 0.09326171875, "num_input_tokens_seen": 177627352, "step": 1946 }, { "epoch": 8.1125, "grad_norm": 3.5851380464629243, "learning_rate": 5e-05, "loss": 0.1054, "num_input_tokens_seen": 177716688, "step": 1947 }, { "epoch": 8.1125, "loss": 0.15177735686302185, "loss_ce": 2.8688094971585087e-05, "loss_iou": 0.265625, "loss_num": 0.0303955078125, "loss_xval": 0.1513671875, "num_input_tokens_seen": 177716688, "step": 1947 }, { "epoch": 8.116666666666667, "grad_norm": 1.3617134730767266, "learning_rate": 5e-05, "loss": 0.0832, "num_input_tokens_seen": 177807800, "step": 1948 }, { "epoch": 8.116666666666667, "loss": 0.06027965247631073, "loss_ce": 2.269490869366564e-05, "loss_iou": 0.255859375, "loss_num": 0.01202392578125, "loss_xval": 0.060302734375, "num_input_tokens_seen": 177807800, "step": 1948 }, { "epoch": 8.120833333333334, "grad_norm": 11.342054491371455, "learning_rate": 5e-05, "loss": 0.081, "num_input_tokens_seen": 177899068, "step": 1949 }, { "epoch": 8.120833333333334, "loss": 0.05866030231118202, "loss_ce": 0.00011232466931687668, "loss_iou": 0.271484375, "loss_num": 0.01171875, "loss_xval": 0.05859375, "num_input_tokens_seen": 177899068, "step": 1949 }, { "epoch": 8.125, "grad_norm": 12.640378581487273, "learning_rate": 5e-05, "loss": 0.101, "num_input_tokens_seen": 177990316, "step": 1950 }, { "epoch": 8.125, "loss": 0.0911889374256134, "loss_ce": 0.00013211587793193758, "loss_iou": 0.142578125, "loss_num": 0.0181884765625, "loss_xval": 0.0908203125, "num_input_tokens_seen": 177990316, "step": 1950 }, { "epoch": 8.129166666666666, "grad_norm": 2.9527899337102412, "learning_rate": 5e-05, "loss": 0.0687, "num_input_tokens_seen": 178081300, "step": 1951 }, { "epoch": 8.129166666666666, "loss": 0.0566537082195282, "loss_ce": 0.00039455119986087084, "loss_iou": 0.271484375, "loss_num": 0.01123046875, "loss_xval": 0.05615234375, "num_input_tokens_seen": 178081300, "step": 1951 }, { "epoch": 8.133333333333333, "grad_norm": 4.2287548918279025, "learning_rate": 5e-05, "loss": 0.1153, "num_input_tokens_seen": 178172116, "step": 1952 }, { "epoch": 8.133333333333333, "loss": 0.1239519938826561, "loss_ce": 9.640220378059894e-05, "loss_iou": 0.2578125, "loss_num": 0.0247802734375, "loss_xval": 0.1240234375, "num_input_tokens_seen": 178172116, "step": 1952 }, { "epoch": 8.1375, "grad_norm": 2.3381843610726185, "learning_rate": 5e-05, "loss": 0.0613, "num_input_tokens_seen": 178263160, "step": 1953 }, { "epoch": 8.1375, "loss": 0.0618969164788723, "loss_ce": 0.00017511726764496416, "loss_iou": 0.2578125, "loss_num": 0.0123291015625, "loss_xval": 0.061767578125, "num_input_tokens_seen": 178263160, "step": 1953 }, { "epoch": 8.141666666666667, "grad_norm": 2.5747334144500322, "learning_rate": 5e-05, "loss": 0.0759, "num_input_tokens_seen": 178355032, "step": 1954 }, { "epoch": 8.141666666666667, "loss": 0.061729107052087784, "loss_ce": 2.25647436309373e-05, "loss_iou": 0.2578125, "loss_num": 0.0123291015625, "loss_xval": 0.061767578125, "num_input_tokens_seen": 178355032, "step": 1954 }, { "epoch": 8.145833333333334, "grad_norm": 9.611222694930518, "learning_rate": 5e-05, "loss": 0.0855, "num_input_tokens_seen": 178446324, "step": 1955 }, { "epoch": 8.145833333333334, "loss": 0.1247691959142685, "loss_ce": 0.00021170321269892156, "loss_iou": 0.1611328125, "loss_num": 0.02490234375, "loss_xval": 0.12451171875, "num_input_tokens_seen": 178446324, "step": 1955 }, { "epoch": 8.15, "grad_norm": 2.7844977171891974, "learning_rate": 5e-05, "loss": 0.1012, "num_input_tokens_seen": 178538236, "step": 1956 }, { "epoch": 8.15, "loss": 0.055539753288030624, "loss_ce": 4.353750409791246e-05, "loss_iou": 0.298828125, "loss_num": 0.0111083984375, "loss_xval": 0.055419921875, "num_input_tokens_seen": 178538236, "step": 1956 }, { "epoch": 8.154166666666667, "grad_norm": 3.6994877077194186, "learning_rate": 5e-05, "loss": 0.0956, "num_input_tokens_seen": 178629736, "step": 1957 }, { "epoch": 8.154166666666667, "loss": 0.059186916798353195, "loss_ce": 0.0023326671216636896, "loss_iou": 0.2021484375, "loss_num": 0.0113525390625, "loss_xval": 0.056884765625, "num_input_tokens_seen": 178629736, "step": 1957 }, { "epoch": 8.158333333333333, "grad_norm": 2.9171432609320127, "learning_rate": 5e-05, "loss": 0.1063, "num_input_tokens_seen": 178720776, "step": 1958 }, { "epoch": 8.158333333333333, "loss": 0.16347545385360718, "loss_ce": 8.051843906287104e-06, "loss_iou": 0.318359375, "loss_num": 0.03271484375, "loss_xval": 0.1630859375, "num_input_tokens_seen": 178720776, "step": 1958 }, { "epoch": 8.1625, "grad_norm": 1.9009285367972448, "learning_rate": 5e-05, "loss": 0.1118, "num_input_tokens_seen": 178812356, "step": 1959 }, { "epoch": 8.1625, "loss": 0.1348172128200531, "loss_ce": 0.0013333172537386417, "loss_iou": 0.361328125, "loss_num": 0.026611328125, "loss_xval": 0.1337890625, "num_input_tokens_seen": 178812356, "step": 1959 }, { "epoch": 8.166666666666666, "grad_norm": 2.970864300802223, "learning_rate": 5e-05, "loss": 0.1303, "num_input_tokens_seen": 178903584, "step": 1960 }, { "epoch": 8.166666666666666, "loss": 0.181955486536026, "loss_ce": 9.36027427087538e-05, "loss_iou": 0.11279296875, "loss_num": 0.036376953125, "loss_xval": 0.181640625, "num_input_tokens_seen": 178903584, "step": 1960 }, { "epoch": 8.170833333333333, "grad_norm": 4.069553929687919, "learning_rate": 5e-05, "loss": 0.074, "num_input_tokens_seen": 178994236, "step": 1961 }, { "epoch": 8.170833333333333, "loss": 0.07317335158586502, "loss_ce": 0.00029737126897089183, "loss_iou": 0.2421875, "loss_num": 0.0145263671875, "loss_xval": 0.07275390625, "num_input_tokens_seen": 178994236, "step": 1961 }, { "epoch": 8.175, "grad_norm": 4.51077294510098, "learning_rate": 5e-05, "loss": 0.0609, "num_input_tokens_seen": 179084796, "step": 1962 }, { "epoch": 8.175, "loss": 0.0737156867980957, "loss_ce": 0.0025029226671904325, "loss_iou": 0.419921875, "loss_num": 0.0142822265625, "loss_xval": 0.0712890625, "num_input_tokens_seen": 179084796, "step": 1962 }, { "epoch": 8.179166666666667, "grad_norm": 9.711530780142349, "learning_rate": 5e-05, "loss": 0.0768, "num_input_tokens_seen": 179175948, "step": 1963 }, { "epoch": 8.179166666666667, "loss": 0.0733020007610321, "loss_ce": 0.0001208468820550479, "loss_iou": 0.2890625, "loss_num": 0.0146484375, "loss_xval": 0.0732421875, "num_input_tokens_seen": 179175948, "step": 1963 }, { "epoch": 8.183333333333334, "grad_norm": 2.456772823324688, "learning_rate": 5e-05, "loss": 0.0547, "num_input_tokens_seen": 179267144, "step": 1964 }, { "epoch": 8.183333333333334, "loss": 0.07388445734977722, "loss_ce": 0.00010821619798662141, "loss_iou": 0.361328125, "loss_num": 0.0147705078125, "loss_xval": 0.07373046875, "num_input_tokens_seen": 179267144, "step": 1964 }, { "epoch": 8.1875, "grad_norm": 2.482028249470996, "learning_rate": 5e-05, "loss": 0.0719, "num_input_tokens_seen": 179359172, "step": 1965 }, { "epoch": 8.1875, "loss": 0.08596872538328171, "loss_ce": 0.000702614663168788, "loss_iou": 0.396484375, "loss_num": 0.01708984375, "loss_xval": 0.08544921875, "num_input_tokens_seen": 179359172, "step": 1965 }, { "epoch": 8.191666666666666, "grad_norm": 3.417093743348807, "learning_rate": 5e-05, "loss": 0.0785, "num_input_tokens_seen": 179450416, "step": 1966 }, { "epoch": 8.191666666666666, "loss": 0.08479488641023636, "loss_ce": 0.0011767229298129678, "loss_iou": 0.22265625, "loss_num": 0.0167236328125, "loss_xval": 0.08349609375, "num_input_tokens_seen": 179450416, "step": 1966 }, { "epoch": 8.195833333333333, "grad_norm": 2.6848051093651137, "learning_rate": 5e-05, "loss": 0.0729, "num_input_tokens_seen": 179542024, "step": 1967 }, { "epoch": 8.195833333333333, "loss": 0.08799510449171066, "loss_ce": 2.818577195284888e-05, "loss_iou": 0.265625, "loss_num": 0.017578125, "loss_xval": 0.087890625, "num_input_tokens_seen": 179542024, "step": 1967 }, { "epoch": 8.2, "grad_norm": 3.131329820317152, "learning_rate": 5e-05, "loss": 0.0942, "num_input_tokens_seen": 179633412, "step": 1968 }, { "epoch": 8.2, "loss": 0.05659928917884827, "loss_ce": 0.000569014810025692, "loss_iou": 0.35546875, "loss_num": 0.01123046875, "loss_xval": 0.05615234375, "num_input_tokens_seen": 179633412, "step": 1968 }, { "epoch": 8.204166666666667, "grad_norm": 4.17209524896321, "learning_rate": 5e-05, "loss": 0.0805, "num_input_tokens_seen": 179724624, "step": 1969 }, { "epoch": 8.204166666666667, "loss": 0.0819387137889862, "loss_ce": 0.0014943802962079644, "loss_iou": 0.25, "loss_num": 0.01611328125, "loss_xval": 0.08056640625, "num_input_tokens_seen": 179724624, "step": 1969 }, { "epoch": 8.208333333333334, "grad_norm": 4.50290891660392, "learning_rate": 5e-05, "loss": 0.1009, "num_input_tokens_seen": 179816320, "step": 1970 }, { "epoch": 8.208333333333334, "loss": 0.1005803719162941, "loss_ce": 5.546794272959232e-05, "loss_iou": 0.34765625, "loss_num": 0.0201416015625, "loss_xval": 0.1005859375, "num_input_tokens_seen": 179816320, "step": 1970 }, { "epoch": 8.2125, "grad_norm": 3.9169085252996116, "learning_rate": 5e-05, "loss": 0.142, "num_input_tokens_seen": 179907560, "step": 1971 }, { "epoch": 8.2125, "loss": 0.16928379237651825, "loss_ce": 0.0008267580415122211, "loss_iou": 0.330078125, "loss_num": 0.03369140625, "loss_xval": 0.16796875, "num_input_tokens_seen": 179907560, "step": 1971 }, { "epoch": 8.216666666666667, "grad_norm": 3.4743631391853134, "learning_rate": 5e-05, "loss": 0.0652, "num_input_tokens_seen": 179998500, "step": 1972 }, { "epoch": 8.216666666666667, "loss": 0.08536157011985779, "loss_ce": 0.0012093504192307591, "loss_iou": 0.400390625, "loss_num": 0.016845703125, "loss_xval": 0.083984375, "num_input_tokens_seen": 179998500, "step": 1972 }, { "epoch": 8.220833333333333, "grad_norm": 9.228609800262559, "learning_rate": 5e-05, "loss": 0.0778, "num_input_tokens_seen": 180089536, "step": 1973 }, { "epoch": 8.220833333333333, "loss": 0.11307109892368317, "loss_ce": 0.00040019513107836246, "loss_iou": 0.232421875, "loss_num": 0.0225830078125, "loss_xval": 0.11279296875, "num_input_tokens_seen": 180089536, "step": 1973 }, { "epoch": 8.225, "grad_norm": 6.383981100098228, "learning_rate": 5e-05, "loss": 0.065, "num_input_tokens_seen": 180180812, "step": 1974 }, { "epoch": 8.225, "loss": 0.07836788892745972, "loss_ce": 0.00033443939173594117, "loss_iou": 0.357421875, "loss_num": 0.01556396484375, "loss_xval": 0.078125, "num_input_tokens_seen": 180180812, "step": 1974 }, { "epoch": 8.229166666666666, "grad_norm": 2.6634427522258792, "learning_rate": 5e-05, "loss": 0.1005, "num_input_tokens_seen": 180272216, "step": 1975 }, { "epoch": 8.229166666666666, "loss": 0.07826492935419083, "loss_ce": 1.785774111340288e-05, "loss_iou": 0.322265625, "loss_num": 0.015625, "loss_xval": 0.078125, "num_input_tokens_seen": 180272216, "step": 1975 }, { "epoch": 8.233333333333333, "grad_norm": 4.054808828099222, "learning_rate": 5e-05, "loss": 0.0808, "num_input_tokens_seen": 180363436, "step": 1976 }, { "epoch": 8.233333333333333, "loss": 0.07174524664878845, "loss_ce": 0.00023492946638725698, "loss_iou": 0.19140625, "loss_num": 0.0142822265625, "loss_xval": 0.0712890625, "num_input_tokens_seen": 180363436, "step": 1976 }, { "epoch": 8.2375, "grad_norm": 1.876663851471951, "learning_rate": 5e-05, "loss": 0.0774, "num_input_tokens_seen": 180455088, "step": 1977 }, { "epoch": 8.2375, "loss": 0.07476097345352173, "loss_ce": 0.000984729966148734, "loss_iou": 0.201171875, "loss_num": 0.01470947265625, "loss_xval": 0.07373046875, "num_input_tokens_seen": 180455088, "step": 1977 }, { "epoch": 8.241666666666667, "grad_norm": 4.035059021505715, "learning_rate": 5e-05, "loss": 0.0737, "num_input_tokens_seen": 180545456, "step": 1978 }, { "epoch": 8.241666666666667, "loss": 0.06536682695150375, "loss_ce": 2.8695054425043054e-05, "loss_iou": 0.271484375, "loss_num": 0.0130615234375, "loss_xval": 0.0654296875, "num_input_tokens_seen": 180545456, "step": 1978 }, { "epoch": 8.245833333333334, "grad_norm": 3.8579126149062017, "learning_rate": 5e-05, "loss": 0.1052, "num_input_tokens_seen": 180636760, "step": 1979 }, { "epoch": 8.245833333333334, "loss": 0.10296410322189331, "loss_ce": 0.0005165926995687187, "loss_iou": 0.2158203125, "loss_num": 0.0205078125, "loss_xval": 0.1025390625, "num_input_tokens_seen": 180636760, "step": 1979 }, { "epoch": 8.25, "grad_norm": 2.5788982024479403, "learning_rate": 5e-05, "loss": 0.0685, "num_input_tokens_seen": 180728236, "step": 1980 }, { "epoch": 8.25, "loss": 0.08393379300832748, "loss_ce": 0.0004987327847629786, "loss_iou": 0.2421875, "loss_num": 0.0167236328125, "loss_xval": 0.08349609375, "num_input_tokens_seen": 180728236, "step": 1980 }, { "epoch": 8.254166666666666, "grad_norm": 7.725813315284284, "learning_rate": 5e-05, "loss": 0.0829, "num_input_tokens_seen": 180819708, "step": 1981 }, { "epoch": 8.254166666666666, "loss": 0.08290599286556244, "loss_ce": 0.00013469035911839455, "loss_iou": 0.2119140625, "loss_num": 0.0166015625, "loss_xval": 0.0830078125, "num_input_tokens_seen": 180819708, "step": 1981 }, { "epoch": 8.258333333333333, "grad_norm": 7.410506149901965, "learning_rate": 5e-05, "loss": 0.0782, "num_input_tokens_seen": 180911432, "step": 1982 }, { "epoch": 8.258333333333333, "loss": 0.0912504717707634, "loss_ce": 0.0002317955659236759, "loss_iou": 0.255859375, "loss_num": 0.0181884765625, "loss_xval": 0.0908203125, "num_input_tokens_seen": 180911432, "step": 1982 }, { "epoch": 8.2625, "grad_norm": 9.975238003908782, "learning_rate": 5e-05, "loss": 0.0855, "num_input_tokens_seen": 181003188, "step": 1983 }, { "epoch": 8.2625, "loss": 0.07827382534742355, "loss_ce": 0.0001183067579404451, "loss_iou": 0.30078125, "loss_num": 0.015625, "loss_xval": 0.078125, "num_input_tokens_seen": 181003188, "step": 1983 }, { "epoch": 8.266666666666667, "grad_norm": 3.316524325597984, "learning_rate": 5e-05, "loss": 0.0893, "num_input_tokens_seen": 181094616, "step": 1984 }, { "epoch": 8.266666666666667, "loss": 0.09194935858249664, "loss_ce": 1.5155635082919616e-05, "loss_iou": 0.322265625, "loss_num": 0.018310546875, "loss_xval": 0.091796875, "num_input_tokens_seen": 181094616, "step": 1984 }, { "epoch": 8.270833333333334, "grad_norm": 9.740987394451327, "learning_rate": 5e-05, "loss": 0.0874, "num_input_tokens_seen": 181185980, "step": 1985 }, { "epoch": 8.270833333333334, "loss": 0.09619566798210144, "loss_ce": 0.0030407612212002277, "loss_iou": 0.185546875, "loss_num": 0.0186767578125, "loss_xval": 0.09326171875, "num_input_tokens_seen": 181185980, "step": 1985 }, { "epoch": 8.275, "grad_norm": 3.0570700848047307, "learning_rate": 5e-05, "loss": 0.0614, "num_input_tokens_seen": 181276952, "step": 1986 }, { "epoch": 8.275, "loss": 0.03943505138158798, "loss_ce": 2.92310505756177e-05, "loss_iou": 0.353515625, "loss_num": 0.00787353515625, "loss_xval": 0.039306640625, "num_input_tokens_seen": 181276952, "step": 1986 }, { "epoch": 8.279166666666667, "grad_norm": 2.52623572076418, "learning_rate": 5e-05, "loss": 0.0687, "num_input_tokens_seen": 181368384, "step": 1987 }, { "epoch": 8.279166666666667, "loss": 0.0691833421587944, "loss_ce": 0.0005950895138084888, "loss_iou": 0.2236328125, "loss_num": 0.01373291015625, "loss_xval": 0.068359375, "num_input_tokens_seen": 181368384, "step": 1987 }, { "epoch": 8.283333333333333, "grad_norm": 3.198067753348154, "learning_rate": 5e-05, "loss": 0.0939, "num_input_tokens_seen": 181459680, "step": 1988 }, { "epoch": 8.283333333333333, "loss": 0.08994542062282562, "loss_ce": 0.0004373587144073099, "loss_iou": 0.302734375, "loss_num": 0.0179443359375, "loss_xval": 0.08935546875, "num_input_tokens_seen": 181459680, "step": 1988 }, { "epoch": 8.2875, "grad_norm": 3.831468348340558, "learning_rate": 5e-05, "loss": 0.0742, "num_input_tokens_seen": 181550624, "step": 1989 }, { "epoch": 8.2875, "loss": 0.06827230751514435, "loss_ce": 4.485354565986199e-06, "loss_iou": 0.2314453125, "loss_num": 0.013671875, "loss_xval": 0.068359375, "num_input_tokens_seen": 181550624, "step": 1989 }, { "epoch": 8.291666666666666, "grad_norm": 4.826052777734482, "learning_rate": 5e-05, "loss": 0.0985, "num_input_tokens_seen": 181642232, "step": 1990 }, { "epoch": 8.291666666666666, "loss": 0.10974957793951035, "loss_ce": 3.8890477298991755e-05, "loss_iou": 0.2265625, "loss_num": 0.02197265625, "loss_xval": 0.10986328125, "num_input_tokens_seen": 181642232, "step": 1990 }, { "epoch": 8.295833333333333, "grad_norm": 2.1973734075297613, "learning_rate": 5e-05, "loss": 0.0657, "num_input_tokens_seen": 181733320, "step": 1991 }, { "epoch": 8.295833333333333, "loss": 0.05872727930545807, "loss_ce": 5.723710637539625e-05, "loss_iou": 0.146484375, "loss_num": 0.01171875, "loss_xval": 0.05859375, "num_input_tokens_seen": 181733320, "step": 1991 }, { "epoch": 8.3, "grad_norm": 8.823895302868364, "learning_rate": 5e-05, "loss": 0.1295, "num_input_tokens_seen": 181824632, "step": 1992 }, { "epoch": 8.3, "loss": 0.11481700837612152, "loss_ce": 9.878131095319986e-06, "loss_iou": 0.373046875, "loss_num": 0.02294921875, "loss_xval": 0.11474609375, "num_input_tokens_seen": 181824632, "step": 1992 }, { "epoch": 8.304166666666667, "grad_norm": 3.6855112760489823, "learning_rate": 5e-05, "loss": 0.089, "num_input_tokens_seen": 181915556, "step": 1993 }, { "epoch": 8.304166666666667, "loss": 0.08049677312374115, "loss_ce": 0.0005712423007935286, "loss_iou": 0.357421875, "loss_num": 0.0159912109375, "loss_xval": 0.080078125, "num_input_tokens_seen": 181915556, "step": 1993 }, { "epoch": 8.308333333333334, "grad_norm": 1.9136759246235993, "learning_rate": 5e-05, "loss": 0.082, "num_input_tokens_seen": 182006528, "step": 1994 }, { "epoch": 8.308333333333334, "loss": 0.09684373438358307, "loss_ce": 1.1463387636467814e-05, "loss_iou": 0.2734375, "loss_num": 0.0194091796875, "loss_xval": 0.0966796875, "num_input_tokens_seen": 182006528, "step": 1994 }, { "epoch": 8.3125, "grad_norm": 2.4837722716856874, "learning_rate": 5e-05, "loss": 0.0843, "num_input_tokens_seen": 182098420, "step": 1995 }, { "epoch": 8.3125, "loss": 0.13037192821502686, "loss_ce": 0.0021828359458595514, "loss_iou": 0.326171875, "loss_num": 0.025634765625, "loss_xval": 0.1279296875, "num_input_tokens_seen": 182098420, "step": 1995 }, { "epoch": 8.316666666666666, "grad_norm": 4.2129148888337555, "learning_rate": 5e-05, "loss": 0.0713, "num_input_tokens_seen": 182189936, "step": 1996 }, { "epoch": 8.316666666666666, "loss": 0.06477083265781403, "loss_ce": 0.00014985792222432792, "loss_iou": 0.3359375, "loss_num": 0.012939453125, "loss_xval": 0.064453125, "num_input_tokens_seen": 182189936, "step": 1996 }, { "epoch": 8.320833333333333, "grad_norm": 5.524144085808494, "learning_rate": 5e-05, "loss": 0.0694, "num_input_tokens_seen": 182281536, "step": 1997 }, { "epoch": 8.320833333333333, "loss": 0.06460338830947876, "loss_ce": 5.87101822020486e-05, "loss_iou": 0.392578125, "loss_num": 0.012939453125, "loss_xval": 0.064453125, "num_input_tokens_seen": 182281536, "step": 1997 }, { "epoch": 8.325, "grad_norm": 6.583606047512185, "learning_rate": 5e-05, "loss": 0.1289, "num_input_tokens_seen": 182373100, "step": 1998 }, { "epoch": 8.325, "loss": 0.11908264458179474, "loss_ce": 4.883207657258026e-05, "loss_iou": 0.404296875, "loss_num": 0.0238037109375, "loss_xval": 0.119140625, "num_input_tokens_seen": 182373100, "step": 1998 }, { "epoch": 8.329166666666667, "grad_norm": 2.4531039298088952, "learning_rate": 5e-05, "loss": 0.0948, "num_input_tokens_seen": 182465048, "step": 1999 }, { "epoch": 8.329166666666667, "loss": 0.12307038903236389, "loss_ce": 0.0015951667446643114, "loss_iou": 0.138671875, "loss_num": 0.0242919921875, "loss_xval": 0.12158203125, "num_input_tokens_seen": 182465048, "step": 1999 }, { "epoch": 8.333333333333334, "grad_norm": 2.298450315643497, "learning_rate": 5e-05, "loss": 0.0858, "num_input_tokens_seen": 182556628, "step": 2000 }, { "epoch": 8.333333333333334, "eval_seeclick_CIoU": 0.2732051685452461, "eval_seeclick_GIoU": 0.25603655725717545, "eval_seeclick_IoU": 0.36378540098667145, "eval_seeclick_MAE_all": 0.09515979886054993, "eval_seeclick_MAE_h": 0.06633967533707619, "eval_seeclick_MAE_w": 0.1958950012922287, "eval_seeclick_MAE_x_boxes": 0.19774916768074036, "eval_seeclick_MAE_y_boxes": 0.06996404752135277, "eval_seeclick_NUM_probability": 0.9999985694885254, "eval_seeclick_inside_bbox": 0.5852272808551788, "eval_seeclick_loss": 0.5696161985397339, "eval_seeclick_loss_ce": 0.14711932837963104, "eval_seeclick_loss_iou": 0.4210205078125, "eval_seeclick_loss_num": 0.0845184326171875, "eval_seeclick_loss_xval": 0.4228515625, "eval_seeclick_runtime": 101.9273, "eval_seeclick_samples_per_second": 0.422, "eval_seeclick_steps_per_second": 0.02, "num_input_tokens_seen": 182556628, "step": 2000 }, { "epoch": 8.333333333333334, "eval_icons_CIoU": 0.40883131325244904, "eval_icons_GIoU": 0.440561980009079, "eval_icons_IoU": 0.48336389660835266, "eval_icons_MAE_all": 0.05929296091198921, "eval_icons_MAE_h": 0.10431193187832832, "eval_icons_MAE_w": 0.10969538241624832, "eval_icons_MAE_x_boxes": 0.11118783056735992, "eval_icons_MAE_y_boxes": 0.10491820424795151, "eval_icons_NUM_probability": 0.9999992847442627, "eval_icons_inside_bbox": 0.6493055522441864, "eval_icons_loss": 0.29335036873817444, "eval_icons_loss_ce": 4.6103790737106465e-05, "eval_icons_loss_iou": 0.30523681640625, "eval_icons_loss_num": 0.0626220703125, "eval_icons_loss_xval": 0.3133392333984375, "eval_icons_runtime": 88.402, "eval_icons_samples_per_second": 0.566, "eval_icons_steps_per_second": 0.023, "num_input_tokens_seen": 182556628, "step": 2000 }, { "epoch": 8.333333333333334, "eval_screenspot_CIoU": 0.3823320269584656, "eval_screenspot_GIoU": 0.36015834907690686, "eval_screenspot_IoU": 0.45546941955884296, "eval_screenspot_MAE_all": 0.09890640527009964, "eval_screenspot_MAE_h": 0.10396142303943634, "eval_screenspot_MAE_w": 0.1897703061501185, "eval_screenspot_MAE_x_boxes": 0.18329455455144247, "eval_screenspot_MAE_y_boxes": 0.09101255610585213, "eval_screenspot_NUM_probability": 0.9999987483024597, "eval_screenspot_inside_bbox": 0.6804166634877523, "eval_screenspot_loss": 0.4989835321903229, "eval_screenspot_loss_ce": 0.00019722061066810662, "eval_screenspot_loss_iou": 0.3795979817708333, "eval_screenspot_loss_num": 0.100006103515625, "eval_screenspot_loss_xval": 0.5005289713541666, "eval_screenspot_runtime": 152.4963, "eval_screenspot_samples_per_second": 0.584, "eval_screenspot_steps_per_second": 0.02, "num_input_tokens_seen": 182556628, "step": 2000 }, { "epoch": 8.333333333333334, "eval_compot_CIoU": 0.409628689289093, "eval_compot_GIoU": 0.40224941074848175, "eval_compot_IoU": 0.48659662902355194, "eval_compot_MAE_all": 0.0632275864481926, "eval_compot_MAE_h": 0.08800311759114265, "eval_compot_MAE_w": 0.14701341837644577, "eval_compot_MAE_x_boxes": 0.1445625126361847, "eval_compot_MAE_y_boxes": 0.08778712153434753, "eval_compot_NUM_probability": 0.9999973773956299, "eval_compot_inside_bbox": 0.6927083432674408, "eval_compot_loss": 0.34585246443748474, "eval_compot_loss_ce": 0.030167696997523308, "eval_compot_loss_iou": 0.29705810546875, "eval_compot_loss_num": 0.057285308837890625, "eval_compot_loss_xval": 0.2865447998046875, "eval_compot_runtime": 88.9042, "eval_compot_samples_per_second": 0.562, "eval_compot_steps_per_second": 0.022, "num_input_tokens_seen": 182556628, "step": 2000 }, { "epoch": 8.333333333333334, "loss": 0.2891286611557007, "loss_ce": 0.028233911842107773, "loss_iou": 0.33984375, "loss_num": 0.05224609375, "loss_xval": 0.26171875, "num_input_tokens_seen": 182556628, "step": 2000 }, { "epoch": 8.3375, "grad_norm": 3.536412880551861, "learning_rate": 5e-05, "loss": 0.0975, "num_input_tokens_seen": 182648036, "step": 2001 }, { "epoch": 8.3375, "loss": 0.07516656816005707, "loss_ce": 1.70273742696736e-05, "loss_iou": 0.236328125, "loss_num": 0.0150146484375, "loss_xval": 0.0751953125, "num_input_tokens_seen": 182648036, "step": 2001 }, { "epoch": 8.341666666666667, "grad_norm": 2.2089360748949334, "learning_rate": 5e-05, "loss": 0.0785, "num_input_tokens_seen": 182738916, "step": 2002 }, { "epoch": 8.341666666666667, "loss": 0.10036590695381165, "loss_ce": 0.003472598735243082, "loss_iou": 0.287109375, "loss_num": 0.0194091796875, "loss_xval": 0.0966796875, "num_input_tokens_seen": 182738916, "step": 2002 }, { "epoch": 8.345833333333333, "grad_norm": 3.3586904659649424, "learning_rate": 5e-05, "loss": 0.0759, "num_input_tokens_seen": 182830120, "step": 2003 }, { "epoch": 8.345833333333333, "loss": 0.0666748583316803, "loss_ce": 7.787040522089228e-05, "loss_iou": 0.298828125, "loss_num": 0.0133056640625, "loss_xval": 0.06640625, "num_input_tokens_seen": 182830120, "step": 2003 }, { "epoch": 8.35, "grad_norm": 2.6220769322004513, "learning_rate": 5e-05, "loss": 0.0476, "num_input_tokens_seen": 182921292, "step": 2004 }, { "epoch": 8.35, "loss": 0.04563147574663162, "loss_ce": 2.295524245710112e-05, "loss_iou": 0.2578125, "loss_num": 0.0091552734375, "loss_xval": 0.045654296875, "num_input_tokens_seen": 182921292, "step": 2004 }, { "epoch": 8.354166666666666, "grad_norm": 1.4887052911847, "learning_rate": 5e-05, "loss": 0.0778, "num_input_tokens_seen": 183012836, "step": 2005 }, { "epoch": 8.354166666666666, "loss": 0.049174100160598755, "loss_ce": 0.0004833037673961371, "loss_iou": 0.1796875, "loss_num": 0.009765625, "loss_xval": 0.048583984375, "num_input_tokens_seen": 183012836, "step": 2005 }, { "epoch": 8.358333333333333, "grad_norm": 2.326647265916884, "learning_rate": 5e-05, "loss": 0.0608, "num_input_tokens_seen": 183104412, "step": 2006 }, { "epoch": 8.358333333333333, "loss": 0.04949578642845154, "loss_ce": 0.0006676615448668599, "loss_iou": 0.349609375, "loss_num": 0.009765625, "loss_xval": 0.048828125, "num_input_tokens_seen": 183104412, "step": 2006 }, { "epoch": 8.3625, "grad_norm": 46.17555951747404, "learning_rate": 5e-05, "loss": 0.0892, "num_input_tokens_seen": 183195828, "step": 2007 }, { "epoch": 8.3625, "loss": 0.04580899327993393, "loss_ce": 7.077553891576827e-05, "loss_iou": 0.1357421875, "loss_num": 0.0091552734375, "loss_xval": 0.045654296875, "num_input_tokens_seen": 183195828, "step": 2007 }, { "epoch": 8.366666666666667, "grad_norm": 21.30313606809819, "learning_rate": 5e-05, "loss": 0.1172, "num_input_tokens_seen": 183287228, "step": 2008 }, { "epoch": 8.366666666666667, "loss": 0.09672500193119049, "loss_ce": 0.00012923777103424072, "loss_iou": 0.359375, "loss_num": 0.019287109375, "loss_xval": 0.0966796875, "num_input_tokens_seen": 183287228, "step": 2008 }, { "epoch": 8.370833333333334, "grad_norm": 4.311377430234867, "learning_rate": 5e-05, "loss": 0.1136, "num_input_tokens_seen": 183378304, "step": 2009 }, { "epoch": 8.370833333333334, "loss": 0.10985423624515533, "loss_ce": 0.0010438107419759035, "loss_iou": 0.2314453125, "loss_num": 0.0218505859375, "loss_xval": 0.10888671875, "num_input_tokens_seen": 183378304, "step": 2009 }, { "epoch": 8.375, "grad_norm": 3.105143321823887, "learning_rate": 5e-05, "loss": 0.1125, "num_input_tokens_seen": 183468988, "step": 2010 }, { "epoch": 8.375, "loss": 0.12895122170448303, "loss_ce": 7.547732820967212e-05, "loss_iou": 0.171875, "loss_num": 0.0257568359375, "loss_xval": 0.12890625, "num_input_tokens_seen": 183468988, "step": 2010 }, { "epoch": 8.379166666666666, "grad_norm": 3.014097309533224, "learning_rate": 5e-05, "loss": 0.1494, "num_input_tokens_seen": 183560592, "step": 2011 }, { "epoch": 8.379166666666666, "loss": 0.15173643827438354, "loss_ce": 4.8823003453435376e-05, "loss_iou": 0.1904296875, "loss_num": 0.0303955078125, "loss_xval": 0.1513671875, "num_input_tokens_seen": 183560592, "step": 2011 }, { "epoch": 8.383333333333333, "grad_norm": 9.420722778412506, "learning_rate": 5e-05, "loss": 0.1165, "num_input_tokens_seen": 183651696, "step": 2012 }, { "epoch": 8.383333333333333, "loss": 0.1760517656803131, "loss_ce": 0.001826913678087294, "loss_iou": 0.2451171875, "loss_num": 0.034912109375, "loss_xval": 0.173828125, "num_input_tokens_seen": 183651696, "step": 2012 }, { "epoch": 8.3875, "grad_norm": 2.78315603049161, "learning_rate": 5e-05, "loss": 0.1282, "num_input_tokens_seen": 183743368, "step": 2013 }, { "epoch": 8.3875, "loss": 0.12846490740776062, "loss_ce": 1.6419715393567458e-05, "loss_iou": 0.1611328125, "loss_num": 0.025634765625, "loss_xval": 0.12890625, "num_input_tokens_seen": 183743368, "step": 2013 }, { "epoch": 8.391666666666667, "grad_norm": 3.112445349758835, "learning_rate": 5e-05, "loss": 0.0974, "num_input_tokens_seen": 183834992, "step": 2014 }, { "epoch": 8.391666666666667, "loss": 0.11074173450469971, "loss_ce": 0.0008479395764879882, "loss_iou": 0.2158203125, "loss_num": 0.02197265625, "loss_xval": 0.10986328125, "num_input_tokens_seen": 183834992, "step": 2014 }, { "epoch": 8.395833333333334, "grad_norm": 9.581477628961181, "learning_rate": 5e-05, "loss": 0.1136, "num_input_tokens_seen": 183927132, "step": 2015 }, { "epoch": 8.395833333333334, "loss": 0.13781800866127014, "loss_ce": 0.0006720098899677396, "loss_iou": 0.212890625, "loss_num": 0.0274658203125, "loss_xval": 0.13671875, "num_input_tokens_seen": 183927132, "step": 2015 }, { "epoch": 8.4, "grad_norm": 3.2485861634052227, "learning_rate": 5e-05, "loss": 0.0544, "num_input_tokens_seen": 184018300, "step": 2016 }, { "epoch": 8.4, "loss": 0.03941866755485535, "loss_ce": 0.0006460819276981056, "loss_iou": 0.25, "loss_num": 0.00775146484375, "loss_xval": 0.038818359375, "num_input_tokens_seen": 184018300, "step": 2016 }, { "epoch": 8.404166666666667, "grad_norm": 3.567778135692095, "learning_rate": 5e-05, "loss": 0.0943, "num_input_tokens_seen": 184109572, "step": 2017 }, { "epoch": 8.404166666666667, "loss": 0.11215195059776306, "loss_ce": 0.0006407210021279752, "loss_iou": 0.248046875, "loss_num": 0.0223388671875, "loss_xval": 0.111328125, "num_input_tokens_seen": 184109572, "step": 2017 }, { "epoch": 8.408333333333333, "grad_norm": 11.915336200996682, "learning_rate": 5e-05, "loss": 0.0816, "num_input_tokens_seen": 184201736, "step": 2018 }, { "epoch": 8.408333333333333, "loss": 0.05989161133766174, "loss_ce": 0.0011071297340095043, "loss_iou": 0.2265625, "loss_num": 0.01177978515625, "loss_xval": 0.058837890625, "num_input_tokens_seen": 184201736, "step": 2018 }, { "epoch": 8.4125, "grad_norm": 3.1239092174908096, "learning_rate": 5e-05, "loss": 0.0582, "num_input_tokens_seen": 184293072, "step": 2019 }, { "epoch": 8.4125, "loss": 0.03606855124235153, "loss_ce": 0.00043927942169830203, "loss_iou": 0.2578125, "loss_num": 0.00714111328125, "loss_xval": 0.03564453125, "num_input_tokens_seen": 184293072, "step": 2019 }, { "epoch": 8.416666666666666, "grad_norm": 4.363526779562095, "learning_rate": 5e-05, "loss": 0.1132, "num_input_tokens_seen": 184384216, "step": 2020 }, { "epoch": 8.416666666666666, "loss": 0.13690415024757385, "loss_ce": 3.2802829082356766e-05, "loss_iou": 0.283203125, "loss_num": 0.02734375, "loss_xval": 0.13671875, "num_input_tokens_seen": 184384216, "step": 2020 }, { "epoch": 8.420833333333333, "grad_norm": 3.6835107197547483, "learning_rate": 5e-05, "loss": 0.0848, "num_input_tokens_seen": 184475548, "step": 2021 }, { "epoch": 8.420833333333333, "loss": 0.070304274559021, "loss_ce": 3.755126817850396e-05, "loss_iou": 0.42578125, "loss_num": 0.0140380859375, "loss_xval": 0.0703125, "num_input_tokens_seen": 184475548, "step": 2021 }, { "epoch": 8.425, "grad_norm": 43.325644936886654, "learning_rate": 5e-05, "loss": 0.0883, "num_input_tokens_seen": 184566020, "step": 2022 }, { "epoch": 8.425, "loss": 0.1017274484038353, "loss_ce": 0.00030228166724555194, "loss_iou": 0.30859375, "loss_num": 0.020263671875, "loss_xval": 0.1015625, "num_input_tokens_seen": 184566020, "step": 2022 }, { "epoch": 8.429166666666667, "grad_norm": 5.629879042491284, "learning_rate": 5e-05, "loss": 0.068, "num_input_tokens_seen": 184657684, "step": 2023 }, { "epoch": 8.429166666666667, "loss": 0.07418912649154663, "loss_ce": 0.0017098871758207679, "loss_iou": 0.162109375, "loss_num": 0.01446533203125, "loss_xval": 0.072265625, "num_input_tokens_seen": 184657684, "step": 2023 }, { "epoch": 8.433333333333334, "grad_norm": 3.660697851414219, "learning_rate": 5e-05, "loss": 0.1359, "num_input_tokens_seen": 184748280, "step": 2024 }, { "epoch": 8.433333333333334, "loss": 0.1027696281671524, "loss_ce": 0.001321564195677638, "loss_iou": 0.09521484375, "loss_num": 0.020263671875, "loss_xval": 0.1015625, "num_input_tokens_seen": 184748280, "step": 2024 }, { "epoch": 8.4375, "grad_norm": 4.670549770025157, "learning_rate": 5e-05, "loss": 0.0862, "num_input_tokens_seen": 184839656, "step": 2025 }, { "epoch": 8.4375, "loss": 0.061051446944475174, "loss_ce": 0.00016887954552657902, "loss_iou": 0.337890625, "loss_num": 0.01214599609375, "loss_xval": 0.060791015625, "num_input_tokens_seen": 184839656, "step": 2025 }, { "epoch": 8.441666666666666, "grad_norm": 4.255448876340178, "learning_rate": 5e-05, "loss": 0.128, "num_input_tokens_seen": 184930812, "step": 2026 }, { "epoch": 8.441666666666666, "loss": 0.08803963661193848, "loss_ce": 0.013515707105398178, "loss_iou": 0.26953125, "loss_num": 0.014892578125, "loss_xval": 0.07470703125, "num_input_tokens_seen": 184930812, "step": 2026 }, { "epoch": 8.445833333333333, "grad_norm": 2.9847180326094547, "learning_rate": 5e-05, "loss": 0.095, "num_input_tokens_seen": 185021820, "step": 2027 }, { "epoch": 8.445833333333333, "loss": 0.0788634866476059, "loss_ce": 6.063582986826077e-06, "loss_iou": 0.349609375, "loss_num": 0.0157470703125, "loss_xval": 0.0791015625, "num_input_tokens_seen": 185021820, "step": 2027 }, { "epoch": 8.45, "grad_norm": 5.296533863173452, "learning_rate": 5e-05, "loss": 0.1021, "num_input_tokens_seen": 185113136, "step": 2028 }, { "epoch": 8.45, "loss": 0.08161696791648865, "loss_ce": 0.00010451077105244622, "loss_iou": 0.26953125, "loss_num": 0.016357421875, "loss_xval": 0.08154296875, "num_input_tokens_seen": 185113136, "step": 2028 }, { "epoch": 8.454166666666667, "grad_norm": 2.359461532383512, "learning_rate": 5e-05, "loss": 0.0787, "num_input_tokens_seen": 185204172, "step": 2029 }, { "epoch": 8.454166666666667, "loss": 0.11959385871887207, "loss_ce": 5.649681406794116e-05, "loss_iou": 0.32421875, "loss_num": 0.02392578125, "loss_xval": 0.11962890625, "num_input_tokens_seen": 185204172, "step": 2029 }, { "epoch": 8.458333333333334, "grad_norm": 3.3681433875629514, "learning_rate": 5e-05, "loss": 0.0436, "num_input_tokens_seen": 185295588, "step": 2030 }, { "epoch": 8.458333333333334, "loss": 0.051598481833934784, "loss_ce": 0.0004052447038702667, "loss_iou": 0.431640625, "loss_num": 0.01025390625, "loss_xval": 0.05126953125, "num_input_tokens_seen": 185295588, "step": 2030 }, { "epoch": 8.4625, "grad_norm": 1.8409437670897544, "learning_rate": 5e-05, "loss": 0.0518, "num_input_tokens_seen": 185386868, "step": 2031 }, { "epoch": 8.4625, "loss": 0.05945602431893349, "loss_ce": 0.000984343234449625, "loss_iou": 0.26953125, "loss_num": 0.01171875, "loss_xval": 0.05859375, "num_input_tokens_seen": 185386868, "step": 2031 }, { "epoch": 8.466666666666667, "grad_norm": 7.258809483257304, "learning_rate": 5e-05, "loss": 0.097, "num_input_tokens_seen": 185478912, "step": 2032 }, { "epoch": 8.466666666666667, "loss": 0.10054165124893188, "loss_ce": 0.002397121163085103, "loss_iou": 0.296875, "loss_num": 0.0196533203125, "loss_xval": 0.09814453125, "num_input_tokens_seen": 185478912, "step": 2032 }, { "epoch": 8.470833333333333, "grad_norm": 3.5315820826842215, "learning_rate": 5e-05, "loss": 0.0859, "num_input_tokens_seen": 185570160, "step": 2033 }, { "epoch": 8.470833333333333, "loss": 0.12144699692726135, "loss_ce": 0.0003608747501857579, "loss_iou": 0.259765625, "loss_num": 0.0242919921875, "loss_xval": 0.12109375, "num_input_tokens_seen": 185570160, "step": 2033 }, { "epoch": 8.475, "grad_norm": 2.7617066459888617, "learning_rate": 5e-05, "loss": 0.0928, "num_input_tokens_seen": 185661288, "step": 2034 }, { "epoch": 8.475, "loss": 0.10207094252109528, "loss_ce": 0.0007830956601537764, "loss_iou": 0.2001953125, "loss_num": 0.020263671875, "loss_xval": 0.10107421875, "num_input_tokens_seen": 185661288, "step": 2034 }, { "epoch": 8.479166666666666, "grad_norm": 4.366291895999364, "learning_rate": 5e-05, "loss": 0.0744, "num_input_tokens_seen": 185751704, "step": 2035 }, { "epoch": 8.479166666666666, "loss": 0.08858929574489594, "loss_ce": 0.0017973067006096244, "loss_iou": 0.291015625, "loss_num": 0.017333984375, "loss_xval": 0.0869140625, "num_input_tokens_seen": 185751704, "step": 2035 }, { "epoch": 8.483333333333333, "grad_norm": 4.527448075354709, "learning_rate": 5e-05, "loss": 0.1133, "num_input_tokens_seen": 185842700, "step": 2036 }, { "epoch": 8.483333333333333, "loss": 0.0761345848441124, "loss_ce": 0.0007256510434672236, "loss_iou": 0.34765625, "loss_num": 0.01507568359375, "loss_xval": 0.0751953125, "num_input_tokens_seen": 185842700, "step": 2036 }, { "epoch": 8.4875, "grad_norm": 2.658451616936535, "learning_rate": 5e-05, "loss": 0.0682, "num_input_tokens_seen": 185933988, "step": 2037 }, { "epoch": 8.4875, "loss": 0.056536074727773666, "loss_ce": 0.003008242230862379, "loss_iou": 0.310546875, "loss_num": 0.01068115234375, "loss_xval": 0.053466796875, "num_input_tokens_seen": 185933988, "step": 2037 }, { "epoch": 8.491666666666667, "grad_norm": 2.532176322012894, "learning_rate": 5e-05, "loss": 0.0606, "num_input_tokens_seen": 186025476, "step": 2038 }, { "epoch": 8.491666666666667, "loss": 0.07818962633609772, "loss_ce": 0.0006292054313234985, "loss_iou": 0.1435546875, "loss_num": 0.0155029296875, "loss_xval": 0.07763671875, "num_input_tokens_seen": 186025476, "step": 2038 }, { "epoch": 8.495833333333334, "grad_norm": 4.14495850982906, "learning_rate": 5e-05, "loss": 0.057, "num_input_tokens_seen": 186116408, "step": 2039 }, { "epoch": 8.495833333333334, "loss": 0.042085736989974976, "loss_ce": 0.00010880563786486164, "loss_iou": 0.2890625, "loss_num": 0.0084228515625, "loss_xval": 0.0419921875, "num_input_tokens_seen": 186116408, "step": 2039 }, { "epoch": 8.5, "grad_norm": 6.4528287559735285, "learning_rate": 5e-05, "loss": 0.0852, "num_input_tokens_seen": 186208424, "step": 2040 }, { "epoch": 8.5, "loss": 0.10338255763053894, "loss_ce": 0.0007214199285954237, "loss_iou": 0.296875, "loss_num": 0.0205078125, "loss_xval": 0.1025390625, "num_input_tokens_seen": 186208424, "step": 2040 }, { "epoch": 8.504166666666666, "grad_norm": 3.13212007479543, "learning_rate": 5e-05, "loss": 0.059, "num_input_tokens_seen": 186299120, "step": 2041 }, { "epoch": 8.504166666666666, "loss": 0.07471642643213272, "loss_ce": 0.00019250292098149657, "loss_iou": 0.3515625, "loss_num": 0.014892578125, "loss_xval": 0.07470703125, "num_input_tokens_seen": 186299120, "step": 2041 }, { "epoch": 8.508333333333333, "grad_norm": 3.038527609423815, "learning_rate": 5e-05, "loss": 0.1123, "num_input_tokens_seen": 186390332, "step": 2042 }, { "epoch": 8.508333333333333, "loss": 0.13762235641479492, "loss_ce": 0.00021695908799301833, "loss_iou": 0.376953125, "loss_num": 0.0274658203125, "loss_xval": 0.1376953125, "num_input_tokens_seen": 186390332, "step": 2042 }, { "epoch": 8.5125, "grad_norm": 3.118662041893769, "learning_rate": 5e-05, "loss": 0.0754, "num_input_tokens_seen": 186482152, "step": 2043 }, { "epoch": 8.5125, "loss": 0.07551813125610352, "loss_ce": 0.00024652251158840954, "loss_iou": 0.408203125, "loss_num": 0.01507568359375, "loss_xval": 0.0751953125, "num_input_tokens_seen": 186482152, "step": 2043 }, { "epoch": 8.516666666666667, "grad_norm": 2.1146865073908656, "learning_rate": 5e-05, "loss": 0.1233, "num_input_tokens_seen": 186573600, "step": 2044 }, { "epoch": 8.516666666666667, "loss": 0.08287165313959122, "loss_ce": 3.931445462512784e-05, "loss_iou": 0.341796875, "loss_num": 0.0166015625, "loss_xval": 0.0830078125, "num_input_tokens_seen": 186573600, "step": 2044 }, { "epoch": 8.520833333333334, "grad_norm": 2.9617114380688663, "learning_rate": 5e-05, "loss": 0.1143, "num_input_tokens_seen": 186665696, "step": 2045 }, { "epoch": 8.520833333333334, "loss": 0.1069689616560936, "loss_ce": 0.0004855015140492469, "loss_iou": 0.2890625, "loss_num": 0.021240234375, "loss_xval": 0.1064453125, "num_input_tokens_seen": 186665696, "step": 2045 }, { "epoch": 8.525, "grad_norm": 8.627872563995814, "learning_rate": 5e-05, "loss": 0.1128, "num_input_tokens_seen": 186757492, "step": 2046 }, { "epoch": 8.525, "loss": 0.12421756237745285, "loss_ce": 0.0015674126334488392, "loss_iou": 0.216796875, "loss_num": 0.0245361328125, "loss_xval": 0.12255859375, "num_input_tokens_seen": 186757492, "step": 2046 }, { "epoch": 8.529166666666667, "grad_norm": 1.4626980237684941, "learning_rate": 5e-05, "loss": 0.1105, "num_input_tokens_seen": 186849112, "step": 2047 }, { "epoch": 8.529166666666667, "loss": 0.15261510014533997, "loss_ce": 1.1944499419769272e-05, "loss_iou": 0.33203125, "loss_num": 0.030517578125, "loss_xval": 0.15234375, "num_input_tokens_seen": 186849112, "step": 2047 }, { "epoch": 8.533333333333333, "grad_norm": 1.1764810754134916, "learning_rate": 5e-05, "loss": 0.0724, "num_input_tokens_seen": 186940656, "step": 2048 }, { "epoch": 8.533333333333333, "loss": 0.07178713381290436, "loss_ce": 0.00011659866140689701, "loss_iou": 0.263671875, "loss_num": 0.01434326171875, "loss_xval": 0.07177734375, "num_input_tokens_seen": 186940656, "step": 2048 }, { "epoch": 8.5375, "grad_norm": 6.872566752363237, "learning_rate": 5e-05, "loss": 0.0461, "num_input_tokens_seen": 187032012, "step": 2049 }, { "epoch": 8.5375, "loss": 0.04218311607837677, "loss_ce": 2.3082926418283023e-05, "loss_iou": 0.2890625, "loss_num": 0.0084228515625, "loss_xval": 0.042236328125, "num_input_tokens_seen": 187032012, "step": 2049 }, { "epoch": 8.541666666666666, "grad_norm": 6.567042892572599, "learning_rate": 5e-05, "loss": 0.1076, "num_input_tokens_seen": 187123528, "step": 2050 }, { "epoch": 8.541666666666666, "loss": 0.09021590650081635, "loss_ce": 3.6459336115513e-05, "loss_iou": 0.33203125, "loss_num": 0.01806640625, "loss_xval": 0.09033203125, "num_input_tokens_seen": 187123528, "step": 2050 }, { "epoch": 8.545833333333333, "grad_norm": 3.9997670150130307, "learning_rate": 5e-05, "loss": 0.1097, "num_input_tokens_seen": 187214544, "step": 2051 }, { "epoch": 8.545833333333333, "loss": 0.05643386393785477, "loss_ce": 0.0014259336749091744, "loss_iou": 0.1943359375, "loss_num": 0.010986328125, "loss_xval": 0.054931640625, "num_input_tokens_seen": 187214544, "step": 2051 }, { "epoch": 8.55, "grad_norm": 2.946976187208218, "learning_rate": 5e-05, "loss": 0.0885, "num_input_tokens_seen": 187305548, "step": 2052 }, { "epoch": 8.55, "loss": 0.06314453482627869, "loss_ce": 0.0005377238849177957, "loss_iou": 0.21875, "loss_num": 0.01251220703125, "loss_xval": 0.0625, "num_input_tokens_seen": 187305548, "step": 2052 }, { "epoch": 8.554166666666667, "grad_norm": 6.762155479494456, "learning_rate": 5e-05, "loss": 0.0901, "num_input_tokens_seen": 187397320, "step": 2053 }, { "epoch": 8.554166666666667, "loss": 0.0761374980211258, "loss_ce": 1.1393080967536662e-05, "loss_iou": 0.39453125, "loss_num": 0.0152587890625, "loss_xval": 0.076171875, "num_input_tokens_seen": 187397320, "step": 2053 }, { "epoch": 8.558333333333334, "grad_norm": 1.898023791525541, "learning_rate": 5e-05, "loss": 0.0852, "num_input_tokens_seen": 187489576, "step": 2054 }, { "epoch": 8.558333333333334, "loss": 0.06530947238206863, "loss_ce": 0.0002612557145766914, "loss_iou": 0.208984375, "loss_num": 0.01300048828125, "loss_xval": 0.06494140625, "num_input_tokens_seen": 187489576, "step": 2054 }, { "epoch": 8.5625, "grad_norm": 2.0714800012369534, "learning_rate": 5e-05, "loss": 0.0887, "num_input_tokens_seen": 187580848, "step": 2055 }, { "epoch": 8.5625, "loss": 0.08084389567375183, "loss_ce": 2.8269764698052313e-06, "loss_iou": 0.1640625, "loss_num": 0.01611328125, "loss_xval": 0.0810546875, "num_input_tokens_seen": 187580848, "step": 2055 }, { "epoch": 8.566666666666666, "grad_norm": 4.945427219437998, "learning_rate": 5e-05, "loss": 0.1061, "num_input_tokens_seen": 187672592, "step": 2056 }, { "epoch": 8.566666666666666, "loss": 0.06978463381528854, "loss_ce": 0.00023506842262577266, "loss_iou": 0.26171875, "loss_num": 0.013916015625, "loss_xval": 0.0693359375, "num_input_tokens_seen": 187672592, "step": 2056 }, { "epoch": 8.570833333333333, "grad_norm": 20.23750197807952, "learning_rate": 5e-05, "loss": 0.0589, "num_input_tokens_seen": 187764052, "step": 2057 }, { "epoch": 8.570833333333333, "loss": 0.0696493536233902, "loss_ce": 5.401993257692084e-05, "loss_iou": 0.31640625, "loss_num": 0.013916015625, "loss_xval": 0.06982421875, "num_input_tokens_seen": 187764052, "step": 2057 }, { "epoch": 8.575, "grad_norm": 1.6020255845359246, "learning_rate": 5e-05, "loss": 0.0807, "num_input_tokens_seen": 187856304, "step": 2058 }, { "epoch": 8.575, "loss": 0.10885445773601532, "loss_ce": 0.0014325795928016305, "loss_iou": 0.255859375, "loss_num": 0.021484375, "loss_xval": 0.107421875, "num_input_tokens_seen": 187856304, "step": 2058 }, { "epoch": 8.579166666666667, "grad_norm": 4.16555631246304, "learning_rate": 5e-05, "loss": 0.0766, "num_input_tokens_seen": 187948216, "step": 2059 }, { "epoch": 8.579166666666667, "loss": 0.10074003040790558, "loss_ce": 0.00023802298528607935, "loss_iou": 0.3203125, "loss_num": 0.0201416015625, "loss_xval": 0.1005859375, "num_input_tokens_seen": 187948216, "step": 2059 }, { "epoch": 8.583333333333334, "grad_norm": 1.9885928154411758, "learning_rate": 5e-05, "loss": 0.0535, "num_input_tokens_seen": 188040056, "step": 2060 }, { "epoch": 8.583333333333334, "loss": 0.046524547040462494, "loss_ce": 0.0013280146522447467, "loss_iou": 0.3046875, "loss_num": 0.009033203125, "loss_xval": 0.045166015625, "num_input_tokens_seen": 188040056, "step": 2060 }, { "epoch": 8.5875, "grad_norm": 4.150974742592255, "learning_rate": 5e-05, "loss": 0.0902, "num_input_tokens_seen": 188132004, "step": 2061 }, { "epoch": 8.5875, "loss": 0.10680100321769714, "loss_ce": 0.0002336244797334075, "loss_iou": 0.443359375, "loss_num": 0.0213623046875, "loss_xval": 0.1064453125, "num_input_tokens_seen": 188132004, "step": 2061 }, { "epoch": 8.591666666666667, "grad_norm": 2.198395054417203, "learning_rate": 5e-05, "loss": 0.1037, "num_input_tokens_seen": 188222984, "step": 2062 }, { "epoch": 8.591666666666667, "loss": 0.14485947787761688, "loss_ce": 0.0012742701219394803, "loss_iou": 0.251953125, "loss_num": 0.0286865234375, "loss_xval": 0.1435546875, "num_input_tokens_seen": 188222984, "step": 2062 }, { "epoch": 8.595833333333333, "grad_norm": 2.525503803552608, "learning_rate": 5e-05, "loss": 0.1066, "num_input_tokens_seen": 188314616, "step": 2063 }, { "epoch": 8.595833333333333, "loss": 0.04075239598751068, "loss_ce": 0.0002250537509098649, "loss_iou": 0.4375, "loss_num": 0.00811767578125, "loss_xval": 0.04052734375, "num_input_tokens_seen": 188314616, "step": 2063 }, { "epoch": 8.6, "grad_norm": 3.629610035159468, "learning_rate": 5e-05, "loss": 0.0634, "num_input_tokens_seen": 188406216, "step": 2064 }, { "epoch": 8.6, "loss": 0.05258668214082718, "loss_ce": 0.0001269671629415825, "loss_iou": 0.26953125, "loss_num": 0.010498046875, "loss_xval": 0.052490234375, "num_input_tokens_seen": 188406216, "step": 2064 }, { "epoch": 8.604166666666666, "grad_norm": 9.671827213881189, "learning_rate": 5e-05, "loss": 0.0814, "num_input_tokens_seen": 188497988, "step": 2065 }, { "epoch": 8.604166666666666, "loss": 0.051124535501003265, "loss_ce": 0.0021590786054730415, "loss_iou": 0.3203125, "loss_num": 0.00982666015625, "loss_xval": 0.049072265625, "num_input_tokens_seen": 188497988, "step": 2065 }, { "epoch": 8.608333333333333, "grad_norm": 3.3838829799006787, "learning_rate": 5e-05, "loss": 0.0697, "num_input_tokens_seen": 188589916, "step": 2066 }, { "epoch": 8.608333333333333, "loss": 0.06941956281661987, "loss_ce": 2.2590240405406803e-05, "loss_iou": 0.306640625, "loss_num": 0.01385498046875, "loss_xval": 0.0693359375, "num_input_tokens_seen": 188589916, "step": 2066 }, { "epoch": 8.6125, "grad_norm": 7.240025883543789, "learning_rate": 5e-05, "loss": 0.1184, "num_input_tokens_seen": 188680664, "step": 2067 }, { "epoch": 8.6125, "loss": 0.13569886982440948, "loss_ce": 2.461551275700913e-06, "loss_iou": 0.291015625, "loss_num": 0.027099609375, "loss_xval": 0.1357421875, "num_input_tokens_seen": 188680664, "step": 2067 }, { "epoch": 8.616666666666667, "grad_norm": 10.289727923962106, "learning_rate": 5e-05, "loss": 0.0636, "num_input_tokens_seen": 188771524, "step": 2068 }, { "epoch": 8.616666666666667, "loss": 0.06156828999519348, "loss_ce": 0.0006246872362680733, "loss_iou": 0.330078125, "loss_num": 0.01220703125, "loss_xval": 0.06103515625, "num_input_tokens_seen": 188771524, "step": 2068 }, { "epoch": 8.620833333333334, "grad_norm": 3.0223526650592687, "learning_rate": 5e-05, "loss": 0.0381, "num_input_tokens_seen": 188862644, "step": 2069 }, { "epoch": 8.620833333333334, "loss": 0.03289524093270302, "loss_ce": 0.0002719507901929319, "loss_iou": 0.32421875, "loss_num": 0.00653076171875, "loss_xval": 0.03271484375, "num_input_tokens_seen": 188862644, "step": 2069 }, { "epoch": 8.625, "grad_norm": 7.6635049861009295, "learning_rate": 5e-05, "loss": 0.136, "num_input_tokens_seen": 188953916, "step": 2070 }, { "epoch": 8.625, "loss": 0.14031267166137695, "loss_ce": 0.001472959527745843, "loss_iou": 0.2109375, "loss_num": 0.0277099609375, "loss_xval": 0.138671875, "num_input_tokens_seen": 188953916, "step": 2070 }, { "epoch": 8.629166666666666, "grad_norm": 3.774380906886847, "learning_rate": 5e-05, "loss": 0.0951, "num_input_tokens_seen": 189044936, "step": 2071 }, { "epoch": 8.629166666666666, "loss": 0.04810202494263649, "loss_ce": 3.6839413951383904e-05, "loss_iou": 0.208984375, "loss_num": 0.0096435546875, "loss_xval": 0.048095703125, "num_input_tokens_seen": 189044936, "step": 2071 }, { "epoch": 8.633333333333333, "grad_norm": 3.1837139415114883, "learning_rate": 5e-05, "loss": 0.0947, "num_input_tokens_seen": 189134984, "step": 2072 }, { "epoch": 8.633333333333333, "loss": 0.07738348841667175, "loss_ce": 5.1942650316050276e-05, "loss_iou": 0.2578125, "loss_num": 0.0155029296875, "loss_xval": 0.0771484375, "num_input_tokens_seen": 189134984, "step": 2072 }, { "epoch": 8.6375, "grad_norm": 2.321453932470765, "learning_rate": 5e-05, "loss": 0.0726, "num_input_tokens_seen": 189226136, "step": 2073 }, { "epoch": 8.6375, "loss": 0.05806281045079231, "loss_ce": 3.3636093576205894e-05, "loss_iou": 0.25390625, "loss_num": 0.0115966796875, "loss_xval": 0.05810546875, "num_input_tokens_seen": 189226136, "step": 2073 }, { "epoch": 8.641666666666667, "grad_norm": 7.354532468160025, "learning_rate": 5e-05, "loss": 0.1341, "num_input_tokens_seen": 189317268, "step": 2074 }, { "epoch": 8.641666666666667, "loss": 0.1462700068950653, "loss_ce": 6.0283447965048254e-05, "loss_iou": 0.265625, "loss_num": 0.029296875, "loss_xval": 0.146484375, "num_input_tokens_seen": 189317268, "step": 2074 }, { "epoch": 8.645833333333334, "grad_norm": 3.238401783158548, "learning_rate": 5e-05, "loss": 0.0958, "num_input_tokens_seen": 189408096, "step": 2075 }, { "epoch": 8.645833333333334, "loss": 0.1143522709608078, "loss_ce": 2.9121104034857126e-06, "loss_iou": 0.11279296875, "loss_num": 0.0228271484375, "loss_xval": 0.1142578125, "num_input_tokens_seen": 189408096, "step": 2075 }, { "epoch": 8.65, "grad_norm": 4.565351741671237, "learning_rate": 5e-05, "loss": 0.1101, "num_input_tokens_seen": 189499192, "step": 2076 }, { "epoch": 8.65, "loss": 0.11114580929279327, "loss_ce": 0.0005043236305937171, "loss_iou": 0.29296875, "loss_num": 0.0220947265625, "loss_xval": 0.11083984375, "num_input_tokens_seen": 189499192, "step": 2076 }, { "epoch": 8.654166666666667, "grad_norm": 2.7719252653668978, "learning_rate": 5e-05, "loss": 0.0707, "num_input_tokens_seen": 189589952, "step": 2077 }, { "epoch": 8.654166666666667, "loss": 0.07235507667064667, "loss_ce": 0.00012377678649500012, "loss_iou": 0.310546875, "loss_num": 0.01446533203125, "loss_xval": 0.072265625, "num_input_tokens_seen": 189589952, "step": 2077 }, { "epoch": 8.658333333333333, "grad_norm": 8.810895366492268, "learning_rate": 5e-05, "loss": 0.1348, "num_input_tokens_seen": 189681012, "step": 2078 }, { "epoch": 8.658333333333333, "loss": 0.10324892401695251, "loss_ce": 0.0006106742075644433, "loss_iou": 0.28515625, "loss_num": 0.0205078125, "loss_xval": 0.1025390625, "num_input_tokens_seen": 189681012, "step": 2078 }, { "epoch": 8.6625, "grad_norm": 2.9084218501299213, "learning_rate": 5e-05, "loss": 0.1109, "num_input_tokens_seen": 189771988, "step": 2079 }, { "epoch": 8.6625, "loss": 0.0454835519194603, "loss_ce": 0.00011916970106540248, "loss_iou": 0.32421875, "loss_num": 0.00909423828125, "loss_xval": 0.04541015625, "num_input_tokens_seen": 189771988, "step": 2079 }, { "epoch": 8.666666666666666, "grad_norm": 3.59826792646645, "learning_rate": 5e-05, "loss": 0.0586, "num_input_tokens_seen": 189863352, "step": 2080 }, { "epoch": 8.666666666666666, "loss": 0.06523586809635162, "loss_ce": 4.540888767223805e-06, "loss_iou": 0.33984375, "loss_num": 0.0130615234375, "loss_xval": 0.0654296875, "num_input_tokens_seen": 189863352, "step": 2080 }, { "epoch": 8.670833333333333, "grad_norm": 17.34496416831108, "learning_rate": 5e-05, "loss": 0.0881, "num_input_tokens_seen": 189955520, "step": 2081 }, { "epoch": 8.670833333333333, "loss": 0.13551849126815796, "loss_ce": 0.003350684652104974, "loss_iou": 0.255859375, "loss_num": 0.0263671875, "loss_xval": 0.1318359375, "num_input_tokens_seen": 189955520, "step": 2081 }, { "epoch": 8.675, "grad_norm": 20.911905020217905, "learning_rate": 5e-05, "loss": 0.1087, "num_input_tokens_seen": 190047308, "step": 2082 }, { "epoch": 8.675, "loss": 0.09232109785079956, "loss_ce": 0.00012748880544677377, "loss_iou": 0.375, "loss_num": 0.0184326171875, "loss_xval": 0.09228515625, "num_input_tokens_seen": 190047308, "step": 2082 }, { "epoch": 8.679166666666667, "grad_norm": 2.6915926195134414, "learning_rate": 5e-05, "loss": 0.0635, "num_input_tokens_seen": 190137948, "step": 2083 }, { "epoch": 8.679166666666667, "loss": 0.06865386664867401, "loss_ce": 0.005497746169567108, "loss_iou": 0.271484375, "loss_num": 0.01263427734375, "loss_xval": 0.06298828125, "num_input_tokens_seen": 190137948, "step": 2083 }, { "epoch": 8.683333333333334, "grad_norm": 5.811296943866821, "learning_rate": 5e-05, "loss": 0.0792, "num_input_tokens_seen": 190229720, "step": 2084 }, { "epoch": 8.683333333333334, "loss": 0.07164183259010315, "loss_ce": 0.00100889487657696, "loss_iou": 0.203125, "loss_num": 0.01416015625, "loss_xval": 0.07080078125, "num_input_tokens_seen": 190229720, "step": 2084 }, { "epoch": 8.6875, "grad_norm": 2.114910384036427, "learning_rate": 5e-05, "loss": 0.0738, "num_input_tokens_seen": 190321172, "step": 2085 }, { "epoch": 8.6875, "loss": 0.05217263475060463, "loss_ce": 0.0006894819671288133, "loss_iou": 0.271484375, "loss_num": 0.01031494140625, "loss_xval": 0.051513671875, "num_input_tokens_seen": 190321172, "step": 2085 }, { "epoch": 8.691666666666666, "grad_norm": 5.1616318256216775, "learning_rate": 5e-05, "loss": 0.0683, "num_input_tokens_seen": 190412864, "step": 2086 }, { "epoch": 8.691666666666666, "loss": 0.0800904929637909, "loss_ce": 0.00022598655777983367, "loss_iou": 0.255859375, "loss_num": 0.0159912109375, "loss_xval": 0.080078125, "num_input_tokens_seen": 190412864, "step": 2086 }, { "epoch": 8.695833333333333, "grad_norm": 2.7341320610815245, "learning_rate": 5e-05, "loss": 0.0907, "num_input_tokens_seen": 190503832, "step": 2087 }, { "epoch": 8.695833333333333, "loss": 0.12463854253292084, "loss_ce": 4.754873771162238e-06, "loss_iou": 0.408203125, "loss_num": 0.02490234375, "loss_xval": 0.12451171875, "num_input_tokens_seen": 190503832, "step": 2087 }, { "epoch": 8.7, "grad_norm": 2.5917705973807563, "learning_rate": 5e-05, "loss": 0.105, "num_input_tokens_seen": 190594728, "step": 2088 }, { "epoch": 8.7, "loss": 0.048062510788440704, "loss_ce": 1.2583659554366022e-05, "loss_iou": 0.2421875, "loss_num": 0.00958251953125, "loss_xval": 0.048095703125, "num_input_tokens_seen": 190594728, "step": 2088 }, { "epoch": 8.704166666666667, "grad_norm": 13.43020791954383, "learning_rate": 5e-05, "loss": 0.0937, "num_input_tokens_seen": 190686224, "step": 2089 }, { "epoch": 8.704166666666667, "loss": 0.09442667663097382, "loss_ce": 0.00011209918739041314, "loss_iou": 0.3125, "loss_num": 0.018798828125, "loss_xval": 0.09423828125, "num_input_tokens_seen": 190686224, "step": 2089 }, { "epoch": 8.708333333333334, "grad_norm": 4.704922294384652, "learning_rate": 5e-05, "loss": 0.1076, "num_input_tokens_seen": 190777928, "step": 2090 }, { "epoch": 8.708333333333334, "loss": 0.09706898033618927, "loss_ce": 0.0027696597389876842, "loss_iou": 0.228515625, "loss_num": 0.018798828125, "loss_xval": 0.09423828125, "num_input_tokens_seen": 190777928, "step": 2090 }, { "epoch": 8.7125, "grad_norm": 2.9925223158655196, "learning_rate": 5e-05, "loss": 0.0729, "num_input_tokens_seen": 190868672, "step": 2091 }, { "epoch": 8.7125, "loss": 0.09800204634666443, "loss_ce": 4.062041625729762e-05, "loss_iou": 0.3359375, "loss_num": 0.0196533203125, "loss_xval": 0.09814453125, "num_input_tokens_seen": 190868672, "step": 2091 }, { "epoch": 8.716666666666667, "grad_norm": 5.7989558548599, "learning_rate": 5e-05, "loss": 0.1182, "num_input_tokens_seen": 190959652, "step": 2092 }, { "epoch": 8.716666666666667, "loss": 0.16375920176506042, "loss_ce": 1.8912126051873202e-06, "loss_iou": 0.189453125, "loss_num": 0.03271484375, "loss_xval": 0.1640625, "num_input_tokens_seen": 190959652, "step": 2092 }, { "epoch": 8.720833333333333, "grad_norm": 2.431675951228576, "learning_rate": 5e-05, "loss": 0.111, "num_input_tokens_seen": 191051024, "step": 2093 }, { "epoch": 8.720833333333333, "loss": 0.1278170347213745, "loss_ce": 0.005975592415779829, "loss_iou": 0.345703125, "loss_num": 0.0244140625, "loss_xval": 0.1220703125, "num_input_tokens_seen": 191051024, "step": 2093 }, { "epoch": 8.725, "grad_norm": 2.937522668485948, "learning_rate": 5e-05, "loss": 0.077, "num_input_tokens_seen": 191142540, "step": 2094 }, { "epoch": 8.725, "loss": 0.07788769155740738, "loss_ce": 0.0004645938170142472, "loss_iou": 0.3359375, "loss_num": 0.0155029296875, "loss_xval": 0.07763671875, "num_input_tokens_seen": 191142540, "step": 2094 }, { "epoch": 8.729166666666666, "grad_norm": 3.6110487331418923, "learning_rate": 5e-05, "loss": 0.0899, "num_input_tokens_seen": 191233892, "step": 2095 }, { "epoch": 8.729166666666666, "loss": 0.07968556880950928, "loss_ce": 0.0003703873953782022, "loss_iou": 0.2470703125, "loss_num": 0.015869140625, "loss_xval": 0.0791015625, "num_input_tokens_seen": 191233892, "step": 2095 }, { "epoch": 8.733333333333333, "grad_norm": 2.5680080860852246, "learning_rate": 5e-05, "loss": 0.0656, "num_input_tokens_seen": 191325460, "step": 2096 }, { "epoch": 8.733333333333333, "loss": 0.07615506649017334, "loss_ce": 0.00021970555826555938, "loss_iou": 0.2236328125, "loss_num": 0.01519775390625, "loss_xval": 0.076171875, "num_input_tokens_seen": 191325460, "step": 2096 }, { "epoch": 8.7375, "grad_norm": 1.4405956050430866, "learning_rate": 5e-05, "loss": 0.099, "num_input_tokens_seen": 191416692, "step": 2097 }, { "epoch": 8.7375, "loss": 0.11963039636611938, "loss_ce": 0.0008712376584298909, "loss_iou": 0.2431640625, "loss_num": 0.0238037109375, "loss_xval": 0.11865234375, "num_input_tokens_seen": 191416692, "step": 2097 }, { "epoch": 8.741666666666667, "grad_norm": 4.359680089658798, "learning_rate": 5e-05, "loss": 0.0665, "num_input_tokens_seen": 191508012, "step": 2098 }, { "epoch": 8.741666666666667, "loss": 0.07305742800235748, "loss_ce": 2.885695721488446e-05, "loss_iou": 0.34375, "loss_num": 0.01458740234375, "loss_xval": 0.0732421875, "num_input_tokens_seen": 191508012, "step": 2098 }, { "epoch": 8.745833333333334, "grad_norm": 4.433279601445979, "learning_rate": 5e-05, "loss": 0.1643, "num_input_tokens_seen": 191599136, "step": 2099 }, { "epoch": 8.745833333333334, "loss": 0.16946928203105927, "loss_ce": 0.00017300611943937838, "loss_iou": 0.41796875, "loss_num": 0.033935546875, "loss_xval": 0.1689453125, "num_input_tokens_seen": 191599136, "step": 2099 }, { "epoch": 8.75, "grad_norm": 3.7135848759531958, "learning_rate": 5e-05, "loss": 0.1112, "num_input_tokens_seen": 191690728, "step": 2100 }, { "epoch": 8.75, "loss": 0.12792375683784485, "loss_ce": 0.0010927030816674232, "loss_iou": 0.154296875, "loss_num": 0.025390625, "loss_xval": 0.126953125, "num_input_tokens_seen": 191690728, "step": 2100 }, { "epoch": 8.754166666666666, "grad_norm": 1.7264645288509217, "learning_rate": 5e-05, "loss": 0.1034, "num_input_tokens_seen": 191781468, "step": 2101 }, { "epoch": 8.754166666666666, "loss": 0.12447254359722137, "loss_ce": 2.185742232541088e-05, "loss_iou": 0.322265625, "loss_num": 0.02490234375, "loss_xval": 0.12451171875, "num_input_tokens_seen": 191781468, "step": 2101 }, { "epoch": 8.758333333333333, "grad_norm": 3.3933607028179202, "learning_rate": 5e-05, "loss": 0.0889, "num_input_tokens_seen": 191872584, "step": 2102 }, { "epoch": 8.758333333333333, "loss": 0.04163660481572151, "loss_ce": 0.0016738366102799773, "loss_iou": 0.296875, "loss_num": 0.00799560546875, "loss_xval": 0.0400390625, "num_input_tokens_seen": 191872584, "step": 2102 }, { "epoch": 8.7625, "grad_norm": 8.584544951393006, "learning_rate": 5e-05, "loss": 0.0658, "num_input_tokens_seen": 191964244, "step": 2103 }, { "epoch": 8.7625, "loss": 0.05103296786546707, "loss_ce": 2.2833030016045086e-05, "loss_iou": 0.2412109375, "loss_num": 0.01019287109375, "loss_xval": 0.051025390625, "num_input_tokens_seen": 191964244, "step": 2103 }, { "epoch": 8.766666666666667, "grad_norm": 3.389537826363904, "learning_rate": 5e-05, "loss": 0.1095, "num_input_tokens_seen": 192055988, "step": 2104 }, { "epoch": 8.766666666666667, "loss": 0.11578889936208725, "loss_ce": 0.0028128253761678934, "loss_iou": 0.26171875, "loss_num": 0.0225830078125, "loss_xval": 0.11279296875, "num_input_tokens_seen": 192055988, "step": 2104 }, { "epoch": 8.770833333333334, "grad_norm": 3.1339648510996656, "learning_rate": 5e-05, "loss": 0.0707, "num_input_tokens_seen": 192147132, "step": 2105 }, { "epoch": 8.770833333333334, "loss": 0.03980373591184616, "loss_ce": 0.0013515896862372756, "loss_iou": 0.31640625, "loss_num": 0.0076904296875, "loss_xval": 0.03857421875, "num_input_tokens_seen": 192147132, "step": 2105 }, { "epoch": 8.775, "grad_norm": 29.03098369988653, "learning_rate": 5e-05, "loss": 0.0764, "num_input_tokens_seen": 192236892, "step": 2106 }, { "epoch": 8.775, "loss": 0.11094363778829575, "loss_ce": 4.276115942047909e-05, "loss_iou": 0.216796875, "loss_num": 0.022216796875, "loss_xval": 0.11083984375, "num_input_tokens_seen": 192236892, "step": 2106 }, { "epoch": 8.779166666666667, "grad_norm": 6.477582900237416, "learning_rate": 5e-05, "loss": 0.0961, "num_input_tokens_seen": 192328540, "step": 2107 }, { "epoch": 8.779166666666667, "loss": 0.12595024704933167, "loss_ce": 0.004871758632361889, "loss_iou": 0.326171875, "loss_num": 0.024169921875, "loss_xval": 0.12109375, "num_input_tokens_seen": 192328540, "step": 2107 }, { "epoch": 8.783333333333333, "grad_norm": 4.283770827213335, "learning_rate": 5e-05, "loss": 0.1416, "num_input_tokens_seen": 192419156, "step": 2108 }, { "epoch": 8.783333333333333, "loss": 0.14962969720363617, "loss_ce": 0.004701715894043446, "loss_iou": 0.20703125, "loss_num": 0.0289306640625, "loss_xval": 0.14453125, "num_input_tokens_seen": 192419156, "step": 2108 }, { "epoch": 8.7875, "grad_norm": 2.6686898314759477, "learning_rate": 5e-05, "loss": 0.1166, "num_input_tokens_seen": 192510612, "step": 2109 }, { "epoch": 8.7875, "loss": 0.10354699194431305, "loss_ce": 0.0019234552746638656, "loss_iou": 0.205078125, "loss_num": 0.0203857421875, "loss_xval": 0.1015625, "num_input_tokens_seen": 192510612, "step": 2109 }, { "epoch": 8.791666666666666, "grad_norm": 17.66232531707253, "learning_rate": 5e-05, "loss": 0.093, "num_input_tokens_seen": 192602204, "step": 2110 }, { "epoch": 8.791666666666666, "loss": 0.06168989837169647, "loss_ce": 2.1500989532796666e-05, "loss_iou": 0.2236328125, "loss_num": 0.0123291015625, "loss_xval": 0.061767578125, "num_input_tokens_seen": 192602204, "step": 2110 }, { "epoch": 8.795833333333333, "grad_norm": 4.238738535772789, "learning_rate": 5e-05, "loss": 0.162, "num_input_tokens_seen": 192693876, "step": 2111 }, { "epoch": 8.795833333333333, "loss": 0.16318759322166443, "loss_ce": 7.11341854184866e-05, "loss_iou": 0.2314453125, "loss_num": 0.03271484375, "loss_xval": 0.1630859375, "num_input_tokens_seen": 192693876, "step": 2111 }, { "epoch": 8.8, "grad_norm": 4.050774282573471, "learning_rate": 5e-05, "loss": 0.117, "num_input_tokens_seen": 192785168, "step": 2112 }, { "epoch": 8.8, "loss": 0.06067885830998421, "loss_ce": 0.0006050096708349884, "loss_iou": 0.3359375, "loss_num": 0.01202392578125, "loss_xval": 0.06005859375, "num_input_tokens_seen": 192785168, "step": 2112 }, { "epoch": 8.804166666666667, "grad_norm": 4.165906172671899, "learning_rate": 5e-05, "loss": 0.0883, "num_input_tokens_seen": 192876180, "step": 2113 }, { "epoch": 8.804166666666667, "loss": 0.09994374215602875, "loss_ce": 0.00044117873767390847, "loss_iou": 0.38671875, "loss_num": 0.0198974609375, "loss_xval": 0.099609375, "num_input_tokens_seen": 192876180, "step": 2113 }, { "epoch": 8.808333333333334, "grad_norm": 1.6761793703721553, "learning_rate": 5e-05, "loss": 0.1109, "num_input_tokens_seen": 192967564, "step": 2114 }, { "epoch": 8.808333333333334, "loss": 0.037985123693943024, "loss_ce": 0.00040272765909321606, "loss_iou": 0.3203125, "loss_num": 0.00750732421875, "loss_xval": 0.03759765625, "num_input_tokens_seen": 192967564, "step": 2114 }, { "epoch": 8.8125, "grad_norm": 1.3339770433832159, "learning_rate": 5e-05, "loss": 0.0947, "num_input_tokens_seen": 193058600, "step": 2115 }, { "epoch": 8.8125, "loss": 0.13931246101856232, "loss_ce": 4.5497559767682105e-05, "loss_iou": 0.298828125, "loss_num": 0.02783203125, "loss_xval": 0.1396484375, "num_input_tokens_seen": 193058600, "step": 2115 }, { "epoch": 8.816666666666666, "grad_norm": 8.546449636295195, "learning_rate": 5e-05, "loss": 0.1091, "num_input_tokens_seen": 193149564, "step": 2116 }, { "epoch": 8.816666666666666, "loss": 0.10763582587242126, "loss_ce": 0.00012239051284268498, "loss_iou": 0.2265625, "loss_num": 0.021484375, "loss_xval": 0.107421875, "num_input_tokens_seen": 193149564, "step": 2116 }, { "epoch": 8.820833333333333, "grad_norm": 1.7750513370782777, "learning_rate": 5e-05, "loss": 0.074, "num_input_tokens_seen": 193240292, "step": 2117 }, { "epoch": 8.820833333333333, "loss": 0.06835909187793732, "loss_ce": 0.0012814588844776154, "loss_iou": 0.123046875, "loss_num": 0.013427734375, "loss_xval": 0.06689453125, "num_input_tokens_seen": 193240292, "step": 2117 }, { "epoch": 8.825, "grad_norm": 2.7249429235580584, "learning_rate": 5e-05, "loss": 0.0783, "num_input_tokens_seen": 193331388, "step": 2118 }, { "epoch": 8.825, "loss": 0.09143895655870438, "loss_ce": 2.3549444449599832e-05, "loss_iou": 0.2734375, "loss_num": 0.018310546875, "loss_xval": 0.09130859375, "num_input_tokens_seen": 193331388, "step": 2118 }, { "epoch": 8.829166666666667, "grad_norm": 1.9231219016705554, "learning_rate": 5e-05, "loss": 0.1003, "num_input_tokens_seen": 193422096, "step": 2119 }, { "epoch": 8.829166666666667, "loss": 0.07505150139331818, "loss_ce": 0.0005580966244451702, "loss_iou": 0.36328125, "loss_num": 0.014892578125, "loss_xval": 0.07470703125, "num_input_tokens_seen": 193422096, "step": 2119 }, { "epoch": 8.833333333333334, "grad_norm": 5.275844547716279, "learning_rate": 5e-05, "loss": 0.0962, "num_input_tokens_seen": 193513700, "step": 2120 }, { "epoch": 8.833333333333334, "loss": 0.06332780420780182, "loss_ce": 0.0007972927996888757, "loss_iou": 0.302734375, "loss_num": 0.01251220703125, "loss_xval": 0.0625, "num_input_tokens_seen": 193513700, "step": 2120 }, { "epoch": 8.8375, "grad_norm": 16.596431523665018, "learning_rate": 5e-05, "loss": 0.109, "num_input_tokens_seen": 193604280, "step": 2121 }, { "epoch": 8.8375, "loss": 0.13335970044136047, "loss_ce": 0.00015810200420673937, "loss_iou": 0.3359375, "loss_num": 0.026611328125, "loss_xval": 0.1328125, "num_input_tokens_seen": 193604280, "step": 2121 }, { "epoch": 8.841666666666667, "grad_norm": 2.582045655864519, "learning_rate": 5e-05, "loss": 0.0771, "num_input_tokens_seen": 193695776, "step": 2122 }, { "epoch": 8.841666666666667, "loss": 0.09266501665115356, "loss_ce": 0.00013571848103310913, "loss_iou": 0.32421875, "loss_num": 0.0185546875, "loss_xval": 0.0927734375, "num_input_tokens_seen": 193695776, "step": 2122 }, { "epoch": 8.845833333333333, "grad_norm": 2.0875391094102547, "learning_rate": 5e-05, "loss": 0.0894, "num_input_tokens_seen": 193787284, "step": 2123 }, { "epoch": 8.845833333333333, "loss": 0.10424083471298218, "loss_ce": 8.434802293777466e-05, "loss_iou": 0.16015625, "loss_num": 0.020751953125, "loss_xval": 0.10400390625, "num_input_tokens_seen": 193787284, "step": 2123 }, { "epoch": 8.85, "grad_norm": 2.8181687136177556, "learning_rate": 5e-05, "loss": 0.1328, "num_input_tokens_seen": 193877640, "step": 2124 }, { "epoch": 8.85, "loss": 0.10266932845115662, "loss_ce": 8.199165677069686e-06, "loss_iou": 0.22265625, "loss_num": 0.0205078125, "loss_xval": 0.1025390625, "num_input_tokens_seen": 193877640, "step": 2124 }, { "epoch": 8.854166666666666, "grad_norm": 3.8795960865784234, "learning_rate": 5e-05, "loss": 0.0518, "num_input_tokens_seen": 193968252, "step": 2125 }, { "epoch": 8.854166666666666, "loss": 0.046281665563583374, "loss_ce": 0.000520557165145874, "loss_iou": 0.208984375, "loss_num": 0.0091552734375, "loss_xval": 0.045654296875, "num_input_tokens_seen": 193968252, "step": 2125 }, { "epoch": 8.858333333333333, "grad_norm": 3.9635752523662466, "learning_rate": 5e-05, "loss": 0.0807, "num_input_tokens_seen": 194060032, "step": 2126 }, { "epoch": 8.858333333333333, "loss": 0.10270962119102478, "loss_ce": 0.0006588352262042463, "loss_iou": 0.384765625, "loss_num": 0.0203857421875, "loss_xval": 0.10205078125, "num_input_tokens_seen": 194060032, "step": 2126 }, { "epoch": 8.8625, "grad_norm": 4.323210515511157, "learning_rate": 5e-05, "loss": 0.0717, "num_input_tokens_seen": 194151192, "step": 2127 }, { "epoch": 8.8625, "loss": 0.09275850653648376, "loss_ce": 7.662278221687302e-05, "loss_iou": 0.2314453125, "loss_num": 0.0185546875, "loss_xval": 0.0927734375, "num_input_tokens_seen": 194151192, "step": 2127 }, { "epoch": 8.866666666666667, "grad_norm": 3.2666282041716883, "learning_rate": 5e-05, "loss": 0.0785, "num_input_tokens_seen": 194242152, "step": 2128 }, { "epoch": 8.866666666666667, "loss": 0.11563707888126373, "loss_ce": 3.6494038795353845e-05, "loss_iou": 0.29296875, "loss_num": 0.0230712890625, "loss_xval": 0.11572265625, "num_input_tokens_seen": 194242152, "step": 2128 }, { "epoch": 8.870833333333334, "grad_norm": 4.249712852950774, "learning_rate": 5e-05, "loss": 0.0782, "num_input_tokens_seen": 194332956, "step": 2129 }, { "epoch": 8.870833333333334, "loss": 0.06841768324375153, "loss_ce": 2.7786783903138712e-05, "loss_iou": 0.21484375, "loss_num": 0.013671875, "loss_xval": 0.068359375, "num_input_tokens_seen": 194332956, "step": 2129 }, { "epoch": 8.875, "grad_norm": 2.698944918523565, "learning_rate": 5e-05, "loss": 0.0929, "num_input_tokens_seen": 194424260, "step": 2130 }, { "epoch": 8.875, "loss": 0.06714097410440445, "loss_ce": 0.0006660611252300441, "loss_iou": 0.150390625, "loss_num": 0.0133056640625, "loss_xval": 0.06640625, "num_input_tokens_seen": 194424260, "step": 2130 }, { "epoch": 8.879166666666666, "grad_norm": 2.3540455144523484, "learning_rate": 5e-05, "loss": 0.0696, "num_input_tokens_seen": 194515688, "step": 2131 }, { "epoch": 8.879166666666666, "loss": 0.05055814981460571, "loss_ce": 0.00023466537822969258, "loss_iou": 0.2412109375, "loss_num": 0.01007080078125, "loss_xval": 0.05029296875, "num_input_tokens_seen": 194515688, "step": 2131 }, { "epoch": 8.883333333333333, "grad_norm": 4.2071846466237215, "learning_rate": 5e-05, "loss": 0.1608, "num_input_tokens_seen": 194607016, "step": 2132 }, { "epoch": 8.883333333333333, "loss": 0.09522165358066559, "loss_ce": 0.0016700156265869737, "loss_iou": 0.2255859375, "loss_num": 0.0186767578125, "loss_xval": 0.09375, "num_input_tokens_seen": 194607016, "step": 2132 }, { "epoch": 8.8875, "grad_norm": 2.757105928706443, "learning_rate": 5e-05, "loss": 0.0725, "num_input_tokens_seen": 194698100, "step": 2133 }, { "epoch": 8.8875, "loss": 0.0668044462800026, "loss_ce": 0.001542604062706232, "loss_iou": 0.224609375, "loss_num": 0.0130615234375, "loss_xval": 0.0654296875, "num_input_tokens_seen": 194698100, "step": 2133 }, { "epoch": 8.891666666666667, "grad_norm": 2.5204276745096235, "learning_rate": 5e-05, "loss": 0.0413, "num_input_tokens_seen": 194789372, "step": 2134 }, { "epoch": 8.891666666666667, "loss": 0.037501260638237, "loss_ce": 2.5677058147266507e-05, "loss_iou": 0.31640625, "loss_num": 0.00750732421875, "loss_xval": 0.03759765625, "num_input_tokens_seen": 194789372, "step": 2134 }, { "epoch": 8.895833333333334, "grad_norm": 3.8010790125482563, "learning_rate": 5e-05, "loss": 0.0831, "num_input_tokens_seen": 194880476, "step": 2135 }, { "epoch": 8.895833333333334, "loss": 0.09368692338466644, "loss_ce": 2.8475475119194016e-05, "loss_iou": 0.26953125, "loss_num": 0.0186767578125, "loss_xval": 0.09375, "num_input_tokens_seen": 194880476, "step": 2135 }, { "epoch": 8.9, "grad_norm": 7.765168397738465, "learning_rate": 5e-05, "loss": 0.0963, "num_input_tokens_seen": 194971812, "step": 2136 }, { "epoch": 8.9, "loss": 0.11584746837615967, "loss_ce": 0.0007199091487564147, "loss_iou": 0.294921875, "loss_num": 0.0230712890625, "loss_xval": 0.115234375, "num_input_tokens_seen": 194971812, "step": 2136 }, { "epoch": 8.904166666666667, "grad_norm": 2.8015176763074128, "learning_rate": 5e-05, "loss": 0.0904, "num_input_tokens_seen": 195063124, "step": 2137 }, { "epoch": 8.904166666666667, "loss": 0.12815745174884796, "loss_ce": 2.1764270059065893e-05, "loss_iou": 0.29296875, "loss_num": 0.025634765625, "loss_xval": 0.1279296875, "num_input_tokens_seen": 195063124, "step": 2137 }, { "epoch": 8.908333333333333, "grad_norm": 6.391269107177382, "learning_rate": 5e-05, "loss": 0.0697, "num_input_tokens_seen": 195153872, "step": 2138 }, { "epoch": 8.908333333333333, "loss": 0.0845765620470047, "loss_ce": 0.0005158971180208027, "loss_iou": 0.302734375, "loss_num": 0.016845703125, "loss_xval": 0.083984375, "num_input_tokens_seen": 195153872, "step": 2138 }, { "epoch": 8.9125, "grad_norm": 3.908464589092032, "learning_rate": 5e-05, "loss": 0.1081, "num_input_tokens_seen": 195245420, "step": 2139 }, { "epoch": 8.9125, "loss": 0.13477161526679993, "loss_ce": 0.0002501318231225014, "loss_iou": 0.3125, "loss_num": 0.0269775390625, "loss_xval": 0.134765625, "num_input_tokens_seen": 195245420, "step": 2139 }, { "epoch": 8.916666666666666, "grad_norm": 2.900077761683489, "learning_rate": 5e-05, "loss": 0.0822, "num_input_tokens_seen": 195336768, "step": 2140 }, { "epoch": 8.916666666666666, "loss": 0.07415474206209183, "loss_ce": 0.0010041063651442528, "loss_iou": 0.1455078125, "loss_num": 0.0146484375, "loss_xval": 0.0732421875, "num_input_tokens_seen": 195336768, "step": 2140 }, { "epoch": 8.920833333333333, "grad_norm": 4.150521780138764, "learning_rate": 5e-05, "loss": 0.1183, "num_input_tokens_seen": 195428360, "step": 2141 }, { "epoch": 8.920833333333333, "loss": 0.1628822684288025, "loss_ce": 0.0022148583084344864, "loss_iou": 0.1806640625, "loss_num": 0.0322265625, "loss_xval": 0.1611328125, "num_input_tokens_seen": 195428360, "step": 2141 }, { "epoch": 8.925, "grad_norm": 3.2825950683825718, "learning_rate": 5e-05, "loss": 0.053, "num_input_tokens_seen": 195519640, "step": 2142 }, { "epoch": 8.925, "loss": 0.057274091988801956, "loss_ce": 0.00041984367999248207, "loss_iou": 0.3203125, "loss_num": 0.0113525390625, "loss_xval": 0.056884765625, "num_input_tokens_seen": 195519640, "step": 2142 }, { "epoch": 8.929166666666667, "grad_norm": 3.5734075717896667, "learning_rate": 5e-05, "loss": 0.1432, "num_input_tokens_seen": 195610488, "step": 2143 }, { "epoch": 8.929166666666667, "loss": 0.17504486441612244, "loss_ce": 0.0001104677066905424, "loss_iou": 0.244140625, "loss_num": 0.034912109375, "loss_xval": 0.1748046875, "num_input_tokens_seen": 195610488, "step": 2143 }, { "epoch": 8.933333333333334, "grad_norm": 2.965201678323881, "learning_rate": 5e-05, "loss": 0.0833, "num_input_tokens_seen": 195701544, "step": 2144 }, { "epoch": 8.933333333333334, "loss": 0.10431183129549026, "loss_ce": 0.0018948402721434832, "loss_iou": 0.2138671875, "loss_num": 0.0205078125, "loss_xval": 0.1025390625, "num_input_tokens_seen": 195701544, "step": 2144 }, { "epoch": 8.9375, "grad_norm": 2.9355869635988614, "learning_rate": 5e-05, "loss": 0.0889, "num_input_tokens_seen": 195792244, "step": 2145 }, { "epoch": 8.9375, "loss": 0.0809515118598938, "loss_ce": 0.00011044730490539223, "loss_iou": 0.359375, "loss_num": 0.01611328125, "loss_xval": 0.0810546875, "num_input_tokens_seen": 195792244, "step": 2145 }, { "epoch": 8.941666666666666, "grad_norm": 13.929199195866536, "learning_rate": 5e-05, "loss": 0.1184, "num_input_tokens_seen": 195882756, "step": 2146 }, { "epoch": 8.941666666666666, "loss": 0.09679819643497467, "loss_ce": 0.00036264234222471714, "loss_iou": 0.31640625, "loss_num": 0.019287109375, "loss_xval": 0.0966796875, "num_input_tokens_seen": 195882756, "step": 2146 }, { "epoch": 8.945833333333333, "grad_norm": 2.251894875754947, "learning_rate": 5e-05, "loss": 0.0614, "num_input_tokens_seen": 195973952, "step": 2147 }, { "epoch": 8.945833333333333, "loss": 0.06221667304635048, "loss_ce": 6.762475823052227e-05, "loss_iou": 0.2890625, "loss_num": 0.012451171875, "loss_xval": 0.062255859375, "num_input_tokens_seen": 195973952, "step": 2147 }, { "epoch": 8.95, "grad_norm": 5.5139699775706905, "learning_rate": 5e-05, "loss": 0.0687, "num_input_tokens_seen": 196065400, "step": 2148 }, { "epoch": 8.95, "loss": 0.07538923621177673, "loss_ce": 0.00017866550479084253, "loss_iou": 0.37890625, "loss_num": 0.01507568359375, "loss_xval": 0.0751953125, "num_input_tokens_seen": 196065400, "step": 2148 }, { "epoch": 8.954166666666667, "grad_norm": 6.863944184162462, "learning_rate": 5e-05, "loss": 0.1038, "num_input_tokens_seen": 196156408, "step": 2149 }, { "epoch": 8.954166666666667, "loss": 0.08523640036582947, "loss_ce": 7.71006743889302e-05, "loss_iou": 0.359375, "loss_num": 0.01708984375, "loss_xval": 0.0849609375, "num_input_tokens_seen": 196156408, "step": 2149 }, { "epoch": 8.958333333333334, "grad_norm": 2.6859021474928895, "learning_rate": 5e-05, "loss": 0.1013, "num_input_tokens_seen": 196247756, "step": 2150 }, { "epoch": 8.958333333333334, "loss": 0.07768000662326813, "loss_ce": 0.0005315648741088808, "loss_iou": 0.27734375, "loss_num": 0.01544189453125, "loss_xval": 0.0771484375, "num_input_tokens_seen": 196247756, "step": 2150 }, { "epoch": 8.9625, "grad_norm": 4.759145776420309, "learning_rate": 5e-05, "loss": 0.0937, "num_input_tokens_seen": 196337072, "step": 2151 }, { "epoch": 8.9625, "loss": 0.0869455337524414, "loss_ce": 7.724837632849813e-05, "loss_iou": 0.275390625, "loss_num": 0.017333984375, "loss_xval": 0.0869140625, "num_input_tokens_seen": 196337072, "step": 2151 }, { "epoch": 8.966666666666667, "grad_norm": 2.7736352381734872, "learning_rate": 5e-05, "loss": 0.0613, "num_input_tokens_seen": 196428672, "step": 2152 }, { "epoch": 8.966666666666667, "loss": 0.06051965802907944, "loss_ce": 0.0017122854478657246, "loss_iou": 0.2021484375, "loss_num": 0.01177978515625, "loss_xval": 0.058837890625, "num_input_tokens_seen": 196428672, "step": 2152 }, { "epoch": 8.970833333333333, "grad_norm": 2.455637897021221, "learning_rate": 5e-05, "loss": 0.119, "num_input_tokens_seen": 196519872, "step": 2153 }, { "epoch": 8.970833333333333, "loss": 0.1420656442642212, "loss_ce": 2.1564021153608337e-05, "loss_iou": 0.37109375, "loss_num": 0.0283203125, "loss_xval": 0.1416015625, "num_input_tokens_seen": 196519872, "step": 2153 }, { "epoch": 8.975, "grad_norm": 2.8515738371990134, "learning_rate": 5e-05, "loss": 0.0657, "num_input_tokens_seen": 196611176, "step": 2154 }, { "epoch": 8.975, "loss": 0.06914485991001129, "loss_ce": 0.0012585069052875042, "loss_iou": 0.1884765625, "loss_num": 0.01361083984375, "loss_xval": 0.06787109375, "num_input_tokens_seen": 196611176, "step": 2154 }, { "epoch": 8.979166666666666, "grad_norm": 20.28384126551533, "learning_rate": 5e-05, "loss": 0.1043, "num_input_tokens_seen": 196702356, "step": 2155 }, { "epoch": 8.979166666666666, "loss": 0.1420624554157257, "loss_ce": 1.8393449863651767e-05, "loss_iou": 0.38671875, "loss_num": 0.0284423828125, "loss_xval": 0.1416015625, "num_input_tokens_seen": 196702356, "step": 2155 }, { "epoch": 8.983333333333333, "grad_norm": 1.48734902741768, "learning_rate": 5e-05, "loss": 0.0885, "num_input_tokens_seen": 196792268, "step": 2156 }, { "epoch": 8.983333333333333, "loss": 0.06732428073883057, "loss_ce": 2.5076310521399137e-06, "loss_iou": 0.1630859375, "loss_num": 0.013427734375, "loss_xval": 0.0673828125, "num_input_tokens_seen": 196792268, "step": 2156 }, { "epoch": 8.9875, "grad_norm": 5.590013731005448, "learning_rate": 5e-05, "loss": 0.1031, "num_input_tokens_seen": 196883568, "step": 2157 }, { "epoch": 8.9875, "loss": 0.10056240856647491, "loss_ce": 0.0037148739211261272, "loss_iou": 0.2734375, "loss_num": 0.0194091796875, "loss_xval": 0.0966796875, "num_input_tokens_seen": 196883568, "step": 2157 }, { "epoch": 8.991666666666667, "grad_norm": 5.6620871011814256, "learning_rate": 5e-05, "loss": 0.1169, "num_input_tokens_seen": 196974492, "step": 2158 }, { "epoch": 8.991666666666667, "loss": 0.1554386019706726, "loss_ce": 0.00027197724557481706, "loss_iou": 0.1748046875, "loss_num": 0.031005859375, "loss_xval": 0.1552734375, "num_input_tokens_seen": 196974492, "step": 2158 }, { "epoch": 8.995833333333334, "grad_norm": 7.269429389298034, "learning_rate": 5e-05, "loss": 0.0774, "num_input_tokens_seen": 197066204, "step": 2159 }, { "epoch": 8.995833333333334, "loss": 0.047470733523368835, "loss_ce": 0.0006796590751037002, "loss_iou": 0.2470703125, "loss_num": 0.00933837890625, "loss_xval": 0.046875, "num_input_tokens_seen": 197066204, "step": 2159 }, { "epoch": 9.0, "grad_norm": 2.956852458352327, "learning_rate": 5e-05, "loss": 0.1225, "num_input_tokens_seen": 197157384, "step": 2160 }, { "epoch": 9.0, "loss": 0.039815984666347504, "loss_ce": 0.0024777266662567854, "loss_iou": 0.298828125, "loss_num": 0.007476806640625, "loss_xval": 0.037353515625, "num_input_tokens_seen": 197157384, "step": 2160 }, { "epoch": 9.004166666666666, "grad_norm": 4.667322136523386, "learning_rate": 5e-05, "loss": 0.0947, "num_input_tokens_seen": 197249284, "step": 2161 }, { "epoch": 9.004166666666666, "loss": 0.07299579679965973, "loss_ce": 0.0029732147231698036, "loss_iou": 0.29296875, "loss_num": 0.0140380859375, "loss_xval": 0.06982421875, "num_input_tokens_seen": 197249284, "step": 2161 }, { "epoch": 9.008333333333333, "grad_norm": 2.7944442905740963, "learning_rate": 5e-05, "loss": 0.0634, "num_input_tokens_seen": 197340308, "step": 2162 }, { "epoch": 9.008333333333333, "loss": 0.04382137954235077, "loss_ce": 0.002492950763553381, "loss_iou": 0.1162109375, "loss_num": 0.00830078125, "loss_xval": 0.041259765625, "num_input_tokens_seen": 197340308, "step": 2162 }, { "epoch": 9.0125, "grad_norm": 3.390843847331497, "learning_rate": 5e-05, "loss": 0.0544, "num_input_tokens_seen": 197431232, "step": 2163 }, { "epoch": 9.0125, "loss": 0.04338308051228523, "loss_ce": 2.3423449420079123e-06, "loss_iou": 0.2001953125, "loss_num": 0.0086669921875, "loss_xval": 0.04345703125, "num_input_tokens_seen": 197431232, "step": 2163 }, { "epoch": 9.016666666666667, "grad_norm": 3.6245159569976284, "learning_rate": 5e-05, "loss": 0.055, "num_input_tokens_seen": 197522612, "step": 2164 }, { "epoch": 9.016666666666667, "loss": 0.0630885437130928, "loss_ce": 0.00045121321454644203, "loss_iou": 0.23046875, "loss_num": 0.01251220703125, "loss_xval": 0.0625, "num_input_tokens_seen": 197522612, "step": 2164 }, { "epoch": 9.020833333333334, "grad_norm": 2.1136200405152223, "learning_rate": 5e-05, "loss": 0.102, "num_input_tokens_seen": 197613884, "step": 2165 }, { "epoch": 9.020833333333334, "loss": 0.09729062765836716, "loss_ce": 0.00030576009885407984, "loss_iou": 0.310546875, "loss_num": 0.0194091796875, "loss_xval": 0.09716796875, "num_input_tokens_seen": 197613884, "step": 2165 }, { "epoch": 9.025, "grad_norm": 6.339762525414115, "learning_rate": 5e-05, "loss": 0.1056, "num_input_tokens_seen": 197705376, "step": 2166 }, { "epoch": 9.025, "loss": 0.07680866122245789, "loss_ce": 0.00013324561587069184, "loss_iou": 0.39453125, "loss_num": 0.01531982421875, "loss_xval": 0.07666015625, "num_input_tokens_seen": 197705376, "step": 2166 }, { "epoch": 9.029166666666667, "grad_norm": 11.265367650244409, "learning_rate": 5e-05, "loss": 0.1051, "num_input_tokens_seen": 197796280, "step": 2167 }, { "epoch": 9.029166666666667, "loss": 0.04952111840248108, "loss_ce": 0.00012841640273109078, "loss_iou": 0.197265625, "loss_num": 0.0098876953125, "loss_xval": 0.04931640625, "num_input_tokens_seen": 197796280, "step": 2167 }, { "epoch": 9.033333333333333, "grad_norm": 5.4515887725256045, "learning_rate": 5e-05, "loss": 0.0574, "num_input_tokens_seen": 197887884, "step": 2168 }, { "epoch": 9.033333333333333, "loss": 0.03975671902298927, "loss_ce": 7.574854407721432e-06, "loss_iou": 0.17578125, "loss_num": 0.0079345703125, "loss_xval": 0.039794921875, "num_input_tokens_seen": 197887884, "step": 2168 }, { "epoch": 9.0375, "grad_norm": 7.046759699386093, "learning_rate": 5e-05, "loss": 0.0788, "num_input_tokens_seen": 197979352, "step": 2169 }, { "epoch": 9.0375, "loss": 0.12461234629154205, "loss_ce": 0.0014586546458303928, "loss_iou": 0.11083984375, "loss_num": 0.024658203125, "loss_xval": 0.123046875, "num_input_tokens_seen": 197979352, "step": 2169 }, { "epoch": 9.041666666666666, "grad_norm": 2.3299737528421494, "learning_rate": 5e-05, "loss": 0.1036, "num_input_tokens_seen": 198071212, "step": 2170 }, { "epoch": 9.041666666666666, "loss": 0.07513460516929626, "loss_ce": 0.000549639982637018, "loss_iou": 0.359375, "loss_num": 0.014892578125, "loss_xval": 0.07470703125, "num_input_tokens_seen": 198071212, "step": 2170 }, { "epoch": 9.045833333333333, "grad_norm": 2.700252359047943, "learning_rate": 5e-05, "loss": 0.1007, "num_input_tokens_seen": 198162104, "step": 2171 }, { "epoch": 9.045833333333333, "loss": 0.07018221169710159, "loss_ce": 9.859356214292347e-05, "loss_iou": 0.3125, "loss_num": 0.0140380859375, "loss_xval": 0.0703125, "num_input_tokens_seen": 198162104, "step": 2171 }, { "epoch": 9.05, "grad_norm": 3.98467720280014, "learning_rate": 5e-05, "loss": 0.1, "num_input_tokens_seen": 198253448, "step": 2172 }, { "epoch": 9.05, "loss": 0.12369519472122192, "loss_ce": 0.0005033643683418632, "loss_iou": 0.2490234375, "loss_num": 0.024658203125, "loss_xval": 0.123046875, "num_input_tokens_seen": 198253448, "step": 2172 }, { "epoch": 9.054166666666667, "grad_norm": 4.505368366392643, "learning_rate": 5e-05, "loss": 0.0555, "num_input_tokens_seen": 198344568, "step": 2173 }, { "epoch": 9.054166666666667, "loss": 0.0655064806342125, "loss_ce": 1.575993883307092e-05, "loss_iou": 0.328125, "loss_num": 0.01312255859375, "loss_xval": 0.0654296875, "num_input_tokens_seen": 198344568, "step": 2173 }, { "epoch": 9.058333333333334, "grad_norm": 4.808297095727311, "learning_rate": 5e-05, "loss": 0.07, "num_input_tokens_seen": 198435648, "step": 2174 }, { "epoch": 9.058333333333334, "loss": 0.05769220367074013, "loss_ce": 1.3981023585074581e-05, "loss_iou": 0.39453125, "loss_num": 0.01153564453125, "loss_xval": 0.0576171875, "num_input_tokens_seen": 198435648, "step": 2174 }, { "epoch": 9.0625, "grad_norm": 1.808092681487952, "learning_rate": 5e-05, "loss": 0.054, "num_input_tokens_seen": 198527572, "step": 2175 }, { "epoch": 9.0625, "loss": 0.05472852289676666, "loss_ce": 7.154025661293417e-05, "loss_iou": 0.330078125, "loss_num": 0.01092529296875, "loss_xval": 0.0546875, "num_input_tokens_seen": 198527572, "step": 2175 }, { "epoch": 9.066666666666666, "grad_norm": 4.707635649040197, "learning_rate": 5e-05, "loss": 0.0728, "num_input_tokens_seen": 198618784, "step": 2176 }, { "epoch": 9.066666666666666, "loss": 0.045978933572769165, "loss_ce": 0.0002636008430272341, "loss_iou": 0.359375, "loss_num": 0.0091552734375, "loss_xval": 0.045654296875, "num_input_tokens_seen": 198618784, "step": 2176 }, { "epoch": 9.070833333333333, "grad_norm": 7.944324392423701, "learning_rate": 5e-05, "loss": 0.108, "num_input_tokens_seen": 198710052, "step": 2177 }, { "epoch": 9.070833333333333, "loss": 0.13008880615234375, "loss_ce": 0.0007705655298195779, "loss_iou": 0.3359375, "loss_num": 0.02587890625, "loss_xval": 0.12890625, "num_input_tokens_seen": 198710052, "step": 2177 }, { "epoch": 9.075, "grad_norm": 4.735021600903335, "learning_rate": 5e-05, "loss": 0.0709, "num_input_tokens_seen": 198800676, "step": 2178 }, { "epoch": 9.075, "loss": 0.06908006966114044, "loss_ce": 6.456708069890738e-05, "loss_iou": 0.189453125, "loss_num": 0.0137939453125, "loss_xval": 0.06884765625, "num_input_tokens_seen": 198800676, "step": 2178 }, { "epoch": 9.079166666666667, "grad_norm": 3.1598471577438465, "learning_rate": 5e-05, "loss": 0.0826, "num_input_tokens_seen": 198892164, "step": 2179 }, { "epoch": 9.079166666666667, "loss": 0.06824992597103119, "loss_ce": 2.7882007998414338e-05, "loss_iou": 0.263671875, "loss_num": 0.013671875, "loss_xval": 0.068359375, "num_input_tokens_seen": 198892164, "step": 2179 }, { "epoch": 9.083333333333334, "grad_norm": 7.784147543964003, "learning_rate": 5e-05, "loss": 0.0941, "num_input_tokens_seen": 198983280, "step": 2180 }, { "epoch": 9.083333333333334, "loss": 0.08407273143529892, "loss_ce": 0.00025620521046221256, "loss_iou": 0.228515625, "loss_num": 0.0167236328125, "loss_xval": 0.083984375, "num_input_tokens_seen": 198983280, "step": 2180 }, { "epoch": 9.0875, "grad_norm": 4.691636733756171, "learning_rate": 5e-05, "loss": 0.0871, "num_input_tokens_seen": 199074576, "step": 2181 }, { "epoch": 9.0875, "loss": 0.09998394548892975, "loss_ce": 8.361989785043988e-06, "loss_iou": 0.314453125, "loss_num": 0.02001953125, "loss_xval": 0.10009765625, "num_input_tokens_seen": 199074576, "step": 2181 }, { "epoch": 9.091666666666667, "grad_norm": 21.62193954692702, "learning_rate": 5e-05, "loss": 0.0605, "num_input_tokens_seen": 199165880, "step": 2182 }, { "epoch": 9.091666666666667, "loss": 0.03593273088335991, "loss_ce": 0.00021190733241382986, "loss_iou": 0.240234375, "loss_num": 0.00714111328125, "loss_xval": 0.03564453125, "num_input_tokens_seen": 199165880, "step": 2182 }, { "epoch": 9.095833333333333, "grad_norm": 4.401089957899346, "learning_rate": 5e-05, "loss": 0.069, "num_input_tokens_seen": 199257320, "step": 2183 }, { "epoch": 9.095833333333333, "loss": 0.06900110840797424, "loss_ce": 1.6123740351758897e-05, "loss_iou": 0.271484375, "loss_num": 0.0137939453125, "loss_xval": 0.06884765625, "num_input_tokens_seen": 199257320, "step": 2183 }, { "epoch": 9.1, "grad_norm": 6.244533248984985, "learning_rate": 5e-05, "loss": 0.0673, "num_input_tokens_seen": 199348444, "step": 2184 }, { "epoch": 9.1, "loss": 0.05675504356622696, "loss_ce": 0.0001067883349605836, "loss_iou": 0.1689453125, "loss_num": 0.01129150390625, "loss_xval": 0.056640625, "num_input_tokens_seen": 199348444, "step": 2184 }, { "epoch": 9.104166666666666, "grad_norm": 2.6506570582332705, "learning_rate": 5e-05, "loss": 0.0666, "num_input_tokens_seen": 199440092, "step": 2185 }, { "epoch": 9.104166666666666, "loss": 0.05698202922940254, "loss_ce": 2.096708158205729e-05, "loss_iou": 0.30859375, "loss_num": 0.01141357421875, "loss_xval": 0.056884765625, "num_input_tokens_seen": 199440092, "step": 2185 }, { "epoch": 9.108333333333333, "grad_norm": 1.819478487887473, "learning_rate": 5e-05, "loss": 0.0518, "num_input_tokens_seen": 199531432, "step": 2186 }, { "epoch": 9.108333333333333, "loss": 0.054636985063552856, "loss_ce": 0.0002394043403910473, "loss_iou": 0.216796875, "loss_num": 0.0108642578125, "loss_xval": 0.054443359375, "num_input_tokens_seen": 199531432, "step": 2186 }, { "epoch": 9.1125, "grad_norm": 3.826257550497812, "learning_rate": 5e-05, "loss": 0.1092, "num_input_tokens_seen": 199623184, "step": 2187 }, { "epoch": 9.1125, "loss": 0.0903702974319458, "loss_ce": 0.0007859497563913465, "loss_iou": 0.2216796875, "loss_num": 0.0179443359375, "loss_xval": 0.08935546875, "num_input_tokens_seen": 199623184, "step": 2187 }, { "epoch": 9.116666666666667, "grad_norm": 3.7921324553123066, "learning_rate": 5e-05, "loss": 0.0571, "num_input_tokens_seen": 199715248, "step": 2188 }, { "epoch": 9.116666666666667, "loss": 0.06321577727794647, "loss_ce": 7.490571442758664e-05, "loss_iou": 0.4453125, "loss_num": 0.01263427734375, "loss_xval": 0.06298828125, "num_input_tokens_seen": 199715248, "step": 2188 }, { "epoch": 9.120833333333334, "grad_norm": 6.619413944901631, "learning_rate": 5e-05, "loss": 0.1029, "num_input_tokens_seen": 199806600, "step": 2189 }, { "epoch": 9.120833333333334, "loss": 0.1414223164319992, "loss_ce": 0.0012398207327350974, "loss_iou": 0.2490234375, "loss_num": 0.028076171875, "loss_xval": 0.140625, "num_input_tokens_seen": 199806600, "step": 2189 }, { "epoch": 9.125, "grad_norm": 0.6229946226192945, "learning_rate": 5e-05, "loss": 0.0467, "num_input_tokens_seen": 199898248, "step": 2190 }, { "epoch": 9.125, "loss": 0.051244426518678665, "loss_ce": 0.0004860666231252253, "loss_iou": 0.2119140625, "loss_num": 0.0101318359375, "loss_xval": 0.05078125, "num_input_tokens_seen": 199898248, "step": 2190 }, { "epoch": 9.129166666666666, "grad_norm": 2.7563084923358896, "learning_rate": 5e-05, "loss": 0.0615, "num_input_tokens_seen": 199989508, "step": 2191 }, { "epoch": 9.129166666666666, "loss": 0.09039859473705292, "loss_ce": 0.00017337693134322762, "loss_iou": 0.259765625, "loss_num": 0.01806640625, "loss_xval": 0.09033203125, "num_input_tokens_seen": 199989508, "step": 2191 }, { "epoch": 9.133333333333333, "grad_norm": 3.372297274259936, "learning_rate": 5e-05, "loss": 0.0919, "num_input_tokens_seen": 200080684, "step": 2192 }, { "epoch": 9.133333333333333, "loss": 0.08768831193447113, "loss_ce": 1.1310762602079194e-05, "loss_iou": 0.421875, "loss_num": 0.017578125, "loss_xval": 0.087890625, "num_input_tokens_seen": 200080684, "step": 2192 }, { "epoch": 9.1375, "grad_norm": 2.30061741959669, "learning_rate": 5e-05, "loss": 0.1092, "num_input_tokens_seen": 200170408, "step": 2193 }, { "epoch": 9.1375, "loss": 0.10583975911140442, "loss_ce": 0.0013780894223600626, "loss_iou": 0.1962890625, "loss_num": 0.0208740234375, "loss_xval": 0.1044921875, "num_input_tokens_seen": 200170408, "step": 2193 }, { "epoch": 9.141666666666667, "grad_norm": 3.7881185986113937, "learning_rate": 5e-05, "loss": 0.049, "num_input_tokens_seen": 200262016, "step": 2194 }, { "epoch": 9.141666666666667, "loss": 0.046697668731212616, "loss_ce": 2.1029973140684888e-05, "loss_iou": 0.31640625, "loss_num": 0.00933837890625, "loss_xval": 0.046630859375, "num_input_tokens_seen": 200262016, "step": 2194 }, { "epoch": 9.145833333333334, "grad_norm": 10.356708455363178, "learning_rate": 5e-05, "loss": 0.0768, "num_input_tokens_seen": 200352756, "step": 2195 }, { "epoch": 9.145833333333334, "loss": 0.08656169474124908, "loss_ce": 1.384494225931121e-05, "loss_iou": 0.302734375, "loss_num": 0.017333984375, "loss_xval": 0.08642578125, "num_input_tokens_seen": 200352756, "step": 2195 }, { "epoch": 9.15, "grad_norm": 1.4011253078496293, "learning_rate": 5e-05, "loss": 0.0872, "num_input_tokens_seen": 200444596, "step": 2196 }, { "epoch": 9.15, "loss": 0.10002411901950836, "loss_ce": 0.0002926679444499314, "loss_iou": 0.33984375, "loss_num": 0.0198974609375, "loss_xval": 0.099609375, "num_input_tokens_seen": 200444596, "step": 2196 }, { "epoch": 9.154166666666667, "grad_norm": 4.355472283675736, "learning_rate": 5e-05, "loss": 0.138, "num_input_tokens_seen": 200534124, "step": 2197 }, { "epoch": 9.154166666666667, "loss": 0.17626036703586578, "loss_ce": 3.661260416265577e-05, "loss_iou": 0.1611328125, "loss_num": 0.03515625, "loss_xval": 0.17578125, "num_input_tokens_seen": 200534124, "step": 2197 }, { "epoch": 9.158333333333333, "grad_norm": 3.2374131152319343, "learning_rate": 5e-05, "loss": 0.0938, "num_input_tokens_seen": 200625164, "step": 2198 }, { "epoch": 9.158333333333333, "loss": 0.05326056480407715, "loss_ce": 0.001289131585508585, "loss_iou": 0.146484375, "loss_num": 0.0103759765625, "loss_xval": 0.052001953125, "num_input_tokens_seen": 200625164, "step": 2198 }, { "epoch": 9.1625, "grad_norm": 5.774485586525706, "learning_rate": 5e-05, "loss": 0.0629, "num_input_tokens_seen": 200715712, "step": 2199 }, { "epoch": 9.1625, "loss": 0.061350684612989426, "loss_ce": 0.0002239774912595749, "loss_iou": 0.24609375, "loss_num": 0.01220703125, "loss_xval": 0.06103515625, "num_input_tokens_seen": 200715712, "step": 2199 }, { "epoch": 9.166666666666666, "grad_norm": 6.673424562477215, "learning_rate": 5e-05, "loss": 0.0838, "num_input_tokens_seen": 200806972, "step": 2200 }, { "epoch": 9.166666666666666, "loss": 0.07667578756809235, "loss_ce": 0.0026554071810096502, "loss_iou": 0.1806640625, "loss_num": 0.01483154296875, "loss_xval": 0.07421875, "num_input_tokens_seen": 200806972, "step": 2200 }, { "epoch": 9.170833333333333, "grad_norm": 4.918866754467832, "learning_rate": 5e-05, "loss": 0.0779, "num_input_tokens_seen": 200898196, "step": 2201 }, { "epoch": 9.170833333333333, "loss": 0.08252261579036713, "loss_ce": 0.010867348872125149, "loss_iou": 0.232421875, "loss_num": 0.0142822265625, "loss_xval": 0.07177734375, "num_input_tokens_seen": 200898196, "step": 2201 }, { "epoch": 9.175, "grad_norm": 3.3958933070224075, "learning_rate": 5e-05, "loss": 0.07, "num_input_tokens_seen": 200989876, "step": 2202 }, { "epoch": 9.175, "loss": 0.06549730896949768, "loss_ce": 0.00015917142445687205, "loss_iou": 0.1435546875, "loss_num": 0.0130615234375, "loss_xval": 0.0654296875, "num_input_tokens_seen": 200989876, "step": 2202 }, { "epoch": 9.179166666666667, "grad_norm": 2.2801243360680954, "learning_rate": 5e-05, "loss": 0.0666, "num_input_tokens_seen": 201080428, "step": 2203 }, { "epoch": 9.179166666666667, "loss": 0.08679927885532379, "loss_ce": 7.290714165719692e-06, "loss_iou": 0.46484375, "loss_num": 0.017333984375, "loss_xval": 0.0869140625, "num_input_tokens_seen": 201080428, "step": 2203 }, { "epoch": 9.183333333333334, "grad_norm": 2.455656751871585, "learning_rate": 5e-05, "loss": 0.0908, "num_input_tokens_seen": 201171728, "step": 2204 }, { "epoch": 9.183333333333334, "loss": 0.1050679013133049, "loss_ce": 0.0014454597840085626, "loss_iou": 0.267578125, "loss_num": 0.020751953125, "loss_xval": 0.103515625, "num_input_tokens_seen": 201171728, "step": 2204 }, { "epoch": 9.1875, "grad_norm": 6.308043081686157, "learning_rate": 5e-05, "loss": 0.1348, "num_input_tokens_seen": 201262216, "step": 2205 }, { "epoch": 9.1875, "loss": 0.1272621750831604, "loss_ce": 0.0002937901590485126, "loss_iou": 0.263671875, "loss_num": 0.025390625, "loss_xval": 0.126953125, "num_input_tokens_seen": 201262216, "step": 2205 }, { "epoch": 9.191666666666666, "grad_norm": 24.558690680981883, "learning_rate": 5e-05, "loss": 0.079, "num_input_tokens_seen": 201353840, "step": 2206 }, { "epoch": 9.191666666666666, "loss": 0.067722387611866, "loss_ce": 0.004337374120950699, "loss_iou": 0.2373046875, "loss_num": 0.0126953125, "loss_xval": 0.0634765625, "num_input_tokens_seen": 201353840, "step": 2206 }, { "epoch": 9.195833333333333, "grad_norm": 2.437776954669518, "learning_rate": 5e-05, "loss": 0.0748, "num_input_tokens_seen": 201444832, "step": 2207 }, { "epoch": 9.195833333333333, "loss": 0.11072726547718048, "loss_ce": 2.475509791111108e-05, "loss_iou": 0.140625, "loss_num": 0.0220947265625, "loss_xval": 0.11083984375, "num_input_tokens_seen": 201444832, "step": 2207 }, { "epoch": 9.2, "grad_norm": 1.3908832751169862, "learning_rate": 5e-05, "loss": 0.0446, "num_input_tokens_seen": 201534856, "step": 2208 }, { "epoch": 9.2, "loss": 0.03558644652366638, "loss_ce": 0.000109762855572626, "loss_iou": 0.09912109375, "loss_num": 0.007080078125, "loss_xval": 0.035400390625, "num_input_tokens_seen": 201534856, "step": 2208 }, { "epoch": 9.204166666666667, "grad_norm": 3.6286645530410246, "learning_rate": 5e-05, "loss": 0.0759, "num_input_tokens_seen": 201626312, "step": 2209 }, { "epoch": 9.204166666666667, "loss": 0.07812509685754776, "loss_ce": 7.639089017175138e-05, "loss_iou": 0.1787109375, "loss_num": 0.015625, "loss_xval": 0.078125, "num_input_tokens_seen": 201626312, "step": 2209 }, { "epoch": 9.208333333333334, "grad_norm": 2.7511377380974533, "learning_rate": 5e-05, "loss": 0.116, "num_input_tokens_seen": 201716604, "step": 2210 }, { "epoch": 9.208333333333334, "loss": 0.17026406526565552, "loss_ce": 0.000128566927742213, "loss_iou": 0.37890625, "loss_num": 0.0341796875, "loss_xval": 0.169921875, "num_input_tokens_seen": 201716604, "step": 2210 }, { "epoch": 9.2125, "grad_norm": 9.112774236165663, "learning_rate": 5e-05, "loss": 0.0682, "num_input_tokens_seen": 201808304, "step": 2211 }, { "epoch": 9.2125, "loss": 0.051056019961833954, "loss_ce": 0.00045787671115249395, "loss_iou": 0.1923828125, "loss_num": 0.0101318359375, "loss_xval": 0.050537109375, "num_input_tokens_seen": 201808304, "step": 2211 }, { "epoch": 9.216666666666667, "grad_norm": 4.895428761300017, "learning_rate": 5e-05, "loss": 0.0662, "num_input_tokens_seen": 201899716, "step": 2212 }, { "epoch": 9.216666666666667, "loss": 0.08767993748188019, "loss_ce": 7.922661461634561e-05, "loss_iou": 0.3828125, "loss_num": 0.0174560546875, "loss_xval": 0.08740234375, "num_input_tokens_seen": 201899716, "step": 2212 }, { "epoch": 9.220833333333333, "grad_norm": 4.647479505670012, "learning_rate": 5e-05, "loss": 0.1367, "num_input_tokens_seen": 201990580, "step": 2213 }, { "epoch": 9.220833333333333, "loss": 0.21614643931388855, "loss_ce": 0.00089642294915393, "loss_iou": 0.171875, "loss_num": 0.04296875, "loss_xval": 0.21484375, "num_input_tokens_seen": 201990580, "step": 2213 }, { "epoch": 9.225, "grad_norm": 3.508364549794977, "learning_rate": 5e-05, "loss": 0.078, "num_input_tokens_seen": 202082188, "step": 2214 }, { "epoch": 9.225, "loss": 0.07295480370521545, "loss_ce": 0.0007044363301247358, "loss_iou": 0.283203125, "loss_num": 0.01446533203125, "loss_xval": 0.072265625, "num_input_tokens_seen": 202082188, "step": 2214 }, { "epoch": 9.229166666666666, "grad_norm": 2.0454420409331497, "learning_rate": 5e-05, "loss": 0.0441, "num_input_tokens_seen": 202174192, "step": 2215 }, { "epoch": 9.229166666666666, "loss": 0.03788067027926445, "loss_ce": 0.00013042415957897902, "loss_iou": 0.2451171875, "loss_num": 0.007537841796875, "loss_xval": 0.037841796875, "num_input_tokens_seen": 202174192, "step": 2215 }, { "epoch": 9.233333333333333, "grad_norm": 2.0495107961366528, "learning_rate": 5e-05, "loss": 0.0787, "num_input_tokens_seen": 202265876, "step": 2216 }, { "epoch": 9.233333333333333, "loss": 0.04935688525438309, "loss_ce": 0.0004219515831209719, "loss_iou": 0.32421875, "loss_num": 0.009765625, "loss_xval": 0.048828125, "num_input_tokens_seen": 202265876, "step": 2216 }, { "epoch": 9.2375, "grad_norm": 2.517464451287442, "learning_rate": 5e-05, "loss": 0.0667, "num_input_tokens_seen": 202357740, "step": 2217 }, { "epoch": 9.2375, "loss": 0.04978005215525627, "loss_ce": 0.0004407581582199782, "loss_iou": 0.201171875, "loss_num": 0.0098876953125, "loss_xval": 0.04931640625, "num_input_tokens_seen": 202357740, "step": 2217 }, { "epoch": 9.241666666666667, "grad_norm": 3.3097343702421282, "learning_rate": 5e-05, "loss": 0.0788, "num_input_tokens_seen": 202447096, "step": 2218 }, { "epoch": 9.241666666666667, "loss": 0.05562786012887955, "loss_ce": 0.00014690443640574813, "loss_iou": 0.1982421875, "loss_num": 0.0111083984375, "loss_xval": 0.055419921875, "num_input_tokens_seen": 202447096, "step": 2218 }, { "epoch": 9.245833333333334, "grad_norm": 2.6334267837027703, "learning_rate": 5e-05, "loss": 0.0835, "num_input_tokens_seen": 202538308, "step": 2219 }, { "epoch": 9.245833333333334, "loss": 0.054739732295274734, "loss_ce": 5.2235387556720525e-05, "loss_iou": 0.33984375, "loss_num": 0.01092529296875, "loss_xval": 0.0546875, "num_input_tokens_seen": 202538308, "step": 2219 }, { "epoch": 9.25, "grad_norm": 9.03035066197087, "learning_rate": 5e-05, "loss": 0.0477, "num_input_tokens_seen": 202629564, "step": 2220 }, { "epoch": 9.25, "loss": 0.06619110703468323, "loss_ce": 4.425767110660672e-05, "loss_iou": 0.33203125, "loss_num": 0.01324462890625, "loss_xval": 0.06591796875, "num_input_tokens_seen": 202629564, "step": 2220 }, { "epoch": 9.254166666666666, "grad_norm": 3.3762716990766264, "learning_rate": 5e-05, "loss": 0.1171, "num_input_tokens_seen": 202720532, "step": 2221 }, { "epoch": 9.254166666666666, "loss": 0.12677684426307678, "loss_ce": 6.820567705290159e-06, "loss_iou": 0.259765625, "loss_num": 0.025390625, "loss_xval": 0.126953125, "num_input_tokens_seen": 202720532, "step": 2221 }, { "epoch": 9.258333333333333, "grad_norm": 10.875326064101861, "learning_rate": 5e-05, "loss": 0.0732, "num_input_tokens_seen": 202811636, "step": 2222 }, { "epoch": 9.258333333333333, "loss": 0.07245151698589325, "loss_ce": 2.7908758966077585e-06, "loss_iou": 0.265625, "loss_num": 0.0145263671875, "loss_xval": 0.072265625, "num_input_tokens_seen": 202811636, "step": 2222 }, { "epoch": 9.2625, "grad_norm": 3.7498775049362583, "learning_rate": 5e-05, "loss": 0.0976, "num_input_tokens_seen": 202903444, "step": 2223 }, { "epoch": 9.2625, "loss": 0.07416309416294098, "loss_ce": 9.693222818896174e-05, "loss_iou": 0.267578125, "loss_num": 0.01483154296875, "loss_xval": 0.07421875, "num_input_tokens_seen": 202903444, "step": 2223 }, { "epoch": 9.266666666666667, "grad_norm": 4.332373597067934, "learning_rate": 5e-05, "loss": 0.0844, "num_input_tokens_seen": 202994716, "step": 2224 }, { "epoch": 9.266666666666667, "loss": 0.07626857608556747, "loss_ce": 3.566586383385584e-05, "loss_iou": 0.23046875, "loss_num": 0.0152587890625, "loss_xval": 0.076171875, "num_input_tokens_seen": 202994716, "step": 2224 }, { "epoch": 9.270833333333334, "grad_norm": 2.2371257085799505, "learning_rate": 5e-05, "loss": 0.0635, "num_input_tokens_seen": 203086340, "step": 2225 }, { "epoch": 9.270833333333334, "loss": 0.08375569432973862, "loss_ce": 3.0716892069904134e-05, "loss_iou": 0.314453125, "loss_num": 0.0167236328125, "loss_xval": 0.08349609375, "num_input_tokens_seen": 203086340, "step": 2225 }, { "epoch": 9.275, "grad_norm": 1.7012267752678156, "learning_rate": 5e-05, "loss": 0.0869, "num_input_tokens_seen": 203177744, "step": 2226 }, { "epoch": 9.275, "loss": 0.14392778277397156, "loss_ce": 0.00018999181338585913, "loss_iou": 0.26953125, "loss_num": 0.0286865234375, "loss_xval": 0.1435546875, "num_input_tokens_seen": 203177744, "step": 2226 }, { "epoch": 9.279166666666667, "grad_norm": 1.865567681651061, "learning_rate": 5e-05, "loss": 0.0983, "num_input_tokens_seen": 203270012, "step": 2227 }, { "epoch": 9.279166666666667, "loss": 0.11878585815429688, "loss_ce": 0.0067253089509904385, "loss_iou": 0.2275390625, "loss_num": 0.0224609375, "loss_xval": 0.1123046875, "num_input_tokens_seen": 203270012, "step": 2227 }, { "epoch": 9.283333333333333, "grad_norm": 2.2906264696681062, "learning_rate": 5e-05, "loss": 0.0827, "num_input_tokens_seen": 203361336, "step": 2228 }, { "epoch": 9.283333333333333, "loss": 0.11910998076200485, "loss_ce": 0.004455443471670151, "loss_iou": 0.19921875, "loss_num": 0.02294921875, "loss_xval": 0.11474609375, "num_input_tokens_seen": 203361336, "step": 2228 }, { "epoch": 9.2875, "grad_norm": 3.3780007096420315, "learning_rate": 5e-05, "loss": 0.0721, "num_input_tokens_seen": 203451800, "step": 2229 }, { "epoch": 9.2875, "loss": 0.08809817582368851, "loss_ce": 0.00048221100587397814, "loss_iou": 0.216796875, "loss_num": 0.017578125, "loss_xval": 0.08740234375, "num_input_tokens_seen": 203451800, "step": 2229 }, { "epoch": 9.291666666666666, "grad_norm": 3.513922891850055, "learning_rate": 5e-05, "loss": 0.0808, "num_input_tokens_seen": 203543576, "step": 2230 }, { "epoch": 9.291666666666666, "loss": 0.07334207743406296, "loss_ce": 0.002266637748107314, "loss_iou": 0.2490234375, "loss_num": 0.01422119140625, "loss_xval": 0.0712890625, "num_input_tokens_seen": 203543576, "step": 2230 }, { "epoch": 9.295833333333333, "grad_norm": 46.55263213397429, "learning_rate": 5e-05, "loss": 0.0861, "num_input_tokens_seen": 203634712, "step": 2231 }, { "epoch": 9.295833333333333, "loss": 0.10764908045530319, "loss_ce": 0.0007154846098273993, "loss_iou": 0.2001953125, "loss_num": 0.0213623046875, "loss_xval": 0.10693359375, "num_input_tokens_seen": 203634712, "step": 2231 }, { "epoch": 9.3, "grad_norm": 4.245117723090011, "learning_rate": 5e-05, "loss": 0.1047, "num_input_tokens_seen": 203725908, "step": 2232 }, { "epoch": 9.3, "loss": 0.10209492594003677, "loss_ce": 4.4143747800262645e-05, "loss_iou": 0.1640625, "loss_num": 0.0203857421875, "loss_xval": 0.10205078125, "num_input_tokens_seen": 203725908, "step": 2232 }, { "epoch": 9.304166666666667, "grad_norm": 4.580751775042531, "learning_rate": 5e-05, "loss": 0.0996, "num_input_tokens_seen": 203817044, "step": 2233 }, { "epoch": 9.304166666666667, "loss": 0.07633492350578308, "loss_ce": 2.5714654839248396e-05, "loss_iou": 0.267578125, "loss_num": 0.0152587890625, "loss_xval": 0.076171875, "num_input_tokens_seen": 203817044, "step": 2233 }, { "epoch": 9.308333333333334, "grad_norm": 5.19491098728478, "learning_rate": 5e-05, "loss": 0.1068, "num_input_tokens_seen": 203908056, "step": 2234 }, { "epoch": 9.308333333333334, "loss": 0.10972404479980469, "loss_ce": 4.387239459902048e-05, "loss_iou": 0.296875, "loss_num": 0.02197265625, "loss_xval": 0.10986328125, "num_input_tokens_seen": 203908056, "step": 2234 }, { "epoch": 9.3125, "grad_norm": 3.127703276953714, "learning_rate": 5e-05, "loss": 0.0536, "num_input_tokens_seen": 203999944, "step": 2235 }, { "epoch": 9.3125, "loss": 0.058495134115219116, "loss_ce": 0.0009847574401646852, "loss_iou": 0.208984375, "loss_num": 0.011474609375, "loss_xval": 0.0576171875, "num_input_tokens_seen": 203999944, "step": 2235 }, { "epoch": 9.316666666666666, "grad_norm": 5.97333492039574, "learning_rate": 5e-05, "loss": 0.087, "num_input_tokens_seen": 204091188, "step": 2236 }, { "epoch": 9.316666666666666, "loss": 0.06130867451429367, "loss_ce": 2.9378070394159295e-05, "loss_iou": 0.263671875, "loss_num": 0.01226806640625, "loss_xval": 0.061279296875, "num_input_tokens_seen": 204091188, "step": 2236 }, { "epoch": 9.320833333333333, "grad_norm": 3.4424382346355586, "learning_rate": 5e-05, "loss": 0.0723, "num_input_tokens_seen": 204182404, "step": 2237 }, { "epoch": 9.320833333333333, "loss": 0.07460271567106247, "loss_ce": 2.5382661988260224e-05, "loss_iou": 0.2138671875, "loss_num": 0.014892578125, "loss_xval": 0.07470703125, "num_input_tokens_seen": 204182404, "step": 2237 }, { "epoch": 9.325, "grad_norm": 2.138306168086027, "learning_rate": 5e-05, "loss": 0.1213, "num_input_tokens_seen": 204273752, "step": 2238 }, { "epoch": 9.325, "loss": 0.11179852485656738, "loss_ce": 0.0001957409840542823, "loss_iou": 0.32421875, "loss_num": 0.0223388671875, "loss_xval": 0.11181640625, "num_input_tokens_seen": 204273752, "step": 2238 }, { "epoch": 9.329166666666667, "grad_norm": 1.6961706423169747, "learning_rate": 5e-05, "loss": 0.085, "num_input_tokens_seen": 204365528, "step": 2239 }, { "epoch": 9.329166666666667, "loss": 0.06735285371541977, "loss_ce": 0.0017400594661012292, "loss_iou": 0.240234375, "loss_num": 0.01312255859375, "loss_xval": 0.0654296875, "num_input_tokens_seen": 204365528, "step": 2239 }, { "epoch": 9.333333333333334, "grad_norm": 6.645834701948286, "learning_rate": 5e-05, "loss": 0.0882, "num_input_tokens_seen": 204457388, "step": 2240 }, { "epoch": 9.333333333333334, "loss": 0.11958567798137665, "loss_ce": 0.00017039466183632612, "loss_iou": 0.26171875, "loss_num": 0.0238037109375, "loss_xval": 0.11962890625, "num_input_tokens_seen": 204457388, "step": 2240 }, { "epoch": 9.3375, "grad_norm": 3.59784310282802, "learning_rate": 5e-05, "loss": 0.0569, "num_input_tokens_seen": 204548732, "step": 2241 }, { "epoch": 9.3375, "loss": 0.0499962717294693, "loss_ce": 0.0001458074984839186, "loss_iou": 0.154296875, "loss_num": 0.00994873046875, "loss_xval": 0.0498046875, "num_input_tokens_seen": 204548732, "step": 2241 }, { "epoch": 9.341666666666667, "grad_norm": 2.2014844455334837, "learning_rate": 5e-05, "loss": 0.1281, "num_input_tokens_seen": 204639824, "step": 2242 }, { "epoch": 9.341666666666667, "loss": 0.07682133466005325, "loss_ce": 0.00016117449558805674, "loss_iou": 0.291015625, "loss_num": 0.01531982421875, "loss_xval": 0.07666015625, "num_input_tokens_seen": 204639824, "step": 2242 }, { "epoch": 9.345833333333333, "grad_norm": 3.077157982106598, "learning_rate": 5e-05, "loss": 0.0614, "num_input_tokens_seen": 204730496, "step": 2243 }, { "epoch": 9.345833333333333, "loss": 0.07074464857578278, "loss_ce": 2.015555219259113e-05, "loss_iou": 0.31640625, "loss_num": 0.01416015625, "loss_xval": 0.07080078125, "num_input_tokens_seen": 204730496, "step": 2243 }, { "epoch": 9.35, "grad_norm": 2.4578862173343317, "learning_rate": 5e-05, "loss": 0.0665, "num_input_tokens_seen": 204821520, "step": 2244 }, { "epoch": 9.35, "loss": 0.05760511755943298, "loss_ce": 3.1891840990283526e-06, "loss_iou": 0.291015625, "loss_num": 0.01153564453125, "loss_xval": 0.0576171875, "num_input_tokens_seen": 204821520, "step": 2244 }, { "epoch": 9.354166666666666, "grad_norm": 7.810310706978085, "learning_rate": 5e-05, "loss": 0.0761, "num_input_tokens_seen": 204913148, "step": 2245 }, { "epoch": 9.354166666666666, "loss": 0.09471295028924942, "loss_ce": 1.6910182239371352e-05, "loss_iou": 0.185546875, "loss_num": 0.0189208984375, "loss_xval": 0.0947265625, "num_input_tokens_seen": 204913148, "step": 2245 }, { "epoch": 9.358333333333333, "grad_norm": 3.393904127948915, "learning_rate": 5e-05, "loss": 0.0769, "num_input_tokens_seen": 205004668, "step": 2246 }, { "epoch": 9.358333333333333, "loss": 0.07558546960353851, "loss_ce": 0.005860439967364073, "loss_iou": 0.1533203125, "loss_num": 0.01397705078125, "loss_xval": 0.06982421875, "num_input_tokens_seen": 205004668, "step": 2246 }, { "epoch": 9.3625, "grad_norm": 5.296393204114102, "learning_rate": 5e-05, "loss": 0.095, "num_input_tokens_seen": 205096320, "step": 2247 }, { "epoch": 9.3625, "loss": 0.10556471347808838, "loss_ce": 0.003697034204378724, "loss_iou": 0.244140625, "loss_num": 0.0203857421875, "loss_xval": 0.10205078125, "num_input_tokens_seen": 205096320, "step": 2247 }, { "epoch": 9.366666666666667, "grad_norm": 3.98805561043107, "learning_rate": 5e-05, "loss": 0.0975, "num_input_tokens_seen": 205187728, "step": 2248 }, { "epoch": 9.366666666666667, "loss": 0.07330742478370667, "loss_ce": 4.2080778257513884e-06, "loss_iou": 0.25390625, "loss_num": 0.0146484375, "loss_xval": 0.0732421875, "num_input_tokens_seen": 205187728, "step": 2248 }, { "epoch": 9.370833333333334, "grad_norm": 3.422412576639185, "learning_rate": 5e-05, "loss": 0.0622, "num_input_tokens_seen": 205279156, "step": 2249 }, { "epoch": 9.370833333333334, "loss": 0.0616975836455822, "loss_ce": 6.733382178936154e-05, "loss_iou": 0.333984375, "loss_num": 0.0123291015625, "loss_xval": 0.0615234375, "num_input_tokens_seen": 205279156, "step": 2249 }, { "epoch": 9.375, "grad_norm": 3.952350471522899, "learning_rate": 5e-05, "loss": 0.0564, "num_input_tokens_seen": 205370052, "step": 2250 }, { "epoch": 9.375, "eval_seeclick_CIoU": 0.2784217670559883, "eval_seeclick_GIoU": 0.27900155633687973, "eval_seeclick_IoU": 0.36821986734867096, "eval_seeclick_MAE_all": 0.08717832714319229, "eval_seeclick_MAE_h": 0.0708407387137413, "eval_seeclick_MAE_w": 0.18288671225309372, "eval_seeclick_MAE_x_boxes": 0.1732628047466278, "eval_seeclick_MAE_y_boxes": 0.07997602596879005, "eval_seeclick_NUM_probability": 0.9999985694885254, "eval_seeclick_inside_bbox": 0.5397727340459824, "eval_seeclick_loss": 0.5365479588508606, "eval_seeclick_loss_ce": 0.136946901679039, "eval_seeclick_loss_iou": 0.4412841796875, "eval_seeclick_loss_num": 0.080352783203125, "eval_seeclick_loss_xval": 0.40203857421875, "eval_seeclick_runtime": 80.7149, "eval_seeclick_samples_per_second": 0.533, "eval_seeclick_steps_per_second": 0.025, "num_input_tokens_seen": 205370052, "step": 2250 }, { "epoch": 9.375, "eval_icons_CIoU": 0.3527114614844322, "eval_icons_GIoU": 0.3811039477586746, "eval_icons_IoU": 0.43005380034446716, "eval_icons_MAE_all": 0.06621737964451313, "eval_icons_MAE_h": 0.13642537221312523, "eval_icons_MAE_w": 0.09623197466135025, "eval_icons_MAE_x_boxes": 0.09396588802337646, "eval_icons_MAE_y_boxes": 0.13910193741321564, "eval_icons_NUM_probability": 0.9999993145465851, "eval_icons_inside_bbox": 0.5503472238779068, "eval_icons_loss": 0.31992390751838684, "eval_icons_loss_ce": 1.8518157958169468e-06, "eval_icons_loss_iou": 0.3302001953125, "eval_icons_loss_num": 0.0670318603515625, "eval_icons_loss_xval": 0.335205078125, "eval_icons_runtime": 97.4337, "eval_icons_samples_per_second": 0.513, "eval_icons_steps_per_second": 0.021, "num_input_tokens_seen": 205370052, "step": 2250 }, { "epoch": 9.375, "eval_screenspot_CIoU": 0.39432772994041443, "eval_screenspot_GIoU": 0.3775731921195984, "eval_screenspot_IoU": 0.4619280795256297, "eval_screenspot_MAE_all": 0.0979969451824824, "eval_screenspot_MAE_h": 0.09322212388118108, "eval_screenspot_MAE_w": 0.19270053009192148, "eval_screenspot_MAE_x_boxes": 0.19473616778850555, "eval_screenspot_MAE_y_boxes": 0.08440528810024261, "eval_screenspot_NUM_probability": 0.9999984304110209, "eval_screenspot_inside_bbox": 0.7058333357175192, "eval_screenspot_loss": 0.4944130480289459, "eval_screenspot_loss_ce": 9.930451142281527e-06, "eval_screenspot_loss_iou": 0.4032389322916667, "eval_screenspot_loss_num": 0.099884033203125, "eval_screenspot_loss_xval": 0.4994303385416667, "eval_screenspot_runtime": 150.0641, "eval_screenspot_samples_per_second": 0.593, "eval_screenspot_steps_per_second": 0.02, "num_input_tokens_seen": 205370052, "step": 2250 }, { "epoch": 9.375, "eval_compot_CIoU": 0.40679484605789185, "eval_compot_GIoU": 0.3917129784822464, "eval_compot_IoU": 0.48430830240249634, "eval_compot_MAE_all": 0.06649945117533207, "eval_compot_MAE_h": 0.09491590782999992, "eval_compot_MAE_w": 0.1506771482527256, "eval_compot_MAE_x_boxes": 0.14771829172968864, "eval_compot_MAE_y_boxes": 0.09101338312029839, "eval_compot_NUM_probability": 0.9999987185001373, "eval_compot_inside_bbox": 0.6614583432674408, "eval_compot_loss": 0.3391985595226288, "eval_compot_loss_ce": 0.025836432352662086, "eval_compot_loss_iou": 0.31207275390625, "eval_compot_loss_num": 0.0590057373046875, "eval_compot_loss_xval": 0.2950439453125, "eval_compot_runtime": 86.1939, "eval_compot_samples_per_second": 0.58, "eval_compot_steps_per_second": 0.023, "num_input_tokens_seen": 205370052, "step": 2250 }, { "epoch": 9.375, "loss": 0.3358990252017975, "loss_ce": 0.02400936558842659, "loss_iou": 0.30859375, "loss_num": 0.0625, "loss_xval": 0.3125, "num_input_tokens_seen": 205370052, "step": 2250 }, { "epoch": 9.379166666666666, "grad_norm": 6.726949224713207, "learning_rate": 5e-05, "loss": 0.1254, "num_input_tokens_seen": 205462060, "step": 2251 }, { "epoch": 9.379166666666666, "loss": 0.10401031374931335, "loss_ce": 0.0011660760501399636, "loss_iou": 0.37890625, "loss_num": 0.0205078125, "loss_xval": 0.10302734375, "num_input_tokens_seen": 205462060, "step": 2251 }, { "epoch": 9.383333333333333, "grad_norm": 3.5720176536381114, "learning_rate": 5e-05, "loss": 0.0887, "num_input_tokens_seen": 205553276, "step": 2252 }, { "epoch": 9.383333333333333, "loss": 0.10866258293390274, "loss_ce": 0.0006456149276345968, "loss_iou": 0.228515625, "loss_num": 0.0216064453125, "loss_xval": 0.10791015625, "num_input_tokens_seen": 205553276, "step": 2252 }, { "epoch": 9.3875, "grad_norm": 4.580756312669723, "learning_rate": 5e-05, "loss": 0.0668, "num_input_tokens_seen": 205644856, "step": 2253 }, { "epoch": 9.3875, "loss": 0.07860147953033447, "loss_ce": 0.00017130102787632495, "loss_iou": 0.28515625, "loss_num": 0.0157470703125, "loss_xval": 0.07861328125, "num_input_tokens_seen": 205644856, "step": 2253 }, { "epoch": 9.391666666666667, "grad_norm": 2.528667604033413, "learning_rate": 5e-05, "loss": 0.1104, "num_input_tokens_seen": 205735956, "step": 2254 }, { "epoch": 9.391666666666667, "loss": 0.12907829880714417, "loss_ce": 4.210660335957073e-06, "loss_iou": 0.3125, "loss_num": 0.0257568359375, "loss_xval": 0.12890625, "num_input_tokens_seen": 205735956, "step": 2254 }, { "epoch": 9.395833333333334, "grad_norm": 3.322085127164889, "learning_rate": 5e-05, "loss": 0.0855, "num_input_tokens_seen": 205827392, "step": 2255 }, { "epoch": 9.395833333333334, "loss": 0.06169138848781586, "loss_ce": 0.0031891947146505117, "loss_iou": 0.2890625, "loss_num": 0.01171875, "loss_xval": 0.05859375, "num_input_tokens_seen": 205827392, "step": 2255 }, { "epoch": 9.4, "grad_norm": 3.450005752931726, "learning_rate": 5e-05, "loss": 0.0439, "num_input_tokens_seen": 205919032, "step": 2256 }, { "epoch": 9.4, "loss": 0.03724807873368263, "loss_ce": 0.0003904743352904916, "loss_iou": 0.1484375, "loss_num": 0.00738525390625, "loss_xval": 0.036865234375, "num_input_tokens_seen": 205919032, "step": 2256 }, { "epoch": 9.404166666666667, "grad_norm": 2.9762252585224616, "learning_rate": 5e-05, "loss": 0.1238, "num_input_tokens_seen": 206010268, "step": 2257 }, { "epoch": 9.404166666666667, "loss": 0.0545886866748333, "loss_ce": 5.3776235290570185e-05, "loss_iou": 0.171875, "loss_num": 0.01092529296875, "loss_xval": 0.054443359375, "num_input_tokens_seen": 206010268, "step": 2257 }, { "epoch": 9.408333333333333, "grad_norm": 2.8275104606861894, "learning_rate": 5e-05, "loss": 0.1069, "num_input_tokens_seen": 206102272, "step": 2258 }, { "epoch": 9.408333333333333, "loss": 0.11684298515319824, "loss_ce": 0.0014712885022163391, "loss_iou": 0.251953125, "loss_num": 0.0230712890625, "loss_xval": 0.115234375, "num_input_tokens_seen": 206102272, "step": 2258 }, { "epoch": 9.4125, "grad_norm": 3.639553308870559, "learning_rate": 5e-05, "loss": 0.0822, "num_input_tokens_seen": 206193836, "step": 2259 }, { "epoch": 9.4125, "loss": 0.0892866849899292, "loss_ce": 0.00020587486505974084, "loss_iou": 0.427734375, "loss_num": 0.017822265625, "loss_xval": 0.0888671875, "num_input_tokens_seen": 206193836, "step": 2259 }, { "epoch": 9.416666666666666, "grad_norm": 2.973670802416314, "learning_rate": 5e-05, "loss": 0.1252, "num_input_tokens_seen": 206284796, "step": 2260 }, { "epoch": 9.416666666666666, "loss": 0.13764168322086334, "loss_ce": 0.0003278392250649631, "loss_iou": 0.3046875, "loss_num": 0.0274658203125, "loss_xval": 0.1376953125, "num_input_tokens_seen": 206284796, "step": 2260 }, { "epoch": 9.420833333333333, "grad_norm": 5.92163105063818, "learning_rate": 5e-05, "loss": 0.1038, "num_input_tokens_seen": 206376552, "step": 2261 }, { "epoch": 9.420833333333333, "loss": 0.13112279772758484, "loss_ce": 0.0009348143939860165, "loss_iou": 0.265625, "loss_num": 0.0260009765625, "loss_xval": 0.1298828125, "num_input_tokens_seen": 206376552, "step": 2261 }, { "epoch": 9.425, "grad_norm": 6.48320500481271, "learning_rate": 5e-05, "loss": 0.0882, "num_input_tokens_seen": 206467724, "step": 2262 }, { "epoch": 9.425, "loss": 0.0606074333190918, "loss_ce": 1.4783408914809115e-05, "loss_iou": 0.298828125, "loss_num": 0.01214599609375, "loss_xval": 0.060546875, "num_input_tokens_seen": 206467724, "step": 2262 }, { "epoch": 9.429166666666667, "grad_norm": 2.385781764686817, "learning_rate": 5e-05, "loss": 0.0765, "num_input_tokens_seen": 206558988, "step": 2263 }, { "epoch": 9.429166666666667, "loss": 0.08593662828207016, "loss_ce": 0.00013645211583934724, "loss_iou": 0.1796875, "loss_num": 0.0172119140625, "loss_xval": 0.0859375, "num_input_tokens_seen": 206558988, "step": 2263 }, { "epoch": 9.433333333333334, "grad_norm": 4.987948990102972, "learning_rate": 5e-05, "loss": 0.0658, "num_input_tokens_seen": 206649740, "step": 2264 }, { "epoch": 9.433333333333334, "loss": 0.03622180223464966, "loss_ce": 0.002896607154980302, "loss_iou": 0.21875, "loss_num": 0.00665283203125, "loss_xval": 0.033203125, "num_input_tokens_seen": 206649740, "step": 2264 }, { "epoch": 9.4375, "grad_norm": 4.131020051739761, "learning_rate": 5e-05, "loss": 0.1127, "num_input_tokens_seen": 206740656, "step": 2265 }, { "epoch": 9.4375, "loss": 0.15009549260139465, "loss_ce": 1.0045349881693255e-05, "loss_iou": 0.35546875, "loss_num": 0.030029296875, "loss_xval": 0.150390625, "num_input_tokens_seen": 206740656, "step": 2265 }, { "epoch": 9.441666666666666, "grad_norm": 3.589618825751963, "learning_rate": 5e-05, "loss": 0.0876, "num_input_tokens_seen": 206832280, "step": 2266 }, { "epoch": 9.441666666666666, "loss": 0.09583691507577896, "loss_ce": 0.001598631264641881, "loss_iou": 0.31640625, "loss_num": 0.018798828125, "loss_xval": 0.09423828125, "num_input_tokens_seen": 206832280, "step": 2266 }, { "epoch": 9.445833333333333, "grad_norm": 2.959962675425685, "learning_rate": 5e-05, "loss": 0.077, "num_input_tokens_seen": 206923680, "step": 2267 }, { "epoch": 9.445833333333333, "loss": 0.07070231437683105, "loss_ce": 0.00032878227648325264, "loss_iou": 0.25390625, "loss_num": 0.01409912109375, "loss_xval": 0.0703125, "num_input_tokens_seen": 206923680, "step": 2267 }, { "epoch": 9.45, "grad_norm": 2.1554765527304856, "learning_rate": 5e-05, "loss": 0.0735, "num_input_tokens_seen": 207015212, "step": 2268 }, { "epoch": 9.45, "loss": 0.09321004897356033, "loss_ce": 0.0005968199693597853, "loss_iou": 0.1708984375, "loss_num": 0.0184326171875, "loss_xval": 0.0927734375, "num_input_tokens_seen": 207015212, "step": 2268 }, { "epoch": 9.454166666666667, "grad_norm": 1.687814277230721, "learning_rate": 5e-05, "loss": 0.0551, "num_input_tokens_seen": 207106856, "step": 2269 }, { "epoch": 9.454166666666667, "loss": 0.05360877513885498, "loss_ce": 0.00012672031880356371, "loss_iou": 0.2890625, "loss_num": 0.01068115234375, "loss_xval": 0.053466796875, "num_input_tokens_seen": 207106856, "step": 2269 }, { "epoch": 9.458333333333334, "grad_norm": 11.917138995240949, "learning_rate": 5e-05, "loss": 0.0837, "num_input_tokens_seen": 207198372, "step": 2270 }, { "epoch": 9.458333333333334, "loss": 0.06492700427770615, "loss_ce": 0.00011530078336363658, "loss_iou": 0.162109375, "loss_num": 0.012939453125, "loss_xval": 0.06494140625, "num_input_tokens_seen": 207198372, "step": 2270 }, { "epoch": 9.4625, "grad_norm": 1.3942957040035504, "learning_rate": 5e-05, "loss": 0.0604, "num_input_tokens_seen": 207289668, "step": 2271 }, { "epoch": 9.4625, "loss": 0.056693121790885925, "loss_ce": 0.0005865513230673969, "loss_iou": 0.09716796875, "loss_num": 0.01123046875, "loss_xval": 0.05615234375, "num_input_tokens_seen": 207289668, "step": 2271 }, { "epoch": 9.466666666666667, "grad_norm": 2.351656427742343, "learning_rate": 5e-05, "loss": 0.0882, "num_input_tokens_seen": 207380392, "step": 2272 }, { "epoch": 9.466666666666667, "loss": 0.11181023716926575, "loss_ce": 0.0016417772276327014, "loss_iou": 0.349609375, "loss_num": 0.0220947265625, "loss_xval": 0.1103515625, "num_input_tokens_seen": 207380392, "step": 2272 }, { "epoch": 9.470833333333333, "grad_norm": 4.158043720550659, "learning_rate": 5e-05, "loss": 0.0938, "num_input_tokens_seen": 207471628, "step": 2273 }, { "epoch": 9.470833333333333, "loss": 0.0949772447347641, "loss_ce": 9.809455514186993e-05, "loss_iou": 0.314453125, "loss_num": 0.0189208984375, "loss_xval": 0.0947265625, "num_input_tokens_seen": 207471628, "step": 2273 }, { "epoch": 9.475, "grad_norm": 4.92816733376496, "learning_rate": 5e-05, "loss": 0.1023, "num_input_tokens_seen": 207563016, "step": 2274 }, { "epoch": 9.475, "loss": 0.06635133922100067, "loss_ce": 0.0008301038178615272, "loss_iou": 0.296875, "loss_num": 0.01312255859375, "loss_xval": 0.0654296875, "num_input_tokens_seen": 207563016, "step": 2274 }, { "epoch": 9.479166666666666, "grad_norm": 2.790930691242936, "learning_rate": 5e-05, "loss": 0.0524, "num_input_tokens_seen": 207654544, "step": 2275 }, { "epoch": 9.479166666666666, "loss": 0.03971107304096222, "loss_ce": 0.0029679089784622192, "loss_iou": 0.30078125, "loss_num": 0.007354736328125, "loss_xval": 0.03662109375, "num_input_tokens_seen": 207654544, "step": 2275 }, { "epoch": 9.483333333333333, "grad_norm": 9.756186645468425, "learning_rate": 5e-05, "loss": 0.0891, "num_input_tokens_seen": 207745812, "step": 2276 }, { "epoch": 9.483333333333333, "loss": 0.05628746375441551, "loss_ce": 7.408284727716818e-05, "loss_iou": 0.37109375, "loss_num": 0.01123046875, "loss_xval": 0.05615234375, "num_input_tokens_seen": 207745812, "step": 2276 }, { "epoch": 9.4875, "grad_norm": 4.426166362680703, "learning_rate": 5e-05, "loss": 0.0785, "num_input_tokens_seen": 207836940, "step": 2277 }, { "epoch": 9.4875, "loss": 0.11567720770835876, "loss_ce": 4.6097604354145005e-05, "loss_iou": 0.390625, "loss_num": 0.023193359375, "loss_xval": 0.11572265625, "num_input_tokens_seen": 207836940, "step": 2277 }, { "epoch": 9.491666666666667, "grad_norm": 4.623619751098643, "learning_rate": 5e-05, "loss": 0.0987, "num_input_tokens_seen": 207928944, "step": 2278 }, { "epoch": 9.491666666666667, "loss": 0.1042831540107727, "loss_ce": 0.002354443771764636, "loss_iou": 0.255859375, "loss_num": 0.0203857421875, "loss_xval": 0.10205078125, "num_input_tokens_seen": 207928944, "step": 2278 }, { "epoch": 9.495833333333334, "grad_norm": 2.1600688546351123, "learning_rate": 5e-05, "loss": 0.0766, "num_input_tokens_seen": 208019900, "step": 2279 }, { "epoch": 9.495833333333334, "loss": 0.06627009809017181, "loss_ce": 0.0011303334031254053, "loss_iou": 0.20703125, "loss_num": 0.01300048828125, "loss_xval": 0.06494140625, "num_input_tokens_seen": 208019900, "step": 2279 }, { "epoch": 9.5, "grad_norm": 4.642681277020734, "learning_rate": 5e-05, "loss": 0.0678, "num_input_tokens_seen": 208111080, "step": 2280 }, { "epoch": 9.5, "loss": 0.03994332253932953, "loss_ce": 0.0023876256309449673, "loss_iou": 0.1611328125, "loss_num": 0.00750732421875, "loss_xval": 0.03759765625, "num_input_tokens_seen": 208111080, "step": 2280 }, { "epoch": 9.504166666666666, "grad_norm": 2.2019849939713634, "learning_rate": 5e-05, "loss": 0.0757, "num_input_tokens_seen": 208202460, "step": 2281 }, { "epoch": 9.504166666666666, "loss": 0.053781673312187195, "loss_ce": 2.495828812243417e-05, "loss_iou": 0.3046875, "loss_num": 0.0107421875, "loss_xval": 0.0537109375, "num_input_tokens_seen": 208202460, "step": 2281 }, { "epoch": 9.508333333333333, "grad_norm": 6.0104846611295635, "learning_rate": 5e-05, "loss": 0.1031, "num_input_tokens_seen": 208294084, "step": 2282 }, { "epoch": 9.508333333333333, "loss": 0.14557743072509766, "loss_ce": 8.48758063511923e-05, "loss_iou": 0.26953125, "loss_num": 0.029052734375, "loss_xval": 0.1455078125, "num_input_tokens_seen": 208294084, "step": 2282 }, { "epoch": 9.5125, "grad_norm": 4.870647425843709, "learning_rate": 5e-05, "loss": 0.0573, "num_input_tokens_seen": 208385216, "step": 2283 }, { "epoch": 9.5125, "loss": 0.06885615736246109, "loss_ce": 3.90209024772048e-05, "loss_iou": 0.34375, "loss_num": 0.0137939453125, "loss_xval": 0.06884765625, "num_input_tokens_seen": 208385216, "step": 2283 }, { "epoch": 9.516666666666667, "grad_norm": 3.3629716139339156, "learning_rate": 5e-05, "loss": 0.0416, "num_input_tokens_seen": 208476716, "step": 2284 }, { "epoch": 9.516666666666667, "loss": 0.04319828748703003, "loss_ce": 0.0014044629642739892, "loss_iou": 0.271484375, "loss_num": 0.00836181640625, "loss_xval": 0.041748046875, "num_input_tokens_seen": 208476716, "step": 2284 }, { "epoch": 9.520833333333334, "grad_norm": 4.8645127234217105, "learning_rate": 5e-05, "loss": 0.071, "num_input_tokens_seen": 208567748, "step": 2285 }, { "epoch": 9.520833333333334, "loss": 0.07986044883728027, "loss_ce": 0.00022483064094558358, "loss_iou": 0.296875, "loss_num": 0.0159912109375, "loss_xval": 0.07958984375, "num_input_tokens_seen": 208567748, "step": 2285 }, { "epoch": 9.525, "grad_norm": 6.745878397974651, "learning_rate": 5e-05, "loss": 0.0979, "num_input_tokens_seen": 208658852, "step": 2286 }, { "epoch": 9.525, "loss": 0.09872304648160934, "loss_ce": 1.3940521057520527e-05, "loss_iou": 0.3828125, "loss_num": 0.019775390625, "loss_xval": 0.0986328125, "num_input_tokens_seen": 208658852, "step": 2286 }, { "epoch": 9.529166666666667, "grad_norm": 37.867432870339535, "learning_rate": 5e-05, "loss": 0.0878, "num_input_tokens_seen": 208750688, "step": 2287 }, { "epoch": 9.529166666666667, "loss": 0.06559404730796814, "loss_ce": 0.00016435694124083966, "loss_iou": 0.3359375, "loss_num": 0.0130615234375, "loss_xval": 0.0654296875, "num_input_tokens_seen": 208750688, "step": 2287 }, { "epoch": 9.533333333333333, "grad_norm": 5.478800482735125, "learning_rate": 5e-05, "loss": 0.0734, "num_input_tokens_seen": 208842404, "step": 2288 }, { "epoch": 9.533333333333333, "loss": 0.1185697540640831, "loss_ce": 0.00013103854143992066, "loss_iou": 0.275390625, "loss_num": 0.023681640625, "loss_xval": 0.11865234375, "num_input_tokens_seen": 208842404, "step": 2288 }, { "epoch": 9.5375, "grad_norm": 3.186946107916803, "learning_rate": 5e-05, "loss": 0.0617, "num_input_tokens_seen": 208933540, "step": 2289 }, { "epoch": 9.5375, "loss": 0.05266163498163223, "loss_ce": 4.9333386414218694e-05, "loss_iou": 0.30859375, "loss_num": 0.010498046875, "loss_xval": 0.052734375, "num_input_tokens_seen": 208933540, "step": 2289 }, { "epoch": 9.541666666666666, "grad_norm": 2.6175938993499805, "learning_rate": 5e-05, "loss": 0.0702, "num_input_tokens_seen": 209024860, "step": 2290 }, { "epoch": 9.541666666666666, "loss": 0.06243140995502472, "loss_ce": 0.0002823605027515441, "loss_iou": 0.171875, "loss_num": 0.012451171875, "loss_xval": 0.062255859375, "num_input_tokens_seen": 209024860, "step": 2290 }, { "epoch": 9.545833333333333, "grad_norm": 1.8831217358853127, "learning_rate": 5e-05, "loss": 0.0653, "num_input_tokens_seen": 209116392, "step": 2291 }, { "epoch": 9.545833333333333, "loss": 0.06283891201019287, "loss_ce": 0.00021684322564397007, "loss_iou": 0.34765625, "loss_num": 0.01251220703125, "loss_xval": 0.0625, "num_input_tokens_seen": 209116392, "step": 2291 }, { "epoch": 9.55, "grad_norm": 1.9650237682346743, "learning_rate": 5e-05, "loss": 0.0868, "num_input_tokens_seen": 209207740, "step": 2292 }, { "epoch": 9.55, "loss": 0.11798623949289322, "loss_ce": 6.631258293054998e-05, "loss_iou": 0.2470703125, "loss_num": 0.0235595703125, "loss_xval": 0.1181640625, "num_input_tokens_seen": 209207740, "step": 2292 }, { "epoch": 9.554166666666667, "grad_norm": 1.0195349557343432, "learning_rate": 5e-05, "loss": 0.0814, "num_input_tokens_seen": 209299368, "step": 2293 }, { "epoch": 9.554166666666667, "loss": 0.0581989549100399, "loss_ce": 7.822553743608296e-05, "loss_iou": 0.2236328125, "loss_num": 0.0115966796875, "loss_xval": 0.05810546875, "num_input_tokens_seen": 209299368, "step": 2293 }, { "epoch": 9.558333333333334, "grad_norm": 2.0366843665613104, "learning_rate": 5e-05, "loss": 0.0889, "num_input_tokens_seen": 209390852, "step": 2294 }, { "epoch": 9.558333333333334, "loss": 0.0881442278623581, "loss_ce": 0.0012835754314437509, "loss_iou": 0.17578125, "loss_num": 0.017333984375, "loss_xval": 0.0869140625, "num_input_tokens_seen": 209390852, "step": 2294 }, { "epoch": 9.5625, "grad_norm": 2.8187444641894417, "learning_rate": 5e-05, "loss": 0.0814, "num_input_tokens_seen": 209482280, "step": 2295 }, { "epoch": 9.5625, "loss": 0.08741619437932968, "loss_ce": 1.3851060430170037e-05, "loss_iou": 0.375, "loss_num": 0.0174560546875, "loss_xval": 0.08740234375, "num_input_tokens_seen": 209482280, "step": 2295 }, { "epoch": 9.566666666666666, "grad_norm": 6.942068475760814, "learning_rate": 5e-05, "loss": 0.0902, "num_input_tokens_seen": 209573388, "step": 2296 }, { "epoch": 9.566666666666666, "loss": 0.04752349853515625, "loss_ce": 2.288810719619505e-05, "loss_iou": 0.3671875, "loss_num": 0.009521484375, "loss_xval": 0.047607421875, "num_input_tokens_seen": 209573388, "step": 2296 }, { "epoch": 9.570833333333333, "grad_norm": 3.919273820651139, "learning_rate": 5e-05, "loss": 0.0824, "num_input_tokens_seen": 209665200, "step": 2297 }, { "epoch": 9.570833333333333, "loss": 0.09870068728923798, "loss_ce": 0.00018994146375916898, "loss_iou": 0.345703125, "loss_num": 0.0196533203125, "loss_xval": 0.0986328125, "num_input_tokens_seen": 209665200, "step": 2297 }, { "epoch": 9.575, "grad_norm": 2.9869843848344875, "learning_rate": 5e-05, "loss": 0.0857, "num_input_tokens_seen": 209756420, "step": 2298 }, { "epoch": 9.575, "loss": 0.09328018128871918, "loss_ce": 0.001880028867162764, "loss_iou": 0.25390625, "loss_num": 0.018310546875, "loss_xval": 0.09130859375, "num_input_tokens_seen": 209756420, "step": 2298 }, { "epoch": 9.579166666666667, "grad_norm": 3.168385742863224, "learning_rate": 5e-05, "loss": 0.0704, "num_input_tokens_seen": 209847368, "step": 2299 }, { "epoch": 9.579166666666667, "loss": 0.05465655028820038, "loss_ce": 0.00048784760292619467, "loss_iou": 0.220703125, "loss_num": 0.01080322265625, "loss_xval": 0.05419921875, "num_input_tokens_seen": 209847368, "step": 2299 }, { "epoch": 9.583333333333334, "grad_norm": 2.964641575652676, "learning_rate": 5e-05, "loss": 0.06, "num_input_tokens_seen": 209938492, "step": 2300 }, { "epoch": 9.583333333333334, "loss": 0.07439778745174408, "loss_ce": 1.118627824325813e-05, "loss_iou": 0.3203125, "loss_num": 0.014892578125, "loss_xval": 0.07421875, "num_input_tokens_seen": 209938492, "step": 2300 }, { "epoch": 9.5875, "grad_norm": 3.3481929061012217, "learning_rate": 5e-05, "loss": 0.1061, "num_input_tokens_seen": 210029232, "step": 2301 }, { "epoch": 9.5875, "loss": 0.07980884611606598, "loss_ce": 0.0019737626425921917, "loss_iou": 0.2392578125, "loss_num": 0.0155029296875, "loss_xval": 0.07763671875, "num_input_tokens_seen": 210029232, "step": 2301 }, { "epoch": 9.591666666666667, "grad_norm": 3.0061991395338636, "learning_rate": 5e-05, "loss": 0.0603, "num_input_tokens_seen": 210120564, "step": 2302 }, { "epoch": 9.591666666666667, "loss": 0.05892244726419449, "loss_ce": 2.3518419766332954e-05, "loss_iou": 0.263671875, "loss_num": 0.01177978515625, "loss_xval": 0.058837890625, "num_input_tokens_seen": 210120564, "step": 2302 }, { "epoch": 9.595833333333333, "grad_norm": 3.001238624970012, "learning_rate": 5e-05, "loss": 0.0606, "num_input_tokens_seen": 210212024, "step": 2303 }, { "epoch": 9.595833333333333, "loss": 0.04750463739037514, "loss_ce": 0.0016672349302098155, "loss_iou": 0.310546875, "loss_num": 0.0091552734375, "loss_xval": 0.0458984375, "num_input_tokens_seen": 210212024, "step": 2303 }, { "epoch": 9.6, "grad_norm": 3.1328769634590325, "learning_rate": 5e-05, "loss": 0.0414, "num_input_tokens_seen": 210303912, "step": 2304 }, { "epoch": 9.6, "loss": 0.027168650180101395, "loss_ce": 0.0002902920823544264, "loss_iou": 0.2392578125, "loss_num": 0.00537109375, "loss_xval": 0.02685546875, "num_input_tokens_seen": 210303912, "step": 2304 }, { "epoch": 9.604166666666666, "grad_norm": 2.9176307605044727, "learning_rate": 5e-05, "loss": 0.0697, "num_input_tokens_seen": 210395604, "step": 2305 }, { "epoch": 9.604166666666666, "loss": 0.06715308129787445, "loss_ce": 0.001814942224882543, "loss_iou": 0.26171875, "loss_num": 0.0130615234375, "loss_xval": 0.0654296875, "num_input_tokens_seen": 210395604, "step": 2305 }, { "epoch": 9.608333333333333, "grad_norm": 3.8731669151747314, "learning_rate": 5e-05, "loss": 0.0722, "num_input_tokens_seen": 210487352, "step": 2306 }, { "epoch": 9.608333333333333, "loss": 0.06469616293907166, "loss_ce": 0.0007770942756906152, "loss_iou": 0.267578125, "loss_num": 0.01275634765625, "loss_xval": 0.06396484375, "num_input_tokens_seen": 210487352, "step": 2306 }, { "epoch": 9.6125, "grad_norm": 1.7329658587218324, "learning_rate": 5e-05, "loss": 0.0536, "num_input_tokens_seen": 210578192, "step": 2307 }, { "epoch": 9.6125, "loss": 0.06999445706605911, "loss_ce": 0.0007653271895833313, "loss_iou": 0.181640625, "loss_num": 0.01385498046875, "loss_xval": 0.0693359375, "num_input_tokens_seen": 210578192, "step": 2307 }, { "epoch": 9.616666666666667, "grad_norm": 2.633133076304407, "learning_rate": 5e-05, "loss": 0.0784, "num_input_tokens_seen": 210669848, "step": 2308 }, { "epoch": 9.616666666666667, "loss": 0.07915782928466797, "loss_ce": 0.00040722257108427584, "loss_iou": 0.1865234375, "loss_num": 0.0157470703125, "loss_xval": 0.07861328125, "num_input_tokens_seen": 210669848, "step": 2308 }, { "epoch": 9.620833333333334, "grad_norm": 3.2415885083087583, "learning_rate": 5e-05, "loss": 0.0704, "num_input_tokens_seen": 210761176, "step": 2309 }, { "epoch": 9.620833333333334, "loss": 0.06210331618785858, "loss_ce": 1.5305309716495685e-05, "loss_iou": 0.30859375, "loss_num": 0.01239013671875, "loss_xval": 0.06201171875, "num_input_tokens_seen": 210761176, "step": 2309 }, { "epoch": 9.625, "grad_norm": 10.46700965607754, "learning_rate": 5e-05, "loss": 0.1214, "num_input_tokens_seen": 210851532, "step": 2310 }, { "epoch": 9.625, "loss": 0.20068180561065674, "loss_ce": 0.00024236088211182505, "loss_iou": 0.28515625, "loss_num": 0.0400390625, "loss_xval": 0.2001953125, "num_input_tokens_seen": 210851532, "step": 2310 }, { "epoch": 9.629166666666666, "grad_norm": 9.77035819381619, "learning_rate": 5e-05, "loss": 0.0818, "num_input_tokens_seen": 210942648, "step": 2311 }, { "epoch": 9.629166666666666, "loss": 0.06990273296833038, "loss_ce": 0.0010855919681489468, "loss_iou": 0.279296875, "loss_num": 0.01373291015625, "loss_xval": 0.06884765625, "num_input_tokens_seen": 210942648, "step": 2311 }, { "epoch": 9.633333333333333, "grad_norm": 2.268298287488433, "learning_rate": 5e-05, "loss": 0.0872, "num_input_tokens_seen": 211033656, "step": 2312 }, { "epoch": 9.633333333333333, "loss": 0.08489096164703369, "loss_ce": 6.3188363128574565e-06, "loss_iou": 0.31640625, "loss_num": 0.0169677734375, "loss_xval": 0.0849609375, "num_input_tokens_seen": 211033656, "step": 2312 }, { "epoch": 9.6375, "grad_norm": 2.7690051300003744, "learning_rate": 5e-05, "loss": 0.1414, "num_input_tokens_seen": 211125328, "step": 2313 }, { "epoch": 9.6375, "loss": 0.17821158468723297, "loss_ce": 0.0017894639167934656, "loss_iou": 0.216796875, "loss_num": 0.035400390625, "loss_xval": 0.1767578125, "num_input_tokens_seen": 211125328, "step": 2313 }, { "epoch": 9.641666666666667, "grad_norm": 2.403599267282493, "learning_rate": 5e-05, "loss": 0.0839, "num_input_tokens_seen": 211216188, "step": 2314 }, { "epoch": 9.641666666666667, "loss": 0.09419400244951248, "loss_ce": 1.4980028026911896e-06, "loss_iou": 0.2216796875, "loss_num": 0.018798828125, "loss_xval": 0.09423828125, "num_input_tokens_seen": 211216188, "step": 2314 }, { "epoch": 9.645833333333334, "grad_norm": 2.024190396115235, "learning_rate": 5e-05, "loss": 0.0625, "num_input_tokens_seen": 211307148, "step": 2315 }, { "epoch": 9.645833333333334, "loss": 0.08138015121221542, "loss_ce": 6.606592796742916e-05, "loss_iou": 0.2451171875, "loss_num": 0.0162353515625, "loss_xval": 0.08154296875, "num_input_tokens_seen": 211307148, "step": 2315 }, { "epoch": 9.65, "grad_norm": 7.034624926758856, "learning_rate": 5e-05, "loss": 0.1537, "num_input_tokens_seen": 211398560, "step": 2316 }, { "epoch": 9.65, "loss": 0.11996078491210938, "loss_ce": 0.0004081761871930212, "loss_iou": 0.2392578125, "loss_num": 0.02392578125, "loss_xval": 0.11962890625, "num_input_tokens_seen": 211398560, "step": 2316 }, { "epoch": 9.654166666666667, "grad_norm": 3.433862137729614, "learning_rate": 5e-05, "loss": 0.068, "num_input_tokens_seen": 211488688, "step": 2317 }, { "epoch": 9.654166666666667, "loss": 0.07582279294729233, "loss_ce": 1.7128168110502884e-05, "loss_iou": 0.3125, "loss_num": 0.01513671875, "loss_xval": 0.07568359375, "num_input_tokens_seen": 211488688, "step": 2317 }, { "epoch": 9.658333333333333, "grad_norm": 4.1438214713878585, "learning_rate": 5e-05, "loss": 0.0558, "num_input_tokens_seen": 211579856, "step": 2318 }, { "epoch": 9.658333333333333, "loss": 0.07438762485980988, "loss_ce": 0.00027568096993491054, "loss_iou": 0.134765625, "loss_num": 0.01483154296875, "loss_xval": 0.07421875, "num_input_tokens_seen": 211579856, "step": 2318 }, { "epoch": 9.6625, "grad_norm": 3.0284126105680227, "learning_rate": 5e-05, "loss": 0.1231, "num_input_tokens_seen": 211670124, "step": 2319 }, { "epoch": 9.6625, "loss": 0.05692708119750023, "loss_ce": 0.00017964384460356086, "loss_iou": 0.259765625, "loss_num": 0.0113525390625, "loss_xval": 0.056640625, "num_input_tokens_seen": 211670124, "step": 2319 }, { "epoch": 9.666666666666666, "grad_norm": 6.624790027554082, "learning_rate": 5e-05, "loss": 0.0827, "num_input_tokens_seen": 211761708, "step": 2320 }, { "epoch": 9.666666666666666, "loss": 0.10713419318199158, "loss_ce": 9.378503455081955e-05, "loss_iou": 0.3359375, "loss_num": 0.0213623046875, "loss_xval": 0.10693359375, "num_input_tokens_seen": 211761708, "step": 2320 }, { "epoch": 9.670833333333333, "grad_norm": 3.8665033268834113, "learning_rate": 5e-05, "loss": 0.0541, "num_input_tokens_seen": 211852468, "step": 2321 }, { "epoch": 9.670833333333333, "loss": 0.05054951831698418, "loss_ce": 0.0011568169575184584, "loss_iou": 0.27734375, "loss_num": 0.0098876953125, "loss_xval": 0.04931640625, "num_input_tokens_seen": 211852468, "step": 2321 }, { "epoch": 9.675, "grad_norm": 4.1451423402198895, "learning_rate": 5e-05, "loss": 0.0549, "num_input_tokens_seen": 211943768, "step": 2322 }, { "epoch": 9.675, "loss": 0.06782028824090958, "loss_ce": 1.023104414343834e-05, "loss_iou": 0.21484375, "loss_num": 0.0135498046875, "loss_xval": 0.06787109375, "num_input_tokens_seen": 211943768, "step": 2322 }, { "epoch": 9.679166666666667, "grad_norm": 6.308823290773596, "learning_rate": 5e-05, "loss": 0.1002, "num_input_tokens_seen": 212031848, "step": 2323 }, { "epoch": 9.679166666666667, "loss": 0.05495281517505646, "loss_ce": 2.1176834707148373e-05, "loss_iou": 0.28515625, "loss_num": 0.010986328125, "loss_xval": 0.054931640625, "num_input_tokens_seen": 212031848, "step": 2323 }, { "epoch": 9.683333333333334, "grad_norm": 2.2750972936487774, "learning_rate": 5e-05, "loss": 0.0459, "num_input_tokens_seen": 212123040, "step": 2324 }, { "epoch": 9.683333333333334, "loss": 0.04357624053955078, "loss_ce": 0.00026417168555781245, "loss_iou": 0.2431640625, "loss_num": 0.0086669921875, "loss_xval": 0.043212890625, "num_input_tokens_seen": 212123040, "step": 2324 }, { "epoch": 9.6875, "grad_norm": 2.5181649970463935, "learning_rate": 5e-05, "loss": 0.0633, "num_input_tokens_seen": 212214276, "step": 2325 }, { "epoch": 9.6875, "loss": 0.0611778125166893, "loss_ce": 0.0005698998575098813, "loss_iou": 0.33984375, "loss_num": 0.01214599609375, "loss_xval": 0.060546875, "num_input_tokens_seen": 212214276, "step": 2325 }, { "epoch": 9.691666666666666, "grad_norm": 47.32888864378727, "learning_rate": 5e-05, "loss": 0.1016, "num_input_tokens_seen": 212306284, "step": 2326 }, { "epoch": 9.691666666666666, "loss": 0.11467814445495605, "loss_ce": 0.000801810179837048, "loss_iou": 0.26171875, "loss_num": 0.0228271484375, "loss_xval": 0.11376953125, "num_input_tokens_seen": 212306284, "step": 2326 }, { "epoch": 9.695833333333333, "grad_norm": 4.5252150406005125, "learning_rate": 5e-05, "loss": 0.0758, "num_input_tokens_seen": 212396928, "step": 2327 }, { "epoch": 9.695833333333333, "loss": 0.07074105739593506, "loss_ce": 1.3064613995084073e-06, "loss_iou": 0.267578125, "loss_num": 0.01416015625, "loss_xval": 0.07080078125, "num_input_tokens_seen": 212396928, "step": 2327 }, { "epoch": 9.7, "grad_norm": 5.0140978725153955, "learning_rate": 5e-05, "loss": 0.0749, "num_input_tokens_seen": 212487820, "step": 2328 }, { "epoch": 9.7, "loss": 0.0942697674036026, "loss_ce": 9.707949857329368e-07, "loss_iou": 0.33984375, "loss_num": 0.018798828125, "loss_xval": 0.09423828125, "num_input_tokens_seen": 212487820, "step": 2328 }, { "epoch": 9.704166666666667, "grad_norm": 1.9934241094260983, "learning_rate": 5e-05, "loss": 0.1041, "num_input_tokens_seen": 212579456, "step": 2329 }, { "epoch": 9.704166666666667, "loss": 0.09087227284908295, "loss_ce": 0.0002198061702074483, "loss_iou": 0.279296875, "loss_num": 0.01806640625, "loss_xval": 0.0908203125, "num_input_tokens_seen": 212579456, "step": 2329 }, { "epoch": 9.708333333333334, "grad_norm": 7.854491895558061, "learning_rate": 5e-05, "loss": 0.0602, "num_input_tokens_seen": 212670164, "step": 2330 }, { "epoch": 9.708333333333334, "loss": 0.06276330351829529, "loss_ce": 3.900450792571064e-06, "loss_iou": 0.3515625, "loss_num": 0.0125732421875, "loss_xval": 0.06298828125, "num_input_tokens_seen": 212670164, "step": 2330 }, { "epoch": 9.7125, "grad_norm": 3.9516540639993845, "learning_rate": 5e-05, "loss": 0.1026, "num_input_tokens_seen": 212761568, "step": 2331 }, { "epoch": 9.7125, "loss": 0.06600821018218994, "loss_ce": 1.3949293133919127e-05, "loss_iou": 0.27734375, "loss_num": 0.01318359375, "loss_xval": 0.06591796875, "num_input_tokens_seen": 212761568, "step": 2331 }, { "epoch": 9.716666666666667, "grad_norm": 2.8755187332405554, "learning_rate": 5e-05, "loss": 0.0727, "num_input_tokens_seen": 212852804, "step": 2332 }, { "epoch": 9.716666666666667, "loss": 0.07823988795280457, "loss_ce": 2.3335524019785225e-05, "loss_iou": 0.287109375, "loss_num": 0.015625, "loss_xval": 0.078125, "num_input_tokens_seen": 212852804, "step": 2332 }, { "epoch": 9.720833333333333, "grad_norm": 4.2736728794931125, "learning_rate": 5e-05, "loss": 0.1083, "num_input_tokens_seen": 212943956, "step": 2333 }, { "epoch": 9.720833333333333, "loss": 0.13756805658340454, "loss_ce": 1.0072521035908721e-05, "loss_iou": 0.1728515625, "loss_num": 0.0274658203125, "loss_xval": 0.1376953125, "num_input_tokens_seen": 212943956, "step": 2333 }, { "epoch": 9.725, "grad_norm": 3.335770224120245, "learning_rate": 5e-05, "loss": 0.1073, "num_input_tokens_seen": 213034888, "step": 2334 }, { "epoch": 9.725, "loss": 0.15872299671173096, "loss_ce": 0.002099153818562627, "loss_iou": 0.28515625, "loss_num": 0.03125, "loss_xval": 0.15625, "num_input_tokens_seen": 213034888, "step": 2334 }, { "epoch": 9.729166666666666, "grad_norm": 12.569562571201159, "learning_rate": 5e-05, "loss": 0.1035, "num_input_tokens_seen": 213126568, "step": 2335 }, { "epoch": 9.729166666666666, "loss": 0.13710667192935944, "loss_ce": 0.001364489900879562, "loss_iou": 0.287109375, "loss_num": 0.0272216796875, "loss_xval": 0.1357421875, "num_input_tokens_seen": 213126568, "step": 2335 }, { "epoch": 9.733333333333333, "grad_norm": 4.464492026423258, "learning_rate": 5e-05, "loss": 0.0994, "num_input_tokens_seen": 213217640, "step": 2336 }, { "epoch": 9.733333333333333, "loss": 0.07574643939733505, "loss_ce": 0.0007952642044983804, "loss_iou": 0.1787109375, "loss_num": 0.0150146484375, "loss_xval": 0.0751953125, "num_input_tokens_seen": 213217640, "step": 2336 }, { "epoch": 9.7375, "grad_norm": 3.6168676118183, "learning_rate": 5e-05, "loss": 0.0662, "num_input_tokens_seen": 213309268, "step": 2337 }, { "epoch": 9.7375, "loss": 0.05703987181186676, "loss_ce": 0.0003382100840099156, "loss_iou": 0.419921875, "loss_num": 0.0113525390625, "loss_xval": 0.056640625, "num_input_tokens_seen": 213309268, "step": 2337 }, { "epoch": 9.741666666666667, "grad_norm": 9.748831522555728, "learning_rate": 5e-05, "loss": 0.1086, "num_input_tokens_seen": 213400104, "step": 2338 }, { "epoch": 9.741666666666667, "loss": 0.09555191546678543, "loss_ce": 1.3787457646685652e-06, "loss_iou": 0.251953125, "loss_num": 0.0191650390625, "loss_xval": 0.095703125, "num_input_tokens_seen": 213400104, "step": 2338 }, { "epoch": 9.745833333333334, "grad_norm": 2.19442959998253, "learning_rate": 5e-05, "loss": 0.0405, "num_input_tokens_seen": 213491216, "step": 2339 }, { "epoch": 9.745833333333334, "loss": 0.031318824738264084, "loss_ce": 0.00029770893161185086, "loss_iou": 0.1533203125, "loss_num": 0.006195068359375, "loss_xval": 0.031005859375, "num_input_tokens_seen": 213491216, "step": 2339 }, { "epoch": 9.75, "grad_norm": 15.327886100065887, "learning_rate": 5e-05, "loss": 0.0885, "num_input_tokens_seen": 213582480, "step": 2340 }, { "epoch": 9.75, "loss": 0.09976033121347427, "loss_ce": 0.0010664837900549173, "loss_iou": 0.140625, "loss_num": 0.019775390625, "loss_xval": 0.0986328125, "num_input_tokens_seen": 213582480, "step": 2340 }, { "epoch": 9.754166666666666, "grad_norm": 3.3946902286936345, "learning_rate": 5e-05, "loss": 0.1089, "num_input_tokens_seen": 213673028, "step": 2341 }, { "epoch": 9.754166666666666, "loss": 0.0917673110961914, "loss_ce": 9.517025318928063e-07, "loss_iou": 0.2265625, "loss_num": 0.018310546875, "loss_xval": 0.091796875, "num_input_tokens_seen": 213673028, "step": 2341 }, { "epoch": 9.758333333333333, "grad_norm": 2.8068820704716306, "learning_rate": 5e-05, "loss": 0.0753, "num_input_tokens_seen": 213764164, "step": 2342 }, { "epoch": 9.758333333333333, "loss": 0.06515133380889893, "loss_ce": 1.1558730875549372e-05, "loss_iou": 0.287109375, "loss_num": 0.0130615234375, "loss_xval": 0.06494140625, "num_input_tokens_seen": 213764164, "step": 2342 }, { "epoch": 9.7625, "grad_norm": 1.7118709907416432, "learning_rate": 5e-05, "loss": 0.1154, "num_input_tokens_seen": 213855484, "step": 2343 }, { "epoch": 9.7625, "loss": 0.15148527920246124, "loss_ce": 2.6543737476458773e-05, "loss_iou": 0.4609375, "loss_num": 0.0302734375, "loss_xval": 0.1513671875, "num_input_tokens_seen": 213855484, "step": 2343 }, { "epoch": 9.766666666666667, "grad_norm": 1.7780286715792515, "learning_rate": 5e-05, "loss": 0.0461, "num_input_tokens_seen": 213947032, "step": 2344 }, { "epoch": 9.766666666666667, "loss": 0.0518774576485157, "loss_ce": 0.0020727699156850576, "loss_iou": 0.1259765625, "loss_num": 0.00994873046875, "loss_xval": 0.0498046875, "num_input_tokens_seen": 213947032, "step": 2344 }, { "epoch": 9.770833333333334, "grad_norm": 1.9309264666783899, "learning_rate": 5e-05, "loss": 0.1121, "num_input_tokens_seen": 214038300, "step": 2345 }, { "epoch": 9.770833333333334, "loss": 0.1719518005847931, "loss_ce": 0.0018162998603656888, "loss_iou": 0.154296875, "loss_num": 0.033935546875, "loss_xval": 0.169921875, "num_input_tokens_seen": 214038300, "step": 2345 }, { "epoch": 9.775, "grad_norm": 1.712701511235156, "learning_rate": 5e-05, "loss": 0.0456, "num_input_tokens_seen": 214129548, "step": 2346 }, { "epoch": 9.775, "loss": 0.06391113996505737, "loss_ce": 0.00010651241609593853, "loss_iou": 0.15234375, "loss_num": 0.01275634765625, "loss_xval": 0.06396484375, "num_input_tokens_seen": 214129548, "step": 2346 }, { "epoch": 9.779166666666667, "grad_norm": 5.577947780444153, "learning_rate": 5e-05, "loss": 0.1246, "num_input_tokens_seen": 214220536, "step": 2347 }, { "epoch": 9.779166666666667, "loss": 0.12966987490653992, "loss_ce": 0.0036322667729109526, "loss_iou": 0.287109375, "loss_num": 0.025146484375, "loss_xval": 0.1259765625, "num_input_tokens_seen": 214220536, "step": 2347 }, { "epoch": 9.783333333333333, "grad_norm": 3.5191047619911178, "learning_rate": 5e-05, "loss": 0.063, "num_input_tokens_seen": 214312616, "step": 2348 }, { "epoch": 9.783333333333333, "loss": 0.06377019733190536, "loss_ce": 0.0005988088087178767, "loss_iou": 0.27734375, "loss_num": 0.01263427734375, "loss_xval": 0.06298828125, "num_input_tokens_seen": 214312616, "step": 2348 }, { "epoch": 9.7875, "grad_norm": 2.1775888706689095, "learning_rate": 5e-05, "loss": 0.0747, "num_input_tokens_seen": 214404052, "step": 2349 }, { "epoch": 9.7875, "loss": 0.05960691720247269, "loss_ce": 0.0003570413973648101, "loss_iou": 0.234375, "loss_num": 0.0118408203125, "loss_xval": 0.059326171875, "num_input_tokens_seen": 214404052, "step": 2349 }, { "epoch": 9.791666666666666, "grad_norm": 2.8138942849599156, "learning_rate": 5e-05, "loss": 0.0597, "num_input_tokens_seen": 214495420, "step": 2350 }, { "epoch": 9.791666666666666, "loss": 0.0710035040974617, "loss_ce": 3.4879423765232787e-05, "loss_iou": 0.244140625, "loss_num": 0.01416015625, "loss_xval": 0.07080078125, "num_input_tokens_seen": 214495420, "step": 2350 }, { "epoch": 9.795833333333333, "grad_norm": 4.1030473929463405, "learning_rate": 5e-05, "loss": 0.1055, "num_input_tokens_seen": 214586960, "step": 2351 }, { "epoch": 9.795833333333333, "loss": 0.06355182826519012, "loss_ce": 0.0002125888568116352, "loss_iou": 0.34375, "loss_num": 0.0126953125, "loss_xval": 0.0634765625, "num_input_tokens_seen": 214586960, "step": 2351 }, { "epoch": 9.8, "grad_norm": 2.679079200599434, "learning_rate": 5e-05, "loss": 0.0554, "num_input_tokens_seen": 214677932, "step": 2352 }, { "epoch": 9.8, "loss": 0.05392606183886528, "loss_ce": 1.5020291357359383e-06, "loss_iou": 0.294921875, "loss_num": 0.01080322265625, "loss_xval": 0.053955078125, "num_input_tokens_seen": 214677932, "step": 2352 }, { "epoch": 9.804166666666667, "grad_norm": 5.000155119702331, "learning_rate": 5e-05, "loss": 0.0933, "num_input_tokens_seen": 214769072, "step": 2353 }, { "epoch": 9.804166666666667, "loss": 0.1154058575630188, "loss_ce": 3.632328343883273e-06, "loss_iou": 0.3046875, "loss_num": 0.0230712890625, "loss_xval": 0.115234375, "num_input_tokens_seen": 214769072, "step": 2353 }, { "epoch": 9.808333333333334, "grad_norm": 5.068399377116598, "learning_rate": 5e-05, "loss": 0.0634, "num_input_tokens_seen": 214860520, "step": 2354 }, { "epoch": 9.808333333333334, "loss": 0.03795129433274269, "loss_ce": 4.0833965613273904e-05, "loss_iou": 0.2314453125, "loss_num": 0.007598876953125, "loss_xval": 0.037841796875, "num_input_tokens_seen": 214860520, "step": 2354 }, { "epoch": 9.8125, "grad_norm": 2.9163140624956725, "learning_rate": 5e-05, "loss": 0.0786, "num_input_tokens_seen": 214951808, "step": 2355 }, { "epoch": 9.8125, "loss": 0.11772525310516357, "loss_ce": 0.0017584576271474361, "loss_iou": 0.263671875, "loss_num": 0.023193359375, "loss_xval": 0.1162109375, "num_input_tokens_seen": 214951808, "step": 2355 }, { "epoch": 9.816666666666666, "grad_norm": 4.650004655183908, "learning_rate": 5e-05, "loss": 0.0907, "num_input_tokens_seen": 215042492, "step": 2356 }, { "epoch": 9.816666666666666, "loss": 0.1099902018904686, "loss_ce": 4.852836809732253e-06, "loss_iou": 0.23828125, "loss_num": 0.02197265625, "loss_xval": 0.10986328125, "num_input_tokens_seen": 215042492, "step": 2356 }, { "epoch": 9.820833333333333, "grad_norm": 3.6950360384389236, "learning_rate": 5e-05, "loss": 0.0586, "num_input_tokens_seen": 215133576, "step": 2357 }, { "epoch": 9.820833333333333, "loss": 0.07307278364896774, "loss_ce": 5.947624231339432e-05, "loss_iou": 0.30078125, "loss_num": 0.01458740234375, "loss_xval": 0.0732421875, "num_input_tokens_seen": 215133576, "step": 2357 }, { "epoch": 9.825, "grad_norm": 4.495764643011328, "learning_rate": 5e-05, "loss": 0.1072, "num_input_tokens_seen": 215224968, "step": 2358 }, { "epoch": 9.825, "loss": 0.13321489095687866, "loss_ce": 5.671513918059645e-06, "loss_iou": 0.29296875, "loss_num": 0.026611328125, "loss_xval": 0.1328125, "num_input_tokens_seen": 215224968, "step": 2358 }, { "epoch": 9.829166666666667, "grad_norm": 2.9290558110499694, "learning_rate": 5e-05, "loss": 0.061, "num_input_tokens_seen": 215316080, "step": 2359 }, { "epoch": 9.829166666666667, "loss": 0.06919078528881073, "loss_ce": 0.0004346802306827158, "loss_iou": 0.32421875, "loss_num": 0.01373291015625, "loss_xval": 0.06884765625, "num_input_tokens_seen": 215316080, "step": 2359 }, { "epoch": 9.833333333333334, "grad_norm": 4.526856507028311, "learning_rate": 5e-05, "loss": 0.0948, "num_input_tokens_seen": 215407492, "step": 2360 }, { "epoch": 9.833333333333334, "loss": 0.11653508245944977, "loss_ce": 0.0003546580846887082, "loss_iou": 0.29296875, "loss_num": 0.023193359375, "loss_xval": 0.1162109375, "num_input_tokens_seen": 215407492, "step": 2360 }, { "epoch": 9.8375, "grad_norm": 3.022980742192059, "learning_rate": 5e-05, "loss": 0.0805, "num_input_tokens_seen": 215498740, "step": 2361 }, { "epoch": 9.8375, "loss": 0.07529893517494202, "loss_ce": 0.0025374058168381453, "loss_iou": 0.2412109375, "loss_num": 0.0145263671875, "loss_xval": 0.07275390625, "num_input_tokens_seen": 215498740, "step": 2361 }, { "epoch": 9.841666666666667, "grad_norm": 3.0630692547168064, "learning_rate": 5e-05, "loss": 0.1162, "num_input_tokens_seen": 215589256, "step": 2362 }, { "epoch": 9.841666666666667, "loss": 0.1477106511592865, "loss_ce": 5.569358563661808e-06, "loss_iou": 0.2099609375, "loss_num": 0.029541015625, "loss_xval": 0.1474609375, "num_input_tokens_seen": 215589256, "step": 2362 }, { "epoch": 9.845833333333333, "grad_norm": 2.3315894574014413, "learning_rate": 5e-05, "loss": 0.0586, "num_input_tokens_seen": 215680120, "step": 2363 }, { "epoch": 9.845833333333333, "loss": 0.0856175646185875, "loss_ce": 5.013221766603237e-07, "loss_iou": 0.1845703125, "loss_num": 0.01708984375, "loss_xval": 0.08544921875, "num_input_tokens_seen": 215680120, "step": 2363 }, { "epoch": 9.85, "grad_norm": 2.7572666404328, "learning_rate": 5e-05, "loss": 0.0431, "num_input_tokens_seen": 215771780, "step": 2364 }, { "epoch": 9.85, "loss": 0.03435160592198372, "loss_ce": 0.00021388079039752483, "loss_iou": 0.2373046875, "loss_num": 0.0068359375, "loss_xval": 0.0341796875, "num_input_tokens_seen": 215771780, "step": 2364 }, { "epoch": 9.854166666666666, "grad_norm": 3.2252818942955037, "learning_rate": 5e-05, "loss": 0.0996, "num_input_tokens_seen": 215863700, "step": 2365 }, { "epoch": 9.854166666666666, "loss": 0.08640223741531372, "loss_ce": 0.0008614607504568994, "loss_iou": 0.251953125, "loss_num": 0.01708984375, "loss_xval": 0.08544921875, "num_input_tokens_seen": 215863700, "step": 2365 }, { "epoch": 9.858333333333333, "grad_norm": 2.7280946601298734, "learning_rate": 5e-05, "loss": 0.0904, "num_input_tokens_seen": 215954660, "step": 2366 }, { "epoch": 9.858333333333333, "loss": 0.12923277914524078, "loss_ce": 0.0001891975407488644, "loss_iou": 0.212890625, "loss_num": 0.0257568359375, "loss_xval": 0.12890625, "num_input_tokens_seen": 215954660, "step": 2366 }, { "epoch": 9.8625, "grad_norm": 2.7140980275793356, "learning_rate": 5e-05, "loss": 0.0844, "num_input_tokens_seen": 216046256, "step": 2367 }, { "epoch": 9.8625, "loss": 0.0715366005897522, "loss_ce": 6.443824531743303e-05, "loss_iou": 0.306640625, "loss_num": 0.0142822265625, "loss_xval": 0.0712890625, "num_input_tokens_seen": 216046256, "step": 2367 }, { "epoch": 9.866666666666667, "grad_norm": 2.8434266178903465, "learning_rate": 5e-05, "loss": 0.0873, "num_input_tokens_seen": 216137476, "step": 2368 }, { "epoch": 9.866666666666667, "loss": 0.08151215314865112, "loss_ce": 0.00033539917785674334, "loss_iou": 0.349609375, "loss_num": 0.0162353515625, "loss_xval": 0.0810546875, "num_input_tokens_seen": 216137476, "step": 2368 }, { "epoch": 9.870833333333334, "grad_norm": 3.4262262928827703, "learning_rate": 5e-05, "loss": 0.0643, "num_input_tokens_seen": 216229316, "step": 2369 }, { "epoch": 9.870833333333334, "loss": 0.07249397784471512, "loss_ce": 0.0011438806541264057, "loss_iou": 0.3359375, "loss_num": 0.0142822265625, "loss_xval": 0.0712890625, "num_input_tokens_seen": 216229316, "step": 2369 }, { "epoch": 9.875, "grad_norm": 2.777978724374111, "learning_rate": 5e-05, "loss": 0.1048, "num_input_tokens_seen": 216320344, "step": 2370 }, { "epoch": 9.875, "loss": 0.11107797920703888, "loss_ce": 8.55453617987223e-05, "loss_iou": 0.240234375, "loss_num": 0.022216796875, "loss_xval": 0.11083984375, "num_input_tokens_seen": 216320344, "step": 2370 }, { "epoch": 9.879166666666666, "grad_norm": 4.830728497464694, "learning_rate": 5e-05, "loss": 0.1089, "num_input_tokens_seen": 216411052, "step": 2371 }, { "epoch": 9.879166666666666, "loss": 0.09860274195671082, "loss_ce": 1.57094000314828e-05, "loss_iou": 0.287109375, "loss_num": 0.0196533203125, "loss_xval": 0.0986328125, "num_input_tokens_seen": 216411052, "step": 2371 }, { "epoch": 9.883333333333333, "grad_norm": 4.156025898707058, "learning_rate": 5e-05, "loss": 0.0823, "num_input_tokens_seen": 216502236, "step": 2372 }, { "epoch": 9.883333333333333, "loss": 0.052403755486011505, "loss_ce": 0.000912973249796778, "loss_iou": 0.255859375, "loss_num": 0.01031494140625, "loss_xval": 0.051513671875, "num_input_tokens_seen": 216502236, "step": 2372 }, { "epoch": 9.8875, "grad_norm": 3.158859098815327, "learning_rate": 5e-05, "loss": 0.0895, "num_input_tokens_seen": 216593540, "step": 2373 }, { "epoch": 9.8875, "loss": 0.03290058299899101, "loss_ce": 0.0008342385408468544, "loss_iou": 0.216796875, "loss_num": 0.00640869140625, "loss_xval": 0.031982421875, "num_input_tokens_seen": 216593540, "step": 2373 }, { "epoch": 9.891666666666667, "grad_norm": 2.3270527447832663, "learning_rate": 5e-05, "loss": 0.0591, "num_input_tokens_seen": 216684660, "step": 2374 }, { "epoch": 9.891666666666667, "loss": 0.04258023202419281, "loss_ce": 0.0002981259021908045, "loss_iou": 0.16015625, "loss_num": 0.00848388671875, "loss_xval": 0.042236328125, "num_input_tokens_seen": 216684660, "step": 2374 }, { "epoch": 9.895833333333334, "grad_norm": 4.043780359087197, "learning_rate": 5e-05, "loss": 0.1232, "num_input_tokens_seen": 216776372, "step": 2375 }, { "epoch": 9.895833333333334, "loss": 0.1797737032175064, "loss_ce": 0.00026931928005069494, "loss_iou": 0.224609375, "loss_num": 0.035888671875, "loss_xval": 0.1796875, "num_input_tokens_seen": 216776372, "step": 2375 }, { "epoch": 9.9, "grad_norm": 1.631130023892962, "learning_rate": 5e-05, "loss": 0.0824, "num_input_tokens_seen": 216868132, "step": 2376 }, { "epoch": 9.9, "loss": 0.062147513031959534, "loss_ce": 9.001667058328167e-05, "loss_iou": 0.296875, "loss_num": 0.01239013671875, "loss_xval": 0.06201171875, "num_input_tokens_seen": 216868132, "step": 2376 }, { "epoch": 9.904166666666667, "grad_norm": 1.6375843259787302, "learning_rate": 5e-05, "loss": 0.0519, "num_input_tokens_seen": 216959320, "step": 2377 }, { "epoch": 9.904166666666667, "loss": 0.05292154848575592, "loss_ce": 4.064571839990094e-06, "loss_iou": 0.30078125, "loss_num": 0.01055908203125, "loss_xval": 0.052978515625, "num_input_tokens_seen": 216959320, "step": 2377 }, { "epoch": 9.908333333333333, "grad_norm": 3.789057407933149, "learning_rate": 5e-05, "loss": 0.082, "num_input_tokens_seen": 217050848, "step": 2378 }, { "epoch": 9.908333333333333, "loss": 0.09067431837320328, "loss_ce": 0.0018223864026367664, "loss_iou": 0.07958984375, "loss_num": 0.017822265625, "loss_xval": 0.0888671875, "num_input_tokens_seen": 217050848, "step": 2378 }, { "epoch": 9.9125, "grad_norm": 3.407605174263242, "learning_rate": 5e-05, "loss": 0.072, "num_input_tokens_seen": 217141616, "step": 2379 }, { "epoch": 9.9125, "loss": 0.09490478038787842, "loss_ce": 3.325934085296467e-05, "loss_iou": 0.296875, "loss_num": 0.0189208984375, "loss_xval": 0.0947265625, "num_input_tokens_seen": 217141616, "step": 2379 }, { "epoch": 9.916666666666666, "grad_norm": 5.964912675949317, "learning_rate": 5e-05, "loss": 0.1063, "num_input_tokens_seen": 217233004, "step": 2380 }, { "epoch": 9.916666666666666, "loss": 0.09552451968193054, "loss_ce": 4.49842082161922e-06, "loss_iou": 0.26953125, "loss_num": 0.01904296875, "loss_xval": 0.095703125, "num_input_tokens_seen": 217233004, "step": 2380 }, { "epoch": 9.920833333333333, "grad_norm": 5.675693701891781, "learning_rate": 5e-05, "loss": 0.0801, "num_input_tokens_seen": 217325000, "step": 2381 }, { "epoch": 9.920833333333333, "loss": 0.05819493532180786, "loss_ce": 2.8428947189240716e-05, "loss_iou": 0.3515625, "loss_num": 0.01165771484375, "loss_xval": 0.05810546875, "num_input_tokens_seen": 217325000, "step": 2381 }, { "epoch": 9.925, "grad_norm": 1.9800756013243537, "learning_rate": 5e-05, "loss": 0.067, "num_input_tokens_seen": 217415812, "step": 2382 }, { "epoch": 9.925, "loss": 0.06221006438136101, "loss_ce": 0.0009307658183388412, "loss_iou": 0.25, "loss_num": 0.01226806640625, "loss_xval": 0.061279296875, "num_input_tokens_seen": 217415812, "step": 2382 }, { "epoch": 9.929166666666667, "grad_norm": 7.826859746114933, "learning_rate": 5e-05, "loss": 0.0744, "num_input_tokens_seen": 217506640, "step": 2383 }, { "epoch": 9.929166666666667, "loss": 0.08806528151035309, "loss_ce": 0.0002509501646272838, "loss_iou": 0.2138671875, "loss_num": 0.017578125, "loss_xval": 0.087890625, "num_input_tokens_seen": 217506640, "step": 2383 }, { "epoch": 9.933333333333334, "grad_norm": 2.9482377996316744, "learning_rate": 5e-05, "loss": 0.0504, "num_input_tokens_seen": 217597960, "step": 2384 }, { "epoch": 9.933333333333334, "loss": 0.04804066941142082, "loss_ce": 6.000054327159887e-06, "loss_iou": 0.3125, "loss_num": 0.00958251953125, "loss_xval": 0.048095703125, "num_input_tokens_seen": 217597960, "step": 2384 }, { "epoch": 9.9375, "grad_norm": 8.245569509710974, "learning_rate": 5e-05, "loss": 0.1383, "num_input_tokens_seen": 217688896, "step": 2385 }, { "epoch": 9.9375, "loss": 0.11219567805528641, "loss_ce": 2.8321906938799657e-05, "loss_iou": 0.3515625, "loss_num": 0.0224609375, "loss_xval": 0.1123046875, "num_input_tokens_seen": 217688896, "step": 2385 }, { "epoch": 9.941666666666666, "grad_norm": 3.428894122120708, "learning_rate": 5e-05, "loss": 0.0636, "num_input_tokens_seen": 217780696, "step": 2386 }, { "epoch": 9.941666666666666, "loss": 0.06880239397287369, "loss_ce": 0.0025792501401156187, "loss_iou": 0.279296875, "loss_num": 0.01324462890625, "loss_xval": 0.06640625, "num_input_tokens_seen": 217780696, "step": 2386 }, { "epoch": 9.945833333333333, "grad_norm": 4.2734668776189375, "learning_rate": 5e-05, "loss": 0.1047, "num_input_tokens_seen": 217872132, "step": 2387 }, { "epoch": 9.945833333333333, "loss": 0.13083070516586304, "loss_ce": 0.0021075578406453133, "loss_iou": 0.3359375, "loss_num": 0.0257568359375, "loss_xval": 0.12890625, "num_input_tokens_seen": 217872132, "step": 2387 }, { "epoch": 9.95, "grad_norm": 4.8866090440778285, "learning_rate": 5e-05, "loss": 0.0648, "num_input_tokens_seen": 217963468, "step": 2388 }, { "epoch": 9.95, "loss": 0.06608153879642487, "loss_ce": 0.0019030753755941987, "loss_iou": 0.22265625, "loss_num": 0.0128173828125, "loss_xval": 0.06396484375, "num_input_tokens_seen": 217963468, "step": 2388 }, { "epoch": 9.954166666666667, "grad_norm": 2.0819408855510817, "learning_rate": 5e-05, "loss": 0.0792, "num_input_tokens_seen": 218055508, "step": 2389 }, { "epoch": 9.954166666666667, "loss": 0.10832661390304565, "loss_ce": 0.0008589604403823614, "loss_iou": 0.2392578125, "loss_num": 0.021484375, "loss_xval": 0.107421875, "num_input_tokens_seen": 218055508, "step": 2389 }, { "epoch": 9.958333333333334, "grad_norm": 1.7007160550678941, "learning_rate": 5e-05, "loss": 0.0888, "num_input_tokens_seen": 218146548, "step": 2390 }, { "epoch": 9.958333333333334, "loss": 0.09715352952480316, "loss_ce": 0.00013815786223858595, "loss_iou": 0.30078125, "loss_num": 0.0194091796875, "loss_xval": 0.09716796875, "num_input_tokens_seen": 218146548, "step": 2390 }, { "epoch": 9.9625, "grad_norm": 2.832922464483346, "learning_rate": 5e-05, "loss": 0.0721, "num_input_tokens_seen": 218237904, "step": 2391 }, { "epoch": 9.9625, "loss": 0.07945854961872101, "loss_ce": 0.0006087534129619598, "loss_iou": 0.306640625, "loss_num": 0.0157470703125, "loss_xval": 0.07861328125, "num_input_tokens_seen": 218237904, "step": 2391 }, { "epoch": 9.966666666666667, "grad_norm": 2.27150866322334, "learning_rate": 5e-05, "loss": 0.0789, "num_input_tokens_seen": 218329788, "step": 2392 }, { "epoch": 9.966666666666667, "loss": 0.06808695942163467, "loss_ce": 0.00036845580325461924, "loss_iou": 0.1513671875, "loss_num": 0.0135498046875, "loss_xval": 0.06787109375, "num_input_tokens_seen": 218329788, "step": 2392 }, { "epoch": 9.970833333333333, "grad_norm": 3.3544890310831064, "learning_rate": 5e-05, "loss": 0.1286, "num_input_tokens_seen": 218421056, "step": 2393 }, { "epoch": 9.970833333333333, "loss": 0.1268257051706314, "loss_ce": 0.00011672836990328506, "loss_iou": 0.33203125, "loss_num": 0.0252685546875, "loss_xval": 0.126953125, "num_input_tokens_seen": 218421056, "step": 2393 }, { "epoch": 9.975, "grad_norm": 15.697672226925908, "learning_rate": 5e-05, "loss": 0.0724, "num_input_tokens_seen": 218513060, "step": 2394 }, { "epoch": 9.975, "loss": 0.09975279122591019, "loss_ce": 0.004324328154325485, "loss_iou": 0.134765625, "loss_num": 0.01904296875, "loss_xval": 0.09521484375, "num_input_tokens_seen": 218513060, "step": 2394 }, { "epoch": 9.979166666666666, "grad_norm": 13.437865154392682, "learning_rate": 5e-05, "loss": 0.1062, "num_input_tokens_seen": 218604308, "step": 2395 }, { "epoch": 9.979166666666666, "loss": 0.1018124371767044, "loss_ce": 0.0006314095808193088, "loss_iou": 0.263671875, "loss_num": 0.020263671875, "loss_xval": 0.10107421875, "num_input_tokens_seen": 218604308, "step": 2395 }, { "epoch": 9.983333333333333, "grad_norm": 2.1136336116403878, "learning_rate": 5e-05, "loss": 0.0574, "num_input_tokens_seen": 218694532, "step": 2396 }, { "epoch": 9.983333333333333, "loss": 0.03833974897861481, "loss_ce": 2.492999192327261e-05, "loss_iou": 0.23046875, "loss_num": 0.007659912109375, "loss_xval": 0.038330078125, "num_input_tokens_seen": 218694532, "step": 2396 }, { "epoch": 9.9875, "grad_norm": 4.0907594526213265, "learning_rate": 5e-05, "loss": 0.0637, "num_input_tokens_seen": 218786656, "step": 2397 }, { "epoch": 9.9875, "loss": 0.047209057956933975, "loss_ce": 0.000761302886530757, "loss_iou": 0.3515625, "loss_num": 0.00927734375, "loss_xval": 0.04638671875, "num_input_tokens_seen": 218786656, "step": 2397 }, { "epoch": 9.991666666666667, "grad_norm": 3.173601061282437, "learning_rate": 5e-05, "loss": 0.0958, "num_input_tokens_seen": 218877772, "step": 2398 }, { "epoch": 9.991666666666667, "loss": 0.0674939751625061, "loss_ce": 1.1977379472227767e-05, "loss_iou": 0.201171875, "loss_num": 0.01348876953125, "loss_xval": 0.0673828125, "num_input_tokens_seen": 218877772, "step": 2398 }, { "epoch": 9.995833333333334, "grad_norm": 4.917086807008557, "learning_rate": 5e-05, "loss": 0.1076, "num_input_tokens_seen": 218969860, "step": 2399 }, { "epoch": 9.995833333333334, "loss": 0.08607882261276245, "loss_ce": 0.0025216902140527964, "loss_iou": 0.310546875, "loss_num": 0.0167236328125, "loss_xval": 0.08349609375, "num_input_tokens_seen": 218969860, "step": 2399 }, { "epoch": 10.0, "grad_norm": 5.4402695278723305, "learning_rate": 5e-05, "loss": 0.1207, "num_input_tokens_seen": 219060548, "step": 2400 }, { "epoch": 10.0, "loss": 0.09793904423713684, "loss_ce": 8.13117094367044e-06, "loss_iou": 0.34765625, "loss_num": 0.01953125, "loss_xval": 0.09814453125, "num_input_tokens_seen": 219060548, "step": 2400 }, { "epoch": 10.004166666666666, "grad_norm": 3.0268333764807838, "learning_rate": 5e-05, "loss": 0.0823, "num_input_tokens_seen": 219150468, "step": 2401 }, { "epoch": 10.004166666666666, "loss": 0.07914966344833374, "loss_ce": 0.0003380189591553062, "loss_iou": 0.40234375, "loss_num": 0.0157470703125, "loss_xval": 0.07861328125, "num_input_tokens_seen": 219150468, "step": 2401 }, { "epoch": 10.008333333333333, "grad_norm": 3.55544659192017, "learning_rate": 5e-05, "loss": 0.0813, "num_input_tokens_seen": 219242128, "step": 2402 }, { "epoch": 10.008333333333333, "loss": 0.11007954180240631, "loss_ce": 0.00013996948837302625, "loss_iou": 0.26171875, "loss_num": 0.02197265625, "loss_xval": 0.10986328125, "num_input_tokens_seen": 219242128, "step": 2402 }, { "epoch": 10.0125, "grad_norm": 2.8460970426000416, "learning_rate": 5e-05, "loss": 0.0988, "num_input_tokens_seen": 219333524, "step": 2403 }, { "epoch": 10.0125, "loss": 0.057223014533519745, "loss_ce": 3.307330553070642e-05, "loss_iou": 0.359375, "loss_num": 0.01141357421875, "loss_xval": 0.05712890625, "num_input_tokens_seen": 219333524, "step": 2403 }, { "epoch": 10.016666666666667, "grad_norm": 1.801013103785767, "learning_rate": 5e-05, "loss": 0.0551, "num_input_tokens_seen": 219424884, "step": 2404 }, { "epoch": 10.016666666666667, "loss": 0.054716162383556366, "loss_ce": 2.1029693016316742e-05, "loss_iou": 0.255859375, "loss_num": 0.01092529296875, "loss_xval": 0.0546875, "num_input_tokens_seen": 219424884, "step": 2404 }, { "epoch": 10.020833333333334, "grad_norm": 3.1932161522855353, "learning_rate": 5e-05, "loss": 0.0895, "num_input_tokens_seen": 219515724, "step": 2405 }, { "epoch": 10.020833333333334, "loss": 0.09836345911026001, "loss_ce": 5.310107098921435e-06, "loss_iou": 0.0, "loss_num": 0.0196533203125, "loss_xval": 0.09814453125, "num_input_tokens_seen": 219515724, "step": 2405 }, { "epoch": 10.025, "grad_norm": 2.7569867058474657, "learning_rate": 5e-05, "loss": 0.0683, "num_input_tokens_seen": 219606804, "step": 2406 }, { "epoch": 10.025, "loss": 0.04205465316772461, "loss_ce": 8.535310189472511e-05, "loss_iou": 0.2158203125, "loss_num": 0.00836181640625, "loss_xval": 0.0419921875, "num_input_tokens_seen": 219606804, "step": 2406 }, { "epoch": 10.029166666666667, "grad_norm": 3.201256579939997, "learning_rate": 5e-05, "loss": 0.0667, "num_input_tokens_seen": 219697536, "step": 2407 }, { "epoch": 10.029166666666667, "loss": 0.06935551762580872, "loss_ce": 3.484290209598839e-05, "loss_iou": 0.2890625, "loss_num": 0.01385498046875, "loss_xval": 0.0693359375, "num_input_tokens_seen": 219697536, "step": 2407 }, { "epoch": 10.033333333333333, "grad_norm": 3.1797158746431307, "learning_rate": 5e-05, "loss": 0.1001, "num_input_tokens_seen": 219789392, "step": 2408 }, { "epoch": 10.033333333333333, "loss": 0.08047311007976532, "loss_ce": 0.00012033308303216472, "loss_iou": 0.3359375, "loss_num": 0.01611328125, "loss_xval": 0.08056640625, "num_input_tokens_seen": 219789392, "step": 2408 }, { "epoch": 10.0375, "grad_norm": 3.494274889843191, "learning_rate": 5e-05, "loss": 0.1097, "num_input_tokens_seen": 219880764, "step": 2409 }, { "epoch": 10.0375, "loss": 0.1517402082681656, "loss_ce": 0.0002814657927956432, "loss_iou": 0.220703125, "loss_num": 0.0302734375, "loss_xval": 0.1513671875, "num_input_tokens_seen": 219880764, "step": 2409 }, { "epoch": 10.041666666666666, "grad_norm": 4.472146277590022, "learning_rate": 5e-05, "loss": 0.086, "num_input_tokens_seen": 219972004, "step": 2410 }, { "epoch": 10.041666666666666, "loss": 0.10514138638973236, "loss_ce": 3.884359830408357e-05, "loss_iou": 0.2353515625, "loss_num": 0.0211181640625, "loss_xval": 0.10498046875, "num_input_tokens_seen": 219972004, "step": 2410 }, { "epoch": 10.045833333333333, "grad_norm": 2.2622531373039223, "learning_rate": 5e-05, "loss": 0.1177, "num_input_tokens_seen": 220062736, "step": 2411 }, { "epoch": 10.045833333333333, "loss": 0.054421041160821915, "loss_ce": 0.0005575146642513573, "loss_iou": 0.21484375, "loss_num": 0.0107421875, "loss_xval": 0.053955078125, "num_input_tokens_seen": 220062736, "step": 2411 }, { "epoch": 10.05, "grad_norm": 2.0006320317028243, "learning_rate": 5e-05, "loss": 0.0556, "num_input_tokens_seen": 220154128, "step": 2412 }, { "epoch": 10.05, "loss": 0.035746023058891296, "loss_ce": 0.0013221934204921126, "loss_iou": 0.296875, "loss_num": 0.00689697265625, "loss_xval": 0.034423828125, "num_input_tokens_seen": 220154128, "step": 2412 }, { "epoch": 10.054166666666667, "grad_norm": 2.8703796099596146, "learning_rate": 5e-05, "loss": 0.0883, "num_input_tokens_seen": 220245464, "step": 2413 }, { "epoch": 10.054166666666667, "loss": 0.12158460915088654, "loss_ce": 1.7835860489867628e-05, "loss_iou": 0.35546875, "loss_num": 0.0242919921875, "loss_xval": 0.12158203125, "num_input_tokens_seen": 220245464, "step": 2413 }, { "epoch": 10.058333333333334, "grad_norm": 3.156257832441717, "learning_rate": 5e-05, "loss": 0.0765, "num_input_tokens_seen": 220336884, "step": 2414 }, { "epoch": 10.058333333333334, "loss": 0.10016626864671707, "loss_ce": 0.002448986517265439, "loss_iou": 0.263671875, "loss_num": 0.01953125, "loss_xval": 0.09765625, "num_input_tokens_seen": 220336884, "step": 2414 }, { "epoch": 10.0625, "grad_norm": 1.5155234429258053, "learning_rate": 5e-05, "loss": 0.075, "num_input_tokens_seen": 220428424, "step": 2415 }, { "epoch": 10.0625, "loss": 0.09825599193572998, "loss_ce": 0.000370861409464851, "loss_iou": 0.283203125, "loss_num": 0.01953125, "loss_xval": 0.09765625, "num_input_tokens_seen": 220428424, "step": 2415 }, { "epoch": 10.066666666666666, "grad_norm": 3.9094783921140146, "learning_rate": 5e-05, "loss": 0.0552, "num_input_tokens_seen": 220520116, "step": 2416 }, { "epoch": 10.066666666666666, "loss": 0.06594032049179077, "loss_ce": 0.0002893781056627631, "loss_iou": 0.12255859375, "loss_num": 0.01312255859375, "loss_xval": 0.0654296875, "num_input_tokens_seen": 220520116, "step": 2416 }, { "epoch": 10.070833333333333, "grad_norm": 5.7386299952124045, "learning_rate": 5e-05, "loss": 0.0596, "num_input_tokens_seen": 220611108, "step": 2417 }, { "epoch": 10.070833333333333, "loss": 0.04351577162742615, "loss_ce": 1.2962243999936618e-05, "loss_iou": 0.2578125, "loss_num": 0.00872802734375, "loss_xval": 0.04345703125, "num_input_tokens_seen": 220611108, "step": 2417 }, { "epoch": 10.075, "grad_norm": 13.471698733873914, "learning_rate": 5e-05, "loss": 0.0511, "num_input_tokens_seen": 220702400, "step": 2418 }, { "epoch": 10.075, "loss": 0.05986550450325012, "loss_ce": 0.00011208564683329314, "loss_iou": 0.23046875, "loss_num": 0.011962890625, "loss_xval": 0.059814453125, "num_input_tokens_seen": 220702400, "step": 2418 }, { "epoch": 10.079166666666667, "grad_norm": 2.2063943344886954, "learning_rate": 5e-05, "loss": 0.0926, "num_input_tokens_seen": 220792056, "step": 2419 }, { "epoch": 10.079166666666667, "loss": 0.11690068989992142, "loss_ce": 0.0017883825348690152, "loss_iou": 0.18359375, "loss_num": 0.0230712890625, "loss_xval": 0.115234375, "num_input_tokens_seen": 220792056, "step": 2419 }, { "epoch": 10.083333333333334, "grad_norm": 2.1165109910760656, "learning_rate": 5e-05, "loss": 0.0793, "num_input_tokens_seen": 220883616, "step": 2420 }, { "epoch": 10.083333333333334, "loss": 0.073494553565979, "loss_ce": 0.0004659837868530303, "loss_iou": 0.3203125, "loss_num": 0.01458740234375, "loss_xval": 0.0732421875, "num_input_tokens_seen": 220883616, "step": 2420 }, { "epoch": 10.0875, "grad_norm": 4.4274156190618745, "learning_rate": 5e-05, "loss": 0.0835, "num_input_tokens_seen": 220974820, "step": 2421 }, { "epoch": 10.0875, "loss": 0.10872413218021393, "loss_ce": 0.0009665669058449566, "loss_iou": 0.306640625, "loss_num": 0.0216064453125, "loss_xval": 0.10791015625, "num_input_tokens_seen": 220974820, "step": 2421 }, { "epoch": 10.091666666666667, "grad_norm": 14.36812894515794, "learning_rate": 5e-05, "loss": 0.0527, "num_input_tokens_seen": 221066324, "step": 2422 }, { "epoch": 10.091666666666667, "loss": 0.04289761185646057, "loss_ce": 0.002934845397248864, "loss_iou": 0.375, "loss_num": 0.00799560546875, "loss_xval": 0.0400390625, "num_input_tokens_seen": 221066324, "step": 2422 }, { "epoch": 10.095833333333333, "grad_norm": 2.791532517943939, "learning_rate": 5e-05, "loss": 0.0809, "num_input_tokens_seen": 221157812, "step": 2423 }, { "epoch": 10.095833333333333, "loss": 0.10510668158531189, "loss_ce": 0.0006297547952271998, "loss_iou": 0.36328125, "loss_num": 0.0208740234375, "loss_xval": 0.1044921875, "num_input_tokens_seen": 221157812, "step": 2423 }, { "epoch": 10.1, "grad_norm": 4.795963465485498, "learning_rate": 5e-05, "loss": 0.0957, "num_input_tokens_seen": 221248892, "step": 2424 }, { "epoch": 10.1, "loss": 0.09126611053943634, "loss_ce": 0.0003542409685906023, "loss_iou": 0.361328125, "loss_num": 0.0181884765625, "loss_xval": 0.0908203125, "num_input_tokens_seen": 221248892, "step": 2424 }, { "epoch": 10.104166666666666, "grad_norm": 1.3895802639770385, "learning_rate": 5e-05, "loss": 0.0439, "num_input_tokens_seen": 221340344, "step": 2425 }, { "epoch": 10.104166666666666, "loss": 0.04654834046959877, "loss_ce": 0.00019213651830796152, "loss_iou": 0.3125, "loss_num": 0.00927734375, "loss_xval": 0.04638671875, "num_input_tokens_seen": 221340344, "step": 2425 }, { "epoch": 10.108333333333333, "grad_norm": 1.9300053468786738, "learning_rate": 5e-05, "loss": 0.0567, "num_input_tokens_seen": 221430728, "step": 2426 }, { "epoch": 10.108333333333333, "loss": 0.0767325758934021, "loss_ce": 0.00025553052546456456, "loss_iou": 0.287109375, "loss_num": 0.01531982421875, "loss_xval": 0.07666015625, "num_input_tokens_seen": 221430728, "step": 2426 }, { "epoch": 10.1125, "grad_norm": 6.7978455117570205, "learning_rate": 5e-05, "loss": 0.1173, "num_input_tokens_seen": 221522208, "step": 2427 }, { "epoch": 10.1125, "loss": 0.12275524437427521, "loss_ce": 1.3544628927775193e-05, "loss_iou": 0.294921875, "loss_num": 0.0245361328125, "loss_xval": 0.12255859375, "num_input_tokens_seen": 221522208, "step": 2427 }, { "epoch": 10.116666666666667, "grad_norm": 3.729614802969419, "learning_rate": 5e-05, "loss": 0.0793, "num_input_tokens_seen": 221612324, "step": 2428 }, { "epoch": 10.116666666666667, "loss": 0.07310568541288376, "loss_ce": 0.00018393303616903722, "loss_iou": 0.376953125, "loss_num": 0.01458740234375, "loss_xval": 0.07275390625, "num_input_tokens_seen": 221612324, "step": 2428 }, { "epoch": 10.120833333333334, "grad_norm": 2.520190672936384, "learning_rate": 5e-05, "loss": 0.0638, "num_input_tokens_seen": 221703620, "step": 2429 }, { "epoch": 10.120833333333334, "loss": 0.06499192863702774, "loss_ce": 0.0001725930196698755, "loss_iou": 0.255859375, "loss_num": 0.012939453125, "loss_xval": 0.06494140625, "num_input_tokens_seen": 221703620, "step": 2429 }, { "epoch": 10.125, "grad_norm": 2.4152431639328533, "learning_rate": 5e-05, "loss": 0.0932, "num_input_tokens_seen": 221795388, "step": 2430 }, { "epoch": 10.125, "loss": 0.05093023553490639, "loss_ce": 0.00010320887668058276, "loss_iou": 0.3046875, "loss_num": 0.0101318359375, "loss_xval": 0.05078125, "num_input_tokens_seen": 221795388, "step": 2430 }, { "epoch": 10.129166666666666, "grad_norm": 5.861639988839476, "learning_rate": 5e-05, "loss": 0.0797, "num_input_tokens_seen": 221886460, "step": 2431 }, { "epoch": 10.129166666666666, "loss": 0.08886352926492691, "loss_ce": 3.969657882407773e-06, "loss_iou": 0.232421875, "loss_num": 0.017822265625, "loss_xval": 0.0888671875, "num_input_tokens_seen": 221886460, "step": 2431 }, { "epoch": 10.133333333333333, "grad_norm": 2.749109833223461, "learning_rate": 5e-05, "loss": 0.059, "num_input_tokens_seen": 221978136, "step": 2432 }, { "epoch": 10.133333333333333, "loss": 0.07125408947467804, "loss_ce": 7.183963316492736e-05, "loss_iou": 0.28125, "loss_num": 0.01422119140625, "loss_xval": 0.0712890625, "num_input_tokens_seen": 221978136, "step": 2432 }, { "epoch": 10.1375, "grad_norm": 4.697700481651981, "learning_rate": 5e-05, "loss": 0.0989, "num_input_tokens_seen": 222069384, "step": 2433 }, { "epoch": 10.1375, "loss": 0.05989304929971695, "loss_ce": 4.8080084525281563e-05, "loss_iou": 0.287109375, "loss_num": 0.011962890625, "loss_xval": 0.059814453125, "num_input_tokens_seen": 222069384, "step": 2433 }, { "epoch": 10.141666666666667, "grad_norm": 1.8668463225797145, "learning_rate": 5e-05, "loss": 0.0838, "num_input_tokens_seen": 222160292, "step": 2434 }, { "epoch": 10.141666666666667, "loss": 0.14080928266048431, "loss_ce": 1.1779565056713182e-06, "loss_iou": 0.31640625, "loss_num": 0.0281982421875, "loss_xval": 0.140625, "num_input_tokens_seen": 222160292, "step": 2434 }, { "epoch": 10.145833333333334, "grad_norm": 4.570146064664159, "learning_rate": 5e-05, "loss": 0.0869, "num_input_tokens_seen": 222251872, "step": 2435 }, { "epoch": 10.145833333333334, "loss": 0.08597946166992188, "loss_ce": 0.0011863745748996735, "loss_iou": 0.365234375, "loss_num": 0.0169677734375, "loss_xval": 0.0849609375, "num_input_tokens_seen": 222251872, "step": 2435 }, { "epoch": 10.15, "grad_norm": 5.6751823791908285, "learning_rate": 5e-05, "loss": 0.0897, "num_input_tokens_seen": 222343836, "step": 2436 }, { "epoch": 10.15, "loss": 0.10517530143260956, "loss_ce": 1.1719241229002364e-05, "loss_iou": 0.279296875, "loss_num": 0.02099609375, "loss_xval": 0.10498046875, "num_input_tokens_seen": 222343836, "step": 2436 }, { "epoch": 10.154166666666667, "grad_norm": 1.972712912402272, "learning_rate": 5e-05, "loss": 0.0825, "num_input_tokens_seen": 222435432, "step": 2437 }, { "epoch": 10.154166666666667, "loss": 0.10965215414762497, "loss_ce": 4.063854430569336e-05, "loss_iou": 0.291015625, "loss_num": 0.02197265625, "loss_xval": 0.109375, "num_input_tokens_seen": 222435432, "step": 2437 }, { "epoch": 10.158333333333333, "grad_norm": 4.3981004493047635, "learning_rate": 5e-05, "loss": 0.051, "num_input_tokens_seen": 222526416, "step": 2438 }, { "epoch": 10.158333333333333, "loss": 0.04471606761217117, "loss_ce": 0.0001909200509544462, "loss_iou": 0.3046875, "loss_num": 0.0089111328125, "loss_xval": 0.04443359375, "num_input_tokens_seen": 222526416, "step": 2438 }, { "epoch": 10.1625, "grad_norm": 1.7735384153770875, "learning_rate": 5e-05, "loss": 0.0924, "num_input_tokens_seen": 222617280, "step": 2439 }, { "epoch": 10.1625, "loss": 0.09482355415821075, "loss_ce": 0.00012750302266795188, "loss_iou": 0.09814453125, "loss_num": 0.0189208984375, "loss_xval": 0.0947265625, "num_input_tokens_seen": 222617280, "step": 2439 }, { "epoch": 10.166666666666666, "grad_norm": 12.698027434976936, "learning_rate": 5e-05, "loss": 0.0734, "num_input_tokens_seen": 222709132, "step": 2440 }, { "epoch": 10.166666666666666, "loss": 0.11245200037956238, "loss_ce": 4.050401912536472e-05, "loss_iou": 0.0537109375, "loss_num": 0.0224609375, "loss_xval": 0.1123046875, "num_input_tokens_seen": 222709132, "step": 2440 }, { "epoch": 10.170833333333333, "grad_norm": 3.190471324233273, "learning_rate": 5e-05, "loss": 0.0583, "num_input_tokens_seen": 222800892, "step": 2441 }, { "epoch": 10.170833333333333, "loss": 0.05578252300620079, "loss_ce": 0.0001947572163771838, "loss_iou": 0.1806640625, "loss_num": 0.0111083984375, "loss_xval": 0.0556640625, "num_input_tokens_seen": 222800892, "step": 2441 }, { "epoch": 10.175, "grad_norm": 2.2801811064612574, "learning_rate": 5e-05, "loss": 0.0729, "num_input_tokens_seen": 222892336, "step": 2442 }, { "epoch": 10.175, "loss": 0.08900895714759827, "loss_ce": 0.0002638417645357549, "loss_iou": 0.390625, "loss_num": 0.0177001953125, "loss_xval": 0.0888671875, "num_input_tokens_seen": 222892336, "step": 2442 }, { "epoch": 10.179166666666667, "grad_norm": 2.4598503645782115, "learning_rate": 5e-05, "loss": 0.1039, "num_input_tokens_seen": 222983220, "step": 2443 }, { "epoch": 10.179166666666667, "loss": 0.13636144995689392, "loss_ce": 2.417125870124437e-05, "loss_iou": 0.296875, "loss_num": 0.02734375, "loss_xval": 0.13671875, "num_input_tokens_seen": 222983220, "step": 2443 }, { "epoch": 10.183333333333334, "grad_norm": 4.214439285952087, "learning_rate": 5e-05, "loss": 0.0618, "num_input_tokens_seen": 223074292, "step": 2444 }, { "epoch": 10.183333333333334, "loss": 0.054007645696401596, "loss_ce": 0.00014411890879273415, "loss_iou": 0.400390625, "loss_num": 0.01080322265625, "loss_xval": 0.053955078125, "num_input_tokens_seen": 223074292, "step": 2444 }, { "epoch": 10.1875, "grad_norm": 3.0707843729279003, "learning_rate": 5e-05, "loss": 0.0868, "num_input_tokens_seen": 223165536, "step": 2445 }, { "epoch": 10.1875, "loss": 0.06411126255989075, "loss_ce": 0.0001616842782823369, "loss_iou": 0.255859375, "loss_num": 0.01275634765625, "loss_xval": 0.06396484375, "num_input_tokens_seen": 223165536, "step": 2445 }, { "epoch": 10.191666666666666, "grad_norm": 5.227605457388482, "learning_rate": 5e-05, "loss": 0.085, "num_input_tokens_seen": 223256980, "step": 2446 }, { "epoch": 10.191666666666666, "loss": 0.07259244471788406, "loss_ce": 0.0001131939425249584, "loss_iou": 0.361328125, "loss_num": 0.0145263671875, "loss_xval": 0.072265625, "num_input_tokens_seen": 223256980, "step": 2446 }, { "epoch": 10.195833333333333, "grad_norm": 2.310431922091145, "learning_rate": 5e-05, "loss": 0.0746, "num_input_tokens_seen": 223348508, "step": 2447 }, { "epoch": 10.195833333333333, "loss": 0.07903735339641571, "loss_ce": 0.00019518463523127139, "loss_iou": 0.197265625, "loss_num": 0.0157470703125, "loss_xval": 0.07861328125, "num_input_tokens_seen": 223348508, "step": 2447 }, { "epoch": 10.2, "grad_norm": 8.548814866250257, "learning_rate": 5e-05, "loss": 0.0703, "num_input_tokens_seen": 223439892, "step": 2448 }, { "epoch": 10.2, "loss": 0.06143535301089287, "loss_ce": 3.467700935289031e-06, "loss_iou": 0.11376953125, "loss_num": 0.01226806640625, "loss_xval": 0.0615234375, "num_input_tokens_seen": 223439892, "step": 2448 }, { "epoch": 10.204166666666667, "grad_norm": 1.3559549692124349, "learning_rate": 5e-05, "loss": 0.0834, "num_input_tokens_seen": 223531256, "step": 2449 }, { "epoch": 10.204166666666667, "loss": 0.0661230981349945, "loss_ce": 6.771959760953905e-06, "loss_iou": 0.326171875, "loss_num": 0.01324462890625, "loss_xval": 0.06591796875, "num_input_tokens_seen": 223531256, "step": 2449 }, { "epoch": 10.208333333333334, "grad_norm": 1.1895310174092164, "learning_rate": 5e-05, "loss": 0.0609, "num_input_tokens_seen": 223622460, "step": 2450 }, { "epoch": 10.208333333333334, "loss": 0.06462086737155914, "loss_ce": 1.515869917056989e-05, "loss_iou": 0.27734375, "loss_num": 0.012939453125, "loss_xval": 0.064453125, "num_input_tokens_seen": 223622460, "step": 2450 }, { "epoch": 10.2125, "grad_norm": 2.7657554203330577, "learning_rate": 5e-05, "loss": 0.0888, "num_input_tokens_seen": 223713960, "step": 2451 }, { "epoch": 10.2125, "loss": 0.07661904394626617, "loss_ce": 0.0004929460119456053, "loss_iou": 0.38671875, "loss_num": 0.01519775390625, "loss_xval": 0.076171875, "num_input_tokens_seen": 223713960, "step": 2451 }, { "epoch": 10.216666666666667, "grad_norm": 3.2893812369272637, "learning_rate": 5e-05, "loss": 0.0967, "num_input_tokens_seen": 223805260, "step": 2452 }, { "epoch": 10.216666666666667, "loss": 0.1292020082473755, "loss_ce": 6.686578126391396e-05, "loss_iou": 0.22265625, "loss_num": 0.0257568359375, "loss_xval": 0.12890625, "num_input_tokens_seen": 223805260, "step": 2452 }, { "epoch": 10.220833333333333, "grad_norm": 1.2475696406825638, "learning_rate": 5e-05, "loss": 0.0504, "num_input_tokens_seen": 223896604, "step": 2453 }, { "epoch": 10.220833333333333, "loss": 0.05976950749754906, "loss_ce": 9.238149505108595e-05, "loss_iou": 0.3046875, "loss_num": 0.011962890625, "loss_xval": 0.0595703125, "num_input_tokens_seen": 223896604, "step": 2453 }, { "epoch": 10.225, "grad_norm": 2.364589120670526, "learning_rate": 5e-05, "loss": 0.0594, "num_input_tokens_seen": 223987908, "step": 2454 }, { "epoch": 10.225, "loss": 0.040394507348537445, "loss_ce": 8.84165710886009e-05, "loss_iou": 0.19140625, "loss_num": 0.008056640625, "loss_xval": 0.040283203125, "num_input_tokens_seen": 223987908, "step": 2454 }, { "epoch": 10.229166666666666, "grad_norm": 1.537721777952214, "learning_rate": 5e-05, "loss": 0.0473, "num_input_tokens_seen": 224079124, "step": 2455 }, { "epoch": 10.229166666666666, "loss": 0.04819488525390625, "loss_ce": 0.0006408722838386893, "loss_iou": 0.2490234375, "loss_num": 0.009521484375, "loss_xval": 0.047607421875, "num_input_tokens_seen": 224079124, "step": 2455 }, { "epoch": 10.233333333333333, "grad_norm": 4.249976349145265, "learning_rate": 5e-05, "loss": 0.0389, "num_input_tokens_seen": 224170684, "step": 2456 }, { "epoch": 10.233333333333333, "loss": 0.043090589344501495, "loss_ce": 0.00037361119757406414, "loss_iou": 0.265625, "loss_num": 0.008544921875, "loss_xval": 0.042724609375, "num_input_tokens_seen": 224170684, "step": 2456 }, { "epoch": 10.2375, "grad_norm": 3.0823394236162, "learning_rate": 5e-05, "loss": 0.1423, "num_input_tokens_seen": 224261828, "step": 2457 }, { "epoch": 10.2375, "loss": 0.13820458948612213, "loss_ce": 0.0003261679084971547, "loss_iou": 0.30078125, "loss_num": 0.027587890625, "loss_xval": 0.1376953125, "num_input_tokens_seen": 224261828, "step": 2457 }, { "epoch": 10.241666666666667, "grad_norm": 1.923597296697882, "learning_rate": 5e-05, "loss": 0.0835, "num_input_tokens_seen": 224353200, "step": 2458 }, { "epoch": 10.241666666666667, "loss": 0.05683402344584465, "loss_ce": 4.081109000253491e-05, "loss_iou": 0.326171875, "loss_num": 0.0113525390625, "loss_xval": 0.056884765625, "num_input_tokens_seen": 224353200, "step": 2458 }, { "epoch": 10.245833333333334, "grad_norm": 3.913499891437732, "learning_rate": 5e-05, "loss": 0.0453, "num_input_tokens_seen": 224444804, "step": 2459 }, { "epoch": 10.245833333333334, "loss": 0.026809057220816612, "loss_ce": 6.039956497261301e-05, "loss_iou": 0.220703125, "loss_num": 0.005340576171875, "loss_xval": 0.0267333984375, "num_input_tokens_seen": 224444804, "step": 2459 }, { "epoch": 10.25, "grad_norm": 1.0660942413214372, "learning_rate": 5e-05, "loss": 0.1453, "num_input_tokens_seen": 224535972, "step": 2460 }, { "epoch": 10.25, "loss": 0.1777527630329132, "loss_ce": 0.00019386685744393617, "loss_iou": 0.2373046875, "loss_num": 0.035400390625, "loss_xval": 0.177734375, "num_input_tokens_seen": 224535972, "step": 2460 }, { "epoch": 10.254166666666666, "grad_norm": 5.531253479696742, "learning_rate": 5e-05, "loss": 0.0446, "num_input_tokens_seen": 224627484, "step": 2461 }, { "epoch": 10.254166666666666, "loss": 0.04134564474225044, "loss_ce": 0.0007114895852282643, "loss_iou": 0.2041015625, "loss_num": 0.00811767578125, "loss_xval": 0.04052734375, "num_input_tokens_seen": 224627484, "step": 2461 }, { "epoch": 10.258333333333333, "grad_norm": 1.8363294736874196, "learning_rate": 5e-05, "loss": 0.0558, "num_input_tokens_seen": 224718824, "step": 2462 }, { "epoch": 10.258333333333333, "loss": 0.06277695298194885, "loss_ce": 2.2918047761777416e-06, "loss_iou": 0.310546875, "loss_num": 0.0125732421875, "loss_xval": 0.06298828125, "num_input_tokens_seen": 224718824, "step": 2462 }, { "epoch": 10.2625, "grad_norm": 1.8460751885950988, "learning_rate": 5e-05, "loss": 0.0761, "num_input_tokens_seen": 224810796, "step": 2463 }, { "epoch": 10.2625, "loss": 0.10428975522518158, "loss_ce": 0.00013325779582373798, "loss_iou": 0.1708984375, "loss_num": 0.0208740234375, "loss_xval": 0.10400390625, "num_input_tokens_seen": 224810796, "step": 2463 }, { "epoch": 10.266666666666667, "grad_norm": 1.4976747720517578, "learning_rate": 5e-05, "loss": 0.0808, "num_input_tokens_seen": 224901868, "step": 2464 }, { "epoch": 10.266666666666667, "loss": 0.07676523923873901, "loss_ce": 0.0001432291028322652, "loss_iou": 0.23828125, "loss_num": 0.01531982421875, "loss_xval": 0.07666015625, "num_input_tokens_seen": 224901868, "step": 2464 }, { "epoch": 10.270833333333334, "grad_norm": 3.112973875767646, "learning_rate": 5e-05, "loss": 0.0575, "num_input_tokens_seen": 224993660, "step": 2465 }, { "epoch": 10.270833333333334, "loss": 0.07043145596981049, "loss_ce": 0.00016473176947329193, "loss_iou": 0.2314453125, "loss_num": 0.01409912109375, "loss_xval": 0.0703125, "num_input_tokens_seen": 224993660, "step": 2465 }, { "epoch": 10.275, "grad_norm": 7.317607041578287, "learning_rate": 5e-05, "loss": 0.0696, "num_input_tokens_seen": 225084528, "step": 2466 }, { "epoch": 10.275, "loss": 0.05415572226047516, "loss_ce": 4.8060854169307277e-05, "loss_iou": 0.384765625, "loss_num": 0.01080322265625, "loss_xval": 0.05419921875, "num_input_tokens_seen": 225084528, "step": 2466 }, { "epoch": 10.279166666666667, "grad_norm": 3.3420650983507394, "learning_rate": 5e-05, "loss": 0.0818, "num_input_tokens_seen": 225175876, "step": 2467 }, { "epoch": 10.279166666666667, "loss": 0.10633950680494308, "loss_ce": 1.6262883946183138e-05, "loss_iou": 0.341796875, "loss_num": 0.021240234375, "loss_xval": 0.1064453125, "num_input_tokens_seen": 225175876, "step": 2467 }, { "epoch": 10.283333333333333, "grad_norm": 3.436964712975802, "learning_rate": 5e-05, "loss": 0.0846, "num_input_tokens_seen": 225266960, "step": 2468 }, { "epoch": 10.283333333333333, "loss": 0.09859128296375275, "loss_ce": 1.9506447642925195e-05, "loss_iou": 0.30078125, "loss_num": 0.0196533203125, "loss_xval": 0.0986328125, "num_input_tokens_seen": 225266960, "step": 2468 }, { "epoch": 10.2875, "grad_norm": 3.1825697171052942, "learning_rate": 5e-05, "loss": 0.0694, "num_input_tokens_seen": 225358192, "step": 2469 }, { "epoch": 10.2875, "loss": 0.05997881665825844, "loss_ce": 7.280804857145995e-05, "loss_iou": 0.296875, "loss_num": 0.011962890625, "loss_xval": 0.059814453125, "num_input_tokens_seen": 225358192, "step": 2469 }, { "epoch": 10.291666666666666, "grad_norm": 4.272989950784549, "learning_rate": 5e-05, "loss": 0.0771, "num_input_tokens_seen": 225449832, "step": 2470 }, { "epoch": 10.291666666666666, "loss": 0.08155052363872528, "loss_ce": 0.0001906609977595508, "loss_iou": 0.279296875, "loss_num": 0.0162353515625, "loss_xval": 0.08154296875, "num_input_tokens_seen": 225449832, "step": 2470 }, { "epoch": 10.295833333333333, "grad_norm": 34.74430851314654, "learning_rate": 5e-05, "loss": 0.0872, "num_input_tokens_seen": 225540424, "step": 2471 }, { "epoch": 10.295833333333333, "loss": 0.1386980563402176, "loss_ce": 1.0921379725914448e-05, "loss_iou": 0.384765625, "loss_num": 0.0277099609375, "loss_xval": 0.138671875, "num_input_tokens_seen": 225540424, "step": 2471 }, { "epoch": 10.3, "grad_norm": 3.0368183871807917, "learning_rate": 5e-05, "loss": 0.0712, "num_input_tokens_seen": 225630200, "step": 2472 }, { "epoch": 10.3, "loss": 0.04973556473851204, "loss_ce": 0.0013957209885120392, "loss_iou": 0.3125, "loss_num": 0.0096435546875, "loss_xval": 0.04833984375, "num_input_tokens_seen": 225630200, "step": 2472 }, { "epoch": 10.304166666666667, "grad_norm": 3.523903342921853, "learning_rate": 5e-05, "loss": 0.0673, "num_input_tokens_seen": 225720748, "step": 2473 }, { "epoch": 10.304166666666667, "loss": 0.05381819233298302, "loss_ce": 0.000320877181366086, "loss_iou": 0.34765625, "loss_num": 0.01068115234375, "loss_xval": 0.053466796875, "num_input_tokens_seen": 225720748, "step": 2473 }, { "epoch": 10.308333333333334, "grad_norm": 4.619375355597199, "learning_rate": 5e-05, "loss": 0.0674, "num_input_tokens_seen": 225811212, "step": 2474 }, { "epoch": 10.308333333333334, "loss": 0.06829918175935745, "loss_ce": 8.393885764235165e-07, "loss_iou": 0.35546875, "loss_num": 0.013671875, "loss_xval": 0.068359375, "num_input_tokens_seen": 225811212, "step": 2474 }, { "epoch": 10.3125, "grad_norm": 2.4553281904077906, "learning_rate": 5e-05, "loss": 0.0579, "num_input_tokens_seen": 225902520, "step": 2475 }, { "epoch": 10.3125, "loss": 0.046314314007759094, "loss_ce": 3.887810635205824e-06, "loss_iou": 0.28125, "loss_num": 0.00927734375, "loss_xval": 0.04638671875, "num_input_tokens_seen": 225902520, "step": 2475 }, { "epoch": 10.316666666666666, "grad_norm": 5.3015376076212775, "learning_rate": 5e-05, "loss": 0.0837, "num_input_tokens_seen": 225993824, "step": 2476 }, { "epoch": 10.316666666666666, "loss": 0.05360734462738037, "loss_ce": 0.0008271909318864346, "loss_iou": 0.203125, "loss_num": 0.01055908203125, "loss_xval": 0.052734375, "num_input_tokens_seen": 225993824, "step": 2476 }, { "epoch": 10.320833333333333, "grad_norm": 4.167800578454227, "learning_rate": 5e-05, "loss": 0.0957, "num_input_tokens_seen": 226085280, "step": 2477 }, { "epoch": 10.320833333333333, "loss": 0.12834444642066956, "loss_ce": 2.764263854260207e-06, "loss_iou": 0.3046875, "loss_num": 0.025634765625, "loss_xval": 0.1279296875, "num_input_tokens_seen": 226085280, "step": 2477 }, { "epoch": 10.325, "grad_norm": 5.162117634743744, "learning_rate": 5e-05, "loss": 0.072, "num_input_tokens_seen": 226177060, "step": 2478 }, { "epoch": 10.325, "loss": 0.05269046127796173, "loss_ce": 0.00023074712953530252, "loss_iou": 0.302734375, "loss_num": 0.010498046875, "loss_xval": 0.052490234375, "num_input_tokens_seen": 226177060, "step": 2478 }, { "epoch": 10.329166666666667, "grad_norm": 2.6086503652713957, "learning_rate": 5e-05, "loss": 0.0551, "num_input_tokens_seen": 226267776, "step": 2479 }, { "epoch": 10.329166666666667, "loss": 0.03804173693060875, "loss_ce": 1.683563277765643e-05, "loss_iou": 0.2431640625, "loss_num": 0.007598876953125, "loss_xval": 0.0380859375, "num_input_tokens_seen": 226267776, "step": 2479 }, { "epoch": 10.333333333333334, "grad_norm": 4.493073901021718, "learning_rate": 5e-05, "loss": 0.0669, "num_input_tokens_seen": 226358440, "step": 2480 }, { "epoch": 10.333333333333334, "loss": 0.05348392203450203, "loss_ce": 1.865989361249376e-06, "loss_iou": 0.26953125, "loss_num": 0.01068115234375, "loss_xval": 0.053466796875, "num_input_tokens_seen": 226358440, "step": 2480 }, { "epoch": 10.3375, "grad_norm": 2.343907113482161, "learning_rate": 5e-05, "loss": 0.0466, "num_input_tokens_seen": 226449808, "step": 2481 }, { "epoch": 10.3375, "loss": 0.03878547623753548, "loss_ce": 1.2893915481981821e-05, "loss_iou": 0.380859375, "loss_num": 0.00775146484375, "loss_xval": 0.038818359375, "num_input_tokens_seen": 226449808, "step": 2481 }, { "epoch": 10.341666666666667, "grad_norm": 6.73918095196411, "learning_rate": 5e-05, "loss": 0.1366, "num_input_tokens_seen": 226541812, "step": 2482 }, { "epoch": 10.341666666666667, "loss": 0.1784840226173401, "loss_ce": 0.00026137454551644623, "loss_iou": 0.35546875, "loss_num": 0.03564453125, "loss_xval": 0.177734375, "num_input_tokens_seen": 226541812, "step": 2482 }, { "epoch": 10.345833333333333, "grad_norm": 2.8465636354751473, "learning_rate": 5e-05, "loss": 0.0637, "num_input_tokens_seen": 226632944, "step": 2483 }, { "epoch": 10.345833333333333, "loss": 0.08737257122993469, "loss_ce": 7.506472456952906e-07, "loss_iou": 0.30078125, "loss_num": 0.0174560546875, "loss_xval": 0.08740234375, "num_input_tokens_seen": 226632944, "step": 2483 }, { "epoch": 10.35, "grad_norm": 3.0168112348538316, "learning_rate": 5e-05, "loss": 0.0897, "num_input_tokens_seen": 226724220, "step": 2484 }, { "epoch": 10.35, "loss": 0.04172979295253754, "loss_ce": 0.00010381372703704983, "loss_iou": 0.294921875, "loss_num": 0.00830078125, "loss_xval": 0.04150390625, "num_input_tokens_seen": 226724220, "step": 2484 }, { "epoch": 10.354166666666666, "grad_norm": 7.525334521925941, "learning_rate": 5e-05, "loss": 0.0572, "num_input_tokens_seen": 226815480, "step": 2485 }, { "epoch": 10.354166666666666, "loss": 0.048039909452199936, "loss_ce": 5.101898204884492e-05, "loss_iou": 0.302734375, "loss_num": 0.00958251953125, "loss_xval": 0.048095703125, "num_input_tokens_seen": 226815480, "step": 2485 }, { "epoch": 10.358333333333333, "grad_norm": 4.079229634153809, "learning_rate": 5e-05, "loss": 0.0713, "num_input_tokens_seen": 226906372, "step": 2486 }, { "epoch": 10.358333333333333, "loss": 0.09189343452453613, "loss_ce": 5.005502316635102e-06, "loss_iou": 0.275390625, "loss_num": 0.018310546875, "loss_xval": 0.091796875, "num_input_tokens_seen": 226906372, "step": 2486 }, { "epoch": 10.3625, "grad_norm": 2.3354944781338545, "learning_rate": 5e-05, "loss": 0.0636, "num_input_tokens_seen": 226998504, "step": 2487 }, { "epoch": 10.3625, "loss": 0.05715341866016388, "loss_ce": 0.0005738280597142875, "loss_iou": 0.18359375, "loss_num": 0.0113525390625, "loss_xval": 0.056640625, "num_input_tokens_seen": 226998504, "step": 2487 }, { "epoch": 10.366666666666667, "grad_norm": 3.8303454841396722, "learning_rate": 5e-05, "loss": 0.0955, "num_input_tokens_seen": 227089372, "step": 2488 }, { "epoch": 10.366666666666667, "loss": 0.08700613677501678, "loss_ce": 0.003235388780012727, "loss_iou": 0.263671875, "loss_num": 0.0167236328125, "loss_xval": 0.083984375, "num_input_tokens_seen": 227089372, "step": 2488 }, { "epoch": 10.370833333333334, "grad_norm": 3.429327852601159, "learning_rate": 5e-05, "loss": 0.0859, "num_input_tokens_seen": 227180756, "step": 2489 }, { "epoch": 10.370833333333334, "loss": 0.03858550265431404, "loss_ce": 1.128264375438448e-05, "loss_iou": 0.33203125, "loss_num": 0.007720947265625, "loss_xval": 0.03857421875, "num_input_tokens_seen": 227180756, "step": 2489 }, { "epoch": 10.375, "grad_norm": 3.16423758573065, "learning_rate": 5e-05, "loss": 0.0762, "num_input_tokens_seen": 227271620, "step": 2490 }, { "epoch": 10.375, "loss": 0.0924580842256546, "loss_ce": 5.079701168142492e-06, "loss_iou": 0.216796875, "loss_num": 0.0184326171875, "loss_xval": 0.09228515625, "num_input_tokens_seen": 227271620, "step": 2490 }, { "epoch": 10.379166666666666, "grad_norm": 4.138720832179852, "learning_rate": 5e-05, "loss": 0.0908, "num_input_tokens_seen": 227362088, "step": 2491 }, { "epoch": 10.379166666666666, "loss": 0.09778620302677155, "loss_ce": 7.887525498517789e-06, "loss_iou": 0.447265625, "loss_num": 0.01953125, "loss_xval": 0.09765625, "num_input_tokens_seen": 227362088, "step": 2491 }, { "epoch": 10.383333333333333, "grad_norm": 4.817565170683859, "learning_rate": 5e-05, "loss": 0.0812, "num_input_tokens_seen": 227453448, "step": 2492 }, { "epoch": 10.383333333333333, "loss": 0.08528520911931992, "loss_ce": 3.835527422779705e-06, "loss_iou": 0.1748046875, "loss_num": 0.01708984375, "loss_xval": 0.08544921875, "num_input_tokens_seen": 227453448, "step": 2492 }, { "epoch": 10.3875, "grad_norm": 3.2928127220446988, "learning_rate": 5e-05, "loss": 0.0936, "num_input_tokens_seen": 227544780, "step": 2493 }, { "epoch": 10.3875, "loss": 0.07505002617835999, "loss_ce": 0.004905372392386198, "loss_iou": 0.2392578125, "loss_num": 0.0140380859375, "loss_xval": 0.0703125, "num_input_tokens_seen": 227544780, "step": 2493 }, { "epoch": 10.391666666666667, "grad_norm": 4.6372402442600515, "learning_rate": 5e-05, "loss": 0.0689, "num_input_tokens_seen": 227636708, "step": 2494 }, { "epoch": 10.391666666666667, "loss": 0.09854992479085922, "loss_ce": 0.0006647917907685041, "loss_iou": 0.150390625, "loss_num": 0.01953125, "loss_xval": 0.09765625, "num_input_tokens_seen": 227636708, "step": 2494 }, { "epoch": 10.395833333333334, "grad_norm": 1.6189678476279272, "learning_rate": 5e-05, "loss": 0.0703, "num_input_tokens_seen": 227728100, "step": 2495 }, { "epoch": 10.395833333333334, "loss": 0.07708020508289337, "loss_ce": 2.331818177481182e-05, "loss_iou": 0.28515625, "loss_num": 0.01544189453125, "loss_xval": 0.0771484375, "num_input_tokens_seen": 227728100, "step": 2495 }, { "epoch": 10.4, "grad_norm": 1.3578423056075073, "learning_rate": 5e-05, "loss": 0.071, "num_input_tokens_seen": 227818952, "step": 2496 }, { "epoch": 10.4, "loss": 0.07700711488723755, "loss_ce": 2.6529296519584022e-05, "loss_iou": 0.12890625, "loss_num": 0.015380859375, "loss_xval": 0.0771484375, "num_input_tokens_seen": 227818952, "step": 2496 }, { "epoch": 10.404166666666667, "grad_norm": 1.8049258585400736, "learning_rate": 5e-05, "loss": 0.0881, "num_input_tokens_seen": 227909404, "step": 2497 }, { "epoch": 10.404166666666667, "loss": 0.05526716262102127, "loss_ce": 7.46006662666332e-06, "loss_iou": 0.244140625, "loss_num": 0.01104736328125, "loss_xval": 0.05517578125, "num_input_tokens_seen": 227909404, "step": 2497 }, { "epoch": 10.408333333333333, "grad_norm": 2.9045362810221476, "learning_rate": 5e-05, "loss": 0.0737, "num_input_tokens_seen": 228000976, "step": 2498 }, { "epoch": 10.408333333333333, "loss": 0.07178732752799988, "loss_ce": 0.00047538039507344365, "loss_iou": 0.271484375, "loss_num": 0.0142822265625, "loss_xval": 0.0712890625, "num_input_tokens_seen": 228000976, "step": 2498 }, { "epoch": 10.4125, "grad_norm": 2.2211804392233496, "learning_rate": 5e-05, "loss": 0.0436, "num_input_tokens_seen": 228092024, "step": 2499 }, { "epoch": 10.4125, "loss": 0.04628128185868263, "loss_ce": 1.3733673540627933e-06, "loss_iou": 0.2412109375, "loss_num": 0.00927734375, "loss_xval": 0.04638671875, "num_input_tokens_seen": 228092024, "step": 2499 }, { "epoch": 10.416666666666666, "grad_norm": 2.9487757030361825, "learning_rate": 5e-05, "loss": 0.0734, "num_input_tokens_seen": 228183328, "step": 2500 }, { "epoch": 10.416666666666666, "eval_seeclick_CIoU": 0.3149303123354912, "eval_seeclick_GIoU": 0.29233773797750473, "eval_seeclick_IoU": 0.3987307697534561, "eval_seeclick_MAE_all": 0.08514390140771866, "eval_seeclick_MAE_h": 0.07179565727710724, "eval_seeclick_MAE_w": 0.15848005563020706, "eval_seeclick_MAE_x_boxes": 0.16587074100971222, "eval_seeclick_MAE_y_boxes": 0.07483186945319176, "eval_seeclick_NUM_probability": 0.999997466802597, "eval_seeclick_inside_bbox": 0.6321022808551788, "eval_seeclick_loss": 0.5096176862716675, "eval_seeclick_loss_ce": 0.1360682100057602, "eval_seeclick_loss_iou": 0.504638671875, "eval_seeclick_loss_num": 0.07415771484375, "eval_seeclick_loss_xval": 0.3709716796875, "eval_seeclick_runtime": 75.6845, "eval_seeclick_samples_per_second": 0.568, "eval_seeclick_steps_per_second": 0.026, "num_input_tokens_seen": 228183328, "step": 2500 }, { "epoch": 10.416666666666666, "eval_icons_CIoU": 0.4136456251144409, "eval_icons_GIoU": 0.41529805958271027, "eval_icons_IoU": 0.47623029351234436, "eval_icons_MAE_all": 0.06310141086578369, "eval_icons_MAE_h": 0.11154510080814362, "eval_icons_MAE_w": 0.09058283641934395, "eval_icons_MAE_x_boxes": 0.09419732540845871, "eval_icons_MAE_y_boxes": 0.12008867785334587, "eval_icons_NUM_probability": 0.9999987185001373, "eval_icons_inside_bbox": 0.7170138955116272, "eval_icons_loss": 0.32223400473594666, "eval_icons_loss_ce": 9.500573696641368e-06, "eval_icons_loss_iou": 0.3427734375, "eval_icons_loss_num": 0.06640625, "eval_icons_loss_xval": 0.3321533203125, "eval_icons_runtime": 98.1797, "eval_icons_samples_per_second": 0.509, "eval_icons_steps_per_second": 0.02, "num_input_tokens_seen": 228183328, "step": 2500 }, { "epoch": 10.416666666666666, "eval_screenspot_CIoU": 0.39047037561734516, "eval_screenspot_GIoU": 0.3841150104999542, "eval_screenspot_IoU": 0.46335774660110474, "eval_screenspot_MAE_all": 0.09988050907850266, "eval_screenspot_MAE_h": 0.09078333526849747, "eval_screenspot_MAE_w": 0.20148720343907675, "eval_screenspot_MAE_x_boxes": 0.19923935333887735, "eval_screenspot_MAE_y_boxes": 0.08086183667182922, "eval_screenspot_NUM_probability": 0.9999897480010986, "eval_screenspot_inside_bbox": 0.7295833428700765, "eval_screenspot_loss": 0.4993094205856323, "eval_screenspot_loss_ce": 0.004859724128133773, "eval_screenspot_loss_iou": 0.3956705729166667, "eval_screenspot_loss_num": 0.09987894694010417, "eval_screenspot_loss_xval": 0.4995524088541667, "eval_screenspot_runtime": 151.0375, "eval_screenspot_samples_per_second": 0.589, "eval_screenspot_steps_per_second": 0.02, "num_input_tokens_seen": 228183328, "step": 2500 }, { "epoch": 10.416666666666666, "eval_compot_CIoU": 0.45688967406749725, "eval_compot_GIoU": 0.44939421117305756, "eval_compot_IoU": 0.5281907916069031, "eval_compot_MAE_all": 0.05896926671266556, "eval_compot_MAE_h": 0.07026198133826256, "eval_compot_MAE_w": 0.1460394188761711, "eval_compot_MAE_x_boxes": 0.14645987004041672, "eval_compot_MAE_y_boxes": 0.07101576030254364, "eval_compot_NUM_probability": 0.9999970197677612, "eval_compot_inside_bbox": 0.7361111044883728, "eval_compot_loss": 0.30137625336647034, "eval_compot_loss_ce": 0.028895296156406403, "eval_compot_loss_iou": 0.29559326171875, "eval_compot_loss_num": 0.052188873291015625, "eval_compot_loss_xval": 0.2608489990234375, "eval_compot_runtime": 86.7894, "eval_compot_samples_per_second": 0.576, "eval_compot_steps_per_second": 0.023, "num_input_tokens_seen": 228183328, "step": 2500 }, { "epoch": 10.416666666666666, "loss": 0.3029475212097168, "loss_ce": 0.025023944675922394, "loss_iou": 0.275390625, "loss_num": 0.0556640625, "loss_xval": 0.27734375, "num_input_tokens_seen": 228183328, "step": 2500 }, { "epoch": 10.420833333333333, "grad_norm": 2.5303109849448733, "learning_rate": 5e-05, "loss": 0.1039, "num_input_tokens_seen": 228274200, "step": 2501 }, { "epoch": 10.420833333333333, "loss": 0.07500009983778, "loss_ce": 3.149094027321553e-06, "loss_iou": 0.4375, "loss_num": 0.0150146484375, "loss_xval": 0.0751953125, "num_input_tokens_seen": 228274200, "step": 2501 }, { "epoch": 10.425, "grad_norm": 2.5967014863549074, "learning_rate": 5e-05, "loss": 0.0699, "num_input_tokens_seen": 228364664, "step": 2502 }, { "epoch": 10.425, "loss": 0.09338469803333282, "loss_ce": 9.118302841670811e-07, "loss_iou": 0.25390625, "loss_num": 0.0186767578125, "loss_xval": 0.09326171875, "num_input_tokens_seen": 228364664, "step": 2502 }, { "epoch": 10.429166666666667, "grad_norm": 7.713983434423728, "learning_rate": 5e-05, "loss": 0.0939, "num_input_tokens_seen": 228455736, "step": 2503 }, { "epoch": 10.429166666666667, "loss": 0.12286948412656784, "loss_ce": 5.714358849218115e-06, "loss_iou": 0.4765625, "loss_num": 0.0245361328125, "loss_xval": 0.123046875, "num_input_tokens_seen": 228455736, "step": 2503 }, { "epoch": 10.433333333333334, "grad_norm": 3.87512890789201, "learning_rate": 5e-05, "loss": 0.0893, "num_input_tokens_seen": 228547240, "step": 2504 }, { "epoch": 10.433333333333334, "loss": 0.07037439942359924, "loss_ce": 8.583416502006003e-07, "loss_iou": 0.373046875, "loss_num": 0.01409912109375, "loss_xval": 0.0703125, "num_input_tokens_seen": 228547240, "step": 2504 }, { "epoch": 10.4375, "grad_norm": 4.133812777201116, "learning_rate": 5e-05, "loss": 0.05, "num_input_tokens_seen": 228639228, "step": 2505 }, { "epoch": 10.4375, "loss": 0.04950854182243347, "loss_ce": 0.0004667960456572473, "loss_iou": 0.34375, "loss_num": 0.00982666015625, "loss_xval": 0.049072265625, "num_input_tokens_seen": 228639228, "step": 2505 }, { "epoch": 10.441666666666666, "grad_norm": 7.732185340749662, "learning_rate": 5e-05, "loss": 0.0786, "num_input_tokens_seen": 228730848, "step": 2506 }, { "epoch": 10.441666666666666, "loss": 0.05839722603559494, "loss_ce": 1.835384864534717e-06, "loss_iou": 0.2001953125, "loss_num": 0.01165771484375, "loss_xval": 0.058349609375, "num_input_tokens_seen": 228730848, "step": 2506 }, { "epoch": 10.445833333333333, "grad_norm": 3.433041817943037, "learning_rate": 5e-05, "loss": 0.0486, "num_input_tokens_seen": 228821788, "step": 2507 }, { "epoch": 10.445833333333333, "loss": 0.03542107343673706, "loss_ce": 0.0001351251994492486, "loss_iou": 0.09716796875, "loss_num": 0.007049560546875, "loss_xval": 0.035400390625, "num_input_tokens_seen": 228821788, "step": 2507 }, { "epoch": 10.45, "grad_norm": 3.121765436554312, "learning_rate": 5e-05, "loss": 0.0601, "num_input_tokens_seen": 228912956, "step": 2508 }, { "epoch": 10.45, "loss": 0.07435625791549683, "loss_ce": 4.214227374177426e-05, "loss_iou": 0.2001953125, "loss_num": 0.01483154296875, "loss_xval": 0.07421875, "num_input_tokens_seen": 228912956, "step": 2508 }, { "epoch": 10.454166666666667, "grad_norm": 2.0324462426108636, "learning_rate": 5e-05, "loss": 0.0511, "num_input_tokens_seen": 229004264, "step": 2509 }, { "epoch": 10.454166666666667, "loss": 0.044322483241558075, "loss_ce": 0.00011014081974280998, "loss_iou": 0.1826171875, "loss_num": 0.00885009765625, "loss_xval": 0.044189453125, "num_input_tokens_seen": 229004264, "step": 2509 }, { "epoch": 10.458333333333334, "grad_norm": 2.1366102938245723, "learning_rate": 5e-05, "loss": 0.0548, "num_input_tokens_seen": 229095928, "step": 2510 }, { "epoch": 10.458333333333334, "loss": 0.07690685987472534, "loss_ce": 0.002535521052777767, "loss_iou": 0.26953125, "loss_num": 0.014892578125, "loss_xval": 0.07421875, "num_input_tokens_seen": 229095928, "step": 2510 }, { "epoch": 10.4625, "grad_norm": 2.19711807686499, "learning_rate": 5e-05, "loss": 0.0847, "num_input_tokens_seen": 229186916, "step": 2511 }, { "epoch": 10.4625, "loss": 0.0904402881860733, "loss_ce": 3.196924808435142e-05, "loss_iou": 0.28125, "loss_num": 0.01806640625, "loss_xval": 0.09033203125, "num_input_tokens_seen": 229186916, "step": 2511 }, { "epoch": 10.466666666666667, "grad_norm": 4.7434556280046865, "learning_rate": 5e-05, "loss": 0.086, "num_input_tokens_seen": 229278420, "step": 2512 }, { "epoch": 10.466666666666667, "loss": 0.10490299016237259, "loss_ce": 0.0016315041575580835, "loss_iou": 0.3671875, "loss_num": 0.0206298828125, "loss_xval": 0.103515625, "num_input_tokens_seen": 229278420, "step": 2512 }, { "epoch": 10.470833333333333, "grad_norm": 4.876042891988366, "learning_rate": 5e-05, "loss": 0.067, "num_input_tokens_seen": 229370024, "step": 2513 }, { "epoch": 10.470833333333333, "loss": 0.06508772075176239, "loss_ce": 0.0031675598584115505, "loss_iou": 0.224609375, "loss_num": 0.01239013671875, "loss_xval": 0.06201171875, "num_input_tokens_seen": 229370024, "step": 2513 }, { "epoch": 10.475, "grad_norm": 3.5778712305029354, "learning_rate": 5e-05, "loss": 0.0914, "num_input_tokens_seen": 229461440, "step": 2514 }, { "epoch": 10.475, "loss": 0.10127786546945572, "loss_ce": 0.000813998281955719, "loss_iou": 0.38671875, "loss_num": 0.0201416015625, "loss_xval": 0.1005859375, "num_input_tokens_seen": 229461440, "step": 2514 }, { "epoch": 10.479166666666666, "grad_norm": 2.5220713836612223, "learning_rate": 5e-05, "loss": 0.0767, "num_input_tokens_seen": 229552776, "step": 2515 }, { "epoch": 10.479166666666666, "loss": 0.04373849928379059, "loss_ce": 0.0002585825277492404, "loss_iou": 0.11376953125, "loss_num": 0.0086669921875, "loss_xval": 0.04345703125, "num_input_tokens_seen": 229552776, "step": 2515 }, { "epoch": 10.483333333333333, "grad_norm": 1.26617865145252, "learning_rate": 5e-05, "loss": 0.0781, "num_input_tokens_seen": 229643696, "step": 2516 }, { "epoch": 10.483333333333333, "loss": 0.08795313537120819, "loss_ce": 1.478557919654122e-06, "loss_iou": 0.2158203125, "loss_num": 0.017578125, "loss_xval": 0.087890625, "num_input_tokens_seen": 229643696, "step": 2516 }, { "epoch": 10.4875, "grad_norm": 1.1901244706406917, "learning_rate": 5e-05, "loss": 0.0775, "num_input_tokens_seen": 229734900, "step": 2517 }, { "epoch": 10.4875, "loss": 0.04605482146143913, "loss_ce": 0.0002479375689290464, "loss_iou": 0.2392578125, "loss_num": 0.0091552734375, "loss_xval": 0.0458984375, "num_input_tokens_seen": 229734900, "step": 2517 }, { "epoch": 10.491666666666667, "grad_norm": 2.702939547986177, "learning_rate": 5e-05, "loss": 0.058, "num_input_tokens_seen": 229826620, "step": 2518 }, { "epoch": 10.491666666666667, "loss": 0.06707486510276794, "loss_ce": 0.0004855117294937372, "loss_iou": 0.33203125, "loss_num": 0.0133056640625, "loss_xval": 0.06640625, "num_input_tokens_seen": 229826620, "step": 2518 }, { "epoch": 10.495833333333334, "grad_norm": 3.7483403240961337, "learning_rate": 5e-05, "loss": 0.0565, "num_input_tokens_seen": 229917808, "step": 2519 }, { "epoch": 10.495833333333334, "loss": 0.0458785817027092, "loss_ce": 2.5918541723513044e-05, "loss_iou": 0.263671875, "loss_num": 0.0091552734375, "loss_xval": 0.0458984375, "num_input_tokens_seen": 229917808, "step": 2519 }, { "epoch": 10.5, "grad_norm": 2.40528501657669, "learning_rate": 5e-05, "loss": 0.0588, "num_input_tokens_seen": 230009924, "step": 2520 }, { "epoch": 10.5, "loss": 0.03555392846465111, "loss_ce": 0.0005693387938663363, "loss_iou": 0.197265625, "loss_num": 0.006988525390625, "loss_xval": 0.034912109375, "num_input_tokens_seen": 230009924, "step": 2520 }, { "epoch": 10.504166666666666, "grad_norm": 3.4734031929175573, "learning_rate": 5e-05, "loss": 0.0616, "num_input_tokens_seen": 230100984, "step": 2521 }, { "epoch": 10.504166666666666, "loss": 0.044868022203445435, "loss_ce": 5.295852679410018e-05, "loss_iou": 0.25390625, "loss_num": 0.00897216796875, "loss_xval": 0.044921875, "num_input_tokens_seen": 230100984, "step": 2521 }, { "epoch": 10.508333333333333, "grad_norm": 5.234999676194138, "learning_rate": 5e-05, "loss": 0.0484, "num_input_tokens_seen": 230192776, "step": 2522 }, { "epoch": 10.508333333333333, "loss": 0.054706450551748276, "loss_ce": 0.00014102361456025392, "loss_iou": 0.173828125, "loss_num": 0.01092529296875, "loss_xval": 0.0546875, "num_input_tokens_seen": 230192776, "step": 2522 }, { "epoch": 10.5125, "grad_norm": 3.749565842887734, "learning_rate": 5e-05, "loss": 0.0605, "num_input_tokens_seen": 230284256, "step": 2523 }, { "epoch": 10.5125, "loss": 0.06063781678676605, "loss_ce": 4.516471744864248e-05, "loss_iou": 0.146484375, "loss_num": 0.01214599609375, "loss_xval": 0.060546875, "num_input_tokens_seen": 230284256, "step": 2523 }, { "epoch": 10.516666666666667, "grad_norm": 3.3831922261258116, "learning_rate": 5e-05, "loss": 0.0676, "num_input_tokens_seen": 230375284, "step": 2524 }, { "epoch": 10.516666666666667, "loss": 0.05358021706342697, "loss_ce": 2.186834899475798e-05, "loss_iou": 0.328125, "loss_num": 0.01068115234375, "loss_xval": 0.053466796875, "num_input_tokens_seen": 230375284, "step": 2524 }, { "epoch": 10.520833333333334, "grad_norm": 3.6643316429433046, "learning_rate": 5e-05, "loss": 0.0749, "num_input_tokens_seen": 230466500, "step": 2525 }, { "epoch": 10.520833333333334, "loss": 0.10528016090393066, "loss_ce": 0.0007498256163671613, "loss_iou": 0.154296875, "loss_num": 0.0208740234375, "loss_xval": 0.1044921875, "num_input_tokens_seen": 230466500, "step": 2525 }, { "epoch": 10.525, "grad_norm": 2.9672800808077473, "learning_rate": 5e-05, "loss": 0.0751, "num_input_tokens_seen": 230557956, "step": 2526 }, { "epoch": 10.525, "loss": 0.07855658233165741, "loss_ce": 4.3351515159884e-06, "loss_iou": 0.2041015625, "loss_num": 0.015625, "loss_xval": 0.07861328125, "num_input_tokens_seen": 230557956, "step": 2526 }, { "epoch": 10.529166666666667, "grad_norm": 5.584652279705522, "learning_rate": 5e-05, "loss": 0.0652, "num_input_tokens_seen": 230649600, "step": 2527 }, { "epoch": 10.529166666666667, "loss": 0.03324050456285477, "loss_ce": 2.2119897039374337e-05, "loss_iou": 0.244140625, "loss_num": 0.00665283203125, "loss_xval": 0.033203125, "num_input_tokens_seen": 230649600, "step": 2527 }, { "epoch": 10.533333333333333, "grad_norm": 2.918245547540015, "learning_rate": 5e-05, "loss": 0.0618, "num_input_tokens_seen": 230740764, "step": 2528 }, { "epoch": 10.533333333333333, "loss": 0.04487369954586029, "loss_ce": 2.8118285626987927e-05, "loss_iou": 0.28515625, "loss_num": 0.00897216796875, "loss_xval": 0.044921875, "num_input_tokens_seen": 230740764, "step": 2528 }, { "epoch": 10.5375, "grad_norm": 2.0239125246620118, "learning_rate": 5e-05, "loss": 0.0686, "num_input_tokens_seen": 230832676, "step": 2529 }, { "epoch": 10.5375, "loss": 0.04169199988245964, "loss_ce": 0.0004474924935493618, "loss_iou": 0.29296875, "loss_num": 0.00823974609375, "loss_xval": 0.041259765625, "num_input_tokens_seen": 230832676, "step": 2529 }, { "epoch": 10.541666666666666, "grad_norm": 1.7093839443831509, "learning_rate": 5e-05, "loss": 0.0512, "num_input_tokens_seen": 230924080, "step": 2530 }, { "epoch": 10.541666666666666, "loss": 0.03623117879033089, "loss_ce": 3.7330690247472376e-05, "loss_iou": 0.193359375, "loss_num": 0.007232666015625, "loss_xval": 0.0361328125, "num_input_tokens_seen": 230924080, "step": 2530 }, { "epoch": 10.545833333333333, "grad_norm": 5.344313472098823, "learning_rate": 5e-05, "loss": 0.1128, "num_input_tokens_seen": 231015492, "step": 2531 }, { "epoch": 10.545833333333333, "loss": 0.12598028779029846, "loss_ce": 0.0008887368021532893, "loss_iou": 0.23046875, "loss_num": 0.0250244140625, "loss_xval": 0.125, "num_input_tokens_seen": 231015492, "step": 2531 }, { "epoch": 10.55, "grad_norm": 2.217578905840367, "learning_rate": 5e-05, "loss": 0.0719, "num_input_tokens_seen": 231107208, "step": 2532 }, { "epoch": 10.55, "loss": 0.09489748626947403, "loss_ce": 0.0016052497085183859, "loss_iou": 0.236328125, "loss_num": 0.0186767578125, "loss_xval": 0.09326171875, "num_input_tokens_seen": 231107208, "step": 2532 }, { "epoch": 10.554166666666667, "grad_norm": 13.061473664615402, "learning_rate": 5e-05, "loss": 0.0702, "num_input_tokens_seen": 231198376, "step": 2533 }, { "epoch": 10.554166666666667, "loss": 0.06628895550966263, "loss_ce": 4.775848992721876e-06, "loss_iou": 0.357421875, "loss_num": 0.01324462890625, "loss_xval": 0.06640625, "num_input_tokens_seen": 231198376, "step": 2533 }, { "epoch": 10.558333333333334, "grad_norm": 4.530044256683953, "learning_rate": 5e-05, "loss": 0.0964, "num_input_tokens_seen": 231288708, "step": 2534 }, { "epoch": 10.558333333333334, "loss": 0.10373524576425552, "loss_ce": 0.0003416899999137968, "loss_iou": 0.27734375, "loss_num": 0.0206298828125, "loss_xval": 0.103515625, "num_input_tokens_seen": 231288708, "step": 2534 }, { "epoch": 10.5625, "grad_norm": 3.1504933757286473, "learning_rate": 5e-05, "loss": 0.0859, "num_input_tokens_seen": 231379416, "step": 2535 }, { "epoch": 10.5625, "loss": 0.08838987350463867, "loss_ce": 2.6227504349662922e-05, "loss_iou": 0.318359375, "loss_num": 0.0177001953125, "loss_xval": 0.08837890625, "num_input_tokens_seen": 231379416, "step": 2535 }, { "epoch": 10.566666666666666, "grad_norm": 2.0429632457317712, "learning_rate": 5e-05, "loss": 0.0817, "num_input_tokens_seen": 231469732, "step": 2536 }, { "epoch": 10.566666666666666, "loss": 0.05402039363980293, "loss_ce": 8.057022205321118e-05, "loss_iou": 0.2490234375, "loss_num": 0.01080322265625, "loss_xval": 0.053955078125, "num_input_tokens_seen": 231469732, "step": 2536 }, { "epoch": 10.570833333333333, "grad_norm": 7.041689939197253, "learning_rate": 5e-05, "loss": 0.0881, "num_input_tokens_seen": 231561780, "step": 2537 }, { "epoch": 10.570833333333333, "loss": 0.12750005722045898, "loss_ce": 0.0014472047332674265, "loss_iou": 0.267578125, "loss_num": 0.025146484375, "loss_xval": 0.1259765625, "num_input_tokens_seen": 231561780, "step": 2537 }, { "epoch": 10.575, "grad_norm": 5.025073454078799, "learning_rate": 5e-05, "loss": 0.0727, "num_input_tokens_seen": 231653080, "step": 2538 }, { "epoch": 10.575, "loss": 0.045722320675849915, "loss_ce": 0.00022060942137613893, "loss_iou": 0.22265625, "loss_num": 0.00909423828125, "loss_xval": 0.04541015625, "num_input_tokens_seen": 231653080, "step": 2538 }, { "epoch": 10.579166666666667, "grad_norm": 4.000437161547661, "learning_rate": 5e-05, "loss": 0.0809, "num_input_tokens_seen": 231744568, "step": 2539 }, { "epoch": 10.579166666666667, "loss": 0.09481997787952423, "loss_ce": 0.0008105772431008518, "loss_iou": 0.296875, "loss_num": 0.018798828125, "loss_xval": 0.09423828125, "num_input_tokens_seen": 231744568, "step": 2539 }, { "epoch": 10.583333333333334, "grad_norm": 4.126768940670154, "learning_rate": 5e-05, "loss": 0.0747, "num_input_tokens_seen": 231836040, "step": 2540 }, { "epoch": 10.583333333333334, "loss": 0.0900723934173584, "loss_ce": 0.002563235815614462, "loss_iou": 0.30859375, "loss_num": 0.0174560546875, "loss_xval": 0.08740234375, "num_input_tokens_seen": 231836040, "step": 2540 }, { "epoch": 10.5875, "grad_norm": 3.0471114034022024, "learning_rate": 5e-05, "loss": 0.0604, "num_input_tokens_seen": 231927320, "step": 2541 }, { "epoch": 10.5875, "loss": 0.06366278976202011, "loss_ce": 0.00046088872477412224, "loss_iou": 0.330078125, "loss_num": 0.01263427734375, "loss_xval": 0.06298828125, "num_input_tokens_seen": 231927320, "step": 2541 }, { "epoch": 10.591666666666667, "grad_norm": 4.389275583769407, "learning_rate": 5e-05, "loss": 0.0666, "num_input_tokens_seen": 232018744, "step": 2542 }, { "epoch": 10.591666666666667, "loss": 0.09230601787567139, "loss_ce": 0.0013178624212741852, "loss_iou": 0.296875, "loss_num": 0.0181884765625, "loss_xval": 0.0908203125, "num_input_tokens_seen": 232018744, "step": 2542 }, { "epoch": 10.595833333333333, "grad_norm": 4.083220168684195, "learning_rate": 5e-05, "loss": 0.107, "num_input_tokens_seen": 232110568, "step": 2543 }, { "epoch": 10.595833333333333, "loss": 0.127569779753685, "loss_ce": 0.00014362900401465595, "loss_iou": 0.224609375, "loss_num": 0.0255126953125, "loss_xval": 0.126953125, "num_input_tokens_seen": 232110568, "step": 2543 }, { "epoch": 10.6, "grad_norm": 2.2540952928617104, "learning_rate": 5e-05, "loss": 0.087, "num_input_tokens_seen": 232202672, "step": 2544 }, { "epoch": 10.6, "loss": 0.07920961081981659, "loss_ce": 0.0016644495772197843, "loss_iou": 0.1884765625, "loss_num": 0.0155029296875, "loss_xval": 0.07763671875, "num_input_tokens_seen": 232202672, "step": 2544 }, { "epoch": 10.604166666666666, "grad_norm": 1.7401042439735654, "learning_rate": 5e-05, "loss": 0.087, "num_input_tokens_seen": 232293812, "step": 2545 }, { "epoch": 10.604166666666666, "loss": 0.08084672689437866, "loss_ce": 0.0001735099358484149, "loss_iou": 0.25, "loss_num": 0.01611328125, "loss_xval": 0.08056640625, "num_input_tokens_seen": 232293812, "step": 2545 }, { "epoch": 10.608333333333333, "grad_norm": 2.5874497229374778, "learning_rate": 5e-05, "loss": 0.1018, "num_input_tokens_seen": 232384532, "step": 2546 }, { "epoch": 10.608333333333333, "loss": 0.09803829342126846, "loss_ce": 3.1087609386304393e-05, "loss_iou": 0.298828125, "loss_num": 0.01953125, "loss_xval": 0.09814453125, "num_input_tokens_seen": 232384532, "step": 2546 }, { "epoch": 10.6125, "grad_norm": 3.1909477246298508, "learning_rate": 5e-05, "loss": 0.0777, "num_input_tokens_seen": 232475536, "step": 2547 }, { "epoch": 10.6125, "loss": 0.09706706553697586, "loss_ce": 5.904821591684595e-06, "loss_iou": 0.3125, "loss_num": 0.0194091796875, "loss_xval": 0.09716796875, "num_input_tokens_seen": 232475536, "step": 2547 }, { "epoch": 10.616666666666667, "grad_norm": 11.624037688005709, "learning_rate": 5e-05, "loss": 0.0733, "num_input_tokens_seen": 232566972, "step": 2548 }, { "epoch": 10.616666666666667, "loss": 0.08421897143125534, "loss_ce": 5.714677172363736e-06, "loss_iou": 0.302734375, "loss_num": 0.016845703125, "loss_xval": 0.083984375, "num_input_tokens_seen": 232566972, "step": 2548 }, { "epoch": 10.620833333333334, "grad_norm": 2.889980560632474, "learning_rate": 5e-05, "loss": 0.0738, "num_input_tokens_seen": 232658416, "step": 2549 }, { "epoch": 10.620833333333334, "loss": 0.07642048597335815, "loss_ce": 4.466603968467098e-06, "loss_iou": 0.380859375, "loss_num": 0.0152587890625, "loss_xval": 0.076171875, "num_input_tokens_seen": 232658416, "step": 2549 }, { "epoch": 10.625, "grad_norm": 2.3197193197672, "learning_rate": 5e-05, "loss": 0.0387, "num_input_tokens_seen": 232749400, "step": 2550 }, { "epoch": 10.625, "loss": 0.045643728226423264, "loss_ce": 0.0016984152607619762, "loss_iou": 0.212890625, "loss_num": 0.0087890625, "loss_xval": 0.0439453125, "num_input_tokens_seen": 232749400, "step": 2550 }, { "epoch": 10.629166666666666, "grad_norm": 2.9763418010603013, "learning_rate": 5e-05, "loss": 0.0382, "num_input_tokens_seen": 232839592, "step": 2551 }, { "epoch": 10.629166666666666, "loss": 0.025699859485030174, "loss_ce": 7.8733210102655e-06, "loss_iou": 0.2099609375, "loss_num": 0.005126953125, "loss_xval": 0.025634765625, "num_input_tokens_seen": 232839592, "step": 2551 }, { "epoch": 10.633333333333333, "grad_norm": 3.2701773771201594, "learning_rate": 5e-05, "loss": 0.0629, "num_input_tokens_seen": 232932300, "step": 2552 }, { "epoch": 10.633333333333333, "loss": 0.08045702427625656, "loss_ce": 0.0002110503555741161, "loss_iou": 0.263671875, "loss_num": 0.0159912109375, "loss_xval": 0.080078125, "num_input_tokens_seen": 232932300, "step": 2552 }, { "epoch": 10.6375, "grad_norm": 2.3879359360601127, "learning_rate": 5e-05, "loss": 0.0666, "num_input_tokens_seen": 233023152, "step": 2553 }, { "epoch": 10.6375, "loss": 0.06964591145515442, "loss_ce": 0.00015738507499918342, "loss_iou": 0.15234375, "loss_num": 0.013916015625, "loss_xval": 0.0693359375, "num_input_tokens_seen": 233023152, "step": 2553 }, { "epoch": 10.641666666666667, "grad_norm": 5.26547456458262, "learning_rate": 5e-05, "loss": 0.0491, "num_input_tokens_seen": 233114204, "step": 2554 }, { "epoch": 10.641666666666667, "loss": 0.04762497916817665, "loss_ce": 0.00011673916014842689, "loss_iou": 0.1875, "loss_num": 0.009521484375, "loss_xval": 0.047607421875, "num_input_tokens_seen": 233114204, "step": 2554 }, { "epoch": 10.645833333333334, "grad_norm": 2.687133977675234, "learning_rate": 5e-05, "loss": 0.0627, "num_input_tokens_seen": 233205552, "step": 2555 }, { "epoch": 10.645833333333334, "loss": 0.05727348476648331, "loss_ce": 7.248850124597084e-06, "loss_iou": 0.21484375, "loss_num": 0.01141357421875, "loss_xval": 0.057373046875, "num_input_tokens_seen": 233205552, "step": 2555 }, { "epoch": 10.65, "grad_norm": 2.9639326833082666, "learning_rate": 5e-05, "loss": 0.0473, "num_input_tokens_seen": 233296880, "step": 2556 }, { "epoch": 10.65, "loss": 0.04553859680891037, "loss_ce": 0.00023525467258878052, "loss_iou": 0.279296875, "loss_num": 0.009033203125, "loss_xval": 0.04541015625, "num_input_tokens_seen": 233296880, "step": 2556 }, { "epoch": 10.654166666666667, "grad_norm": 2.45748708235402, "learning_rate": 5e-05, "loss": 0.1089, "num_input_tokens_seen": 233387816, "step": 2557 }, { "epoch": 10.654166666666667, "loss": 0.0732211172580719, "loss_ce": 0.00042143999598920345, "loss_iou": 0.2294921875, "loss_num": 0.01458740234375, "loss_xval": 0.07275390625, "num_input_tokens_seen": 233387816, "step": 2557 }, { "epoch": 10.658333333333333, "grad_norm": 5.809434756244132, "learning_rate": 5e-05, "loss": 0.0518, "num_input_tokens_seen": 233478620, "step": 2558 }, { "epoch": 10.658333333333333, "loss": 0.06174005568027496, "loss_ce": 2.998542186105624e-06, "loss_iou": 0.310546875, "loss_num": 0.0123291015625, "loss_xval": 0.061767578125, "num_input_tokens_seen": 233478620, "step": 2558 }, { "epoch": 10.6625, "grad_norm": 13.545347879170736, "learning_rate": 5e-05, "loss": 0.0716, "num_input_tokens_seen": 233570304, "step": 2559 }, { "epoch": 10.6625, "loss": 0.07798929512500763, "loss_ce": 0.00036783432005904615, "loss_iou": 0.1728515625, "loss_num": 0.01556396484375, "loss_xval": 0.07763671875, "num_input_tokens_seen": 233570304, "step": 2559 }, { "epoch": 10.666666666666666, "grad_norm": 3.0516130652912024, "learning_rate": 5e-05, "loss": 0.0562, "num_input_tokens_seen": 233661480, "step": 2560 }, { "epoch": 10.666666666666666, "loss": 0.041023723781108856, "loss_ce": 5.387671626522206e-05, "loss_iou": 0.314453125, "loss_num": 0.0081787109375, "loss_xval": 0.041015625, "num_input_tokens_seen": 233661480, "step": 2560 }, { "epoch": 10.670833333333333, "grad_norm": 2.5559688986889144, "learning_rate": 5e-05, "loss": 0.0437, "num_input_tokens_seen": 233753020, "step": 2561 }, { "epoch": 10.670833333333333, "loss": 0.03713707625865936, "loss_ce": 0.00019554520258679986, "loss_iou": 0.13671875, "loss_num": 0.00738525390625, "loss_xval": 0.036865234375, "num_input_tokens_seen": 233753020, "step": 2561 }, { "epoch": 10.675, "grad_norm": 4.498740216899864, "learning_rate": 5e-05, "loss": 0.1109, "num_input_tokens_seen": 233844764, "step": 2562 }, { "epoch": 10.675, "loss": 0.14017170667648315, "loss_ce": 4.261473077349365e-05, "loss_iou": 0.15234375, "loss_num": 0.028076171875, "loss_xval": 0.1396484375, "num_input_tokens_seen": 233844764, "step": 2562 }, { "epoch": 10.679166666666667, "grad_norm": 4.092962491430164, "learning_rate": 5e-05, "loss": 0.0647, "num_input_tokens_seen": 233936216, "step": 2563 }, { "epoch": 10.679166666666667, "loss": 0.06921578198671341, "loss_ce": 0.0005436029750853777, "loss_iou": 0.1181640625, "loss_num": 0.01373291015625, "loss_xval": 0.06884765625, "num_input_tokens_seen": 233936216, "step": 2563 }, { "epoch": 10.683333333333334, "grad_norm": 1.8805188688713392, "learning_rate": 5e-05, "loss": 0.0623, "num_input_tokens_seen": 234027008, "step": 2564 }, { "epoch": 10.683333333333334, "loss": 0.09404385089874268, "loss_ce": 3.933888820029097e-06, "loss_iou": 0.2392578125, "loss_num": 0.018798828125, "loss_xval": 0.09423828125, "num_input_tokens_seen": 234027008, "step": 2564 }, { "epoch": 10.6875, "grad_norm": 2.359065599566822, "learning_rate": 5e-05, "loss": 0.0704, "num_input_tokens_seen": 234117888, "step": 2565 }, { "epoch": 10.6875, "loss": 0.07438018172979355, "loss_ce": 0.0009396261302754283, "loss_iou": 0.21875, "loss_num": 0.01470947265625, "loss_xval": 0.0732421875, "num_input_tokens_seen": 234117888, "step": 2565 }, { "epoch": 10.691666666666666, "grad_norm": 11.401119252449865, "learning_rate": 5e-05, "loss": 0.0601, "num_input_tokens_seen": 234209816, "step": 2566 }, { "epoch": 10.691666666666666, "loss": 0.045751944184303284, "loss_ce": 5.949885962763801e-05, "loss_iou": 0.33984375, "loss_num": 0.0091552734375, "loss_xval": 0.045654296875, "num_input_tokens_seen": 234209816, "step": 2566 }, { "epoch": 10.695833333333333, "grad_norm": 5.266923879339174, "learning_rate": 5e-05, "loss": 0.077, "num_input_tokens_seen": 234301648, "step": 2567 }, { "epoch": 10.695833333333333, "loss": 0.0653052031993866, "loss_ce": 0.00030275885364972055, "loss_iou": 0.298828125, "loss_num": 0.01300048828125, "loss_xval": 0.06494140625, "num_input_tokens_seen": 234301648, "step": 2567 }, { "epoch": 10.7, "grad_norm": 5.586196928970968, "learning_rate": 5e-05, "loss": 0.0897, "num_input_tokens_seen": 234392848, "step": 2568 }, { "epoch": 10.7, "loss": 0.09796257317066193, "loss_ce": 0.0005962373688817024, "loss_iou": 0.373046875, "loss_num": 0.01953125, "loss_xval": 0.09716796875, "num_input_tokens_seen": 234392848, "step": 2568 }, { "epoch": 10.704166666666667, "grad_norm": 2.099581042369191, "learning_rate": 5e-05, "loss": 0.1066, "num_input_tokens_seen": 234483628, "step": 2569 }, { "epoch": 10.704166666666667, "loss": 0.11219096183776855, "loss_ce": 7.143721632019151e-07, "loss_iou": 0.21484375, "loss_num": 0.0224609375, "loss_xval": 0.1123046875, "num_input_tokens_seen": 234483628, "step": 2569 }, { "epoch": 10.708333333333334, "grad_norm": 3.739938443996037, "learning_rate": 5e-05, "loss": 0.1042, "num_input_tokens_seen": 234574800, "step": 2570 }, { "epoch": 10.708333333333334, "loss": 0.1270475685596466, "loss_ce": 3.3405965950805694e-05, "loss_iou": 0.2392578125, "loss_num": 0.025390625, "loss_xval": 0.126953125, "num_input_tokens_seen": 234574800, "step": 2570 }, { "epoch": 10.7125, "grad_norm": 5.373325813911932, "learning_rate": 5e-05, "loss": 0.0603, "num_input_tokens_seen": 234666544, "step": 2571 }, { "epoch": 10.7125, "loss": 0.05830381438136101, "loss_ce": 0.0001296793925575912, "loss_iou": 0.240234375, "loss_num": 0.01165771484375, "loss_xval": 0.05810546875, "num_input_tokens_seen": 234666544, "step": 2571 }, { "epoch": 10.716666666666667, "grad_norm": 1.9381368497514062, "learning_rate": 5e-05, "loss": 0.0888, "num_input_tokens_seen": 234757532, "step": 2572 }, { "epoch": 10.716666666666667, "loss": 0.08373898267745972, "loss_ce": 1.4005401681060903e-05, "loss_iou": 0.263671875, "loss_num": 0.0167236328125, "loss_xval": 0.08349609375, "num_input_tokens_seen": 234757532, "step": 2572 }, { "epoch": 10.720833333333333, "grad_norm": 15.191232019692768, "learning_rate": 5e-05, "loss": 0.1624, "num_input_tokens_seen": 234848952, "step": 2573 }, { "epoch": 10.720833333333333, "loss": 0.21807676553726196, "loss_ce": 0.00047117145732045174, "loss_iou": 0.1240234375, "loss_num": 0.04345703125, "loss_xval": 0.2177734375, "num_input_tokens_seen": 234848952, "step": 2573 }, { "epoch": 10.725, "grad_norm": 5.944073703575605, "learning_rate": 5e-05, "loss": 0.106, "num_input_tokens_seen": 234940592, "step": 2574 }, { "epoch": 10.725, "loss": 0.06124042719602585, "loss_ce": 2.2165504560689442e-05, "loss_iou": 0.3359375, "loss_num": 0.01226806640625, "loss_xval": 0.061279296875, "num_input_tokens_seen": 234940592, "step": 2574 }, { "epoch": 10.729166666666666, "grad_norm": 4.891518135126141, "learning_rate": 5e-05, "loss": 0.0982, "num_input_tokens_seen": 235032148, "step": 2575 }, { "epoch": 10.729166666666666, "loss": 0.13088181614875793, "loss_ce": 0.019523173570632935, "loss_iou": 0.2734375, "loss_num": 0.022216796875, "loss_xval": 0.111328125, "num_input_tokens_seen": 235032148, "step": 2575 }, { "epoch": 10.733333333333333, "grad_norm": 6.031822907931793, "learning_rate": 5e-05, "loss": 0.1062, "num_input_tokens_seen": 235123412, "step": 2576 }, { "epoch": 10.733333333333333, "loss": 0.11574000120162964, "loss_ce": 0.002123055746778846, "loss_iou": 0.2265625, "loss_num": 0.022705078125, "loss_xval": 0.11376953125, "num_input_tokens_seen": 235123412, "step": 2576 }, { "epoch": 10.7375, "grad_norm": 7.517324480847781, "learning_rate": 5e-05, "loss": 0.0844, "num_input_tokens_seen": 235214944, "step": 2577 }, { "epoch": 10.7375, "loss": 0.09054480493068695, "loss_ce": 0.00038061931263655424, "loss_iou": 0.06640625, "loss_num": 0.01806640625, "loss_xval": 0.09033203125, "num_input_tokens_seen": 235214944, "step": 2577 }, { "epoch": 10.741666666666667, "grad_norm": 2.871107651385034, "learning_rate": 5e-05, "loss": 0.0719, "num_input_tokens_seen": 235306616, "step": 2578 }, { "epoch": 10.741666666666667, "loss": 0.08062739670276642, "loss_ce": 3.0476576284854673e-05, "loss_iou": 0.28515625, "loss_num": 0.01611328125, "loss_xval": 0.08056640625, "num_input_tokens_seen": 235306616, "step": 2578 }, { "epoch": 10.745833333333334, "grad_norm": 2.5856460886512402, "learning_rate": 5e-05, "loss": 0.1758, "num_input_tokens_seen": 235397184, "step": 2579 }, { "epoch": 10.745833333333334, "loss": 0.24237322807312012, "loss_ce": 2.62036132880894e-06, "loss_iou": 0.185546875, "loss_num": 0.048583984375, "loss_xval": 0.2421875, "num_input_tokens_seen": 235397184, "step": 2579 }, { "epoch": 10.75, "grad_norm": 5.199548367122784, "learning_rate": 5e-05, "loss": 0.0849, "num_input_tokens_seen": 235488592, "step": 2580 }, { "epoch": 10.75, "loss": 0.09395498037338257, "loss_ce": 0.0002583862515166402, "loss_iou": 0.064453125, "loss_num": 0.018798828125, "loss_xval": 0.09375, "num_input_tokens_seen": 235488592, "step": 2580 }, { "epoch": 10.754166666666666, "grad_norm": 2.1957185686027225, "learning_rate": 5e-05, "loss": 0.0687, "num_input_tokens_seen": 235579488, "step": 2581 }, { "epoch": 10.754166666666666, "loss": 0.07799072563648224, "loss_ce": 3.053093678317964e-06, "loss_iou": 0.1435546875, "loss_num": 0.01556396484375, "loss_xval": 0.078125, "num_input_tokens_seen": 235579488, "step": 2581 }, { "epoch": 10.758333333333333, "grad_norm": 6.386006493395034, "learning_rate": 5e-05, "loss": 0.0594, "num_input_tokens_seen": 235670732, "step": 2582 }, { "epoch": 10.758333333333333, "loss": 0.05041830986738205, "loss_ce": 3.2693253615434514e-06, "loss_iou": 0.35546875, "loss_num": 0.01007080078125, "loss_xval": 0.05029296875, "num_input_tokens_seen": 235670732, "step": 2582 }, { "epoch": 10.7625, "grad_norm": 3.321241011083686, "learning_rate": 5e-05, "loss": 0.0945, "num_input_tokens_seen": 235762244, "step": 2583 }, { "epoch": 10.7625, "loss": 0.09518767893314362, "loss_ce": 0.00030090424115769565, "loss_iou": 0.2236328125, "loss_num": 0.01904296875, "loss_xval": 0.0947265625, "num_input_tokens_seen": 235762244, "step": 2583 }, { "epoch": 10.766666666666667, "grad_norm": 1.7715456393543487, "learning_rate": 5e-05, "loss": 0.075, "num_input_tokens_seen": 235853820, "step": 2584 }, { "epoch": 10.766666666666667, "loss": 0.07396815717220306, "loss_ce": 0.0009090721141546965, "loss_iou": 0.25390625, "loss_num": 0.0146484375, "loss_xval": 0.0732421875, "num_input_tokens_seen": 235853820, "step": 2584 }, { "epoch": 10.770833333333334, "grad_norm": 21.63454670478571, "learning_rate": 5e-05, "loss": 0.1392, "num_input_tokens_seen": 235944788, "step": 2585 }, { "epoch": 10.770833333333334, "loss": 0.18823395669460297, "loss_ce": 1.532650117042067e-06, "loss_iou": 0.34375, "loss_num": 0.03759765625, "loss_xval": 0.1884765625, "num_input_tokens_seen": 235944788, "step": 2585 }, { "epoch": 10.775, "grad_norm": 2.631021488424049, "learning_rate": 5e-05, "loss": 0.0716, "num_input_tokens_seen": 236036360, "step": 2586 }, { "epoch": 10.775, "loss": 0.07843972742557526, "loss_ce": 0.0016269797924906015, "loss_iou": 0.158203125, "loss_num": 0.01531982421875, "loss_xval": 0.07666015625, "num_input_tokens_seen": 236036360, "step": 2586 }, { "epoch": 10.779166666666667, "grad_norm": 3.2757981534867824, "learning_rate": 5e-05, "loss": 0.0912, "num_input_tokens_seen": 236127020, "step": 2587 }, { "epoch": 10.779166666666667, "loss": 0.11449539661407471, "loss_ce": 0.0005198677536100149, "loss_iou": 0.234375, "loss_num": 0.0228271484375, "loss_xval": 0.11376953125, "num_input_tokens_seen": 236127020, "step": 2587 }, { "epoch": 10.783333333333333, "grad_norm": 4.218455808026256, "learning_rate": 5e-05, "loss": 0.0706, "num_input_tokens_seen": 236218416, "step": 2588 }, { "epoch": 10.783333333333333, "loss": 0.05689278990030289, "loss_ce": 2.3287324438570067e-05, "loss_iou": 0.28125, "loss_num": 0.0113525390625, "loss_xval": 0.056884765625, "num_input_tokens_seen": 236218416, "step": 2588 }, { "epoch": 10.7875, "grad_norm": 2.7576194479682266, "learning_rate": 5e-05, "loss": 0.0562, "num_input_tokens_seen": 236309272, "step": 2589 }, { "epoch": 10.7875, "loss": 0.03268555551767349, "loss_ce": 1.229744611919159e-06, "loss_iou": 0.1689453125, "loss_num": 0.00653076171875, "loss_xval": 0.03271484375, "num_input_tokens_seen": 236309272, "step": 2589 }, { "epoch": 10.791666666666666, "grad_norm": 2.956816374332155, "learning_rate": 5e-05, "loss": 0.0842, "num_input_tokens_seen": 236400560, "step": 2590 }, { "epoch": 10.791666666666666, "loss": 0.09306585043668747, "loss_ce": 1.7757985915523022e-05, "loss_iou": 0.251953125, "loss_num": 0.0185546875, "loss_xval": 0.09326171875, "num_input_tokens_seen": 236400560, "step": 2590 }, { "epoch": 10.795833333333333, "grad_norm": 2.098715879741373, "learning_rate": 5e-05, "loss": 0.0749, "num_input_tokens_seen": 236492296, "step": 2591 }, { "epoch": 10.795833333333333, "loss": 0.08957656472921371, "loss_ce": 0.0009382657590322196, "loss_iou": 0.3515625, "loss_num": 0.0177001953125, "loss_xval": 0.0888671875, "num_input_tokens_seen": 236492296, "step": 2591 }, { "epoch": 10.8, "grad_norm": 3.2751263712768846, "learning_rate": 5e-05, "loss": 0.1033, "num_input_tokens_seen": 236584492, "step": 2592 }, { "epoch": 10.8, "loss": 0.1428438276052475, "loss_ce": 0.0007082168012857437, "loss_iou": 0.3359375, "loss_num": 0.0284423828125, "loss_xval": 0.142578125, "num_input_tokens_seen": 236584492, "step": 2592 }, { "epoch": 10.804166666666667, "grad_norm": 4.761788300939806, "learning_rate": 5e-05, "loss": 0.0722, "num_input_tokens_seen": 236676088, "step": 2593 }, { "epoch": 10.804166666666667, "loss": 0.08923020958900452, "loss_ce": 8.836462075123563e-05, "loss_iou": 0.310546875, "loss_num": 0.017822265625, "loss_xval": 0.08935546875, "num_input_tokens_seen": 236676088, "step": 2593 }, { "epoch": 10.808333333333334, "grad_norm": 2.6994689974716977, "learning_rate": 5e-05, "loss": 0.0568, "num_input_tokens_seen": 236767300, "step": 2594 }, { "epoch": 10.808333333333334, "loss": 0.0671512633562088, "loss_ce": 2.0224437321303412e-05, "loss_iou": 0.2431640625, "loss_num": 0.013427734375, "loss_xval": 0.06689453125, "num_input_tokens_seen": 236767300, "step": 2594 }, { "epoch": 10.8125, "grad_norm": 5.731491341541353, "learning_rate": 5e-05, "loss": 0.0899, "num_input_tokens_seen": 236859044, "step": 2595 }, { "epoch": 10.8125, "loss": 0.0962974950671196, "loss_ce": 0.0019981807563453913, "loss_iou": 0.322265625, "loss_num": 0.0189208984375, "loss_xval": 0.09423828125, "num_input_tokens_seen": 236859044, "step": 2595 }, { "epoch": 10.816666666666666, "grad_norm": 4.80794396287632, "learning_rate": 5e-05, "loss": 0.0712, "num_input_tokens_seen": 236950460, "step": 2596 }, { "epoch": 10.816666666666666, "loss": 0.06012987345457077, "loss_ce": 4.0764378354651853e-05, "loss_iou": 0.34765625, "loss_num": 0.01202392578125, "loss_xval": 0.06005859375, "num_input_tokens_seen": 236950460, "step": 2596 }, { "epoch": 10.820833333333333, "grad_norm": 3.2646915505200527, "learning_rate": 5e-05, "loss": 0.1233, "num_input_tokens_seen": 237041240, "step": 2597 }, { "epoch": 10.820833333333333, "loss": 0.1575869917869568, "loss_ce": 9.482464520260692e-06, "loss_iou": 0.26953125, "loss_num": 0.031494140625, "loss_xval": 0.1572265625, "num_input_tokens_seen": 237041240, "step": 2597 }, { "epoch": 10.825, "grad_norm": 3.8923479859874397, "learning_rate": 5e-05, "loss": 0.0551, "num_input_tokens_seen": 237132320, "step": 2598 }, { "epoch": 10.825, "loss": 0.07181952893733978, "loss_ce": 1.1675666428345721e-05, "loss_iou": 0.2294921875, "loss_num": 0.01434326171875, "loss_xval": 0.07177734375, "num_input_tokens_seen": 237132320, "step": 2598 }, { "epoch": 10.829166666666667, "grad_norm": 3.9108713110099256, "learning_rate": 5e-05, "loss": 0.0582, "num_input_tokens_seen": 237223864, "step": 2599 }, { "epoch": 10.829166666666667, "loss": 0.05638699233531952, "loss_ce": 0.0020809650886803865, "loss_iou": 0.267578125, "loss_num": 0.0108642578125, "loss_xval": 0.05419921875, "num_input_tokens_seen": 237223864, "step": 2599 }, { "epoch": 10.833333333333334, "grad_norm": 2.547917471524506, "learning_rate": 5e-05, "loss": 0.0606, "num_input_tokens_seen": 237315552, "step": 2600 }, { "epoch": 10.833333333333334, "loss": 0.048316840082407, "loss_ce": 0.0009230421273969114, "loss_iou": 0.275390625, "loss_num": 0.00946044921875, "loss_xval": 0.04736328125, "num_input_tokens_seen": 237315552, "step": 2600 }, { "epoch": 10.8375, "grad_norm": 7.022061095017465, "learning_rate": 5e-05, "loss": 0.067, "num_input_tokens_seen": 237407428, "step": 2601 }, { "epoch": 10.8375, "loss": 0.05036468803882599, "loss_ce": 0.00019378944125492126, "loss_iou": 0.3515625, "loss_num": 0.010009765625, "loss_xval": 0.05029296875, "num_input_tokens_seen": 237407428, "step": 2601 }, { "epoch": 10.841666666666667, "grad_norm": 7.585959599877752, "learning_rate": 5e-05, "loss": 0.0962, "num_input_tokens_seen": 237498360, "step": 2602 }, { "epoch": 10.841666666666667, "loss": 0.08449165523052216, "loss_ce": 1.8997769075213e-05, "loss_iou": 0.314453125, "loss_num": 0.016845703125, "loss_xval": 0.08447265625, "num_input_tokens_seen": 237498360, "step": 2602 }, { "epoch": 10.845833333333333, "grad_norm": 7.524249545336901, "learning_rate": 5e-05, "loss": 0.1283, "num_input_tokens_seen": 237589592, "step": 2603 }, { "epoch": 10.845833333333333, "loss": 0.1548469066619873, "loss_ce": 0.002121679950505495, "loss_iou": 0.3203125, "loss_num": 0.030517578125, "loss_xval": 0.15234375, "num_input_tokens_seen": 237589592, "step": 2603 }, { "epoch": 10.85, "grad_norm": 5.884465054260644, "learning_rate": 5e-05, "loss": 0.0835, "num_input_tokens_seen": 237679388, "step": 2604 }, { "epoch": 10.85, "loss": 0.08232827484607697, "loss_ce": 6.813806248828769e-05, "loss_iou": 0.30078125, "loss_num": 0.0164794921875, "loss_xval": 0.08203125, "num_input_tokens_seen": 237679388, "step": 2604 }, { "epoch": 10.854166666666666, "grad_norm": 6.344010520408848, "learning_rate": 5e-05, "loss": 0.0872, "num_input_tokens_seen": 237770712, "step": 2605 }, { "epoch": 10.854166666666666, "loss": 0.12124098837375641, "loss_ce": 0.00011671679385472089, "loss_iou": 0.224609375, "loss_num": 0.024169921875, "loss_xval": 0.12109375, "num_input_tokens_seen": 237770712, "step": 2605 }, { "epoch": 10.858333333333333, "grad_norm": 2.1007016750079925, "learning_rate": 5e-05, "loss": 0.0967, "num_input_tokens_seen": 237862096, "step": 2606 }, { "epoch": 10.858333333333333, "loss": 0.1339530646800995, "loss_ce": 4.193175118416548e-05, "loss_iou": 0.181640625, "loss_num": 0.0267333984375, "loss_xval": 0.1337890625, "num_input_tokens_seen": 237862096, "step": 2606 }, { "epoch": 10.8625, "grad_norm": 2.0791420139647303, "learning_rate": 5e-05, "loss": 0.0458, "num_input_tokens_seen": 237953916, "step": 2607 }, { "epoch": 10.8625, "loss": 0.05738446116447449, "loss_ce": 4.956373595632613e-05, "loss_iou": 0.220703125, "loss_num": 0.011474609375, "loss_xval": 0.057373046875, "num_input_tokens_seen": 237953916, "step": 2607 }, { "epoch": 10.866666666666667, "grad_norm": 3.145243872953806, "learning_rate": 5e-05, "loss": 0.0436, "num_input_tokens_seen": 238045100, "step": 2608 }, { "epoch": 10.866666666666667, "loss": 0.05611884593963623, "loss_ce": 6.568455864908174e-05, "loss_iou": 0.1962890625, "loss_num": 0.01123046875, "loss_xval": 0.05615234375, "num_input_tokens_seen": 238045100, "step": 2608 }, { "epoch": 10.870833333333334, "grad_norm": 3.5832832158308277, "learning_rate": 5e-05, "loss": 0.0659, "num_input_tokens_seen": 238136528, "step": 2609 }, { "epoch": 10.870833333333334, "loss": 0.08797188103199005, "loss_ce": 0.0010883386712521315, "loss_iou": 0.1435546875, "loss_num": 0.017333984375, "loss_xval": 0.0869140625, "num_input_tokens_seen": 238136528, "step": 2609 }, { "epoch": 10.875, "grad_norm": 5.225832555854499, "learning_rate": 5e-05, "loss": 0.0861, "num_input_tokens_seen": 238228552, "step": 2610 }, { "epoch": 10.875, "loss": 0.06515424698591232, "loss_ce": 0.0020591537468135357, "loss_iou": 0.197265625, "loss_num": 0.01263427734375, "loss_xval": 0.06298828125, "num_input_tokens_seen": 238228552, "step": 2610 }, { "epoch": 10.879166666666666, "grad_norm": 3.5337897267468645, "learning_rate": 5e-05, "loss": 0.048, "num_input_tokens_seen": 238319624, "step": 2611 }, { "epoch": 10.879166666666666, "loss": 0.035280607640743256, "loss_ce": 0.0001701373839750886, "loss_iou": 0.27734375, "loss_num": 0.00701904296875, "loss_xval": 0.03515625, "num_input_tokens_seen": 238319624, "step": 2611 }, { "epoch": 10.883333333333333, "grad_norm": 2.669757033141276, "learning_rate": 5e-05, "loss": 0.0631, "num_input_tokens_seen": 238411064, "step": 2612 }, { "epoch": 10.883333333333333, "loss": 0.07555267959833145, "loss_ce": 2.1668691260856576e-05, "loss_iou": 0.18359375, "loss_num": 0.01513671875, "loss_xval": 0.07568359375, "num_input_tokens_seen": 238411064, "step": 2612 }, { "epoch": 10.8875, "grad_norm": 24.944381665824572, "learning_rate": 5e-05, "loss": 0.0967, "num_input_tokens_seen": 238502616, "step": 2613 }, { "epoch": 10.8875, "loss": 0.10890813171863556, "loss_ce": 5.193160177441314e-05, "loss_iou": 0.275390625, "loss_num": 0.0218505859375, "loss_xval": 0.10888671875, "num_input_tokens_seen": 238502616, "step": 2613 }, { "epoch": 10.891666666666667, "grad_norm": 3.9529351305551583, "learning_rate": 5e-05, "loss": 0.117, "num_input_tokens_seen": 238593976, "step": 2614 }, { "epoch": 10.891666666666667, "loss": 0.15259791910648346, "loss_ce": 2.5290042685810477e-05, "loss_iou": 0.306640625, "loss_num": 0.030517578125, "loss_xval": 0.15234375, "num_input_tokens_seen": 238593976, "step": 2614 }, { "epoch": 10.895833333333334, "grad_norm": 3.306602582204345, "learning_rate": 5e-05, "loss": 0.0585, "num_input_tokens_seen": 238685196, "step": 2615 }, { "epoch": 10.895833333333334, "loss": 0.07463531196117401, "loss_ce": 0.00047758998698554933, "loss_iou": 0.373046875, "loss_num": 0.01483154296875, "loss_xval": 0.07421875, "num_input_tokens_seen": 238685196, "step": 2615 }, { "epoch": 10.9, "grad_norm": 11.791081179572238, "learning_rate": 5e-05, "loss": 0.0928, "num_input_tokens_seen": 238776636, "step": 2616 }, { "epoch": 10.9, "loss": 0.05548732355237007, "loss_ce": 0.00040309398900717497, "loss_iou": 0.2265625, "loss_num": 0.010986328125, "loss_xval": 0.05517578125, "num_input_tokens_seen": 238776636, "step": 2616 }, { "epoch": 10.904166666666667, "grad_norm": 3.243954880467529, "learning_rate": 5e-05, "loss": 0.0865, "num_input_tokens_seen": 238867920, "step": 2617 }, { "epoch": 10.904166666666667, "loss": 0.0947527140378952, "loss_ce": 4.1406026866752654e-05, "loss_iou": 0.228515625, "loss_num": 0.0189208984375, "loss_xval": 0.0947265625, "num_input_tokens_seen": 238867920, "step": 2617 }, { "epoch": 10.908333333333333, "grad_norm": 3.0316435369779824, "learning_rate": 5e-05, "loss": 0.0592, "num_input_tokens_seen": 238959192, "step": 2618 }, { "epoch": 10.908333333333333, "loss": 0.06768861413002014, "loss_ce": 0.0001989835436688736, "loss_iou": 0.1884765625, "loss_num": 0.01348876953125, "loss_xval": 0.0673828125, "num_input_tokens_seen": 238959192, "step": 2618 }, { "epoch": 10.9125, "grad_norm": 4.897163052854971, "learning_rate": 5e-05, "loss": 0.128, "num_input_tokens_seen": 239050212, "step": 2619 }, { "epoch": 10.9125, "loss": 0.14370054006576538, "loss_ce": 0.0013970638392493129, "loss_iou": 0.21875, "loss_num": 0.0284423828125, "loss_xval": 0.142578125, "num_input_tokens_seen": 239050212, "step": 2619 }, { "epoch": 10.916666666666666, "grad_norm": 7.104457595482057, "learning_rate": 5e-05, "loss": 0.1073, "num_input_tokens_seen": 239142140, "step": 2620 }, { "epoch": 10.916666666666666, "loss": 0.14011695981025696, "loss_ce": 0.002284326357766986, "loss_iou": 0.1806640625, "loss_num": 0.027587890625, "loss_xval": 0.1376953125, "num_input_tokens_seen": 239142140, "step": 2620 }, { "epoch": 10.920833333333333, "grad_norm": 1.5029055909970825, "learning_rate": 5e-05, "loss": 0.076, "num_input_tokens_seen": 239233036, "step": 2621 }, { "epoch": 10.920833333333333, "loss": 0.048193223774433136, "loss_ce": 5.966435310256202e-06, "loss_iou": 0.1591796875, "loss_num": 0.0096435546875, "loss_xval": 0.048095703125, "num_input_tokens_seen": 239233036, "step": 2621 }, { "epoch": 10.925, "grad_norm": 3.191602960669044, "learning_rate": 5e-05, "loss": 0.1152, "num_input_tokens_seen": 239323756, "step": 2622 }, { "epoch": 10.925, "loss": 0.16263824701309204, "loss_ce": 2.4303417376358993e-06, "loss_iou": 0.26953125, "loss_num": 0.032470703125, "loss_xval": 0.1630859375, "num_input_tokens_seen": 239323756, "step": 2622 }, { "epoch": 10.929166666666667, "grad_norm": 3.065510203947232, "learning_rate": 5e-05, "loss": 0.0605, "num_input_tokens_seen": 239415352, "step": 2623 }, { "epoch": 10.929166666666667, "loss": 0.051864929497241974, "loss_ce": 0.0011141971917822957, "loss_iou": 0.19921875, "loss_num": 0.0101318359375, "loss_xval": 0.05078125, "num_input_tokens_seen": 239415352, "step": 2623 }, { "epoch": 10.933333333333334, "grad_norm": 2.946121885961593, "learning_rate": 5e-05, "loss": 0.0713, "num_input_tokens_seen": 239506196, "step": 2624 }, { "epoch": 10.933333333333334, "loss": 0.06659112870693207, "loss_ce": 0.00015435564273502678, "loss_iou": 0.248046875, "loss_num": 0.0133056640625, "loss_xval": 0.06640625, "num_input_tokens_seen": 239506196, "step": 2624 }, { "epoch": 10.9375, "grad_norm": 2.685711695313958, "learning_rate": 5e-05, "loss": 0.0653, "num_input_tokens_seen": 239597536, "step": 2625 }, { "epoch": 10.9375, "loss": 0.07138238847255707, "loss_ce": 0.002153257606551051, "loss_iou": 0.2001953125, "loss_num": 0.01385498046875, "loss_xval": 0.0693359375, "num_input_tokens_seen": 239597536, "step": 2625 }, { "epoch": 10.941666666666666, "grad_norm": 9.989995815244335, "learning_rate": 5e-05, "loss": 0.0647, "num_input_tokens_seen": 239688564, "step": 2626 }, { "epoch": 10.941666666666666, "loss": 0.04970329999923706, "loss_ce": 0.0002648217196110636, "loss_iou": 0.201171875, "loss_num": 0.0098876953125, "loss_xval": 0.04931640625, "num_input_tokens_seen": 239688564, "step": 2626 }, { "epoch": 10.945833333333333, "grad_norm": 4.675366276387425, "learning_rate": 5e-05, "loss": 0.0684, "num_input_tokens_seen": 239779924, "step": 2627 }, { "epoch": 10.945833333333333, "loss": 0.060884036123752594, "loss_ce": 4.724512109532952e-05, "loss_iou": 0.26953125, "loss_num": 0.01214599609375, "loss_xval": 0.060791015625, "num_input_tokens_seen": 239779924, "step": 2627 }, { "epoch": 10.95, "grad_norm": 8.176285506220234, "learning_rate": 5e-05, "loss": 0.1314, "num_input_tokens_seen": 239871828, "step": 2628 }, { "epoch": 10.95, "loss": 0.11220882833003998, "loss_ce": 0.0012774209026247263, "loss_iou": 0.2080078125, "loss_num": 0.022216796875, "loss_xval": 0.11083984375, "num_input_tokens_seen": 239871828, "step": 2628 }, { "epoch": 10.954166666666667, "grad_norm": 3.0610626513608596, "learning_rate": 5e-05, "loss": 0.0506, "num_input_tokens_seen": 239962868, "step": 2629 }, { "epoch": 10.954166666666667, "loss": 0.02921304665505886, "loss_ce": 0.00024423663853667676, "loss_iou": 0.2470703125, "loss_num": 0.00579833984375, "loss_xval": 0.0289306640625, "num_input_tokens_seen": 239962868, "step": 2629 }, { "epoch": 10.958333333333334, "grad_norm": 3.2222858925683404, "learning_rate": 5e-05, "loss": 0.099, "num_input_tokens_seen": 240054444, "step": 2630 }, { "epoch": 10.958333333333334, "loss": 0.1313033401966095, "loss_ce": 1.4491429283225443e-06, "loss_iou": 0.322265625, "loss_num": 0.0262451171875, "loss_xval": 0.130859375, "num_input_tokens_seen": 240054444, "step": 2630 }, { "epoch": 10.9625, "grad_norm": 3.3929942447700796, "learning_rate": 5e-05, "loss": 0.0768, "num_input_tokens_seen": 240146056, "step": 2631 }, { "epoch": 10.9625, "loss": 0.06336264312267303, "loss_ce": 0.0006490138475783169, "loss_iou": 0.265625, "loss_num": 0.01251220703125, "loss_xval": 0.0625, "num_input_tokens_seen": 240146056, "step": 2631 }, { "epoch": 10.966666666666667, "grad_norm": 2.5227606594562912, "learning_rate": 5e-05, "loss": 0.0467, "num_input_tokens_seen": 240237644, "step": 2632 }, { "epoch": 10.966666666666667, "loss": 0.05070841312408447, "loss_ce": 1.8717042621574365e-05, "loss_iou": 0.318359375, "loss_num": 0.0101318359375, "loss_xval": 0.05078125, "num_input_tokens_seen": 240237644, "step": 2632 }, { "epoch": 10.970833333333333, "grad_norm": 2.520264622085186, "learning_rate": 5e-05, "loss": 0.0741, "num_input_tokens_seen": 240326952, "step": 2633 }, { "epoch": 10.970833333333333, "loss": 0.0733042061328888, "loss_ce": 9.8748421351047e-07, "loss_iou": 0.1767578125, "loss_num": 0.0146484375, "loss_xval": 0.0732421875, "num_input_tokens_seen": 240326952, "step": 2633 }, { "epoch": 10.975, "grad_norm": 1.9563081150893862, "learning_rate": 5e-05, "loss": 0.074, "num_input_tokens_seen": 240417816, "step": 2634 }, { "epoch": 10.975, "loss": 0.11251506209373474, "loss_ce": 0.001293750829063356, "loss_iou": 0.125, "loss_num": 0.022216796875, "loss_xval": 0.111328125, "num_input_tokens_seen": 240417816, "step": 2634 }, { "epoch": 10.979166666666666, "grad_norm": 5.065297860890833, "learning_rate": 5e-05, "loss": 0.0354, "num_input_tokens_seen": 240509576, "step": 2635 }, { "epoch": 10.979166666666666, "loss": 0.028818506747484207, "loss_ce": 4.042866930831224e-05, "loss_iou": 0.240234375, "loss_num": 0.005767822265625, "loss_xval": 0.02880859375, "num_input_tokens_seen": 240509576, "step": 2635 }, { "epoch": 10.983333333333333, "grad_norm": 3.16934427084196, "learning_rate": 5e-05, "loss": 0.057, "num_input_tokens_seen": 240600848, "step": 2636 }, { "epoch": 10.983333333333333, "loss": 0.0635094940662384, "loss_ce": 2.4082135041680885e-06, "loss_iou": 0.3203125, "loss_num": 0.0126953125, "loss_xval": 0.0634765625, "num_input_tokens_seen": 240600848, "step": 2636 }, { "epoch": 10.9875, "grad_norm": 2.495854510196729, "learning_rate": 5e-05, "loss": 0.0881, "num_input_tokens_seen": 240690652, "step": 2637 }, { "epoch": 10.9875, "loss": 0.044383447617292404, "loss_ce": 5.6665088777663186e-05, "loss_iou": 0.322265625, "loss_num": 0.00885009765625, "loss_xval": 0.04443359375, "num_input_tokens_seen": 240690652, "step": 2637 }, { "epoch": 10.991666666666667, "grad_norm": 7.214089578682879, "learning_rate": 5e-05, "loss": 0.0991, "num_input_tokens_seen": 240782300, "step": 2638 }, { "epoch": 10.991666666666667, "loss": 0.05883432552218437, "loss_ce": 0.0020868880674242973, "loss_iou": 0.388671875, "loss_num": 0.0113525390625, "loss_xval": 0.056640625, "num_input_tokens_seen": 240782300, "step": 2638 }, { "epoch": 10.995833333333334, "grad_norm": 2.7885933218806205, "learning_rate": 5e-05, "loss": 0.1149, "num_input_tokens_seen": 240872792, "step": 2639 }, { "epoch": 10.995833333333334, "loss": 0.14906570315361023, "loss_ce": 2.5991162146965507e-06, "loss_iou": 0.279296875, "loss_num": 0.02978515625, "loss_xval": 0.1494140625, "num_input_tokens_seen": 240872792, "step": 2639 }, { "epoch": 11.0, "grad_norm": 2.0176084409762773, "learning_rate": 5e-05, "loss": 0.1402, "num_input_tokens_seen": 240963908, "step": 2640 }, { "epoch": 11.0, "loss": 0.15255336463451385, "loss_ce": 5.702380076400004e-05, "loss_iou": 0.216796875, "loss_num": 0.030517578125, "loss_xval": 0.15234375, "num_input_tokens_seen": 240963908, "step": 2640 }, { "epoch": 11.004166666666666, "grad_norm": 2.553639431002751, "learning_rate": 5e-05, "loss": 0.0723, "num_input_tokens_seen": 241055888, "step": 2641 }, { "epoch": 11.004166666666666, "loss": 0.07801361382007599, "loss_ce": 0.0003005999606102705, "loss_iou": 0.2412109375, "loss_num": 0.01556396484375, "loss_xval": 0.07763671875, "num_input_tokens_seen": 241055888, "step": 2641 }, { "epoch": 11.008333333333333, "grad_norm": 1.587553055646591, "learning_rate": 5e-05, "loss": 0.0379, "num_input_tokens_seen": 241147336, "step": 2642 }, { "epoch": 11.008333333333333, "loss": 0.03565359115600586, "loss_ce": 1.6691326891304925e-05, "loss_iou": 0.251953125, "loss_num": 0.00714111328125, "loss_xval": 0.03564453125, "num_input_tokens_seen": 241147336, "step": 2642 }, { "epoch": 11.0125, "grad_norm": 2.8004207008971833, "learning_rate": 5e-05, "loss": 0.1247, "num_input_tokens_seen": 241238736, "step": 2643 }, { "epoch": 11.0125, "loss": 0.10212784260511398, "loss_ce": 0.00034409199724905193, "loss_iou": 0.251953125, "loss_num": 0.0203857421875, "loss_xval": 0.1015625, "num_input_tokens_seen": 241238736, "step": 2643 }, { "epoch": 11.016666666666667, "grad_norm": 5.175036951556304, "learning_rate": 5e-05, "loss": 0.0914, "num_input_tokens_seen": 241328948, "step": 2644 }, { "epoch": 11.016666666666667, "loss": 0.11754395812749863, "loss_ce": 8.180072472896427e-05, "loss_iou": 0.412109375, "loss_num": 0.0234375, "loss_xval": 0.11767578125, "num_input_tokens_seen": 241328948, "step": 2644 }, { "epoch": 11.020833333333334, "grad_norm": 4.164233302103024, "learning_rate": 5e-05, "loss": 0.0742, "num_input_tokens_seen": 241420064, "step": 2645 }, { "epoch": 11.020833333333334, "loss": 0.10768741369247437, "loss_ce": 8.243230695370585e-05, "loss_iou": 0.21875, "loss_num": 0.021484375, "loss_xval": 0.107421875, "num_input_tokens_seen": 241420064, "step": 2645 }, { "epoch": 11.025, "grad_norm": 14.355901836167327, "learning_rate": 5e-05, "loss": 0.0819, "num_input_tokens_seen": 241511284, "step": 2646 }, { "epoch": 11.025, "loss": 0.05288579314947128, "loss_ce": 0.00034978328039869666, "loss_iou": 0.3125, "loss_num": 0.010498046875, "loss_xval": 0.052490234375, "num_input_tokens_seen": 241511284, "step": 2646 }, { "epoch": 11.029166666666667, "grad_norm": 2.7846388045000774, "learning_rate": 5e-05, "loss": 0.0566, "num_input_tokens_seen": 241601988, "step": 2647 }, { "epoch": 11.029166666666667, "loss": 0.0509650744497776, "loss_ce": 3.123717033304274e-05, "loss_iou": 0.2314453125, "loss_num": 0.01019287109375, "loss_xval": 0.051025390625, "num_input_tokens_seen": 241601988, "step": 2647 }, { "epoch": 11.033333333333333, "grad_norm": 1.847015603045074, "learning_rate": 5e-05, "loss": 0.0609, "num_input_tokens_seen": 241693232, "step": 2648 }, { "epoch": 11.033333333333333, "loss": 0.05165189504623413, "loss_ce": 5.429990778793581e-05, "loss_iou": 0.017578125, "loss_num": 0.01031494140625, "loss_xval": 0.051513671875, "num_input_tokens_seen": 241693232, "step": 2648 }, { "epoch": 11.0375, "grad_norm": 3.794780395752318, "learning_rate": 5e-05, "loss": 0.0493, "num_input_tokens_seen": 241784292, "step": 2649 }, { "epoch": 11.0375, "loss": 0.05502014979720116, "loss_ce": 2.7472731744637713e-05, "loss_iou": 0.27734375, "loss_num": 0.010986328125, "loss_xval": 0.054931640625, "num_input_tokens_seen": 241784292, "step": 2649 }, { "epoch": 11.041666666666666, "grad_norm": 4.257199059185294, "learning_rate": 5e-05, "loss": 0.0711, "num_input_tokens_seen": 241875824, "step": 2650 }, { "epoch": 11.041666666666666, "loss": 0.058951519429683685, "loss_ce": 2.207309807999991e-05, "loss_iou": 0.16796875, "loss_num": 0.01177978515625, "loss_xval": 0.058837890625, "num_input_tokens_seen": 241875824, "step": 2650 }, { "epoch": 11.045833333333333, "grad_norm": 4.761678759376161, "learning_rate": 5e-05, "loss": 0.0508, "num_input_tokens_seen": 241968188, "step": 2651 }, { "epoch": 11.045833333333333, "loss": 0.04210842028260231, "loss_ce": 0.0004442967183422297, "loss_iou": 0.306640625, "loss_num": 0.00836181640625, "loss_xval": 0.041748046875, "num_input_tokens_seen": 241968188, "step": 2651 }, { "epoch": 11.05, "grad_norm": 3.6609282092189925, "learning_rate": 5e-05, "loss": 0.0856, "num_input_tokens_seen": 242059468, "step": 2652 }, { "epoch": 11.05, "loss": 0.0841054618358612, "loss_ce": 1.4272000044002198e-05, "loss_iou": 0.365234375, "loss_num": 0.016845703125, "loss_xval": 0.083984375, "num_input_tokens_seen": 242059468, "step": 2652 }, { "epoch": 11.054166666666667, "grad_norm": 2.6215961232335614, "learning_rate": 5e-05, "loss": 0.1001, "num_input_tokens_seen": 242150620, "step": 2653 }, { "epoch": 11.054166666666667, "loss": 0.12753871083259583, "loss_ce": 5.753432560595684e-06, "loss_iou": 0.408203125, "loss_num": 0.0255126953125, "loss_xval": 0.1279296875, "num_input_tokens_seen": 242150620, "step": 2653 }, { "epoch": 11.058333333333334, "grad_norm": 3.6705535538829057, "learning_rate": 5e-05, "loss": 0.0422, "num_input_tokens_seen": 242241964, "step": 2654 }, { "epoch": 11.058333333333334, "loss": 0.04872170090675354, "loss_ce": 8.017166692297906e-06, "loss_iou": 0.296875, "loss_num": 0.009765625, "loss_xval": 0.048828125, "num_input_tokens_seen": 242241964, "step": 2654 }, { "epoch": 11.0625, "grad_norm": 3.096843310450073, "learning_rate": 5e-05, "loss": 0.0622, "num_input_tokens_seen": 242333540, "step": 2655 }, { "epoch": 11.0625, "loss": 0.06332937628030777, "loss_ce": 0.00015799149696249515, "loss_iou": 0.265625, "loss_num": 0.01263427734375, "loss_xval": 0.06298828125, "num_input_tokens_seen": 242333540, "step": 2655 }, { "epoch": 11.066666666666666, "grad_norm": 2.821347636168007, "learning_rate": 5e-05, "loss": 0.0491, "num_input_tokens_seen": 242424904, "step": 2656 }, { "epoch": 11.066666666666666, "loss": 0.04395774379372597, "loss_ce": 0.00011924280261155218, "loss_iou": 0.2578125, "loss_num": 0.0087890625, "loss_xval": 0.0439453125, "num_input_tokens_seen": 242424904, "step": 2656 }, { "epoch": 11.070833333333333, "grad_norm": 1.9314475325144638, "learning_rate": 5e-05, "loss": 0.0359, "num_input_tokens_seen": 242516172, "step": 2657 }, { "epoch": 11.070833333333333, "loss": 0.040782131254673004, "loss_ce": 4.116656054975465e-05, "loss_iou": 0.25390625, "loss_num": 0.00811767578125, "loss_xval": 0.040771484375, "num_input_tokens_seen": 242516172, "step": 2657 }, { "epoch": 11.075, "grad_norm": 3.1928357911856935, "learning_rate": 5e-05, "loss": 0.0518, "num_input_tokens_seen": 242607204, "step": 2658 }, { "epoch": 11.075, "loss": 0.05490949749946594, "loss_ce": 0.00016096464241854846, "loss_iou": 0.2421875, "loss_num": 0.01092529296875, "loss_xval": 0.0546875, "num_input_tokens_seen": 242607204, "step": 2658 }, { "epoch": 11.079166666666667, "grad_norm": 3.403930510911585, "learning_rate": 5e-05, "loss": 0.0723, "num_input_tokens_seen": 242698216, "step": 2659 }, { "epoch": 11.079166666666667, "loss": 0.06885186582803726, "loss_ce": 0.0006450839573517442, "loss_iou": 0.27734375, "loss_num": 0.013671875, "loss_xval": 0.068359375, "num_input_tokens_seen": 242698216, "step": 2659 }, { "epoch": 11.083333333333334, "grad_norm": 3.0371808321247102, "learning_rate": 5e-05, "loss": 0.1277, "num_input_tokens_seen": 242789448, "step": 2660 }, { "epoch": 11.083333333333334, "loss": 0.12627384066581726, "loss_ce": 0.0011212533572688699, "loss_iou": 0.375, "loss_num": 0.0250244140625, "loss_xval": 0.125, "num_input_tokens_seen": 242789448, "step": 2660 }, { "epoch": 11.0875, "grad_norm": 2.826800200963949, "learning_rate": 5e-05, "loss": 0.0501, "num_input_tokens_seen": 242881280, "step": 2661 }, { "epoch": 11.0875, "loss": 0.039384517818689346, "loss_ce": 0.00023809520644135773, "loss_iou": 0.1826171875, "loss_num": 0.0078125, "loss_xval": 0.0390625, "num_input_tokens_seen": 242881280, "step": 2661 }, { "epoch": 11.091666666666667, "grad_norm": 1.9785576025845197, "learning_rate": 5e-05, "loss": 0.0586, "num_input_tokens_seen": 242972648, "step": 2662 }, { "epoch": 11.091666666666667, "loss": 0.06952418386936188, "loss_ce": 3.565673978300765e-05, "loss_iou": 0.353515625, "loss_num": 0.013916015625, "loss_xval": 0.0693359375, "num_input_tokens_seen": 242972648, "step": 2662 }, { "epoch": 11.095833333333333, "grad_norm": 2.9877109491263703, "learning_rate": 5e-05, "loss": 0.0363, "num_input_tokens_seen": 243063848, "step": 2663 }, { "epoch": 11.095833333333333, "loss": 0.03487911820411682, "loss_ce": 0.00013485604722518474, "loss_iou": 0.287109375, "loss_num": 0.0069580078125, "loss_xval": 0.03466796875, "num_input_tokens_seen": 243063848, "step": 2663 }, { "epoch": 11.1, "grad_norm": 5.189930396367434, "learning_rate": 5e-05, "loss": 0.0552, "num_input_tokens_seen": 243155072, "step": 2664 }, { "epoch": 11.1, "loss": 0.04049532860517502, "loss_ce": 1.3759843568550423e-05, "loss_iou": 0.2578125, "loss_num": 0.00811767578125, "loss_xval": 0.04052734375, "num_input_tokens_seen": 243155072, "step": 2664 }, { "epoch": 11.104166666666666, "grad_norm": 4.169496908002205, "learning_rate": 5e-05, "loss": 0.0907, "num_input_tokens_seen": 243245912, "step": 2665 }, { "epoch": 11.104166666666666, "loss": 0.07048118859529495, "loss_ce": 8.375644711122732e-07, "loss_iou": 0.361328125, "loss_num": 0.01409912109375, "loss_xval": 0.0703125, "num_input_tokens_seen": 243245912, "step": 2665 }, { "epoch": 11.108333333333333, "grad_norm": 6.217395923068188, "learning_rate": 5e-05, "loss": 0.0828, "num_input_tokens_seen": 243337164, "step": 2666 }, { "epoch": 11.108333333333333, "loss": 0.07544789463281631, "loss_ce": 0.00016102896188385785, "loss_iou": 0.216796875, "loss_num": 0.01507568359375, "loss_xval": 0.0751953125, "num_input_tokens_seen": 243337164, "step": 2666 }, { "epoch": 11.1125, "grad_norm": 2.7128551737207593, "learning_rate": 5e-05, "loss": 0.0731, "num_input_tokens_seen": 243427952, "step": 2667 }, { "epoch": 11.1125, "loss": 0.0546993650496006, "loss_ce": 1.186842655442888e-05, "loss_iou": 0.3359375, "loss_num": 0.01092529296875, "loss_xval": 0.0546875, "num_input_tokens_seen": 243427952, "step": 2667 }, { "epoch": 11.116666666666667, "grad_norm": 5.04724571226057, "learning_rate": 5e-05, "loss": 0.0777, "num_input_tokens_seen": 243518432, "step": 2668 }, { "epoch": 11.116666666666667, "loss": 0.05978452041745186, "loss_ce": 5.869951564818621e-07, "loss_iou": 0.37109375, "loss_num": 0.011962890625, "loss_xval": 0.059814453125, "num_input_tokens_seen": 243518432, "step": 2668 }, { "epoch": 11.120833333333334, "grad_norm": 2.9978691447633525, "learning_rate": 5e-05, "loss": 0.0521, "num_input_tokens_seen": 243609316, "step": 2669 }, { "epoch": 11.120833333333334, "loss": 0.048206619918346405, "loss_ce": 6.514426786452532e-05, "loss_iou": 0.2373046875, "loss_num": 0.0096435546875, "loss_xval": 0.048095703125, "num_input_tokens_seen": 243609316, "step": 2669 }, { "epoch": 11.125, "grad_norm": 3.8070064442548017, "learning_rate": 5e-05, "loss": 0.043, "num_input_tokens_seen": 243700540, "step": 2670 }, { "epoch": 11.125, "loss": 0.04456840828061104, "loss_ce": 2.8001604732708074e-05, "loss_iou": 0.162109375, "loss_num": 0.0089111328125, "loss_xval": 0.04443359375, "num_input_tokens_seen": 243700540, "step": 2670 }, { "epoch": 11.129166666666666, "grad_norm": 2.5533515554126276, "learning_rate": 5e-05, "loss": 0.0806, "num_input_tokens_seen": 243792088, "step": 2671 }, { "epoch": 11.129166666666666, "loss": 0.07489380240440369, "loss_ce": 3.6675114643003326e-06, "loss_iou": 0.228515625, "loss_num": 0.01495361328125, "loss_xval": 0.07470703125, "num_input_tokens_seen": 243792088, "step": 2671 }, { "epoch": 11.133333333333333, "grad_norm": 8.76308530196958, "learning_rate": 5e-05, "loss": 0.0544, "num_input_tokens_seen": 243883440, "step": 2672 }, { "epoch": 11.133333333333333, "loss": 0.029476849362254143, "loss_ce": 1.2126994079153519e-05, "loss_iou": 0.2578125, "loss_num": 0.005889892578125, "loss_xval": 0.0294189453125, "num_input_tokens_seen": 243883440, "step": 2672 }, { "epoch": 11.1375, "grad_norm": 2.7411960830642803, "learning_rate": 5e-05, "loss": 0.0898, "num_input_tokens_seen": 243974148, "step": 2673 }, { "epoch": 11.1375, "loss": 0.10514024645090103, "loss_ce": 0.0005259868921712041, "loss_iou": 0.2421875, "loss_num": 0.02099609375, "loss_xval": 0.1044921875, "num_input_tokens_seen": 243974148, "step": 2673 }, { "epoch": 11.141666666666667, "grad_norm": 1.6147312553770174, "learning_rate": 5e-05, "loss": 0.0478, "num_input_tokens_seen": 244065188, "step": 2674 }, { "epoch": 11.141666666666667, "loss": 0.06602882593870163, "loss_ce": 2.6929230443784036e-05, "loss_iou": 0.375, "loss_num": 0.01318359375, "loss_xval": 0.06591796875, "num_input_tokens_seen": 244065188, "step": 2674 }, { "epoch": 11.145833333333334, "grad_norm": 2.9834652924516982, "learning_rate": 5e-05, "loss": 0.0558, "num_input_tokens_seen": 244156980, "step": 2675 }, { "epoch": 11.145833333333334, "loss": 0.061976782977581024, "loss_ce": 3.3730986615410075e-05, "loss_iou": 0.2490234375, "loss_num": 0.01239013671875, "loss_xval": 0.06201171875, "num_input_tokens_seen": 244156980, "step": 2675 }, { "epoch": 11.15, "grad_norm": 14.041890501096185, "learning_rate": 5e-05, "loss": 0.0596, "num_input_tokens_seen": 244247708, "step": 2676 }, { "epoch": 11.15, "loss": 0.03552209213376045, "loss_ce": 4.5408145524561405e-05, "loss_iou": 0.2099609375, "loss_num": 0.007110595703125, "loss_xval": 0.035400390625, "num_input_tokens_seen": 244247708, "step": 2676 }, { "epoch": 11.154166666666667, "grad_norm": 2.064877630878675, "learning_rate": 5e-05, "loss": 0.0798, "num_input_tokens_seen": 244339292, "step": 2677 }, { "epoch": 11.154166666666667, "loss": 0.08990888297557831, "loss_ce": 0.0033762939274311066, "loss_iou": 0.37890625, "loss_num": 0.017333984375, "loss_xval": 0.08642578125, "num_input_tokens_seen": 244339292, "step": 2677 }, { "epoch": 11.158333333333333, "grad_norm": 2.708907347781802, "learning_rate": 5e-05, "loss": 0.0495, "num_input_tokens_seen": 244430316, "step": 2678 }, { "epoch": 11.158333333333333, "loss": 0.04705560952425003, "loss_ce": 1.276460534427315e-05, "loss_iou": 0.291015625, "loss_num": 0.0093994140625, "loss_xval": 0.047119140625, "num_input_tokens_seen": 244430316, "step": 2678 }, { "epoch": 11.1625, "grad_norm": 6.248912733827709, "learning_rate": 5e-05, "loss": 0.112, "num_input_tokens_seen": 244521460, "step": 2679 }, { "epoch": 11.1625, "loss": 0.06119865924119949, "loss_ce": 5.6695200328249484e-05, "loss_iou": 0.216796875, "loss_num": 0.01220703125, "loss_xval": 0.06103515625, "num_input_tokens_seen": 244521460, "step": 2679 }, { "epoch": 11.166666666666666, "grad_norm": 4.014238936093042, "learning_rate": 5e-05, "loss": 0.0923, "num_input_tokens_seen": 244612076, "step": 2680 }, { "epoch": 11.166666666666666, "loss": 0.08084017038345337, "loss_ce": 1.4365771676239092e-05, "loss_iou": 0.30859375, "loss_num": 0.0162353515625, "loss_xval": 0.0810546875, "num_input_tokens_seen": 244612076, "step": 2680 }, { "epoch": 11.170833333333333, "grad_norm": 2.365956557946269, "learning_rate": 5e-05, "loss": 0.0912, "num_input_tokens_seen": 244703032, "step": 2681 }, { "epoch": 11.170833333333333, "loss": 0.11085952818393707, "loss_ce": 4.428637112141587e-06, "loss_iou": 0.251953125, "loss_num": 0.022216796875, "loss_xval": 0.11083984375, "num_input_tokens_seen": 244703032, "step": 2681 }, { "epoch": 11.175, "grad_norm": 3.646809449283573, "learning_rate": 5e-05, "loss": 0.0722, "num_input_tokens_seen": 244794256, "step": 2682 }, { "epoch": 11.175, "loss": 0.11017481982707977, "loss_ce": 6.361942268995335e-06, "loss_iou": 0.0595703125, "loss_num": 0.0220947265625, "loss_xval": 0.1103515625, "num_input_tokens_seen": 244794256, "step": 2682 }, { "epoch": 11.179166666666667, "grad_norm": 1.5967211911593462, "learning_rate": 5e-05, "loss": 0.084, "num_input_tokens_seen": 244884596, "step": 2683 }, { "epoch": 11.179166666666667, "loss": 0.08981596678495407, "loss_ce": 1.7995476810028777e-05, "loss_iou": 0.2158203125, "loss_num": 0.0179443359375, "loss_xval": 0.08984375, "num_input_tokens_seen": 244884596, "step": 2683 }, { "epoch": 11.183333333333334, "grad_norm": 2.1982779748437338, "learning_rate": 5e-05, "loss": 0.0531, "num_input_tokens_seen": 244976148, "step": 2684 }, { "epoch": 11.183333333333334, "loss": 0.04370079189538956, "loss_ce": 0.0003200560749974102, "loss_iou": 0.21484375, "loss_num": 0.0086669921875, "loss_xval": 0.04345703125, "num_input_tokens_seen": 244976148, "step": 2684 }, { "epoch": 11.1875, "grad_norm": 2.4019570743051095, "learning_rate": 5e-05, "loss": 0.1104, "num_input_tokens_seen": 245067880, "step": 2685 }, { "epoch": 11.1875, "loss": 0.07037793844938278, "loss_ce": 1.9662955310195684e-05, "loss_iou": 0.2265625, "loss_num": 0.01409912109375, "loss_xval": 0.0703125, "num_input_tokens_seen": 245067880, "step": 2685 }, { "epoch": 11.191666666666666, "grad_norm": 3.500552920517526, "learning_rate": 5e-05, "loss": 0.0908, "num_input_tokens_seen": 245159316, "step": 2686 }, { "epoch": 11.191666666666666, "loss": 0.1178303211927414, "loss_ce": 1.950369096448412e-06, "loss_iou": 0.3046875, "loss_num": 0.0235595703125, "loss_xval": 0.11767578125, "num_input_tokens_seen": 245159316, "step": 2686 }, { "epoch": 11.195833333333333, "grad_norm": 2.404240266456727, "learning_rate": 5e-05, "loss": 0.044, "num_input_tokens_seen": 245251196, "step": 2687 }, { "epoch": 11.195833333333333, "loss": 0.04409124702215195, "loss_ce": 0.000130675412947312, "loss_iou": 0.3671875, "loss_num": 0.0087890625, "loss_xval": 0.0439453125, "num_input_tokens_seen": 245251196, "step": 2687 }, { "epoch": 11.2, "grad_norm": 4.848620713762468, "learning_rate": 5e-05, "loss": 0.0927, "num_input_tokens_seen": 245341364, "step": 2688 }, { "epoch": 11.2, "loss": 0.10234392434358597, "loss_ce": 0.0017732457490637898, "loss_iou": 0.2734375, "loss_num": 0.0201416015625, "loss_xval": 0.1005859375, "num_input_tokens_seen": 245341364, "step": 2688 }, { "epoch": 11.204166666666667, "grad_norm": 3.2238690970385178, "learning_rate": 5e-05, "loss": 0.1109, "num_input_tokens_seen": 245432708, "step": 2689 }, { "epoch": 11.204166666666667, "loss": 0.11461775749921799, "loss_ce": 8.989145499072038e-06, "loss_iou": 0.275390625, "loss_num": 0.02294921875, "loss_xval": 0.11474609375, "num_input_tokens_seen": 245432708, "step": 2689 }, { "epoch": 11.208333333333334, "grad_norm": 7.191269458248552, "learning_rate": 5e-05, "loss": 0.0903, "num_input_tokens_seen": 245523664, "step": 2690 }, { "epoch": 11.208333333333334, "loss": 0.10043956339359283, "loss_ce": 6.214509085111786e-06, "loss_iou": 0.31640625, "loss_num": 0.02001953125, "loss_xval": 0.1005859375, "num_input_tokens_seen": 245523664, "step": 2690 }, { "epoch": 11.2125, "grad_norm": 3.191748444668608, "learning_rate": 5e-05, "loss": 0.0401, "num_input_tokens_seen": 245614976, "step": 2691 }, { "epoch": 11.2125, "loss": 0.039116621017456055, "loss_ce": 1.597761183802504e-05, "loss_iou": 0.2236328125, "loss_num": 0.0078125, "loss_xval": 0.0390625, "num_input_tokens_seen": 245614976, "step": 2691 }, { "epoch": 11.216666666666667, "grad_norm": 3.0015127581483783, "learning_rate": 5e-05, "loss": 0.0658, "num_input_tokens_seen": 245705944, "step": 2692 }, { "epoch": 11.216666666666667, "loss": 0.05734315514564514, "loss_ce": 6.260463578655617e-07, "loss_iou": 0.2119140625, "loss_num": 0.011474609375, "loss_xval": 0.057373046875, "num_input_tokens_seen": 245705944, "step": 2692 }, { "epoch": 11.220833333333333, "grad_norm": 3.100443408978586, "learning_rate": 5e-05, "loss": 0.0749, "num_input_tokens_seen": 245797184, "step": 2693 }, { "epoch": 11.220833333333333, "loss": 0.06344582140445709, "loss_ce": 0.0009000458521768451, "loss_iou": 0.20703125, "loss_num": 0.01251220703125, "loss_xval": 0.0625, "num_input_tokens_seen": 245797184, "step": 2693 }, { "epoch": 11.225, "grad_norm": 6.253250655357221, "learning_rate": 5e-05, "loss": 0.089, "num_input_tokens_seen": 245888588, "step": 2694 }, { "epoch": 11.225, "loss": 0.07886232435703278, "loss_ce": 0.00037111277924850583, "loss_iou": 0.38671875, "loss_num": 0.0157470703125, "loss_xval": 0.07861328125, "num_input_tokens_seen": 245888588, "step": 2694 }, { "epoch": 11.229166666666666, "grad_norm": 2.1730678405329913, "learning_rate": 5e-05, "loss": 0.1075, "num_input_tokens_seen": 245979896, "step": 2695 }, { "epoch": 11.229166666666666, "loss": 0.0903480052947998, "loss_ce": 7.157451591410791e-07, "loss_iou": 0.240234375, "loss_num": 0.01806640625, "loss_xval": 0.09033203125, "num_input_tokens_seen": 245979896, "step": 2695 }, { "epoch": 11.233333333333333, "grad_norm": 4.14898510788073, "learning_rate": 5e-05, "loss": 0.057, "num_input_tokens_seen": 246071696, "step": 2696 }, { "epoch": 11.233333333333333, "loss": 0.05042353272438049, "loss_ce": 4.282536974642426e-05, "loss_iou": 0.123046875, "loss_num": 0.01007080078125, "loss_xval": 0.05029296875, "num_input_tokens_seen": 246071696, "step": 2696 }, { "epoch": 11.2375, "grad_norm": 3.46826227696615, "learning_rate": 5e-05, "loss": 0.0599, "num_input_tokens_seen": 246163656, "step": 2697 }, { "epoch": 11.2375, "loss": 0.028534265235066414, "loss_ce": 0.00029024691320955753, "loss_iou": 0.158203125, "loss_num": 0.005645751953125, "loss_xval": 0.0281982421875, "num_input_tokens_seen": 246163656, "step": 2697 }, { "epoch": 11.241666666666667, "grad_norm": 1.7889033940175771, "learning_rate": 5e-05, "loss": 0.0671, "num_input_tokens_seen": 246254440, "step": 2698 }, { "epoch": 11.241666666666667, "loss": 0.06817486882209778, "loss_ce": 0.0010819713352248073, "loss_iou": 0.0, "loss_num": 0.013427734375, "loss_xval": 0.06689453125, "num_input_tokens_seen": 246254440, "step": 2698 }, { "epoch": 11.245833333333334, "grad_norm": 1.8621407970482446, "learning_rate": 5e-05, "loss": 0.0849, "num_input_tokens_seen": 246346288, "step": 2699 }, { "epoch": 11.245833333333334, "loss": 0.11108222603797913, "loss_ce": 1.3498207408702001e-05, "loss_iou": 0.2177734375, "loss_num": 0.022216796875, "loss_xval": 0.11083984375, "num_input_tokens_seen": 246346288, "step": 2699 }, { "epoch": 11.25, "grad_norm": 5.07881587238784, "learning_rate": 5e-05, "loss": 0.1127, "num_input_tokens_seen": 246437464, "step": 2700 }, { "epoch": 11.25, "loss": 0.16123591363430023, "loss_ce": 7.258645200636238e-05, "loss_iou": 0.150390625, "loss_num": 0.0322265625, "loss_xval": 0.1611328125, "num_input_tokens_seen": 246437464, "step": 2700 }, { "epoch": 11.254166666666666, "grad_norm": 3.8278222792890824, "learning_rate": 5e-05, "loss": 0.064, "num_input_tokens_seen": 246528996, "step": 2701 }, { "epoch": 11.254166666666666, "loss": 0.06611582636833191, "loss_ce": 1.4749471120012458e-05, "loss_iou": 0.35546875, "loss_num": 0.01318359375, "loss_xval": 0.06591796875, "num_input_tokens_seen": 246528996, "step": 2701 }, { "epoch": 11.258333333333333, "grad_norm": 2.5501182541847394, "learning_rate": 5e-05, "loss": 0.0676, "num_input_tokens_seen": 246620564, "step": 2702 }, { "epoch": 11.258333333333333, "loss": 0.03480696678161621, "loss_ce": 1.6694054920662893e-06, "loss_iou": 0.267578125, "loss_num": 0.0069580078125, "loss_xval": 0.034912109375, "num_input_tokens_seen": 246620564, "step": 2702 }, { "epoch": 11.2625, "grad_norm": 2.466748870865673, "learning_rate": 5e-05, "loss": 0.039, "num_input_tokens_seen": 246711844, "step": 2703 }, { "epoch": 11.2625, "loss": 0.03872073069214821, "loss_ce": 9.183676411339547e-06, "loss_iou": 0.287109375, "loss_num": 0.00775146484375, "loss_xval": 0.038818359375, "num_input_tokens_seen": 246711844, "step": 2703 }, { "epoch": 11.266666666666667, "grad_norm": 3.856505806043897, "learning_rate": 5e-05, "loss": 0.0973, "num_input_tokens_seen": 246803444, "step": 2704 }, { "epoch": 11.266666666666667, "loss": 0.08916931599378586, "loss_ce": 8.850642188917845e-05, "loss_iou": 0.185546875, "loss_num": 0.017822265625, "loss_xval": 0.0888671875, "num_input_tokens_seen": 246803444, "step": 2704 }, { "epoch": 11.270833333333334, "grad_norm": 3.611251296625748, "learning_rate": 5e-05, "loss": 0.0681, "num_input_tokens_seen": 246894804, "step": 2705 }, { "epoch": 11.270833333333334, "loss": 0.0859089121222496, "loss_ce": 0.0005207245703786612, "loss_iou": 0.216796875, "loss_num": 0.01708984375, "loss_xval": 0.08544921875, "num_input_tokens_seen": 246894804, "step": 2705 }, { "epoch": 11.275, "grad_norm": 2.758755755769881, "learning_rate": 5e-05, "loss": 0.0589, "num_input_tokens_seen": 246986168, "step": 2706 }, { "epoch": 11.275, "loss": 0.08567283302545547, "loss_ce": 9.99398162093712e-06, "loss_iou": 0.337890625, "loss_num": 0.01708984375, "loss_xval": 0.08544921875, "num_input_tokens_seen": 246986168, "step": 2706 }, { "epoch": 11.279166666666667, "grad_norm": 28.694230527247903, "learning_rate": 5e-05, "loss": 0.1072, "num_input_tokens_seen": 247077700, "step": 2707 }, { "epoch": 11.279166666666667, "loss": 0.10691899806261063, "loss_ce": 6.615737220272422e-07, "loss_iou": 0.2177734375, "loss_num": 0.0213623046875, "loss_xval": 0.10693359375, "num_input_tokens_seen": 247077700, "step": 2707 }, { "epoch": 11.283333333333333, "grad_norm": 1.5594716242777975, "learning_rate": 5e-05, "loss": 0.0612, "num_input_tokens_seen": 247169228, "step": 2708 }, { "epoch": 11.283333333333333, "loss": 0.056843891739845276, "loss_ce": 4.9039017540053464e-06, "loss_iou": 0.1357421875, "loss_num": 0.0113525390625, "loss_xval": 0.056884765625, "num_input_tokens_seen": 247169228, "step": 2708 }, { "epoch": 11.2875, "grad_norm": 4.545860092599523, "learning_rate": 5e-05, "loss": 0.0647, "num_input_tokens_seen": 247260608, "step": 2709 }, { "epoch": 11.2875, "loss": 0.06523621082305908, "loss_ce": 3.5400349588599056e-05, "loss_iou": 0.326171875, "loss_num": 0.0130615234375, "loss_xval": 0.0654296875, "num_input_tokens_seen": 247260608, "step": 2709 }, { "epoch": 11.291666666666666, "grad_norm": 1.9214195941139585, "learning_rate": 5e-05, "loss": 0.0825, "num_input_tokens_seen": 247351872, "step": 2710 }, { "epoch": 11.291666666666666, "loss": 0.07823637127876282, "loss_ce": 8.086175512289628e-05, "loss_iou": 0.30859375, "loss_num": 0.015625, "loss_xval": 0.078125, "num_input_tokens_seen": 247351872, "step": 2710 }, { "epoch": 11.295833333333333, "grad_norm": 4.218596955138657, "learning_rate": 5e-05, "loss": 0.0874, "num_input_tokens_seen": 247442660, "step": 2711 }, { "epoch": 11.295833333333333, "loss": 0.04063517600297928, "loss_ce": 1.0193945172431995e-06, "loss_iou": 0.310546875, "loss_num": 0.00811767578125, "loss_xval": 0.04052734375, "num_input_tokens_seen": 247442660, "step": 2711 }, { "epoch": 11.3, "grad_norm": 3.676734520825449, "learning_rate": 5e-05, "loss": 0.063, "num_input_tokens_seen": 247533776, "step": 2712 }, { "epoch": 11.3, "loss": 0.06818170100450516, "loss_ce": 9.698516078060493e-05, "loss_iou": 0.291015625, "loss_num": 0.01361083984375, "loss_xval": 0.06787109375, "num_input_tokens_seen": 247533776, "step": 2712 }, { "epoch": 11.304166666666667, "grad_norm": 3.470390920762771, "learning_rate": 5e-05, "loss": 0.0557, "num_input_tokens_seen": 247624792, "step": 2713 }, { "epoch": 11.304166666666667, "loss": 0.0611143596470356, "loss_ce": 2.911293449869845e-06, "loss_iou": 0.396484375, "loss_num": 0.01220703125, "loss_xval": 0.06103515625, "num_input_tokens_seen": 247624792, "step": 2713 }, { "epoch": 11.308333333333334, "grad_norm": 2.9181044624824435, "learning_rate": 5e-05, "loss": 0.0668, "num_input_tokens_seen": 247716444, "step": 2714 }, { "epoch": 11.308333333333334, "loss": 0.06357040256261826, "loss_ce": 0.0007041930221021175, "loss_iou": 0.2890625, "loss_num": 0.0125732421875, "loss_xval": 0.06298828125, "num_input_tokens_seen": 247716444, "step": 2714 }, { "epoch": 11.3125, "grad_norm": 3.0094011954333326, "learning_rate": 5e-05, "loss": 0.0505, "num_input_tokens_seen": 247807880, "step": 2715 }, { "epoch": 11.3125, "loss": 0.047051601111888885, "loss_ce": 2.401344318059273e-05, "loss_iou": 0.232421875, "loss_num": 0.0093994140625, "loss_xval": 0.047119140625, "num_input_tokens_seen": 247807880, "step": 2715 }, { "epoch": 11.316666666666666, "grad_norm": 4.432788501298371, "learning_rate": 5e-05, "loss": 0.105, "num_input_tokens_seen": 247899236, "step": 2716 }, { "epoch": 11.316666666666666, "loss": 0.0781235545873642, "loss_ce": 5.9586993302218616e-05, "loss_iou": 0.427734375, "loss_num": 0.015625, "loss_xval": 0.078125, "num_input_tokens_seen": 247899236, "step": 2716 }, { "epoch": 11.320833333333333, "grad_norm": 2.784805843158077, "learning_rate": 5e-05, "loss": 0.1038, "num_input_tokens_seen": 247990644, "step": 2717 }, { "epoch": 11.320833333333333, "loss": 0.051045119762420654, "loss_ce": 4.472130513022421e-06, "loss_iou": 0.21484375, "loss_num": 0.01019287109375, "loss_xval": 0.051025390625, "num_input_tokens_seen": 247990644, "step": 2717 }, { "epoch": 11.325, "grad_norm": 6.738147447697919, "learning_rate": 5e-05, "loss": 0.059, "num_input_tokens_seen": 248081828, "step": 2718 }, { "epoch": 11.325, "loss": 0.05768699571490288, "loss_ce": 0.0005123146111145616, "loss_iou": 0.26953125, "loss_num": 0.01141357421875, "loss_xval": 0.05712890625, "num_input_tokens_seen": 248081828, "step": 2718 }, { "epoch": 11.329166666666667, "grad_norm": 4.4089728566049615, "learning_rate": 5e-05, "loss": 0.1044, "num_input_tokens_seen": 248173364, "step": 2719 }, { "epoch": 11.329166666666667, "loss": 0.1089344322681427, "loss_ce": 7.822787301847711e-05, "loss_iou": 0.302734375, "loss_num": 0.0218505859375, "loss_xval": 0.10888671875, "num_input_tokens_seen": 248173364, "step": 2719 }, { "epoch": 11.333333333333334, "grad_norm": 1.9846478948713728, "learning_rate": 5e-05, "loss": 0.0528, "num_input_tokens_seen": 248265452, "step": 2720 }, { "epoch": 11.333333333333334, "loss": 0.04808041825890541, "loss_ce": 0.003967257682234049, "loss_iou": 0.33984375, "loss_num": 0.00885009765625, "loss_xval": 0.044189453125, "num_input_tokens_seen": 248265452, "step": 2720 }, { "epoch": 11.3375, "grad_norm": 1.9982745678749323, "learning_rate": 5e-05, "loss": 0.0356, "num_input_tokens_seen": 248357340, "step": 2721 }, { "epoch": 11.3375, "loss": 0.04231679067015648, "loss_ce": 0.00013196113286539912, "loss_iou": 0.125, "loss_num": 0.0084228515625, "loss_xval": 0.042236328125, "num_input_tokens_seen": 248357340, "step": 2721 }, { "epoch": 11.341666666666667, "grad_norm": 2.984575890838546, "learning_rate": 5e-05, "loss": 0.0404, "num_input_tokens_seen": 248448232, "step": 2722 }, { "epoch": 11.341666666666667, "loss": 0.04854791611433029, "loss_ce": 0.0001775545097189024, "loss_iou": 0.1796875, "loss_num": 0.00970458984375, "loss_xval": 0.04833984375, "num_input_tokens_seen": 248448232, "step": 2722 }, { "epoch": 11.345833333333333, "grad_norm": 2.700649135100673, "learning_rate": 5e-05, "loss": 0.0511, "num_input_tokens_seen": 248539736, "step": 2723 }, { "epoch": 11.345833333333333, "loss": 0.04843775928020477, "loss_ce": 3.688073411467485e-05, "loss_iou": 0.23046875, "loss_num": 0.0096435546875, "loss_xval": 0.04833984375, "num_input_tokens_seen": 248539736, "step": 2723 }, { "epoch": 11.35, "grad_norm": 2.142703007126909, "learning_rate": 5e-05, "loss": 0.0593, "num_input_tokens_seen": 248630668, "step": 2724 }, { "epoch": 11.35, "loss": 0.05542871356010437, "loss_ce": 6.982320337556303e-05, "loss_iou": 0.34375, "loss_num": 0.01104736328125, "loss_xval": 0.055419921875, "num_input_tokens_seen": 248630668, "step": 2724 }, { "epoch": 11.354166666666666, "grad_norm": 3.4553891155628005, "learning_rate": 5e-05, "loss": 0.0649, "num_input_tokens_seen": 248722216, "step": 2725 }, { "epoch": 11.354166666666666, "loss": 0.10151135921478271, "loss_ce": 9.894505637930706e-06, "loss_iou": 0.357421875, "loss_num": 0.020263671875, "loss_xval": 0.1015625, "num_input_tokens_seen": 248722216, "step": 2725 }, { "epoch": 11.358333333333333, "grad_norm": 4.9126542442498105, "learning_rate": 5e-05, "loss": 0.0569, "num_input_tokens_seen": 248813456, "step": 2726 }, { "epoch": 11.358333333333333, "loss": 0.033823929727077484, "loss_ce": 2.8231502255948726e-06, "loss_iou": 0.330078125, "loss_num": 0.00677490234375, "loss_xval": 0.033935546875, "num_input_tokens_seen": 248813456, "step": 2726 }, { "epoch": 11.3625, "grad_norm": 3.2736521351266825, "learning_rate": 5e-05, "loss": 0.0798, "num_input_tokens_seen": 248905088, "step": 2727 }, { "epoch": 11.3625, "loss": 0.06761668622493744, "loss_ce": 0.0003406820760574192, "loss_iou": 0.341796875, "loss_num": 0.01348876953125, "loss_xval": 0.0673828125, "num_input_tokens_seen": 248905088, "step": 2727 }, { "epoch": 11.366666666666667, "grad_norm": 1.8359191390557765, "learning_rate": 5e-05, "loss": 0.08, "num_input_tokens_seen": 248997364, "step": 2728 }, { "epoch": 11.366666666666667, "loss": 0.08889603614807129, "loss_ce": 0.00046372690121643245, "loss_iou": 0.263671875, "loss_num": 0.0177001953125, "loss_xval": 0.08837890625, "num_input_tokens_seen": 248997364, "step": 2728 }, { "epoch": 11.370833333333334, "grad_norm": 4.613524160928555, "learning_rate": 5e-05, "loss": 0.096, "num_input_tokens_seen": 249087984, "step": 2729 }, { "epoch": 11.370833333333334, "loss": 0.11847412586212158, "loss_ce": 4.888382591161644e-06, "loss_iou": 0.1708984375, "loss_num": 0.023681640625, "loss_xval": 0.11865234375, "num_input_tokens_seen": 249087984, "step": 2729 }, { "epoch": 11.375, "grad_norm": 6.472476130650295, "learning_rate": 5e-05, "loss": 0.0841, "num_input_tokens_seen": 249179364, "step": 2730 }, { "epoch": 11.375, "loss": 0.07751025259494781, "loss_ce": 0.0004075966135133058, "loss_iou": 0.240234375, "loss_num": 0.01544189453125, "loss_xval": 0.0771484375, "num_input_tokens_seen": 249179364, "step": 2730 }, { "epoch": 11.379166666666666, "grad_norm": 2.0262396552769864, "learning_rate": 5e-05, "loss": 0.0923, "num_input_tokens_seen": 249270408, "step": 2731 }, { "epoch": 11.379166666666666, "loss": 0.10283870995044708, "loss_ce": 8.602441812399775e-05, "loss_iou": 0.287109375, "loss_num": 0.0205078125, "loss_xval": 0.1025390625, "num_input_tokens_seen": 249270408, "step": 2731 }, { "epoch": 11.383333333333333, "grad_norm": 1.8024602462731834, "learning_rate": 5e-05, "loss": 0.0621, "num_input_tokens_seen": 249361720, "step": 2732 }, { "epoch": 11.383333333333333, "loss": 0.09584569931030273, "loss_ce": 6.6278429585509e-05, "loss_iou": 0.185546875, "loss_num": 0.0191650390625, "loss_xval": 0.095703125, "num_input_tokens_seen": 249361720, "step": 2732 }, { "epoch": 11.3875, "grad_norm": 2.940021352095748, "learning_rate": 5e-05, "loss": 0.0733, "num_input_tokens_seen": 249452956, "step": 2733 }, { "epoch": 11.3875, "loss": 0.03529635816812515, "loss_ce": 0.00144473509863019, "loss_iou": 0.2138671875, "loss_num": 0.00677490234375, "loss_xval": 0.033935546875, "num_input_tokens_seen": 249452956, "step": 2733 }, { "epoch": 11.391666666666667, "grad_norm": 2.600382492122339, "learning_rate": 5e-05, "loss": 0.0802, "num_input_tokens_seen": 249544408, "step": 2734 }, { "epoch": 11.391666666666667, "loss": 0.08233761042356491, "loss_ce": 0.00016902832430787385, "loss_iou": 0.263671875, "loss_num": 0.0164794921875, "loss_xval": 0.08203125, "num_input_tokens_seen": 249544408, "step": 2734 }, { "epoch": 11.395833333333334, "grad_norm": 3.4005438341922667, "learning_rate": 5e-05, "loss": 0.0667, "num_input_tokens_seen": 249636064, "step": 2735 }, { "epoch": 11.395833333333334, "loss": 0.08299972116947174, "loss_ce": 7.166857812990202e-06, "loss_iou": 0.291015625, "loss_num": 0.0166015625, "loss_xval": 0.0830078125, "num_input_tokens_seen": 249636064, "step": 2735 }, { "epoch": 11.4, "grad_norm": 13.1219533614889, "learning_rate": 5e-05, "loss": 0.0647, "num_input_tokens_seen": 249727908, "step": 2736 }, { "epoch": 11.4, "loss": 0.047019585967063904, "loss_ce": 2.2515387172461487e-05, "loss_iou": 0.33984375, "loss_num": 0.0093994140625, "loss_xval": 0.046875, "num_input_tokens_seen": 249727908, "step": 2736 }, { "epoch": 11.404166666666667, "grad_norm": 2.7704786263125496, "learning_rate": 5e-05, "loss": 0.0516, "num_input_tokens_seen": 249819412, "step": 2737 }, { "epoch": 11.404166666666667, "loss": 0.04526345431804657, "loss_ce": 5.886092367290985e-06, "loss_iou": 0.328125, "loss_num": 0.009033203125, "loss_xval": 0.045166015625, "num_input_tokens_seen": 249819412, "step": 2737 }, { "epoch": 11.408333333333333, "grad_norm": 4.015097845565541, "learning_rate": 5e-05, "loss": 0.0713, "num_input_tokens_seen": 249911020, "step": 2738 }, { "epoch": 11.408333333333333, "loss": 0.04373963177204132, "loss_ce": 0.001976326573640108, "loss_iou": 0.2333984375, "loss_num": 0.00836181640625, "loss_xval": 0.041748046875, "num_input_tokens_seen": 249911020, "step": 2738 }, { "epoch": 11.4125, "grad_norm": 5.749573921039443, "learning_rate": 5e-05, "loss": 0.1062, "num_input_tokens_seen": 250002288, "step": 2739 }, { "epoch": 11.4125, "loss": 0.10650154948234558, "loss_ce": 0.000971770437899977, "loss_iou": 0.1953125, "loss_num": 0.0211181640625, "loss_xval": 0.10546875, "num_input_tokens_seen": 250002288, "step": 2739 }, { "epoch": 11.416666666666666, "grad_norm": 1.7547716698846578, "learning_rate": 5e-05, "loss": 0.0484, "num_input_tokens_seen": 250093536, "step": 2740 }, { "epoch": 11.416666666666666, "loss": 0.04704135283827782, "loss_ce": 0.000944552302826196, "loss_iou": 0.07275390625, "loss_num": 0.00921630859375, "loss_xval": 0.046142578125, "num_input_tokens_seen": 250093536, "step": 2740 }, { "epoch": 11.420833333333333, "grad_norm": 3.7529070742068873, "learning_rate": 5e-05, "loss": 0.1585, "num_input_tokens_seen": 250184244, "step": 2741 }, { "epoch": 11.420833333333333, "loss": 0.1299409568309784, "loss_ce": 4.7433445615752134e-06, "loss_iou": 0.2197265625, "loss_num": 0.0260009765625, "loss_xval": 0.1298828125, "num_input_tokens_seen": 250184244, "step": 2741 }, { "epoch": 11.425, "grad_norm": 1.8625930195822442, "learning_rate": 5e-05, "loss": 0.0728, "num_input_tokens_seen": 250274520, "step": 2742 }, { "epoch": 11.425, "loss": 0.05740495026111603, "loss_ce": 0.00010056864994112402, "loss_iou": 0.08544921875, "loss_num": 0.011474609375, "loss_xval": 0.057373046875, "num_input_tokens_seen": 250274520, "step": 2742 }, { "epoch": 11.429166666666667, "grad_norm": 2.4351725931958885, "learning_rate": 5e-05, "loss": 0.0673, "num_input_tokens_seen": 250365952, "step": 2743 }, { "epoch": 11.429166666666667, "loss": 0.0731714516878128, "loss_ce": 0.0004175424110144377, "loss_iou": 0.36328125, "loss_num": 0.01458740234375, "loss_xval": 0.07275390625, "num_input_tokens_seen": 250365952, "step": 2743 }, { "epoch": 11.433333333333334, "grad_norm": 2.413316258176711, "learning_rate": 5e-05, "loss": 0.0592, "num_input_tokens_seen": 250457084, "step": 2744 }, { "epoch": 11.433333333333334, "loss": 0.07355280220508575, "loss_ce": 3.5956800275016576e-05, "loss_iou": 0.291015625, "loss_num": 0.01470947265625, "loss_xval": 0.07373046875, "num_input_tokens_seen": 250457084, "step": 2744 }, { "epoch": 11.4375, "grad_norm": 5.637883827271504, "learning_rate": 5e-05, "loss": 0.0441, "num_input_tokens_seen": 250548420, "step": 2745 }, { "epoch": 11.4375, "loss": 0.026842396706342697, "loss_ce": 1.7445854609832168e-05, "loss_iou": 0.1962890625, "loss_num": 0.00537109375, "loss_xval": 0.02685546875, "num_input_tokens_seen": 250548420, "step": 2745 }, { "epoch": 11.441666666666666, "grad_norm": 2.3023188917249247, "learning_rate": 5e-05, "loss": 0.0776, "num_input_tokens_seen": 250639996, "step": 2746 }, { "epoch": 11.441666666666666, "loss": 0.07687580585479736, "loss_ce": 0.00012409850023686886, "loss_iou": 0.27734375, "loss_num": 0.015380859375, "loss_xval": 0.07666015625, "num_input_tokens_seen": 250639996, "step": 2746 }, { "epoch": 11.445833333333333, "grad_norm": 4.086735347534926, "learning_rate": 5e-05, "loss": 0.082, "num_input_tokens_seen": 250731156, "step": 2747 }, { "epoch": 11.445833333333333, "loss": 0.03926856815814972, "loss_ce": 3.822279904852621e-05, "loss_iou": 0.21484375, "loss_num": 0.0078125, "loss_xval": 0.039306640625, "num_input_tokens_seen": 250731156, "step": 2747 }, { "epoch": 11.45, "grad_norm": 3.7251995120016272, "learning_rate": 5e-05, "loss": 0.0849, "num_input_tokens_seen": 250822536, "step": 2748 }, { "epoch": 11.45, "loss": 0.08701753616333008, "loss_ce": 0.0001950228470377624, "loss_iou": 0.294921875, "loss_num": 0.017333984375, "loss_xval": 0.0869140625, "num_input_tokens_seen": 250822536, "step": 2748 }, { "epoch": 11.454166666666667, "grad_norm": 2.4634828035844323, "learning_rate": 5e-05, "loss": 0.044, "num_input_tokens_seen": 250913728, "step": 2749 }, { "epoch": 11.454166666666667, "loss": 0.03424752503633499, "loss_ce": 5.2576619054889306e-05, "loss_iou": 0.298828125, "loss_num": 0.0068359375, "loss_xval": 0.0341796875, "num_input_tokens_seen": 250913728, "step": 2749 }, { "epoch": 11.458333333333334, "grad_norm": 3.431238291107999, "learning_rate": 5e-05, "loss": 0.0764, "num_input_tokens_seen": 251004924, "step": 2750 }, { "epoch": 11.458333333333334, "eval_seeclick_CIoU": 0.29619458317756653, "eval_seeclick_GIoU": 0.2929905205965042, "eval_seeclick_IoU": 0.38835375010967255, "eval_seeclick_MAE_all": 0.09226639196276665, "eval_seeclick_MAE_h": 0.08143088221549988, "eval_seeclick_MAE_w": 0.19786543399095535, "eval_seeclick_MAE_x_boxes": 0.19208704680204391, "eval_seeclick_MAE_y_boxes": 0.08390780910849571, "eval_seeclick_NUM_probability": 0.9999978244304657, "eval_seeclick_inside_bbox": 0.6150568127632141, "eval_seeclick_loss": 0.5580732822418213, "eval_seeclick_loss_ce": 0.13431841880083084, "eval_seeclick_loss_iou": 0.42510986328125, "eval_seeclick_loss_num": 0.0828704833984375, "eval_seeclick_loss_xval": 0.41436767578125, "eval_seeclick_runtime": 79.9593, "eval_seeclick_samples_per_second": 0.538, "eval_seeclick_steps_per_second": 0.025, "num_input_tokens_seen": 251004924, "step": 2750 }, { "epoch": 11.458333333333334, "eval_icons_CIoU": 0.29433034360408783, "eval_icons_GIoU": 0.3072480261325836, "eval_icons_IoU": 0.38313308358192444, "eval_icons_MAE_all": 0.07298702001571655, "eval_icons_MAE_h": 0.17794279009103775, "eval_icons_MAE_w": 0.08765166997909546, "eval_icons_MAE_x_boxes": 0.08846120536327362, "eval_icons_MAE_y_boxes": 0.1775236800312996, "eval_icons_NUM_probability": 0.9999988079071045, "eval_icons_inside_bbox": 0.5069444477558136, "eval_icons_loss": 0.35328853130340576, "eval_icons_loss_ce": 0.0008648704388178885, "eval_icons_loss_iou": 0.275634765625, "eval_icons_loss_num": 0.073883056640625, "eval_icons_loss_xval": 0.36932373046875, "eval_icons_runtime": 98.2904, "eval_icons_samples_per_second": 0.509, "eval_icons_steps_per_second": 0.02, "num_input_tokens_seen": 251004924, "step": 2750 }, { "epoch": 11.458333333333334, "eval_screenspot_CIoU": 0.3644411067167918, "eval_screenspot_GIoU": 0.35429301857948303, "eval_screenspot_IoU": 0.4447065393129985, "eval_screenspot_MAE_all": 0.10152472058931987, "eval_screenspot_MAE_h": 0.09767068674166997, "eval_screenspot_MAE_w": 0.203730175892512, "eval_screenspot_MAE_x_boxes": 0.2037352075179418, "eval_screenspot_MAE_y_boxes": 0.08715710788965225, "eval_screenspot_NUM_probability": 0.9992983738581339, "eval_screenspot_inside_bbox": 0.7012499968210856, "eval_screenspot_loss": 0.5108309388160706, "eval_screenspot_loss_ce": 0.0012114184373785974, "eval_screenspot_loss_iou": 0.3665364583333333, "eval_screenspot_loss_num": 0.102203369140625, "eval_screenspot_loss_xval": 0.5108235677083334, "eval_screenspot_runtime": 153.1364, "eval_screenspot_samples_per_second": 0.581, "eval_screenspot_steps_per_second": 0.02, "num_input_tokens_seen": 251004924, "step": 2750 }, { "epoch": 11.458333333333334, "eval_compot_CIoU": 0.5426531434059143, "eval_compot_GIoU": 0.5361522138118744, "eval_compot_IoU": 0.5986569225788116, "eval_compot_MAE_all": 0.048880767077207565, "eval_compot_MAE_h": 0.06029001250863075, "eval_compot_MAE_w": 0.11704185605049133, "eval_compot_MAE_x_boxes": 0.11615481600165367, "eval_compot_MAE_y_boxes": 0.05879940651357174, "eval_compot_NUM_probability": 0.9999973475933075, "eval_compot_inside_bbox": 0.8107638955116272, "eval_compot_loss": 0.2820165157318115, "eval_compot_loss_ce": 0.04021947830915451, "eval_compot_loss_iou": 0.2904052734375, "eval_compot_loss_num": 0.042430877685546875, "eval_compot_loss_xval": 0.2122650146484375, "eval_compot_runtime": 97.599, "eval_compot_samples_per_second": 0.512, "eval_compot_steps_per_second": 0.02, "num_input_tokens_seen": 251004924, "step": 2750 }, { "epoch": 11.458333333333334, "loss": 0.23834848403930664, "loss_ce": 0.03562020882964134, "loss_iou": 0.2734375, "loss_num": 0.04052734375, "loss_xval": 0.203125, "num_input_tokens_seen": 251004924, "step": 2750 }, { "epoch": 11.4625, "grad_norm": 9.404948579296532, "learning_rate": 5e-05, "loss": 0.064, "num_input_tokens_seen": 251096120, "step": 2751 }, { "epoch": 11.4625, "loss": 0.06319372355937958, "loss_ce": 0.00017492602637503296, "loss_iou": 0.37109375, "loss_num": 0.0125732421875, "loss_xval": 0.06298828125, "num_input_tokens_seen": 251096120, "step": 2751 }, { "epoch": 11.466666666666667, "grad_norm": 3.0181016834500314, "learning_rate": 5e-05, "loss": 0.1042, "num_input_tokens_seen": 251187304, "step": 2752 }, { "epoch": 11.466666666666667, "loss": 0.07687968760728836, "loss_ce": 0.00342387892305851, "loss_iou": 0.2578125, "loss_num": 0.01470947265625, "loss_xval": 0.0732421875, "num_input_tokens_seen": 251187304, "step": 2752 }, { "epoch": 11.470833333333333, "grad_norm": 6.415262920707865, "learning_rate": 5e-05, "loss": 0.1131, "num_input_tokens_seen": 251278548, "step": 2753 }, { "epoch": 11.470833333333333, "loss": 0.09908459335565567, "loss_ce": 0.00010846274381037802, "loss_iou": 0.267578125, "loss_num": 0.019775390625, "loss_xval": 0.09912109375, "num_input_tokens_seen": 251278548, "step": 2753 }, { "epoch": 11.475, "grad_norm": 5.777631940298475, "learning_rate": 5e-05, "loss": 0.0578, "num_input_tokens_seen": 251369688, "step": 2754 }, { "epoch": 11.475, "loss": 0.05238595977425575, "loss_ce": 0.00036874954821541905, "loss_iou": 0.267578125, "loss_num": 0.0103759765625, "loss_xval": 0.052001953125, "num_input_tokens_seen": 251369688, "step": 2754 }, { "epoch": 11.479166666666666, "grad_norm": 6.699880381934479, "learning_rate": 5e-05, "loss": 0.0666, "num_input_tokens_seen": 251461016, "step": 2755 }, { "epoch": 11.479166666666666, "loss": 0.05163790285587311, "loss_ce": 2.1608900624414673e-06, "loss_iou": 0.345703125, "loss_num": 0.01031494140625, "loss_xval": 0.0517578125, "num_input_tokens_seen": 251461016, "step": 2755 }, { "epoch": 11.483333333333333, "grad_norm": 5.016164344500406, "learning_rate": 5e-05, "loss": 0.054, "num_input_tokens_seen": 251552476, "step": 2756 }, { "epoch": 11.483333333333333, "loss": 0.048208437860012054, "loss_ce": 2.1183048374950886e-05, "loss_iou": 0.4140625, "loss_num": 0.0096435546875, "loss_xval": 0.048095703125, "num_input_tokens_seen": 251552476, "step": 2756 }, { "epoch": 11.4875, "grad_norm": 2.0772984630901044, "learning_rate": 5e-05, "loss": 0.0819, "num_input_tokens_seen": 251644024, "step": 2757 }, { "epoch": 11.4875, "loss": 0.07686302065849304, "loss_ce": 6.55341282254085e-05, "loss_iou": 0.345703125, "loss_num": 0.015380859375, "loss_xval": 0.07666015625, "num_input_tokens_seen": 251644024, "step": 2757 }, { "epoch": 11.491666666666667, "grad_norm": 2.9398763812012025, "learning_rate": 5e-05, "loss": 0.0635, "num_input_tokens_seen": 251734604, "step": 2758 }, { "epoch": 11.491666666666667, "loss": 0.06483888626098633, "loss_ce": 1.955397783603985e-05, "loss_iou": 0.2138671875, "loss_num": 0.012939453125, "loss_xval": 0.06494140625, "num_input_tokens_seen": 251734604, "step": 2758 }, { "epoch": 11.495833333333334, "grad_norm": 4.8316124027285285, "learning_rate": 5e-05, "loss": 0.0546, "num_input_tokens_seen": 251826580, "step": 2759 }, { "epoch": 11.495833333333334, "loss": 0.05621996894478798, "loss_ce": 0.00018206711683887988, "loss_iou": 0.1435546875, "loss_num": 0.01123046875, "loss_xval": 0.05615234375, "num_input_tokens_seen": 251826580, "step": 2759 }, { "epoch": 11.5, "grad_norm": 2.3090663766733255, "learning_rate": 5e-05, "loss": 0.0615, "num_input_tokens_seen": 251918256, "step": 2760 }, { "epoch": 11.5, "loss": 0.07426677644252777, "loss_ce": 0.0002616445126477629, "loss_iou": 0.28515625, "loss_num": 0.0147705078125, "loss_xval": 0.07421875, "num_input_tokens_seen": 251918256, "step": 2760 }, { "epoch": 11.504166666666666, "grad_norm": 3.6694203312693396, "learning_rate": 5e-05, "loss": 0.0732, "num_input_tokens_seen": 252010080, "step": 2761 }, { "epoch": 11.504166666666666, "loss": 0.07445695996284485, "loss_ce": 0.0017946104053407907, "loss_iou": 0.32421875, "loss_num": 0.0145263671875, "loss_xval": 0.07275390625, "num_input_tokens_seen": 252010080, "step": 2761 }, { "epoch": 11.508333333333333, "grad_norm": 3.1695559146201897, "learning_rate": 5e-05, "loss": 0.048, "num_input_tokens_seen": 252100984, "step": 2762 }, { "epoch": 11.508333333333333, "loss": 0.05864902213215828, "loss_ce": 9.49107197811827e-06, "loss_iou": 0.30859375, "loss_num": 0.01171875, "loss_xval": 0.05859375, "num_input_tokens_seen": 252100984, "step": 2762 }, { "epoch": 11.5125, "grad_norm": 2.6170723158105194, "learning_rate": 5e-05, "loss": 0.0595, "num_input_tokens_seen": 252191872, "step": 2763 }, { "epoch": 11.5125, "loss": 0.0470140241086483, "loss_ce": 1.6982100987661397e-06, "loss_iou": 0.1982421875, "loss_num": 0.0093994140625, "loss_xval": 0.047119140625, "num_input_tokens_seen": 252191872, "step": 2763 }, { "epoch": 11.516666666666667, "grad_norm": 2.2240632395882294, "learning_rate": 5e-05, "loss": 0.0668, "num_input_tokens_seen": 252282840, "step": 2764 }, { "epoch": 11.516666666666667, "loss": 0.07522941380739212, "loss_ce": 3.586279717637808e-06, "loss_iou": 0.27734375, "loss_num": 0.0150146484375, "loss_xval": 0.0751953125, "num_input_tokens_seen": 252282840, "step": 2764 }, { "epoch": 11.520833333333334, "grad_norm": 1.936684607025272, "learning_rate": 5e-05, "loss": 0.0601, "num_input_tokens_seen": 252374224, "step": 2765 }, { "epoch": 11.520833333333334, "loss": 0.06849393248558044, "loss_ce": 0.0002108556218445301, "loss_iou": 0.322265625, "loss_num": 0.013671875, "loss_xval": 0.068359375, "num_input_tokens_seen": 252374224, "step": 2765 }, { "epoch": 11.525, "grad_norm": 2.8227182617362883, "learning_rate": 5e-05, "loss": 0.0958, "num_input_tokens_seen": 252465888, "step": 2766 }, { "epoch": 11.525, "loss": 0.1218826025724411, "loss_ce": 0.000239536224398762, "loss_iou": 0.1943359375, "loss_num": 0.0242919921875, "loss_xval": 0.12158203125, "num_input_tokens_seen": 252465888, "step": 2766 }, { "epoch": 11.529166666666667, "grad_norm": 2.131730407889572, "learning_rate": 5e-05, "loss": 0.0564, "num_input_tokens_seen": 252557832, "step": 2767 }, { "epoch": 11.529166666666667, "loss": 0.08378443866968155, "loss_ce": 0.0012191261630505323, "loss_iou": 0.25390625, "loss_num": 0.0164794921875, "loss_xval": 0.08251953125, "num_input_tokens_seen": 252557832, "step": 2767 }, { "epoch": 11.533333333333333, "grad_norm": 2.3278290162303645, "learning_rate": 5e-05, "loss": 0.099, "num_input_tokens_seen": 252647696, "step": 2768 }, { "epoch": 11.533333333333333, "loss": 0.07774877548217773, "loss_ce": 0.001607423764653504, "loss_iou": 0.14453125, "loss_num": 0.0152587890625, "loss_xval": 0.076171875, "num_input_tokens_seen": 252647696, "step": 2768 }, { "epoch": 11.5375, "grad_norm": 1.8376633462341854, "learning_rate": 5e-05, "loss": 0.0582, "num_input_tokens_seen": 252738876, "step": 2769 }, { "epoch": 11.5375, "loss": 0.042166076600551605, "loss_ce": 6.0432503232732415e-06, "loss_iou": 0.3046875, "loss_num": 0.0084228515625, "loss_xval": 0.042236328125, "num_input_tokens_seen": 252738876, "step": 2769 }, { "epoch": 11.541666666666666, "grad_norm": 3.362258235142838, "learning_rate": 5e-05, "loss": 0.0483, "num_input_tokens_seen": 252829700, "step": 2770 }, { "epoch": 11.541666666666666, "loss": 0.03307725116610527, "loss_ce": 1.908603917399887e-05, "loss_iou": 0.31640625, "loss_num": 0.006622314453125, "loss_xval": 0.032958984375, "num_input_tokens_seen": 252829700, "step": 2770 }, { "epoch": 11.545833333333333, "grad_norm": 3.098440746580563, "learning_rate": 5e-05, "loss": 0.0883, "num_input_tokens_seen": 252920408, "step": 2771 }, { "epoch": 11.545833333333333, "loss": 0.10354944318532944, "loss_ce": 3.3009541766659822e-06, "loss_iou": 0.162109375, "loss_num": 0.020751953125, "loss_xval": 0.103515625, "num_input_tokens_seen": 252920408, "step": 2771 }, { "epoch": 11.55, "grad_norm": 2.2481952482408585, "learning_rate": 5e-05, "loss": 0.0833, "num_input_tokens_seen": 253011180, "step": 2772 }, { "epoch": 11.55, "loss": 0.07058833539485931, "loss_ce": 1.1733255860235658e-06, "loss_iou": 0.23828125, "loss_num": 0.01416015625, "loss_xval": 0.07080078125, "num_input_tokens_seen": 253011180, "step": 2772 }, { "epoch": 11.554166666666667, "grad_norm": 2.378779411614109, "learning_rate": 5e-05, "loss": 0.0758, "num_input_tokens_seen": 253102820, "step": 2773 }, { "epoch": 11.554166666666667, "loss": 0.09102697670459747, "loss_ce": 8.459096716251224e-05, "loss_iou": 0.32421875, "loss_num": 0.0181884765625, "loss_xval": 0.0908203125, "num_input_tokens_seen": 253102820, "step": 2773 }, { "epoch": 11.558333333333334, "grad_norm": 3.626487783840794, "learning_rate": 5e-05, "loss": 0.1212, "num_input_tokens_seen": 253193792, "step": 2774 }, { "epoch": 11.558333333333334, "loss": 0.07658842206001282, "loss_ce": 0.00014189038483891636, "loss_iou": 0.326171875, "loss_num": 0.01531982421875, "loss_xval": 0.07666015625, "num_input_tokens_seen": 253193792, "step": 2774 }, { "epoch": 11.5625, "grad_norm": 5.01509653664042, "learning_rate": 5e-05, "loss": 0.0667, "num_input_tokens_seen": 253285112, "step": 2775 }, { "epoch": 11.5625, "loss": 0.045781366527080536, "loss_ce": 8.128983608912677e-05, "loss_iou": 0.234375, "loss_num": 0.0091552734375, "loss_xval": 0.045654296875, "num_input_tokens_seen": 253285112, "step": 2775 }, { "epoch": 11.566666666666666, "grad_norm": 4.398346693808524, "learning_rate": 5e-05, "loss": 0.0512, "num_input_tokens_seen": 253377040, "step": 2776 }, { "epoch": 11.566666666666666, "loss": 0.0686916932463646, "loss_ce": 0.0004391258116811514, "loss_iou": 0.306640625, "loss_num": 0.01361083984375, "loss_xval": 0.068359375, "num_input_tokens_seen": 253377040, "step": 2776 }, { "epoch": 11.570833333333333, "grad_norm": 3.5203956737821467, "learning_rate": 5e-05, "loss": 0.0903, "num_input_tokens_seen": 253468468, "step": 2777 }, { "epoch": 11.570833333333333, "loss": 0.1309206485748291, "loss_ce": 7.865828592912294e-06, "loss_iou": 0.2216796875, "loss_num": 0.0262451171875, "loss_xval": 0.130859375, "num_input_tokens_seen": 253468468, "step": 2777 }, { "epoch": 11.575, "grad_norm": 3.509405073027444, "learning_rate": 5e-05, "loss": 0.0548, "num_input_tokens_seen": 253559120, "step": 2778 }, { "epoch": 11.575, "loss": 0.06349446624517441, "loss_ce": 6.367945752572268e-05, "loss_iou": 0.27734375, "loss_num": 0.0126953125, "loss_xval": 0.0634765625, "num_input_tokens_seen": 253559120, "step": 2778 }, { "epoch": 11.579166666666667, "grad_norm": 4.479791757883328, "learning_rate": 5e-05, "loss": 0.0567, "num_input_tokens_seen": 253650464, "step": 2779 }, { "epoch": 11.579166666666667, "loss": 0.07993124425411224, "loss_ce": 5.7041561376536265e-06, "loss_iou": 0.197265625, "loss_num": 0.0159912109375, "loss_xval": 0.080078125, "num_input_tokens_seen": 253650464, "step": 2779 }, { "epoch": 11.583333333333334, "grad_norm": 2.6582836288121126, "learning_rate": 5e-05, "loss": 0.0845, "num_input_tokens_seen": 253742120, "step": 2780 }, { "epoch": 11.583333333333334, "loss": 0.033640384674072266, "loss_ce": 2.5271636332035996e-05, "loss_iou": 0.1328125, "loss_num": 0.0067138671875, "loss_xval": 0.03369140625, "num_input_tokens_seen": 253742120, "step": 2780 }, { "epoch": 11.5875, "grad_norm": 4.125095437639992, "learning_rate": 5e-05, "loss": 0.0688, "num_input_tokens_seen": 253833700, "step": 2781 }, { "epoch": 11.5875, "loss": 0.10041318088769913, "loss_ce": 0.0033215084113180637, "loss_iou": 0.3359375, "loss_num": 0.0194091796875, "loss_xval": 0.09716796875, "num_input_tokens_seen": 253833700, "step": 2781 }, { "epoch": 11.591666666666667, "grad_norm": 3.9652818320745413, "learning_rate": 5e-05, "loss": 0.1176, "num_input_tokens_seen": 253925124, "step": 2782 }, { "epoch": 11.591666666666667, "loss": 0.15131857991218567, "loss_ce": 1.243268525286112e-05, "loss_iou": 0.34375, "loss_num": 0.0302734375, "loss_xval": 0.1513671875, "num_input_tokens_seen": 253925124, "step": 2782 }, { "epoch": 11.595833333333333, "grad_norm": 2.9177510310641956, "learning_rate": 5e-05, "loss": 0.0796, "num_input_tokens_seen": 254016228, "step": 2783 }, { "epoch": 11.595833333333333, "loss": 0.10961540043354034, "loss_ce": 1.1515976439113729e-05, "loss_iou": 0.220703125, "loss_num": 0.0218505859375, "loss_xval": 0.109375, "num_input_tokens_seen": 254016228, "step": 2783 }, { "epoch": 11.6, "grad_norm": 2.7266875155345622, "learning_rate": 5e-05, "loss": 0.0539, "num_input_tokens_seen": 254107736, "step": 2784 }, { "epoch": 11.6, "loss": 0.06983380019664764, "loss_ce": 1.95505663214135e-06, "loss_iou": 0.310546875, "loss_num": 0.01397705078125, "loss_xval": 0.06982421875, "num_input_tokens_seen": 254107736, "step": 2784 }, { "epoch": 11.604166666666666, "grad_norm": 14.061937738446051, "learning_rate": 5e-05, "loss": 0.0697, "num_input_tokens_seen": 254199140, "step": 2785 }, { "epoch": 11.604166666666666, "loss": 0.08557083457708359, "loss_ce": 0.0003504964697640389, "loss_iou": 0.28515625, "loss_num": 0.01708984375, "loss_xval": 0.08544921875, "num_input_tokens_seen": 254199140, "step": 2785 }, { "epoch": 11.608333333333333, "grad_norm": 3.4596467699047397, "learning_rate": 5e-05, "loss": 0.066, "num_input_tokens_seen": 254290332, "step": 2786 }, { "epoch": 11.608333333333333, "loss": 0.09167817234992981, "loss_ce": 3.3687520044622943e-06, "loss_iou": 0.31640625, "loss_num": 0.018310546875, "loss_xval": 0.091796875, "num_input_tokens_seen": 254290332, "step": 2786 }, { "epoch": 11.6125, "grad_norm": 3.109949228920906, "learning_rate": 5e-05, "loss": 0.0479, "num_input_tokens_seen": 254381496, "step": 2787 }, { "epoch": 11.6125, "loss": 0.046016875654459, "loss_ce": 0.00027102508465759456, "loss_iou": 0.25390625, "loss_num": 0.0091552734375, "loss_xval": 0.045654296875, "num_input_tokens_seen": 254381496, "step": 2787 }, { "epoch": 11.616666666666667, "grad_norm": 2.7695627569049885, "learning_rate": 5e-05, "loss": 0.0716, "num_input_tokens_seen": 254473576, "step": 2788 }, { "epoch": 11.616666666666667, "loss": 0.07978077232837677, "loss_ce": 0.00017566655878908932, "loss_iou": 0.1875, "loss_num": 0.0159912109375, "loss_xval": 0.07958984375, "num_input_tokens_seen": 254473576, "step": 2788 }, { "epoch": 11.620833333333334, "grad_norm": 1.2862383178885615, "learning_rate": 5e-05, "loss": 0.0349, "num_input_tokens_seen": 254565020, "step": 2789 }, { "epoch": 11.620833333333334, "loss": 0.03599901497364044, "loss_ce": 3.5316618323122384e-06, "loss_iou": 0.12451171875, "loss_num": 0.0072021484375, "loss_xval": 0.035888671875, "num_input_tokens_seen": 254565020, "step": 2789 }, { "epoch": 11.625, "grad_norm": 2.2476605782102315, "learning_rate": 5e-05, "loss": 0.0608, "num_input_tokens_seen": 254656308, "step": 2790 }, { "epoch": 11.625, "loss": 0.060362979769706726, "loss_ce": 0.0013343519531190395, "loss_iou": 0.2890625, "loss_num": 0.0118408203125, "loss_xval": 0.05908203125, "num_input_tokens_seen": 254656308, "step": 2790 }, { "epoch": 11.629166666666666, "grad_norm": 4.784563316647035, "learning_rate": 5e-05, "loss": 0.0768, "num_input_tokens_seen": 254748008, "step": 2791 }, { "epoch": 11.629166666666666, "loss": 0.1162891536951065, "loss_ce": 0.0018787547014653683, "loss_iou": 0.37109375, "loss_num": 0.0228271484375, "loss_xval": 0.1142578125, "num_input_tokens_seen": 254748008, "step": 2791 }, { "epoch": 11.633333333333333, "grad_norm": 3.3228327784466356, "learning_rate": 5e-05, "loss": 0.0777, "num_input_tokens_seen": 254839640, "step": 2792 }, { "epoch": 11.633333333333333, "loss": 0.0805789902806282, "loss_ce": 0.0011112188221886754, "loss_iou": 0.21875, "loss_num": 0.015869140625, "loss_xval": 0.07958984375, "num_input_tokens_seen": 254839640, "step": 2792 }, { "epoch": 11.6375, "grad_norm": 48.217722451229285, "learning_rate": 5e-05, "loss": 0.0822, "num_input_tokens_seen": 254931388, "step": 2793 }, { "epoch": 11.6375, "loss": 0.061529166996479034, "loss_ce": 0.00239373417571187, "loss_iou": 0.181640625, "loss_num": 0.0118408203125, "loss_xval": 0.05908203125, "num_input_tokens_seen": 254931388, "step": 2793 }, { "epoch": 11.641666666666667, "grad_norm": 2.4388938083203824, "learning_rate": 5e-05, "loss": 0.0922, "num_input_tokens_seen": 255022072, "step": 2794 }, { "epoch": 11.641666666666667, "loss": 0.09992580860853195, "loss_ce": 1.1263322448940016e-05, "loss_iou": 0.359375, "loss_num": 0.02001953125, "loss_xval": 0.10009765625, "num_input_tokens_seen": 255022072, "step": 2794 }, { "epoch": 11.645833333333334, "grad_norm": 62.60861556378519, "learning_rate": 5e-05, "loss": 0.0606, "num_input_tokens_seen": 255113548, "step": 2795 }, { "epoch": 11.645833333333334, "loss": 0.08678022027015686, "loss_ce": 1.874838380899746e-05, "loss_iou": 0.263671875, "loss_num": 0.017333984375, "loss_xval": 0.0869140625, "num_input_tokens_seen": 255113548, "step": 2795 }, { "epoch": 11.65, "grad_norm": 4.333663691442244, "learning_rate": 5e-05, "loss": 0.0467, "num_input_tokens_seen": 255204604, "step": 2796 }, { "epoch": 11.65, "loss": 0.054408200085163116, "loss_ce": 0.00011742699280148372, "loss_iou": 0.310546875, "loss_num": 0.0108642578125, "loss_xval": 0.05419921875, "num_input_tokens_seen": 255204604, "step": 2796 }, { "epoch": 11.654166666666667, "grad_norm": 7.411968075893123, "learning_rate": 5e-05, "loss": 0.0657, "num_input_tokens_seen": 255295664, "step": 2797 }, { "epoch": 11.654166666666667, "loss": 0.04946771264076233, "loss_ce": 0.000639589736238122, "loss_iou": 0.2353515625, "loss_num": 0.009765625, "loss_xval": 0.048828125, "num_input_tokens_seen": 255295664, "step": 2797 }, { "epoch": 11.658333333333333, "grad_norm": 2.701084957754334, "learning_rate": 5e-05, "loss": 0.1053, "num_input_tokens_seen": 255386836, "step": 2798 }, { "epoch": 11.658333333333333, "loss": 0.11523690819740295, "loss_ce": 9.409207268618047e-05, "loss_iou": 0.2216796875, "loss_num": 0.0230712890625, "loss_xval": 0.115234375, "num_input_tokens_seen": 255386836, "step": 2798 }, { "epoch": 11.6625, "grad_norm": 3.273955524452734, "learning_rate": 5e-05, "loss": 0.0875, "num_input_tokens_seen": 255478100, "step": 2799 }, { "epoch": 11.6625, "loss": 0.1136731430888176, "loss_ce": 0.002314502838999033, "loss_iou": 0.32421875, "loss_num": 0.022216796875, "loss_xval": 0.111328125, "num_input_tokens_seen": 255478100, "step": 2799 }, { "epoch": 11.666666666666666, "grad_norm": 3.242321040345061, "learning_rate": 5e-05, "loss": 0.1012, "num_input_tokens_seen": 255569656, "step": 2800 }, { "epoch": 11.666666666666666, "loss": 0.08774766325950623, "loss_ce": 0.0009861922590062022, "loss_iou": 0.32421875, "loss_num": 0.017333984375, "loss_xval": 0.0869140625, "num_input_tokens_seen": 255569656, "step": 2800 }, { "epoch": 11.670833333333333, "grad_norm": 3.059187190578943, "learning_rate": 5e-05, "loss": 0.0728, "num_input_tokens_seen": 255660884, "step": 2801 }, { "epoch": 11.670833333333333, "loss": 0.0821562260389328, "loss_ce": 6.393673538696021e-05, "loss_iou": 0.220703125, "loss_num": 0.0164794921875, "loss_xval": 0.08203125, "num_input_tokens_seen": 255660884, "step": 2801 }, { "epoch": 11.675, "grad_norm": 2.4947240380219733, "learning_rate": 5e-05, "loss": 0.0692, "num_input_tokens_seen": 255751908, "step": 2802 }, { "epoch": 11.675, "loss": 0.05989711731672287, "loss_ce": 6.366583420458483e-06, "loss_iou": 0.19921875, "loss_num": 0.011962890625, "loss_xval": 0.059814453125, "num_input_tokens_seen": 255751908, "step": 2802 }, { "epoch": 11.679166666666667, "grad_norm": 2.5267985250994074, "learning_rate": 5e-05, "loss": 0.0436, "num_input_tokens_seen": 255843508, "step": 2803 }, { "epoch": 11.679166666666667, "loss": 0.043543294072151184, "loss_ce": 0.00014730200928170234, "loss_iou": 0.189453125, "loss_num": 0.0086669921875, "loss_xval": 0.04345703125, "num_input_tokens_seen": 255843508, "step": 2803 }, { "epoch": 11.683333333333334, "grad_norm": 4.36168402933246, "learning_rate": 5e-05, "loss": 0.0923, "num_input_tokens_seen": 255934776, "step": 2804 }, { "epoch": 11.683333333333334, "loss": 0.06685806065797806, "loss_ce": 0.0003297396469861269, "loss_iou": 0.35546875, "loss_num": 0.0133056640625, "loss_xval": 0.06640625, "num_input_tokens_seen": 255934776, "step": 2804 }, { "epoch": 11.6875, "grad_norm": 6.375631937181193, "learning_rate": 5e-05, "loss": 0.1221, "num_input_tokens_seen": 256025172, "step": 2805 }, { "epoch": 11.6875, "loss": 0.1451430320739746, "loss_ce": 1.4369206837727688e-06, "loss_iou": 0.23046875, "loss_num": 0.029052734375, "loss_xval": 0.1455078125, "num_input_tokens_seen": 256025172, "step": 2805 }, { "epoch": 11.691666666666666, "grad_norm": 2.585798896351954, "learning_rate": 5e-05, "loss": 0.0441, "num_input_tokens_seen": 256117384, "step": 2806 }, { "epoch": 11.691666666666666, "loss": 0.04989667236804962, "loss_ce": 0.0005802658852189779, "loss_iou": 0.26171875, "loss_num": 0.0098876953125, "loss_xval": 0.04931640625, "num_input_tokens_seen": 256117384, "step": 2806 }, { "epoch": 11.695833333333333, "grad_norm": 1.7965183540535414, "learning_rate": 5e-05, "loss": 0.0494, "num_input_tokens_seen": 256208420, "step": 2807 }, { "epoch": 11.695833333333333, "loss": 0.07193634659051895, "loss_ce": 6.416817996068858e-06, "loss_iou": 0.36328125, "loss_num": 0.014404296875, "loss_xval": 0.07177734375, "num_input_tokens_seen": 256208420, "step": 2807 }, { "epoch": 11.7, "grad_norm": 2.2213844866692396, "learning_rate": 5e-05, "loss": 0.0674, "num_input_tokens_seen": 256300508, "step": 2808 }, { "epoch": 11.7, "loss": 0.0829494446516037, "loss_ce": 0.00046043359907343984, "loss_iou": 0.1708984375, "loss_num": 0.0164794921875, "loss_xval": 0.08251953125, "num_input_tokens_seen": 256300508, "step": 2808 }, { "epoch": 11.704166666666667, "grad_norm": 2.575061272063674, "learning_rate": 5e-05, "loss": 0.0813, "num_input_tokens_seen": 256391228, "step": 2809 }, { "epoch": 11.704166666666667, "loss": 0.12455101311206818, "loss_ce": 6.981042679399252e-05, "loss_iou": 0.0908203125, "loss_num": 0.02490234375, "loss_xval": 0.12451171875, "num_input_tokens_seen": 256391228, "step": 2809 }, { "epoch": 11.708333333333334, "grad_norm": 3.217577953810864, "learning_rate": 5e-05, "loss": 0.0775, "num_input_tokens_seen": 256482424, "step": 2810 }, { "epoch": 11.708333333333334, "loss": 0.09218916296958923, "loss_ce": 4.1327919461764395e-05, "loss_iou": 0.28125, "loss_num": 0.0184326171875, "loss_xval": 0.09228515625, "num_input_tokens_seen": 256482424, "step": 2810 }, { "epoch": 11.7125, "grad_norm": 3.50783614109373, "learning_rate": 5e-05, "loss": 0.0837, "num_input_tokens_seen": 256573408, "step": 2811 }, { "epoch": 11.7125, "loss": 0.11624173820018768, "loss_ce": 0.0002749458944890648, "loss_iou": 0.302734375, "loss_num": 0.023193359375, "loss_xval": 0.1162109375, "num_input_tokens_seen": 256573408, "step": 2811 }, { "epoch": 11.716666666666667, "grad_norm": 4.623612033239213, "learning_rate": 5e-05, "loss": 0.123, "num_input_tokens_seen": 256663956, "step": 2812 }, { "epoch": 11.716666666666667, "loss": 0.13129764795303345, "loss_ce": 0.0002856797364074737, "loss_iou": 0.337890625, "loss_num": 0.0262451171875, "loss_xval": 0.130859375, "num_input_tokens_seen": 256663956, "step": 2812 }, { "epoch": 11.720833333333333, "grad_norm": 2.3002718900676884, "learning_rate": 5e-05, "loss": 0.0413, "num_input_tokens_seen": 256754852, "step": 2813 }, { "epoch": 11.720833333333333, "loss": 0.04171242564916611, "loss_ce": 0.0001780021848389879, "loss_iou": 0.1875, "loss_num": 0.00830078125, "loss_xval": 0.04150390625, "num_input_tokens_seen": 256754852, "step": 2813 }, { "epoch": 11.725, "grad_norm": 3.5142776554663557, "learning_rate": 5e-05, "loss": 0.0897, "num_input_tokens_seen": 256846128, "step": 2814 }, { "epoch": 11.725, "loss": 0.10495474189519882, "loss_ce": 4.796497250936227e-06, "loss_iou": 0.119140625, "loss_num": 0.02099609375, "loss_xval": 0.10498046875, "num_input_tokens_seen": 256846128, "step": 2814 }, { "epoch": 11.729166666666666, "grad_norm": 4.37602634922094, "learning_rate": 5e-05, "loss": 0.0679, "num_input_tokens_seen": 256937748, "step": 2815 }, { "epoch": 11.729166666666666, "loss": 0.05913905054330826, "loss_ce": 0.00014857419591862708, "loss_iou": 0.3828125, "loss_num": 0.01177978515625, "loss_xval": 0.05908203125, "num_input_tokens_seen": 256937748, "step": 2815 }, { "epoch": 11.733333333333333, "grad_norm": 2.1503700616017167, "learning_rate": 5e-05, "loss": 0.0625, "num_input_tokens_seen": 257028948, "step": 2816 }, { "epoch": 11.733333333333333, "loss": 0.034769974648952484, "loss_ce": 1.0452968126628548e-05, "loss_iou": 0.2734375, "loss_num": 0.0069580078125, "loss_xval": 0.03466796875, "num_input_tokens_seen": 257028948, "step": 2816 }, { "epoch": 11.7375, "grad_norm": 5.0109340144284795, "learning_rate": 5e-05, "loss": 0.1121, "num_input_tokens_seen": 257120320, "step": 2817 }, { "epoch": 11.7375, "loss": 0.1027803048491478, "loss_ce": 0.00039382706745527685, "loss_iou": 0.203125, "loss_num": 0.0205078125, "loss_xval": 0.1025390625, "num_input_tokens_seen": 257120320, "step": 2817 }, { "epoch": 11.741666666666667, "grad_norm": 2.093254385288736, "learning_rate": 5e-05, "loss": 0.0906, "num_input_tokens_seen": 257210756, "step": 2818 }, { "epoch": 11.741666666666667, "loss": 0.06602243334054947, "loss_ce": 2.8168045901111327e-05, "loss_iou": 0.322265625, "loss_num": 0.01318359375, "loss_xval": 0.06591796875, "num_input_tokens_seen": 257210756, "step": 2818 }, { "epoch": 11.745833333333334, "grad_norm": 2.9800228128313035, "learning_rate": 5e-05, "loss": 0.0721, "num_input_tokens_seen": 257301984, "step": 2819 }, { "epoch": 11.745833333333334, "loss": 0.09179629385471344, "loss_ce": 2.9940814783913083e-05, "loss_iou": 0.2265625, "loss_num": 0.018310546875, "loss_xval": 0.091796875, "num_input_tokens_seen": 257301984, "step": 2819 }, { "epoch": 11.75, "grad_norm": 5.110429224134522, "learning_rate": 5e-05, "loss": 0.0428, "num_input_tokens_seen": 257393624, "step": 2820 }, { "epoch": 11.75, "loss": 0.05140618979930878, "loss_ce": 0.001487057888880372, "loss_iou": 0.330078125, "loss_num": 0.010009765625, "loss_xval": 0.0498046875, "num_input_tokens_seen": 257393624, "step": 2820 }, { "epoch": 11.754166666666666, "grad_norm": 2.954461092840001, "learning_rate": 5e-05, "loss": 0.085, "num_input_tokens_seen": 257484916, "step": 2821 }, { "epoch": 11.754166666666666, "loss": 0.06782172620296478, "loss_ce": 0.0014002150855958462, "loss_iou": 0.25390625, "loss_num": 0.0133056640625, "loss_xval": 0.06640625, "num_input_tokens_seen": 257484916, "step": 2821 }, { "epoch": 11.758333333333333, "grad_norm": 2.070142501990094, "learning_rate": 5e-05, "loss": 0.0604, "num_input_tokens_seen": 257576552, "step": 2822 }, { "epoch": 11.758333333333333, "loss": 0.05732317268848419, "loss_ce": 2.6417277695145458e-05, "loss_iou": 0.134765625, "loss_num": 0.011474609375, "loss_xval": 0.057373046875, "num_input_tokens_seen": 257576552, "step": 2822 }, { "epoch": 11.7625, "grad_norm": 2.4238920761588987, "learning_rate": 5e-05, "loss": 0.0603, "num_input_tokens_seen": 257667628, "step": 2823 }, { "epoch": 11.7625, "loss": 0.0896691381931305, "loss_ce": 2.3749711544951424e-05, "loss_iou": 0.142578125, "loss_num": 0.0179443359375, "loss_xval": 0.08984375, "num_input_tokens_seen": 257667628, "step": 2823 }, { "epoch": 11.766666666666667, "grad_norm": 4.637905965425538, "learning_rate": 5e-05, "loss": 0.0592, "num_input_tokens_seen": 257758564, "step": 2824 }, { "epoch": 11.766666666666667, "loss": 0.07250712811946869, "loss_ce": 0.0027592037804424763, "loss_iou": 0.09619140625, "loss_num": 0.01397705078125, "loss_xval": 0.06982421875, "num_input_tokens_seen": 257758564, "step": 2824 }, { "epoch": 11.770833333333334, "grad_norm": 5.46493963664531, "learning_rate": 5e-05, "loss": 0.0664, "num_input_tokens_seen": 257849596, "step": 2825 }, { "epoch": 11.770833333333334, "loss": 0.06953981518745422, "loss_ce": 0.001790795475244522, "loss_iou": 0.25390625, "loss_num": 0.0135498046875, "loss_xval": 0.06787109375, "num_input_tokens_seen": 257849596, "step": 2825 }, { "epoch": 11.775, "grad_norm": 6.457935238980587, "learning_rate": 5e-05, "loss": 0.0804, "num_input_tokens_seen": 257940964, "step": 2826 }, { "epoch": 11.775, "loss": 0.03701567277312279, "loss_ce": 0.00013517978368327022, "loss_iou": 0.26953125, "loss_num": 0.00738525390625, "loss_xval": 0.036865234375, "num_input_tokens_seen": 257940964, "step": 2826 }, { "epoch": 11.779166666666667, "grad_norm": 2.952868993992443, "learning_rate": 5e-05, "loss": 0.0649, "num_input_tokens_seen": 258032144, "step": 2827 }, { "epoch": 11.779166666666667, "loss": 0.07449017465114594, "loss_ce": 1.2029305253236089e-05, "loss_iou": 0.359375, "loss_num": 0.014892578125, "loss_xval": 0.07470703125, "num_input_tokens_seen": 258032144, "step": 2827 }, { "epoch": 11.783333333333333, "grad_norm": 3.49494376828575, "learning_rate": 5e-05, "loss": 0.057, "num_input_tokens_seen": 258123184, "step": 2828 }, { "epoch": 11.783333333333333, "loss": 0.06829185783863068, "loss_ce": 0.00039788178401067853, "loss_iou": 0.2060546875, "loss_num": 0.0135498046875, "loss_xval": 0.06787109375, "num_input_tokens_seen": 258123184, "step": 2828 }, { "epoch": 11.7875, "grad_norm": 3.9275142395882807, "learning_rate": 5e-05, "loss": 0.0917, "num_input_tokens_seen": 258214768, "step": 2829 }, { "epoch": 11.7875, "loss": 0.11695680767297745, "loss_ce": 0.0044690147042274475, "loss_iou": 0.279296875, "loss_num": 0.0224609375, "loss_xval": 0.1123046875, "num_input_tokens_seen": 258214768, "step": 2829 }, { "epoch": 11.791666666666666, "grad_norm": 2.8659715350381125, "learning_rate": 5e-05, "loss": 0.0603, "num_input_tokens_seen": 258304748, "step": 2830 }, { "epoch": 11.791666666666666, "loss": 0.06732072681188583, "loss_ce": 6.581793059012853e-06, "loss_iou": 0.2734375, "loss_num": 0.013427734375, "loss_xval": 0.0673828125, "num_input_tokens_seen": 258304748, "step": 2830 }, { "epoch": 11.795833333333333, "grad_norm": 2.522448317248731, "learning_rate": 5e-05, "loss": 0.0391, "num_input_tokens_seen": 258396604, "step": 2831 }, { "epoch": 11.795833333333333, "loss": 0.02837366610765457, "loss_ce": 0.0002822370151989162, "loss_iou": 0.2431640625, "loss_num": 0.005615234375, "loss_xval": 0.028076171875, "num_input_tokens_seen": 258396604, "step": 2831 }, { "epoch": 11.8, "grad_norm": 14.163127732807999, "learning_rate": 5e-05, "loss": 0.0591, "num_input_tokens_seen": 258488080, "step": 2832 }, { "epoch": 11.8, "loss": 0.06442129611968994, "loss_ce": 0.0002199427835876122, "loss_iou": 0.291015625, "loss_num": 0.0128173828125, "loss_xval": 0.06396484375, "num_input_tokens_seen": 258488080, "step": 2832 }, { "epoch": 11.804166666666667, "grad_norm": 3.2389575147775056, "learning_rate": 5e-05, "loss": 0.0678, "num_input_tokens_seen": 258579568, "step": 2833 }, { "epoch": 11.804166666666667, "loss": 0.06743942946195602, "loss_ce": 8.713373972568661e-05, "loss_iou": 0.251953125, "loss_num": 0.01348876953125, "loss_xval": 0.0673828125, "num_input_tokens_seen": 258579568, "step": 2833 }, { "epoch": 11.808333333333334, "grad_norm": 3.278984446766221, "learning_rate": 5e-05, "loss": 0.0452, "num_input_tokens_seen": 258671068, "step": 2834 }, { "epoch": 11.808333333333334, "loss": 0.04743684455752373, "loss_ce": 0.00019563363457564265, "loss_iou": 0.2412109375, "loss_num": 0.00946044921875, "loss_xval": 0.04736328125, "num_input_tokens_seen": 258671068, "step": 2834 }, { "epoch": 11.8125, "grad_norm": 2.7045741910549856, "learning_rate": 5e-05, "loss": 0.0677, "num_input_tokens_seen": 258762528, "step": 2835 }, { "epoch": 11.8125, "loss": 0.0750080943107605, "loss_ce": 4.166929647908546e-05, "loss_iou": 0.21484375, "loss_num": 0.0150146484375, "loss_xval": 0.0751953125, "num_input_tokens_seen": 258762528, "step": 2835 }, { "epoch": 11.816666666666666, "grad_norm": 8.82185220811526, "learning_rate": 5e-05, "loss": 0.1116, "num_input_tokens_seen": 258853572, "step": 2836 }, { "epoch": 11.816666666666666, "loss": 0.14761298894882202, "loss_ce": 0.0003351602063048631, "loss_iou": 0.36328125, "loss_num": 0.029541015625, "loss_xval": 0.1474609375, "num_input_tokens_seen": 258853572, "step": 2836 }, { "epoch": 11.820833333333333, "grad_norm": 5.2310030751405, "learning_rate": 5e-05, "loss": 0.1105, "num_input_tokens_seen": 258944892, "step": 2837 }, { "epoch": 11.820833333333333, "loss": 0.08534011244773865, "loss_ce": 1.2963697372470051e-05, "loss_iou": 0.34765625, "loss_num": 0.01708984375, "loss_xval": 0.08544921875, "num_input_tokens_seen": 258944892, "step": 2837 }, { "epoch": 11.825, "grad_norm": 1.705351501456277, "learning_rate": 5e-05, "loss": 0.0665, "num_input_tokens_seen": 259036288, "step": 2838 }, { "epoch": 11.825, "loss": 0.05713297426700592, "loss_ce": 1.9327546397107653e-05, "loss_iou": 0.27734375, "loss_num": 0.01141357421875, "loss_xval": 0.05712890625, "num_input_tokens_seen": 259036288, "step": 2838 }, { "epoch": 11.829166666666667, "grad_norm": 1.1420082612365101, "learning_rate": 5e-05, "loss": 0.0571, "num_input_tokens_seen": 259128004, "step": 2839 }, { "epoch": 11.829166666666667, "loss": 0.06977026909589767, "loss_ce": 0.0031351372599601746, "loss_iou": 0.15625, "loss_num": 0.0133056640625, "loss_xval": 0.06640625, "num_input_tokens_seen": 259128004, "step": 2839 }, { "epoch": 11.833333333333334, "grad_norm": 2.2371733830453264, "learning_rate": 5e-05, "loss": 0.0565, "num_input_tokens_seen": 259219580, "step": 2840 }, { "epoch": 11.833333333333334, "loss": 0.07266523689031601, "loss_ce": 9.443731687497348e-05, "loss_iou": 0.244140625, "loss_num": 0.0145263671875, "loss_xval": 0.07275390625, "num_input_tokens_seen": 259219580, "step": 2840 }, { "epoch": 11.8375, "grad_norm": 2.4504193950415103, "learning_rate": 5e-05, "loss": 0.0667, "num_input_tokens_seen": 259310764, "step": 2841 }, { "epoch": 11.8375, "loss": 0.049181997776031494, "loss_ce": 2.9190573513915297e-06, "loss_iou": 0.37109375, "loss_num": 0.00982666015625, "loss_xval": 0.049072265625, "num_input_tokens_seen": 259310764, "step": 2841 }, { "epoch": 11.841666666666667, "grad_norm": 6.426105045159625, "learning_rate": 5e-05, "loss": 0.111, "num_input_tokens_seen": 259402568, "step": 2842 }, { "epoch": 11.841666666666667, "loss": 0.08205066621303558, "loss_ce": 0.004261358641088009, "loss_iou": 0.279296875, "loss_num": 0.01556396484375, "loss_xval": 0.07763671875, "num_input_tokens_seen": 259402568, "step": 2842 }, { "epoch": 11.845833333333333, "grad_norm": 2.2911199553655894, "learning_rate": 5e-05, "loss": 0.1029, "num_input_tokens_seen": 259493920, "step": 2843 }, { "epoch": 11.845833333333333, "loss": 0.1470203995704651, "loss_ce": 0.0003681685193441808, "loss_iou": 0.232421875, "loss_num": 0.029296875, "loss_xval": 0.146484375, "num_input_tokens_seen": 259493920, "step": 2843 }, { "epoch": 11.85, "grad_norm": 1.7712901267125332, "learning_rate": 5e-05, "loss": 0.0499, "num_input_tokens_seen": 259585264, "step": 2844 }, { "epoch": 11.85, "loss": 0.05896005034446716, "loss_ce": 7.720286703261081e-06, "loss_iou": 0.208984375, "loss_num": 0.01177978515625, "loss_xval": 0.058837890625, "num_input_tokens_seen": 259585264, "step": 2844 }, { "epoch": 11.854166666666666, "grad_norm": 2.1354891999723775, "learning_rate": 5e-05, "loss": 0.058, "num_input_tokens_seen": 259676860, "step": 2845 }, { "epoch": 11.854166666666666, "loss": 0.03327609598636627, "loss_ce": 0.0007367273210547864, "loss_iou": 0.2080078125, "loss_num": 0.006500244140625, "loss_xval": 0.032470703125, "num_input_tokens_seen": 259676860, "step": 2845 }, { "epoch": 11.858333333333333, "grad_norm": 1.6946017824633848, "learning_rate": 5e-05, "loss": 0.0609, "num_input_tokens_seen": 259767912, "step": 2846 }, { "epoch": 11.858333333333333, "loss": 0.07876819372177124, "loss_ce": 1.7584598026587628e-05, "loss_iou": 0.27734375, "loss_num": 0.0157470703125, "loss_xval": 0.07861328125, "num_input_tokens_seen": 259767912, "step": 2846 }, { "epoch": 11.8625, "grad_norm": 2.7137875697412874, "learning_rate": 5e-05, "loss": 0.1371, "num_input_tokens_seen": 259858860, "step": 2847 }, { "epoch": 11.8625, "loss": 0.15086443722248077, "loss_ce": 1.6057805623859167e-05, "loss_iou": 0.2353515625, "loss_num": 0.0302734375, "loss_xval": 0.150390625, "num_input_tokens_seen": 259858860, "step": 2847 }, { "epoch": 11.866666666666667, "grad_norm": 1.1937516546302895, "learning_rate": 5e-05, "loss": 0.0447, "num_input_tokens_seen": 259951008, "step": 2848 }, { "epoch": 11.866666666666667, "loss": 0.03670906648039818, "loss_ce": 9.560144826536998e-05, "loss_iou": 0.19921875, "loss_num": 0.00732421875, "loss_xval": 0.03662109375, "num_input_tokens_seen": 259951008, "step": 2848 }, { "epoch": 11.870833333333334, "grad_norm": 1.6499540676990112, "learning_rate": 5e-05, "loss": 0.0328, "num_input_tokens_seen": 260042204, "step": 2849 }, { "epoch": 11.870833333333334, "loss": 0.03667999804019928, "loss_ce": 8.94237236934714e-05, "loss_iou": 0.33984375, "loss_num": 0.00732421875, "loss_xval": 0.03662109375, "num_input_tokens_seen": 260042204, "step": 2849 }, { "epoch": 11.875, "grad_norm": 1.440890559983585, "learning_rate": 5e-05, "loss": 0.089, "num_input_tokens_seen": 260132976, "step": 2850 }, { "epoch": 11.875, "loss": 0.07991119474172592, "loss_ce": 9.17267129807442e-07, "loss_iou": 0.228515625, "loss_num": 0.0159912109375, "loss_xval": 0.080078125, "num_input_tokens_seen": 260132976, "step": 2850 }, { "epoch": 11.879166666666666, "grad_norm": 3.725208714864202, "learning_rate": 5e-05, "loss": 0.0994, "num_input_tokens_seen": 260224912, "step": 2851 }, { "epoch": 11.879166666666666, "loss": 0.1331530511379242, "loss_ce": 0.00018796537187881768, "loss_iou": 0.29296875, "loss_num": 0.026611328125, "loss_xval": 0.1328125, "num_input_tokens_seen": 260224912, "step": 2851 }, { "epoch": 11.883333333333333, "grad_norm": 2.7663335524624015, "learning_rate": 5e-05, "loss": 0.0923, "num_input_tokens_seen": 260316564, "step": 2852 }, { "epoch": 11.883333333333333, "loss": 0.05577700585126877, "loss_ce": 0.0012192062567919493, "loss_iou": 0.251953125, "loss_num": 0.01092529296875, "loss_xval": 0.054443359375, "num_input_tokens_seen": 260316564, "step": 2852 }, { "epoch": 11.8875, "grad_norm": 5.732165254014268, "learning_rate": 5e-05, "loss": 0.0757, "num_input_tokens_seen": 260407848, "step": 2853 }, { "epoch": 11.8875, "loss": 0.09239616245031357, "loss_ce": 0.0001796693541109562, "loss_iou": 0.328125, "loss_num": 0.0184326171875, "loss_xval": 0.09228515625, "num_input_tokens_seen": 260407848, "step": 2853 }, { "epoch": 11.891666666666667, "grad_norm": 3.70790354718247, "learning_rate": 5e-05, "loss": 0.0969, "num_input_tokens_seen": 260499980, "step": 2854 }, { "epoch": 11.891666666666667, "loss": 0.10627731680870056, "loss_ce": 0.001647809287533164, "loss_iou": 0.279296875, "loss_num": 0.0208740234375, "loss_xval": 0.1044921875, "num_input_tokens_seen": 260499980, "step": 2854 }, { "epoch": 11.895833333333334, "grad_norm": 2.326751790263759, "learning_rate": 5e-05, "loss": 0.0789, "num_input_tokens_seen": 260591020, "step": 2855 }, { "epoch": 11.895833333333334, "loss": 0.0929078683257103, "loss_ce": 1.2361353583401069e-05, "loss_iou": 0.21484375, "loss_num": 0.0185546875, "loss_xval": 0.0927734375, "num_input_tokens_seen": 260591020, "step": 2855 }, { "epoch": 11.9, "grad_norm": 5.539103654373248, "learning_rate": 5e-05, "loss": 0.1042, "num_input_tokens_seen": 260683344, "step": 2856 }, { "epoch": 11.9, "loss": 0.10079024732112885, "loss_ce": 0.007009732071310282, "loss_iou": 0.25390625, "loss_num": 0.018798828125, "loss_xval": 0.09375, "num_input_tokens_seen": 260683344, "step": 2856 }, { "epoch": 11.904166666666667, "grad_norm": 5.560794328132688, "learning_rate": 5e-05, "loss": 0.0863, "num_input_tokens_seen": 260774700, "step": 2857 }, { "epoch": 11.904166666666667, "loss": 0.09718882292509079, "loss_ce": 2.0858064090134576e-05, "loss_iou": 0.275390625, "loss_num": 0.0194091796875, "loss_xval": 0.09716796875, "num_input_tokens_seen": 260774700, "step": 2857 }, { "epoch": 11.908333333333333, "grad_norm": 2.759221182078596, "learning_rate": 5e-05, "loss": 0.0829, "num_input_tokens_seen": 260866232, "step": 2858 }, { "epoch": 11.908333333333333, "loss": 0.06998462229967117, "loss_ce": 6.122285412857309e-05, "loss_iou": 0.26171875, "loss_num": 0.01397705078125, "loss_xval": 0.06982421875, "num_input_tokens_seen": 260866232, "step": 2858 }, { "epoch": 11.9125, "grad_norm": 2.564275124770271, "learning_rate": 5e-05, "loss": 0.0518, "num_input_tokens_seen": 260957752, "step": 2859 }, { "epoch": 11.9125, "loss": 0.053347524255514145, "loss_ce": 3.3316449844278395e-05, "loss_iou": 0.26953125, "loss_num": 0.01068115234375, "loss_xval": 0.05322265625, "num_input_tokens_seen": 260957752, "step": 2859 }, { "epoch": 11.916666666666666, "grad_norm": 3.333069768156953, "learning_rate": 5e-05, "loss": 0.063, "num_input_tokens_seen": 261049304, "step": 2860 }, { "epoch": 11.916666666666666, "loss": 0.06215094029903412, "loss_ce": 1.7154019587906078e-05, "loss_iou": 0.365234375, "loss_num": 0.012451171875, "loss_xval": 0.06201171875, "num_input_tokens_seen": 261049304, "step": 2860 }, { "epoch": 11.920833333333333, "grad_norm": 3.0899776462038795, "learning_rate": 5e-05, "loss": 0.0796, "num_input_tokens_seen": 261140836, "step": 2861 }, { "epoch": 11.920833333333333, "loss": 0.07568001747131348, "loss_ce": 0.0009615468443371356, "loss_iou": 0.240234375, "loss_num": 0.01495361328125, "loss_xval": 0.07470703125, "num_input_tokens_seen": 261140836, "step": 2861 }, { "epoch": 11.925, "grad_norm": 2.9081179239353236, "learning_rate": 5e-05, "loss": 0.0542, "num_input_tokens_seen": 261232416, "step": 2862 }, { "epoch": 11.925, "loss": 0.061053868383169174, "loss_ce": 0.0004535876796580851, "loss_iou": 0.208984375, "loss_num": 0.0120849609375, "loss_xval": 0.060546875, "num_input_tokens_seen": 261232416, "step": 2862 }, { "epoch": 11.929166666666667, "grad_norm": 3.6913574493836427, "learning_rate": 5e-05, "loss": 0.0845, "num_input_tokens_seen": 261323292, "step": 2863 }, { "epoch": 11.929166666666667, "loss": 0.07173296064138412, "loss_ce": 1.3924227459938265e-06, "loss_iou": 0.306640625, "loss_num": 0.01434326171875, "loss_xval": 0.07177734375, "num_input_tokens_seen": 261323292, "step": 2863 }, { "epoch": 11.933333333333334, "grad_norm": 7.352666525893405, "learning_rate": 5e-05, "loss": 0.0817, "num_input_tokens_seen": 261413988, "step": 2864 }, { "epoch": 11.933333333333334, "loss": 0.07763297855854034, "loss_ce": 0.0035057791974395514, "loss_iou": 0.396484375, "loss_num": 0.01483154296875, "loss_xval": 0.07421875, "num_input_tokens_seen": 261413988, "step": 2864 }, { "epoch": 11.9375, "grad_norm": 3.8865203166046918, "learning_rate": 5e-05, "loss": 0.0584, "num_input_tokens_seen": 261504740, "step": 2865 }, { "epoch": 11.9375, "loss": 0.06813535839319229, "loss_ce": 4.8647452786099166e-06, "loss_iou": 0.34765625, "loss_num": 0.01361083984375, "loss_xval": 0.068359375, "num_input_tokens_seen": 261504740, "step": 2865 }, { "epoch": 11.941666666666666, "grad_norm": 36.39431622438075, "learning_rate": 5e-05, "loss": 0.1249, "num_input_tokens_seen": 261595296, "step": 2866 }, { "epoch": 11.941666666666666, "loss": 0.1817329227924347, "loss_ce": 7.592077508888906e-07, "loss_iou": 0.2373046875, "loss_num": 0.036376953125, "loss_xval": 0.181640625, "num_input_tokens_seen": 261595296, "step": 2866 }, { "epoch": 11.945833333333333, "grad_norm": 3.2830388964775645, "learning_rate": 5e-05, "loss": 0.0617, "num_input_tokens_seen": 261686756, "step": 2867 }, { "epoch": 11.945833333333333, "loss": 0.03829586133360863, "loss_ce": 0.002208825433626771, "loss_iou": 0.185546875, "loss_num": 0.0072021484375, "loss_xval": 0.0361328125, "num_input_tokens_seen": 261686756, "step": 2867 }, { "epoch": 11.95, "grad_norm": 1.8327089283590499, "learning_rate": 5e-05, "loss": 0.0779, "num_input_tokens_seen": 261777792, "step": 2868 }, { "epoch": 11.95, "loss": 0.0942518413066864, "loss_ce": 4.407918095239438e-05, "loss_iou": 0.259765625, "loss_num": 0.018798828125, "loss_xval": 0.09423828125, "num_input_tokens_seen": 261777792, "step": 2868 }, { "epoch": 11.954166666666667, "grad_norm": 1.5163096220501042, "learning_rate": 5e-05, "loss": 0.0477, "num_input_tokens_seen": 261869352, "step": 2869 }, { "epoch": 11.954166666666667, "loss": 0.05169472098350525, "loss_ce": 0.00010475327144376934, "loss_iou": 0.26171875, "loss_num": 0.01031494140625, "loss_xval": 0.051513671875, "num_input_tokens_seen": 261869352, "step": 2869 }, { "epoch": 11.958333333333334, "grad_norm": 8.579480300836751, "learning_rate": 5e-05, "loss": 0.0692, "num_input_tokens_seen": 261960436, "step": 2870 }, { "epoch": 11.958333333333334, "loss": 0.053049832582473755, "loss_ce": 0.0016429764218628407, "loss_iou": 0.1884765625, "loss_num": 0.01031494140625, "loss_xval": 0.051513671875, "num_input_tokens_seen": 261960436, "step": 2870 }, { "epoch": 11.9625, "grad_norm": 1.856468335604494, "learning_rate": 5e-05, "loss": 0.0544, "num_input_tokens_seen": 262051256, "step": 2871 }, { "epoch": 11.9625, "loss": 0.023780783638358116, "loss_ce": 0.00015254892059601843, "loss_iou": 0.2373046875, "loss_num": 0.004730224609375, "loss_xval": 0.023681640625, "num_input_tokens_seen": 262051256, "step": 2871 }, { "epoch": 11.966666666666667, "grad_norm": 3.277751162934432, "learning_rate": 5e-05, "loss": 0.0761, "num_input_tokens_seen": 262142500, "step": 2872 }, { "epoch": 11.966666666666667, "loss": 0.04581213742494583, "loss_ce": 0.00021887400362174958, "loss_iou": 0.3515625, "loss_num": 0.00909423828125, "loss_xval": 0.045654296875, "num_input_tokens_seen": 262142500, "step": 2872 }, { "epoch": 11.970833333333333, "grad_norm": 34.78272795634807, "learning_rate": 5e-05, "loss": 0.0785, "num_input_tokens_seen": 262233244, "step": 2873 }, { "epoch": 11.970833333333333, "loss": 0.06828820705413818, "loss_ce": 0.0003255601041018963, "loss_iou": 0.24609375, "loss_num": 0.01361083984375, "loss_xval": 0.06787109375, "num_input_tokens_seen": 262233244, "step": 2873 }, { "epoch": 11.975, "grad_norm": 12.613292522757435, "learning_rate": 5e-05, "loss": 0.0518, "num_input_tokens_seen": 262325024, "step": 2874 }, { "epoch": 11.975, "loss": 0.05874905735254288, "loss_ce": 0.00015530729433521628, "loss_iou": 0.17578125, "loss_num": 0.01171875, "loss_xval": 0.05859375, "num_input_tokens_seen": 262325024, "step": 2874 }, { "epoch": 11.979166666666666, "grad_norm": 2.789133765269862, "learning_rate": 5e-05, "loss": 0.0698, "num_input_tokens_seen": 262416224, "step": 2875 }, { "epoch": 11.979166666666666, "loss": 0.06322193145751953, "loss_ce": 0.0025148349814116955, "loss_iou": 0.310546875, "loss_num": 0.01214599609375, "loss_xval": 0.060791015625, "num_input_tokens_seen": 262416224, "step": 2875 }, { "epoch": 11.983333333333333, "grad_norm": 3.9294923350103423, "learning_rate": 5e-05, "loss": 0.0833, "num_input_tokens_seen": 262508332, "step": 2876 }, { "epoch": 11.983333333333333, "loss": 0.07195824384689331, "loss_ce": 0.0007607348961755633, "loss_iou": 0.275390625, "loss_num": 0.01422119140625, "loss_xval": 0.0712890625, "num_input_tokens_seen": 262508332, "step": 2876 }, { "epoch": 11.9875, "grad_norm": 4.479058343755453, "learning_rate": 5e-05, "loss": 0.0827, "num_input_tokens_seen": 262600376, "step": 2877 }, { "epoch": 11.9875, "loss": 0.0894709974527359, "loss_ce": 8.714882824278902e-06, "loss_iou": 0.333984375, "loss_num": 0.0179443359375, "loss_xval": 0.08935546875, "num_input_tokens_seen": 262600376, "step": 2877 }, { "epoch": 11.991666666666667, "grad_norm": 5.508719950001302, "learning_rate": 5e-05, "loss": 0.0811, "num_input_tokens_seen": 262690988, "step": 2878 }, { "epoch": 11.991666666666667, "loss": 0.09753264486789703, "loss_ce": 0.00010526960977585986, "loss_iou": 0.2734375, "loss_num": 0.01953125, "loss_xval": 0.09765625, "num_input_tokens_seen": 262690988, "step": 2878 }, { "epoch": 11.995833333333334, "grad_norm": 2.7247469459373894, "learning_rate": 5e-05, "loss": 0.0737, "num_input_tokens_seen": 262782476, "step": 2879 }, { "epoch": 11.995833333333334, "loss": 0.03912658989429474, "loss_ce": 0.00030823066481389105, "loss_iou": 0.2578125, "loss_num": 0.00775146484375, "loss_xval": 0.038818359375, "num_input_tokens_seen": 262782476, "step": 2879 }, { "epoch": 12.0, "grad_norm": 3.3107134432842362, "learning_rate": 5e-05, "loss": 0.0822, "num_input_tokens_seen": 262874424, "step": 2880 }, { "epoch": 12.0, "loss": 0.09382101148366928, "loss_ce": 0.0004982562968507409, "loss_iou": 0.25390625, "loss_num": 0.0186767578125, "loss_xval": 0.09326171875, "num_input_tokens_seen": 262874424, "step": 2880 }, { "epoch": 12.004166666666666, "grad_norm": 2.058713282308722, "learning_rate": 5e-05, "loss": 0.0681, "num_input_tokens_seen": 262965544, "step": 2881 }, { "epoch": 12.004166666666666, "loss": 0.06187222898006439, "loss_ce": 8.939013787312433e-05, "loss_iou": 0.291015625, "loss_num": 0.0123291015625, "loss_xval": 0.061767578125, "num_input_tokens_seen": 262965544, "step": 2881 }, { "epoch": 12.008333333333333, "grad_norm": 3.115740501206554, "learning_rate": 5e-05, "loss": 0.0817, "num_input_tokens_seen": 263056624, "step": 2882 }, { "epoch": 12.008333333333333, "loss": 0.08259187638759613, "loss_ce": 0.0008047710871323943, "loss_iou": 0.2578125, "loss_num": 0.016357421875, "loss_xval": 0.08203125, "num_input_tokens_seen": 263056624, "step": 2882 }, { "epoch": 12.0125, "grad_norm": 4.965254203037331, "learning_rate": 5e-05, "loss": 0.1174, "num_input_tokens_seen": 263148000, "step": 2883 }, { "epoch": 12.0125, "loss": 0.1574704349040985, "loss_ce": 0.00012181226338725537, "loss_iou": 0.2333984375, "loss_num": 0.031494140625, "loss_xval": 0.1572265625, "num_input_tokens_seen": 263148000, "step": 2883 }, { "epoch": 12.016666666666667, "grad_norm": 3.6225222493450304, "learning_rate": 5e-05, "loss": 0.1778, "num_input_tokens_seen": 263239460, "step": 2884 }, { "epoch": 12.016666666666667, "loss": 0.195316344499588, "loss_ce": 0.003757503116503358, "loss_iou": 0.1640625, "loss_num": 0.038330078125, "loss_xval": 0.19140625, "num_input_tokens_seen": 263239460, "step": 2884 }, { "epoch": 12.020833333333334, "grad_norm": 4.810819290420674, "learning_rate": 5e-05, "loss": 0.0793, "num_input_tokens_seen": 263330848, "step": 2885 }, { "epoch": 12.020833333333334, "loss": 0.04318102449178696, "loss_ce": 0.000799737055785954, "loss_iou": 0.357421875, "loss_num": 0.00848388671875, "loss_xval": 0.04248046875, "num_input_tokens_seen": 263330848, "step": 2885 }, { "epoch": 12.025, "grad_norm": 5.366359423275253, "learning_rate": 5e-05, "loss": 0.0601, "num_input_tokens_seen": 263422472, "step": 2886 }, { "epoch": 12.025, "loss": 0.055778469890356064, "loss_ce": 0.0001449243864044547, "loss_iou": 0.375, "loss_num": 0.0111083984375, "loss_xval": 0.0556640625, "num_input_tokens_seen": 263422472, "step": 2886 }, { "epoch": 12.029166666666667, "grad_norm": 3.0301501491243337, "learning_rate": 5e-05, "loss": 0.04, "num_input_tokens_seen": 263514088, "step": 2887 }, { "epoch": 12.029166666666667, "loss": 0.05425199121236801, "loss_ce": 0.00031217176001518965, "loss_iou": 0.263671875, "loss_num": 0.01080322265625, "loss_xval": 0.053955078125, "num_input_tokens_seen": 263514088, "step": 2887 }, { "epoch": 12.033333333333333, "grad_norm": 3.9172536956407757, "learning_rate": 5e-05, "loss": 0.0812, "num_input_tokens_seen": 263605540, "step": 2888 }, { "epoch": 12.033333333333333, "loss": 0.092352494597435, "loss_ce": 0.0003648839774541557, "loss_iou": 0.275390625, "loss_num": 0.0184326171875, "loss_xval": 0.091796875, "num_input_tokens_seen": 263605540, "step": 2888 }, { "epoch": 12.0375, "grad_norm": 2.2012629252219744, "learning_rate": 5e-05, "loss": 0.0737, "num_input_tokens_seen": 263696612, "step": 2889 }, { "epoch": 12.0375, "loss": 0.07746168971061707, "loss_ce": 0.001137231825850904, "loss_iou": 0.201171875, "loss_num": 0.0152587890625, "loss_xval": 0.076171875, "num_input_tokens_seen": 263696612, "step": 2889 }, { "epoch": 12.041666666666666, "grad_norm": 2.8205767907604056, "learning_rate": 5e-05, "loss": 0.0481, "num_input_tokens_seen": 263788176, "step": 2890 }, { "epoch": 12.041666666666666, "loss": 0.0641142725944519, "loss_ce": 0.001415909151546657, "loss_iou": 0.35546875, "loss_num": 0.0125732421875, "loss_xval": 0.0625, "num_input_tokens_seen": 263788176, "step": 2890 }, { "epoch": 12.045833333333333, "grad_norm": 1.141144522550956, "learning_rate": 5e-05, "loss": 0.105, "num_input_tokens_seen": 263879328, "step": 2891 }, { "epoch": 12.045833333333333, "loss": 0.11292260885238647, "loss_ce": 0.0005874069756828249, "loss_iou": 0.240234375, "loss_num": 0.0224609375, "loss_xval": 0.1123046875, "num_input_tokens_seen": 263879328, "step": 2891 }, { "epoch": 12.05, "grad_norm": 2.183273900885889, "learning_rate": 5e-05, "loss": 0.0689, "num_input_tokens_seen": 263971180, "step": 2892 }, { "epoch": 12.05, "loss": 0.04804209619760513, "loss_ce": 0.0003126035735476762, "loss_iou": 0.259765625, "loss_num": 0.00958251953125, "loss_xval": 0.0478515625, "num_input_tokens_seen": 263971180, "step": 2892 }, { "epoch": 12.054166666666667, "grad_norm": 27.968695688765518, "learning_rate": 5e-05, "loss": 0.1055, "num_input_tokens_seen": 264062552, "step": 2893 }, { "epoch": 12.054166666666667, "loss": 0.09331085532903671, "loss_ce": 3.358149797350052e-06, "loss_iou": 0.302734375, "loss_num": 0.0186767578125, "loss_xval": 0.09326171875, "num_input_tokens_seen": 264062552, "step": 2893 }, { "epoch": 12.058333333333334, "grad_norm": 1.479698420035943, "learning_rate": 5e-05, "loss": 0.0485, "num_input_tokens_seen": 264153656, "step": 2894 }, { "epoch": 12.058333333333334, "loss": 0.04069763422012329, "loss_ce": 0.00010925379319814965, "loss_iou": 0.291015625, "loss_num": 0.00811767578125, "loss_xval": 0.04052734375, "num_input_tokens_seen": 264153656, "step": 2894 }, { "epoch": 12.0625, "grad_norm": 2.6670024246000383, "learning_rate": 5e-05, "loss": 0.0672, "num_input_tokens_seen": 264245196, "step": 2895 }, { "epoch": 12.0625, "loss": 0.06512384116649628, "loss_ce": 0.0004570932942442596, "loss_iou": 0.30859375, "loss_num": 0.012939453125, "loss_xval": 0.064453125, "num_input_tokens_seen": 264245196, "step": 2895 }, { "epoch": 12.066666666666666, "grad_norm": 5.723331032317641, "learning_rate": 5e-05, "loss": 0.1045, "num_input_tokens_seen": 264336084, "step": 2896 }, { "epoch": 12.066666666666666, "loss": 0.15588708221912384, "loss_ce": 3.283528712927364e-06, "loss_iou": 0.0859375, "loss_num": 0.03125, "loss_xval": 0.15625, "num_input_tokens_seen": 264336084, "step": 2896 }, { "epoch": 12.070833333333333, "grad_norm": 6.767367961633356, "learning_rate": 5e-05, "loss": 0.1252, "num_input_tokens_seen": 264428068, "step": 2897 }, { "epoch": 12.070833333333333, "loss": 0.16708049178123474, "loss_ce": 0.0008207280770875514, "loss_iou": 0.0, "loss_num": 0.033203125, "loss_xval": 0.166015625, "num_input_tokens_seen": 264428068, "step": 2897 }, { "epoch": 12.075, "grad_norm": 9.613717735454351, "learning_rate": 5e-05, "loss": 0.0569, "num_input_tokens_seen": 264519880, "step": 2898 }, { "epoch": 12.075, "loss": 0.029908880591392517, "loss_ce": 1.6913822037167847e-05, "loss_iou": 0.171875, "loss_num": 0.0059814453125, "loss_xval": 0.0299072265625, "num_input_tokens_seen": 264519880, "step": 2898 }, { "epoch": 12.079166666666667, "grad_norm": 3.628596364029426, "learning_rate": 5e-05, "loss": 0.0788, "num_input_tokens_seen": 264611556, "step": 2899 }, { "epoch": 12.079166666666667, "loss": 0.11229430139064789, "loss_ce": 0.002400508848950267, "loss_iou": 0.353515625, "loss_num": 0.02197265625, "loss_xval": 0.10986328125, "num_input_tokens_seen": 264611556, "step": 2899 }, { "epoch": 12.083333333333334, "grad_norm": 5.529244181375904, "learning_rate": 5e-05, "loss": 0.0605, "num_input_tokens_seen": 264702740, "step": 2900 }, { "epoch": 12.083333333333334, "loss": 0.04226759821176529, "loss_ce": 7.488557685064734e-07, "loss_iou": 0.3203125, "loss_num": 0.00848388671875, "loss_xval": 0.042236328125, "num_input_tokens_seen": 264702740, "step": 2900 }, { "epoch": 12.0875, "grad_norm": 2.703727858030741, "learning_rate": 5e-05, "loss": 0.0636, "num_input_tokens_seen": 264794332, "step": 2901 }, { "epoch": 12.0875, "loss": 0.04961564019322395, "loss_ce": 2.4576796931796707e-05, "loss_iou": 0.39453125, "loss_num": 0.0098876953125, "loss_xval": 0.049560546875, "num_input_tokens_seen": 264794332, "step": 2901 }, { "epoch": 12.091666666666667, "grad_norm": 2.4838722579389616, "learning_rate": 5e-05, "loss": 0.0625, "num_input_tokens_seen": 264885436, "step": 2902 }, { "epoch": 12.091666666666667, "loss": 0.056573957204818726, "loss_ce": 0.00037583772791549563, "loss_iou": 0.263671875, "loss_num": 0.01123046875, "loss_xval": 0.05615234375, "num_input_tokens_seen": 264885436, "step": 2902 }, { "epoch": 12.095833333333333, "grad_norm": 4.40229256652342, "learning_rate": 5e-05, "loss": 0.0916, "num_input_tokens_seen": 264976496, "step": 2903 }, { "epoch": 12.095833333333333, "loss": 0.09596607089042664, "loss_ce": 3.5482567000144627e-06, "loss_iou": 0.3203125, "loss_num": 0.0191650390625, "loss_xval": 0.09619140625, "num_input_tokens_seen": 264976496, "step": 2903 }, { "epoch": 12.1, "grad_norm": 3.0575238205030666, "learning_rate": 5e-05, "loss": 0.1059, "num_input_tokens_seen": 265066968, "step": 2904 }, { "epoch": 12.1, "loss": 0.16267964243888855, "loss_ce": 0.00014301914779935032, "loss_iou": 0.2373046875, "loss_num": 0.032470703125, "loss_xval": 0.162109375, "num_input_tokens_seen": 265066968, "step": 2904 }, { "epoch": 12.104166666666666, "grad_norm": 2.931793056011788, "learning_rate": 5e-05, "loss": 0.07, "num_input_tokens_seen": 265158324, "step": 2905 }, { "epoch": 12.104166666666666, "loss": 0.07005259394645691, "loss_ce": 0.00012156509910710156, "loss_iou": 0.19140625, "loss_num": 0.01397705078125, "loss_xval": 0.06982421875, "num_input_tokens_seen": 265158324, "step": 2905 }, { "epoch": 12.108333333333333, "grad_norm": 1.6134630286599965, "learning_rate": 5e-05, "loss": 0.0514, "num_input_tokens_seen": 265250108, "step": 2906 }, { "epoch": 12.108333333333333, "loss": 0.05043035373091698, "loss_ce": 6.10871720709838e-05, "loss_iou": 0.09765625, "loss_num": 0.01007080078125, "loss_xval": 0.05029296875, "num_input_tokens_seen": 265250108, "step": 2906 }, { "epoch": 12.1125, "grad_norm": 2.656497257584411, "learning_rate": 5e-05, "loss": 0.0672, "num_input_tokens_seen": 265342096, "step": 2907 }, { "epoch": 12.1125, "loss": 0.09211976826190948, "loss_ce": 2.462998509145109e-06, "loss_iou": 0.263671875, "loss_num": 0.0184326171875, "loss_xval": 0.09228515625, "num_input_tokens_seen": 265342096, "step": 2907 }, { "epoch": 12.116666666666667, "grad_norm": 2.44578022152003, "learning_rate": 5e-05, "loss": 0.0599, "num_input_tokens_seen": 265433364, "step": 2908 }, { "epoch": 12.116666666666667, "loss": 0.06962820887565613, "loss_ce": 3.287343861302361e-05, "loss_iou": 0.1875, "loss_num": 0.013916015625, "loss_xval": 0.06982421875, "num_input_tokens_seen": 265433364, "step": 2908 }, { "epoch": 12.120833333333334, "grad_norm": 2.5929382173081827, "learning_rate": 5e-05, "loss": 0.0539, "num_input_tokens_seen": 265524704, "step": 2909 }, { "epoch": 12.120833333333334, "loss": 0.0612100325524807, "loss_ce": 3.754855060833506e-05, "loss_iou": 0.29296875, "loss_num": 0.01220703125, "loss_xval": 0.061279296875, "num_input_tokens_seen": 265524704, "step": 2909 }, { "epoch": 12.125, "grad_norm": 2.952912122345602, "learning_rate": 5e-05, "loss": 0.0668, "num_input_tokens_seen": 265616064, "step": 2910 }, { "epoch": 12.125, "loss": 0.0755220502614975, "loss_ce": 0.0010286483447998762, "loss_iou": 0.275390625, "loss_num": 0.014892578125, "loss_xval": 0.07470703125, "num_input_tokens_seen": 265616064, "step": 2910 }, { "epoch": 12.129166666666666, "grad_norm": 2.8047268982104887, "learning_rate": 5e-05, "loss": 0.0575, "num_input_tokens_seen": 265707516, "step": 2911 }, { "epoch": 12.129166666666666, "loss": 0.03295915573835373, "loss_ce": 0.0007707421318627894, "loss_iou": 0.337890625, "loss_num": 0.006439208984375, "loss_xval": 0.0322265625, "num_input_tokens_seen": 265707516, "step": 2911 }, { "epoch": 12.133333333333333, "grad_norm": 2.716707347455883, "learning_rate": 5e-05, "loss": 0.08, "num_input_tokens_seen": 265798680, "step": 2912 }, { "epoch": 12.133333333333333, "loss": 0.05870115011930466, "loss_ce": 5.865215939593327e-07, "loss_iou": 0.3046875, "loss_num": 0.01171875, "loss_xval": 0.05859375, "num_input_tokens_seen": 265798680, "step": 2912 }, { "epoch": 12.1375, "grad_norm": 3.7884463613882646, "learning_rate": 5e-05, "loss": 0.047, "num_input_tokens_seen": 265889936, "step": 2913 }, { "epoch": 12.1375, "loss": 0.06101018935441971, "loss_ce": 3.60674130206462e-05, "loss_iou": 0.32421875, "loss_num": 0.01220703125, "loss_xval": 0.06103515625, "num_input_tokens_seen": 265889936, "step": 2913 }, { "epoch": 12.141666666666667, "grad_norm": 4.705935262745655, "learning_rate": 5e-05, "loss": 0.0722, "num_input_tokens_seen": 265981024, "step": 2914 }, { "epoch": 12.141666666666667, "loss": 0.06493838876485825, "loss_ce": 0.00024112407118082047, "loss_iou": 0.15234375, "loss_num": 0.012939453125, "loss_xval": 0.064453125, "num_input_tokens_seen": 265981024, "step": 2914 }, { "epoch": 12.145833333333334, "grad_norm": 4.364909969389509, "learning_rate": 5e-05, "loss": 0.0682, "num_input_tokens_seen": 266072952, "step": 2915 }, { "epoch": 12.145833333333334, "loss": 0.07338149845600128, "loss_ce": 0.00030715003958903253, "loss_iou": 0.34765625, "loss_num": 0.01458740234375, "loss_xval": 0.0732421875, "num_input_tokens_seen": 266072952, "step": 2915 }, { "epoch": 12.15, "grad_norm": 2.72071504424907, "learning_rate": 5e-05, "loss": 0.071, "num_input_tokens_seen": 266164872, "step": 2916 }, { "epoch": 12.15, "loss": 0.07031304389238358, "loss_ce": 5.453845233205357e-07, "loss_iou": 0.2314453125, "loss_num": 0.0140380859375, "loss_xval": 0.0703125, "num_input_tokens_seen": 266164872, "step": 2916 }, { "epoch": 12.154166666666667, "grad_norm": 3.4031048303871128, "learning_rate": 5e-05, "loss": 0.0628, "num_input_tokens_seen": 266255884, "step": 2917 }, { "epoch": 12.154166666666667, "loss": 0.04454587399959564, "loss_ce": 0.0002953895309474319, "loss_iou": 0.265625, "loss_num": 0.00885009765625, "loss_xval": 0.044189453125, "num_input_tokens_seen": 266255884, "step": 2917 }, { "epoch": 12.158333333333333, "grad_norm": 3.6525245485243634, "learning_rate": 5e-05, "loss": 0.0534, "num_input_tokens_seen": 266347008, "step": 2918 }, { "epoch": 12.158333333333333, "loss": 0.05760706961154938, "loss_ce": 5.091585626360029e-05, "loss_iou": 0.33203125, "loss_num": 0.01153564453125, "loss_xval": 0.0576171875, "num_input_tokens_seen": 266347008, "step": 2918 }, { "epoch": 12.1625, "grad_norm": 3.8083362004566594, "learning_rate": 5e-05, "loss": 0.0479, "num_input_tokens_seen": 266438312, "step": 2919 }, { "epoch": 12.1625, "loss": 0.05300240218639374, "loss_ce": 0.0001917332410812378, "loss_iou": 0.390625, "loss_num": 0.01055908203125, "loss_xval": 0.052734375, "num_input_tokens_seen": 266438312, "step": 2919 }, { "epoch": 12.166666666666666, "grad_norm": 3.220254727036035, "learning_rate": 5e-05, "loss": 0.0415, "num_input_tokens_seen": 266529904, "step": 2920 }, { "epoch": 12.166666666666666, "loss": 0.04786030575633049, "loss_ce": 0.00014607336197514087, "loss_iou": 0.3359375, "loss_num": 0.009521484375, "loss_xval": 0.047607421875, "num_input_tokens_seen": 266529904, "step": 2920 }, { "epoch": 12.170833333333333, "grad_norm": 6.3841299060006715, "learning_rate": 5e-05, "loss": 0.072, "num_input_tokens_seen": 266621108, "step": 2921 }, { "epoch": 12.170833333333333, "loss": 0.09495310485363007, "loss_ce": 4.343495675129816e-05, "loss_iou": 0.28515625, "loss_num": 0.01904296875, "loss_xval": 0.0947265625, "num_input_tokens_seen": 266621108, "step": 2921 }, { "epoch": 12.175, "grad_norm": 5.043845795133797, "learning_rate": 5e-05, "loss": 0.0733, "num_input_tokens_seen": 266712428, "step": 2922 }, { "epoch": 12.175, "loss": 0.07533925771713257, "loss_ce": 3.7128469557501376e-05, "loss_iou": 0.33984375, "loss_num": 0.01507568359375, "loss_xval": 0.0751953125, "num_input_tokens_seen": 266712428, "step": 2922 }, { "epoch": 12.179166666666667, "grad_norm": 2.027261528087587, "learning_rate": 5e-05, "loss": 0.0383, "num_input_tokens_seen": 266803764, "step": 2923 }, { "epoch": 12.179166666666667, "loss": 0.03585366904735565, "loss_ce": 3.147234565403778e-06, "loss_iou": 0.2236328125, "loss_num": 0.007171630859375, "loss_xval": 0.035888671875, "num_input_tokens_seen": 266803764, "step": 2923 }, { "epoch": 12.183333333333334, "grad_norm": 2.861881243241126, "learning_rate": 5e-05, "loss": 0.0876, "num_input_tokens_seen": 266895580, "step": 2924 }, { "epoch": 12.183333333333334, "loss": 0.03889714181423187, "loss_ce": 0.0020624231547117233, "loss_iou": 0.271484375, "loss_num": 0.00738525390625, "loss_xval": 0.036865234375, "num_input_tokens_seen": 266895580, "step": 2924 }, { "epoch": 12.1875, "grad_norm": 3.7892169185950517, "learning_rate": 5e-05, "loss": 0.0686, "num_input_tokens_seen": 266986668, "step": 2925 }, { "epoch": 12.1875, "loss": 0.056877922266721725, "loss_ce": 8.416508535447065e-06, "loss_iou": 0.333984375, "loss_num": 0.0113525390625, "loss_xval": 0.056884765625, "num_input_tokens_seen": 266986668, "step": 2925 }, { "epoch": 12.191666666666666, "grad_norm": 4.483164328656569, "learning_rate": 5e-05, "loss": 0.0961, "num_input_tokens_seen": 267078348, "step": 2926 }, { "epoch": 12.191666666666666, "loss": 0.061795562505722046, "loss_ce": 0.000928252877201885, "loss_iou": 0.271484375, "loss_num": 0.01214599609375, "loss_xval": 0.060791015625, "num_input_tokens_seen": 267078348, "step": 2926 }, { "epoch": 12.195833333333333, "grad_norm": 2.6821650432495447, "learning_rate": 5e-05, "loss": 0.0361, "num_input_tokens_seen": 267169512, "step": 2927 }, { "epoch": 12.195833333333333, "loss": 0.029609516263008118, "loss_ce": 2.2723163056070916e-05, "loss_iou": 0.32421875, "loss_num": 0.00592041015625, "loss_xval": 0.029541015625, "num_input_tokens_seen": 267169512, "step": 2927 }, { "epoch": 12.2, "grad_norm": 8.672822575584679, "learning_rate": 5e-05, "loss": 0.1132, "num_input_tokens_seen": 267260836, "step": 2928 }, { "epoch": 12.2, "loss": 0.11694012582302094, "loss_ce": 0.002804376883432269, "loss_iou": 0.279296875, "loss_num": 0.0228271484375, "loss_xval": 0.1142578125, "num_input_tokens_seen": 267260836, "step": 2928 }, { "epoch": 12.204166666666667, "grad_norm": 9.806434406786224, "learning_rate": 5e-05, "loss": 0.0587, "num_input_tokens_seen": 267352716, "step": 2929 }, { "epoch": 12.204166666666667, "loss": 0.04113160818815231, "loss_ce": 2.443217636027839e-05, "loss_iou": 0.30859375, "loss_num": 0.00823974609375, "loss_xval": 0.041015625, "num_input_tokens_seen": 267352716, "step": 2929 }, { "epoch": 12.208333333333334, "grad_norm": 2.175031456204424, "learning_rate": 5e-05, "loss": 0.0529, "num_input_tokens_seen": 267443920, "step": 2930 }, { "epoch": 12.208333333333334, "loss": 0.05550452694296837, "loss_ce": 8.460329991066828e-05, "loss_iou": 0.15625, "loss_num": 0.01104736328125, "loss_xval": 0.055419921875, "num_input_tokens_seen": 267443920, "step": 2930 }, { "epoch": 12.2125, "grad_norm": 1.605174398404864, "learning_rate": 5e-05, "loss": 0.047, "num_input_tokens_seen": 267535708, "step": 2931 }, { "epoch": 12.2125, "loss": 0.05761126056313515, "loss_ce": 0.00031451130053028464, "loss_iou": 0.1552734375, "loss_num": 0.011474609375, "loss_xval": 0.057373046875, "num_input_tokens_seen": 267535708, "step": 2931 }, { "epoch": 12.216666666666667, "grad_norm": 1.9986457114321676, "learning_rate": 5e-05, "loss": 0.0639, "num_input_tokens_seen": 267626804, "step": 2932 }, { "epoch": 12.216666666666667, "loss": 0.0630793496966362, "loss_ce": 1.4777106116525829e-05, "loss_iou": 0.25390625, "loss_num": 0.01263427734375, "loss_xval": 0.06298828125, "num_input_tokens_seen": 267626804, "step": 2932 }, { "epoch": 12.220833333333333, "grad_norm": 6.053276257068978, "learning_rate": 5e-05, "loss": 0.0467, "num_input_tokens_seen": 267718264, "step": 2933 }, { "epoch": 12.220833333333333, "loss": 0.06700801849365234, "loss_ce": 3.7198144127614796e-05, "loss_iou": 0.279296875, "loss_num": 0.013427734375, "loss_xval": 0.06689453125, "num_input_tokens_seen": 267718264, "step": 2933 }, { "epoch": 12.225, "grad_norm": 2.3243663096267064, "learning_rate": 5e-05, "loss": 0.0454, "num_input_tokens_seen": 267809600, "step": 2934 }, { "epoch": 12.225, "loss": 0.03875809535384178, "loss_ce": 7.706361793680117e-05, "loss_iou": 0.248046875, "loss_num": 0.00775146484375, "loss_xval": 0.03857421875, "num_input_tokens_seen": 267809600, "step": 2934 }, { "epoch": 12.229166666666666, "grad_norm": 11.416599220781654, "learning_rate": 5e-05, "loss": 0.0753, "num_input_tokens_seen": 267900996, "step": 2935 }, { "epoch": 12.229166666666666, "loss": 0.08636993169784546, "loss_ce": 0.0001272500230697915, "loss_iou": 0.30078125, "loss_num": 0.0172119140625, "loss_xval": 0.08642578125, "num_input_tokens_seen": 267900996, "step": 2935 }, { "epoch": 12.233333333333333, "grad_norm": 10.747556296285435, "learning_rate": 5e-05, "loss": 0.0671, "num_input_tokens_seen": 267992640, "step": 2936 }, { "epoch": 12.233333333333333, "loss": 0.04982030764222145, "loss_ce": 4.613840428646654e-05, "loss_iou": 0.2265625, "loss_num": 0.00994873046875, "loss_xval": 0.0498046875, "num_input_tokens_seen": 267992640, "step": 2936 }, { "epoch": 12.2375, "grad_norm": 5.00122027159461, "learning_rate": 5e-05, "loss": 0.058, "num_input_tokens_seen": 268084216, "step": 2937 }, { "epoch": 12.2375, "loss": 0.07573728263378143, "loss_ce": 0.0004656722885556519, "loss_iou": 0.353515625, "loss_num": 0.01507568359375, "loss_xval": 0.0751953125, "num_input_tokens_seen": 268084216, "step": 2937 }, { "epoch": 12.241666666666667, "grad_norm": 1.8936543226280202, "learning_rate": 5e-05, "loss": 0.0419, "num_input_tokens_seen": 268175636, "step": 2938 }, { "epoch": 12.241666666666667, "loss": 0.039882972836494446, "loss_ce": 5.75307167309802e-05, "loss_iou": 0.25390625, "loss_num": 0.0079345703125, "loss_xval": 0.039794921875, "num_input_tokens_seen": 268175636, "step": 2938 }, { "epoch": 12.245833333333334, "grad_norm": 3.126485576801853, "learning_rate": 5e-05, "loss": 0.0645, "num_input_tokens_seen": 268267340, "step": 2939 }, { "epoch": 12.245833333333334, "loss": 0.05813007429242134, "loss_ce": 0.00016193474584724754, "loss_iou": 0.33984375, "loss_num": 0.0115966796875, "loss_xval": 0.057861328125, "num_input_tokens_seen": 268267340, "step": 2939 }, { "epoch": 12.25, "grad_norm": 2.753687602321184, "learning_rate": 5e-05, "loss": 0.0781, "num_input_tokens_seen": 268358356, "step": 2940 }, { "epoch": 12.25, "loss": 0.067852683365345, "loss_ce": 1.211215385410469e-05, "loss_iou": 0.271484375, "loss_num": 0.0135498046875, "loss_xval": 0.06787109375, "num_input_tokens_seen": 268358356, "step": 2940 }, { "epoch": 12.254166666666666, "grad_norm": 2.306219959360317, "learning_rate": 5e-05, "loss": 0.0639, "num_input_tokens_seen": 268449056, "step": 2941 }, { "epoch": 12.254166666666666, "loss": 0.08572492748498917, "loss_ce": 1.0461585588927846e-06, "loss_iou": 0.1708984375, "loss_num": 0.01708984375, "loss_xval": 0.0859375, "num_input_tokens_seen": 268449056, "step": 2941 }, { "epoch": 12.258333333333333, "grad_norm": 2.663256282359737, "learning_rate": 5e-05, "loss": 0.0806, "num_input_tokens_seen": 268541016, "step": 2942 }, { "epoch": 12.258333333333333, "loss": 0.0969671756029129, "loss_ce": 0.00018067903874907643, "loss_iou": 0.34375, "loss_num": 0.019287109375, "loss_xval": 0.0966796875, "num_input_tokens_seen": 268541016, "step": 2942 }, { "epoch": 12.2625, "grad_norm": 2.1110070353785213, "learning_rate": 5e-05, "loss": 0.0419, "num_input_tokens_seen": 268632556, "step": 2943 }, { "epoch": 12.2625, "loss": 0.028460349887609482, "loss_ce": 0.00046810219646431506, "loss_iou": 0.2021484375, "loss_num": 0.005584716796875, "loss_xval": 0.0279541015625, "num_input_tokens_seen": 268632556, "step": 2943 }, { "epoch": 12.266666666666667, "grad_norm": 3.2150465390901717, "learning_rate": 5e-05, "loss": 0.0766, "num_input_tokens_seen": 268724012, "step": 2944 }, { "epoch": 12.266666666666667, "loss": 0.10927990078926086, "loss_ce": 4.2232659325236455e-05, "loss_iou": 0.251953125, "loss_num": 0.0218505859375, "loss_xval": 0.109375, "num_input_tokens_seen": 268724012, "step": 2944 }, { "epoch": 12.270833333333334, "grad_norm": 3.0431108054614215, "learning_rate": 5e-05, "loss": 0.0708, "num_input_tokens_seen": 268815492, "step": 2945 }, { "epoch": 12.270833333333334, "loss": 0.07915940880775452, "loss_ce": 3.495713826850988e-05, "loss_iou": 0.373046875, "loss_num": 0.015869140625, "loss_xval": 0.0791015625, "num_input_tokens_seen": 268815492, "step": 2945 }, { "epoch": 12.275, "grad_norm": 2.4540010699603267, "learning_rate": 5e-05, "loss": 0.0634, "num_input_tokens_seen": 268906800, "step": 2946 }, { "epoch": 12.275, "loss": 0.08143861591815948, "loss_ce": 4.823937706532888e-05, "loss_iou": 0.2138671875, "loss_num": 0.0162353515625, "loss_xval": 0.08154296875, "num_input_tokens_seen": 268906800, "step": 2946 }, { "epoch": 12.279166666666667, "grad_norm": 4.1755469226422655, "learning_rate": 5e-05, "loss": 0.078, "num_input_tokens_seen": 268997880, "step": 2947 }, { "epoch": 12.279166666666667, "loss": 0.06574500352144241, "loss_ce": 1.014260487863794e-05, "loss_iou": 0.234375, "loss_num": 0.01318359375, "loss_xval": 0.06591796875, "num_input_tokens_seen": 268997880, "step": 2947 }, { "epoch": 12.283333333333333, "grad_norm": 0.9371808156525325, "learning_rate": 5e-05, "loss": 0.0907, "num_input_tokens_seen": 269088332, "step": 2948 }, { "epoch": 12.283333333333333, "loss": 0.11407633125782013, "loss_ce": 4.739892392535694e-05, "loss_iou": 0.23828125, "loss_num": 0.0228271484375, "loss_xval": 0.1142578125, "num_input_tokens_seen": 269088332, "step": 2948 }, { "epoch": 12.2875, "grad_norm": 2.74225444069886, "learning_rate": 5e-05, "loss": 0.0599, "num_input_tokens_seen": 269179540, "step": 2949 }, { "epoch": 12.2875, "loss": 0.04844193905591965, "loss_ce": 8.683644409757107e-05, "loss_iou": 0.2734375, "loss_num": 0.00970458984375, "loss_xval": 0.04833984375, "num_input_tokens_seen": 269179540, "step": 2949 }, { "epoch": 12.291666666666666, "grad_norm": 46.38768999122701, "learning_rate": 5e-05, "loss": 0.0584, "num_input_tokens_seen": 269270724, "step": 2950 }, { "epoch": 12.291666666666666, "loss": 0.06026214361190796, "loss_ce": 0.00011962662392761558, "loss_iou": 0.22265625, "loss_num": 0.01202392578125, "loss_xval": 0.06005859375, "num_input_tokens_seen": 269270724, "step": 2950 }, { "epoch": 12.295833333333333, "grad_norm": 2.267656912745027, "learning_rate": 5e-05, "loss": 0.0871, "num_input_tokens_seen": 269361640, "step": 2951 }, { "epoch": 12.295833333333333, "loss": 0.11035619676113129, "loss_ce": 4.627444923244184e-06, "loss_iou": 0.0859375, "loss_num": 0.0220947265625, "loss_xval": 0.1103515625, "num_input_tokens_seen": 269361640, "step": 2951 }, { "epoch": 12.3, "grad_norm": 60.26301427305691, "learning_rate": 5e-05, "loss": 0.0655, "num_input_tokens_seen": 269452804, "step": 2952 }, { "epoch": 12.3, "loss": 0.06073518097400665, "loss_ce": 2.045981091214344e-05, "loss_iou": 0.322265625, "loss_num": 0.01214599609375, "loss_xval": 0.060791015625, "num_input_tokens_seen": 269452804, "step": 2952 }, { "epoch": 12.304166666666667, "grad_norm": 3.3411094226742084, "learning_rate": 5e-05, "loss": 0.0621, "num_input_tokens_seen": 269543492, "step": 2953 }, { "epoch": 12.304166666666667, "loss": 0.06969062983989716, "loss_ce": 0.00038520374801009893, "loss_iou": 0.28125, "loss_num": 0.01385498046875, "loss_xval": 0.0693359375, "num_input_tokens_seen": 269543492, "step": 2953 }, { "epoch": 12.308333333333334, "grad_norm": 2.694947426545346, "learning_rate": 5e-05, "loss": 0.0731, "num_input_tokens_seen": 269635008, "step": 2954 }, { "epoch": 12.308333333333334, "loss": 0.05683236941695213, "loss_ce": 0.0005884742713533342, "loss_iou": 0.384765625, "loss_num": 0.01123046875, "loss_xval": 0.05615234375, "num_input_tokens_seen": 269635008, "step": 2954 }, { "epoch": 12.3125, "grad_norm": 3.2220681695144227, "learning_rate": 5e-05, "loss": 0.066, "num_input_tokens_seen": 269725540, "step": 2955 }, { "epoch": 12.3125, "loss": 0.05476832389831543, "loss_ce": 0.00044703579624183476, "loss_iou": 0.22265625, "loss_num": 0.0108642578125, "loss_xval": 0.05419921875, "num_input_tokens_seen": 269725540, "step": 2955 }, { "epoch": 12.316666666666666, "grad_norm": 3.4982940830366407, "learning_rate": 5e-05, "loss": 0.07, "num_input_tokens_seen": 269817180, "step": 2956 }, { "epoch": 12.316666666666666, "loss": 0.07881681621074677, "loss_ce": 5.0943544920301065e-05, "loss_iou": 0.306640625, "loss_num": 0.0157470703125, "loss_xval": 0.07861328125, "num_input_tokens_seen": 269817180, "step": 2956 }, { "epoch": 12.320833333333333, "grad_norm": 3.6375666812709584, "learning_rate": 5e-05, "loss": 0.0401, "num_input_tokens_seen": 269908468, "step": 2957 }, { "epoch": 12.320833333333333, "loss": 0.040140677243471146, "loss_ce": 0.0010667321039363742, "loss_iou": 0.271484375, "loss_num": 0.0078125, "loss_xval": 0.0390625, "num_input_tokens_seen": 269908468, "step": 2957 }, { "epoch": 12.325, "grad_norm": 4.173489294941111, "learning_rate": 5e-05, "loss": 0.0623, "num_input_tokens_seen": 269999436, "step": 2958 }, { "epoch": 12.325, "loss": 0.05630388855934143, "loss_ce": 0.005782034248113632, "loss_iou": 0.2255859375, "loss_num": 0.0101318359375, "loss_xval": 0.050537109375, "num_input_tokens_seen": 269999436, "step": 2958 }, { "epoch": 12.329166666666667, "grad_norm": 1.9740436997100599, "learning_rate": 5e-05, "loss": 0.0699, "num_input_tokens_seen": 270090756, "step": 2959 }, { "epoch": 12.329166666666667, "loss": 0.09736660867929459, "loss_ce": 1.5532590623479337e-05, "loss_iou": 0.19140625, "loss_num": 0.01953125, "loss_xval": 0.09716796875, "num_input_tokens_seen": 270090756, "step": 2959 }, { "epoch": 12.333333333333334, "grad_norm": 2.525271542103449, "learning_rate": 5e-05, "loss": 0.1172, "num_input_tokens_seen": 270182140, "step": 2960 }, { "epoch": 12.333333333333334, "loss": 0.11587947607040405, "loss_ce": 6.527241930598393e-05, "loss_iou": 0.2275390625, "loss_num": 0.023193359375, "loss_xval": 0.11572265625, "num_input_tokens_seen": 270182140, "step": 2960 }, { "epoch": 12.3375, "grad_norm": 3.217067686924724, "learning_rate": 5e-05, "loss": 0.0715, "num_input_tokens_seen": 270273508, "step": 2961 }, { "epoch": 12.3375, "loss": 0.052564837038517, "loss_ce": 0.00234816106967628, "loss_iou": 0.2255859375, "loss_num": 0.010009765625, "loss_xval": 0.05029296875, "num_input_tokens_seen": 270273508, "step": 2961 }, { "epoch": 12.341666666666667, "grad_norm": 3.7700213068144244, "learning_rate": 5e-05, "loss": 0.0553, "num_input_tokens_seen": 270365420, "step": 2962 }, { "epoch": 12.341666666666667, "loss": 0.06520096957683563, "loss_ce": 0.0013276764657348394, "loss_iou": 0.298828125, "loss_num": 0.01275634765625, "loss_xval": 0.06396484375, "num_input_tokens_seen": 270365420, "step": 2962 }, { "epoch": 12.345833333333333, "grad_norm": 3.888306227512155, "learning_rate": 5e-05, "loss": 0.0505, "num_input_tokens_seen": 270456560, "step": 2963 }, { "epoch": 12.345833333333333, "loss": 0.042089179158210754, "loss_ce": 5.439537744678091e-06, "loss_iou": 0.2734375, "loss_num": 0.0084228515625, "loss_xval": 0.0419921875, "num_input_tokens_seen": 270456560, "step": 2963 }, { "epoch": 12.35, "grad_norm": 3.352267654498321, "learning_rate": 5e-05, "loss": 0.0649, "num_input_tokens_seen": 270547912, "step": 2964 }, { "epoch": 12.35, "loss": 0.061682380735874176, "loss_ce": 3.687536809593439e-05, "loss_iou": 0.326171875, "loss_num": 0.0123291015625, "loss_xval": 0.0615234375, "num_input_tokens_seen": 270547912, "step": 2964 }, { "epoch": 12.354166666666666, "grad_norm": 3.0135508618533806, "learning_rate": 5e-05, "loss": 0.0795, "num_input_tokens_seen": 270637804, "step": 2965 }, { "epoch": 12.354166666666666, "loss": 0.10426987707614899, "loss_ce": 3.7082412745803595e-05, "loss_iou": 0.32421875, "loss_num": 0.0208740234375, "loss_xval": 0.10400390625, "num_input_tokens_seen": 270637804, "step": 2965 }, { "epoch": 12.358333333333333, "grad_norm": 1.7291855053180956, "learning_rate": 5e-05, "loss": 0.0607, "num_input_tokens_seen": 270728876, "step": 2966 }, { "epoch": 12.358333333333333, "loss": 0.08189569413661957, "loss_ce": 1.7031868992489763e-05, "loss_iou": 0.11083984375, "loss_num": 0.016357421875, "loss_xval": 0.08203125, "num_input_tokens_seen": 270728876, "step": 2966 }, { "epoch": 12.3625, "grad_norm": 2.4908079853099356, "learning_rate": 5e-05, "loss": 0.0527, "num_input_tokens_seen": 270820496, "step": 2967 }, { "epoch": 12.3625, "loss": 0.03716364502906799, "loss_ce": 0.00020685509662143886, "loss_iou": 0.26171875, "loss_num": 0.00738525390625, "loss_xval": 0.036865234375, "num_input_tokens_seen": 270820496, "step": 2967 }, { "epoch": 12.366666666666667, "grad_norm": 2.97895839160627, "learning_rate": 5e-05, "loss": 0.0581, "num_input_tokens_seen": 270912076, "step": 2968 }, { "epoch": 12.366666666666667, "loss": 0.050762590020895004, "loss_ce": 7.28939994587563e-05, "loss_iou": 0.349609375, "loss_num": 0.0101318359375, "loss_xval": 0.05078125, "num_input_tokens_seen": 270912076, "step": 2968 }, { "epoch": 12.370833333333334, "grad_norm": 5.030995700810052, "learning_rate": 5e-05, "loss": 0.0624, "num_input_tokens_seen": 271003444, "step": 2969 }, { "epoch": 12.370833333333334, "loss": 0.08265762776136398, "loss_ce": 7.6745891419705e-07, "loss_iou": 0.24609375, "loss_num": 0.0166015625, "loss_xval": 0.08251953125, "num_input_tokens_seen": 271003444, "step": 2969 }, { "epoch": 12.375, "grad_norm": 3.6267303733690452, "learning_rate": 5e-05, "loss": 0.0616, "num_input_tokens_seen": 271094672, "step": 2970 }, { "epoch": 12.375, "loss": 0.053386226296424866, "loss_ce": 2.6241139494231902e-05, "loss_iou": 0.255859375, "loss_num": 0.01068115234375, "loss_xval": 0.053466796875, "num_input_tokens_seen": 271094672, "step": 2970 }, { "epoch": 12.379166666666666, "grad_norm": 3.6698821259215753, "learning_rate": 5e-05, "loss": 0.074, "num_input_tokens_seen": 271186328, "step": 2971 }, { "epoch": 12.379166666666666, "loss": 0.06262575834989548, "loss_ce": 7.998640649020672e-05, "loss_iou": 0.35546875, "loss_num": 0.01251220703125, "loss_xval": 0.0625, "num_input_tokens_seen": 271186328, "step": 2971 }, { "epoch": 12.383333333333333, "grad_norm": 3.066041322585827, "learning_rate": 5e-05, "loss": 0.0785, "num_input_tokens_seen": 271277608, "step": 2972 }, { "epoch": 12.383333333333333, "loss": 0.10933400690555573, "loss_ce": 0.0004778074217028916, "loss_iou": 0.3828125, "loss_num": 0.021728515625, "loss_xval": 0.10888671875, "num_input_tokens_seen": 271277608, "step": 2972 }, { "epoch": 12.3875, "grad_norm": 2.789272102325914, "learning_rate": 5e-05, "loss": 0.0634, "num_input_tokens_seen": 271369024, "step": 2973 }, { "epoch": 12.3875, "loss": 0.08655081689357758, "loss_ce": 0.0005217670113779604, "loss_iou": 0.302734375, "loss_num": 0.0172119140625, "loss_xval": 0.0859375, "num_input_tokens_seen": 271369024, "step": 2973 }, { "epoch": 12.391666666666667, "grad_norm": 3.0763527820391436, "learning_rate": 5e-05, "loss": 0.0601, "num_input_tokens_seen": 271459964, "step": 2974 }, { "epoch": 12.391666666666667, "loss": 0.0638897716999054, "loss_ce": 0.0026562470011413097, "loss_iou": 0.224609375, "loss_num": 0.01226806640625, "loss_xval": 0.061279296875, "num_input_tokens_seen": 271459964, "step": 2974 }, { "epoch": 12.395833333333334, "grad_norm": 2.5119394436453195, "learning_rate": 5e-05, "loss": 0.0444, "num_input_tokens_seen": 271551052, "step": 2975 }, { "epoch": 12.395833333333334, "loss": 0.03439211845397949, "loss_ce": 6.439298886107281e-06, "loss_iou": 0.251953125, "loss_num": 0.00689697265625, "loss_xval": 0.034423828125, "num_input_tokens_seen": 271551052, "step": 2975 }, { "epoch": 12.4, "grad_norm": 6.359073936988227, "learning_rate": 5e-05, "loss": 0.0973, "num_input_tokens_seen": 271642408, "step": 2976 }, { "epoch": 12.4, "loss": 0.06211081147193909, "loss_ce": 1.5169309335760772e-05, "loss_iou": 0.23828125, "loss_num": 0.012451171875, "loss_xval": 0.06201171875, "num_input_tokens_seen": 271642408, "step": 2976 }, { "epoch": 12.404166666666667, "grad_norm": 5.83255234834698, "learning_rate": 5e-05, "loss": 0.0596, "num_input_tokens_seen": 271734232, "step": 2977 }, { "epoch": 12.404166666666667, "loss": 0.08257852494716644, "loss_ce": 1.3214259524829686e-05, "loss_iou": 0.33984375, "loss_num": 0.0164794921875, "loss_xval": 0.08251953125, "num_input_tokens_seen": 271734232, "step": 2977 }, { "epoch": 12.408333333333333, "grad_norm": 3.356095413500328, "learning_rate": 5e-05, "loss": 0.0454, "num_input_tokens_seen": 271825252, "step": 2978 }, { "epoch": 12.408333333333333, "loss": 0.05137810856103897, "loss_ce": 1.7683807982393773e-06, "loss_iou": 0.330078125, "loss_num": 0.01025390625, "loss_xval": 0.05126953125, "num_input_tokens_seen": 271825252, "step": 2978 }, { "epoch": 12.4125, "grad_norm": 5.3591045499391345, "learning_rate": 5e-05, "loss": 0.0945, "num_input_tokens_seen": 271916788, "step": 2979 }, { "epoch": 12.4125, "loss": 0.10306023806333542, "loss_ce": 2.3709637844149256e-06, "loss_iou": 0.390625, "loss_num": 0.0206298828125, "loss_xval": 0.10302734375, "num_input_tokens_seen": 271916788, "step": 2979 }, { "epoch": 12.416666666666666, "grad_norm": 2.65928675424727, "learning_rate": 5e-05, "loss": 0.0834, "num_input_tokens_seen": 272008156, "step": 2980 }, { "epoch": 12.416666666666666, "loss": 0.076792873442173, "loss_ce": 8.6943109636195e-05, "loss_iou": 0.27734375, "loss_num": 0.015380859375, "loss_xval": 0.07666015625, "num_input_tokens_seen": 272008156, "step": 2980 }, { "epoch": 12.420833333333333, "grad_norm": 2.647439664415239, "learning_rate": 5e-05, "loss": 0.0976, "num_input_tokens_seen": 272099316, "step": 2981 }, { "epoch": 12.420833333333333, "loss": 0.08195499330759048, "loss_ce": 6.107529043219984e-05, "loss_iou": 0.4375, "loss_num": 0.016357421875, "loss_xval": 0.08203125, "num_input_tokens_seen": 272099316, "step": 2981 }, { "epoch": 12.425, "grad_norm": 3.166738565215811, "learning_rate": 5e-05, "loss": 0.0869, "num_input_tokens_seen": 272189424, "step": 2982 }, { "epoch": 12.425, "loss": 0.08983665704727173, "loss_ce": 0.00017601408762857318, "loss_iou": 0.33203125, "loss_num": 0.0179443359375, "loss_xval": 0.08984375, "num_input_tokens_seen": 272189424, "step": 2982 }, { "epoch": 12.429166666666667, "grad_norm": 1.7291364446089161, "learning_rate": 5e-05, "loss": 0.0642, "num_input_tokens_seen": 272280260, "step": 2983 }, { "epoch": 12.429166666666667, "loss": 0.07141336053609848, "loss_ce": 1.74842425622046e-05, "loss_iou": 0.31640625, "loss_num": 0.0142822265625, "loss_xval": 0.0712890625, "num_input_tokens_seen": 272280260, "step": 2983 }, { "epoch": 12.433333333333334, "grad_norm": 3.7551882332604087, "learning_rate": 5e-05, "loss": 0.0577, "num_input_tokens_seen": 272371540, "step": 2984 }, { "epoch": 12.433333333333334, "loss": 0.06047248840332031, "loss_ce": 3.2421918149339035e-05, "loss_iou": 0.244140625, "loss_num": 0.0120849609375, "loss_xval": 0.060546875, "num_input_tokens_seen": 272371540, "step": 2984 }, { "epoch": 12.4375, "grad_norm": 2.7412211250098784, "learning_rate": 5e-05, "loss": 0.0754, "num_input_tokens_seen": 272463376, "step": 2985 }, { "epoch": 12.4375, "loss": 0.10339340567588806, "loss_ce": 0.000648344517685473, "loss_iou": 0.2490234375, "loss_num": 0.0205078125, "loss_xval": 0.1025390625, "num_input_tokens_seen": 272463376, "step": 2985 }, { "epoch": 12.441666666666666, "grad_norm": 2.6385098374805307, "learning_rate": 5e-05, "loss": 0.0499, "num_input_tokens_seen": 272554700, "step": 2986 }, { "epoch": 12.441666666666666, "loss": 0.05318248271942139, "loss_ce": 0.0003260402590967715, "loss_iou": 0.2392578125, "loss_num": 0.01055908203125, "loss_xval": 0.052734375, "num_input_tokens_seen": 272554700, "step": 2986 }, { "epoch": 12.445833333333333, "grad_norm": 4.896829389393238, "learning_rate": 5e-05, "loss": 0.0922, "num_input_tokens_seen": 272646284, "step": 2987 }, { "epoch": 12.445833333333333, "loss": 0.09454986453056335, "loss_ce": 0.0002200333256041631, "loss_iou": 0.265625, "loss_num": 0.0189208984375, "loss_xval": 0.09423828125, "num_input_tokens_seen": 272646284, "step": 2987 }, { "epoch": 12.45, "grad_norm": 3.040650468532159, "learning_rate": 5e-05, "loss": 0.0916, "num_input_tokens_seen": 272737320, "step": 2988 }, { "epoch": 12.45, "loss": 0.10238365083932877, "loss_ce": 4.2953739466611296e-05, "loss_iou": 0.294921875, "loss_num": 0.0205078125, "loss_xval": 0.1025390625, "num_input_tokens_seen": 272737320, "step": 2988 }, { "epoch": 12.454166666666667, "grad_norm": 4.243087910124068, "learning_rate": 5e-05, "loss": 0.0942, "num_input_tokens_seen": 272828568, "step": 2989 }, { "epoch": 12.454166666666667, "loss": 0.06616440415382385, "loss_ce": 3.281386671005748e-05, "loss_iou": 0.2421875, "loss_num": 0.01324462890625, "loss_xval": 0.06591796875, "num_input_tokens_seen": 272828568, "step": 2989 }, { "epoch": 12.458333333333334, "grad_norm": 1.8136182206329305, "learning_rate": 5e-05, "loss": 0.0482, "num_input_tokens_seen": 272920056, "step": 2990 }, { "epoch": 12.458333333333334, "loss": 0.047113243490457535, "loss_ce": 1.7326314036836266e-06, "loss_iou": 0.248046875, "loss_num": 0.0093994140625, "loss_xval": 0.047119140625, "num_input_tokens_seen": 272920056, "step": 2990 }, { "epoch": 12.4625, "grad_norm": 2.6392028828013903, "learning_rate": 5e-05, "loss": 0.0341, "num_input_tokens_seen": 273012108, "step": 2991 }, { "epoch": 12.4625, "loss": 0.03175271302461624, "loss_ce": 0.00047219384578056633, "loss_iou": 0.2109375, "loss_num": 0.006256103515625, "loss_xval": 0.03125, "num_input_tokens_seen": 273012108, "step": 2991 }, { "epoch": 12.466666666666667, "grad_norm": 6.542435490375677, "learning_rate": 5e-05, "loss": 0.0538, "num_input_tokens_seen": 273103444, "step": 2992 }, { "epoch": 12.466666666666667, "loss": 0.06299017369747162, "loss_ce": 1.8880823517974932e-06, "loss_iou": 0.19921875, "loss_num": 0.0125732421875, "loss_xval": 0.06298828125, "num_input_tokens_seen": 273103444, "step": 2992 }, { "epoch": 12.470833333333333, "grad_norm": 2.489483280029813, "learning_rate": 5e-05, "loss": 0.0544, "num_input_tokens_seen": 273195164, "step": 2993 }, { "epoch": 12.470833333333333, "loss": 0.038447022438049316, "loss_ce": 0.0008798825438134372, "loss_iou": 0.26953125, "loss_num": 0.00750732421875, "loss_xval": 0.03759765625, "num_input_tokens_seen": 273195164, "step": 2993 }, { "epoch": 12.475, "grad_norm": 2.8719785161307123, "learning_rate": 5e-05, "loss": 0.0561, "num_input_tokens_seen": 273286944, "step": 2994 }, { "epoch": 12.475, "loss": 0.058075353503227234, "loss_ce": 6.143513019196689e-05, "loss_iou": 0.283203125, "loss_num": 0.0115966796875, "loss_xval": 0.05810546875, "num_input_tokens_seen": 273286944, "step": 2994 }, { "epoch": 12.479166666666666, "grad_norm": 2.5710577096424925, "learning_rate": 5e-05, "loss": 0.0573, "num_input_tokens_seen": 273377916, "step": 2995 }, { "epoch": 12.479166666666666, "loss": 0.040670327842235565, "loss_ce": 5.657493147737114e-06, "loss_iou": 0.2275390625, "loss_num": 0.00811767578125, "loss_xval": 0.040771484375, "num_input_tokens_seen": 273377916, "step": 2995 }, { "epoch": 12.483333333333333, "grad_norm": 2.2883425127558956, "learning_rate": 5e-05, "loss": 0.0583, "num_input_tokens_seen": 273468608, "step": 2996 }, { "epoch": 12.483333333333333, "loss": 0.03394392877817154, "loss_ce": 8.38409505377058e-06, "loss_iou": 0.25390625, "loss_num": 0.00677490234375, "loss_xval": 0.033935546875, "num_input_tokens_seen": 273468608, "step": 2996 }, { "epoch": 12.4875, "grad_norm": 7.473944248059015, "learning_rate": 5e-05, "loss": 0.0666, "num_input_tokens_seen": 273560360, "step": 2997 }, { "epoch": 12.4875, "loss": 0.07293405383825302, "loss_ce": 0.00027170099201612175, "loss_iou": 0.265625, "loss_num": 0.0145263671875, "loss_xval": 0.07275390625, "num_input_tokens_seen": 273560360, "step": 2997 }, { "epoch": 12.491666666666667, "grad_norm": 5.223530887356119, "learning_rate": 5e-05, "loss": 0.0906, "num_input_tokens_seen": 273651220, "step": 2998 }, { "epoch": 12.491666666666667, "loss": 0.06444063037633896, "loss_ce": 2.763768634395092e-06, "loss_iou": 0.400390625, "loss_num": 0.01287841796875, "loss_xval": 0.064453125, "num_input_tokens_seen": 273651220, "step": 2998 }, { "epoch": 12.495833333333334, "grad_norm": 3.77445855797572, "learning_rate": 5e-05, "loss": 0.092, "num_input_tokens_seen": 273742916, "step": 2999 }, { "epoch": 12.495833333333334, "loss": 0.1081899106502533, "loss_ce": 5.0875409215223044e-05, "loss_iou": 0.193359375, "loss_num": 0.0216064453125, "loss_xval": 0.10791015625, "num_input_tokens_seen": 273742916, "step": 2999 }, { "epoch": 12.5, "grad_norm": 3.894907640330977, "learning_rate": 5e-05, "loss": 0.0959, "num_input_tokens_seen": 273833260, "step": 3000 }, { "epoch": 12.5, "eval_seeclick_CIoU": 0.2620522305369377, "eval_seeclick_GIoU": 0.261411115527153, "eval_seeclick_IoU": 0.3575377017259598, "eval_seeclick_MAE_all": 0.09368777647614479, "eval_seeclick_MAE_h": 0.08874733746051788, "eval_seeclick_MAE_w": 0.18410181254148483, "eval_seeclick_MAE_x_boxes": 0.1935308650135994, "eval_seeclick_MAE_y_boxes": 0.09129971638321877, "eval_seeclick_NUM_probability": 0.9999963939189911, "eval_seeclick_inside_bbox": 0.4786931872367859, "eval_seeclick_loss": 0.5895414352416992, "eval_seeclick_loss_ce": 0.1322433277964592, "eval_seeclick_loss_iou": 0.4326171875, "eval_seeclick_loss_num": 0.0874786376953125, "eval_seeclick_loss_xval": 0.4368896484375, "eval_seeclick_runtime": 76.9126, "eval_seeclick_samples_per_second": 0.559, "eval_seeclick_steps_per_second": 0.026, "num_input_tokens_seen": 273833260, "step": 3000 }, { "epoch": 12.5, "eval_icons_CIoU": 0.423148512840271, "eval_icons_GIoU": 0.43206779658794403, "eval_icons_IoU": 0.48607292771339417, "eval_icons_MAE_all": 0.05760672502219677, "eval_icons_MAE_h": 0.11967556178569794, "eval_icons_MAE_w": 0.084600280970335, "eval_icons_MAE_x_boxes": 0.08436497300863266, "eval_icons_MAE_y_boxes": 0.11780550330877304, "eval_icons_NUM_probability": 0.9999983906745911, "eval_icons_inside_bbox": 0.6493055522441864, "eval_icons_loss": 0.2809631824493408, "eval_icons_loss_ce": 4.4523385440697894e-05, "eval_icons_loss_iou": 0.2989501953125, "eval_icons_loss_num": 0.0606536865234375, "eval_icons_loss_xval": 0.303131103515625, "eval_icons_runtime": 88.2186, "eval_icons_samples_per_second": 0.567, "eval_icons_steps_per_second": 0.023, "num_input_tokens_seen": 273833260, "step": 3000 }, { "epoch": 12.5, "eval_screenspot_CIoU": 0.35235823194185895, "eval_screenspot_GIoU": 0.33617523312568665, "eval_screenspot_IoU": 0.44529513518015545, "eval_screenspot_MAE_all": 0.10489916304747264, "eval_screenspot_MAE_h": 0.09332895030577977, "eval_screenspot_MAE_w": 0.2365158200263977, "eval_screenspot_MAE_x_boxes": 0.22446357210477194, "eval_screenspot_MAE_y_boxes": 0.08861926694711049, "eval_screenspot_NUM_probability": 0.9974575042724609, "eval_screenspot_inside_bbox": 0.6670833428700765, "eval_screenspot_loss": 0.5274566411972046, "eval_screenspot_loss_ce": 0.0008574875680362007, "eval_screenspot_loss_iou": 0.3682861328125, "eval_screenspot_loss_num": 0.10426839192708333, "eval_screenspot_loss_xval": 0.52142333984375, "eval_screenspot_runtime": 163.935, "eval_screenspot_samples_per_second": 0.543, "eval_screenspot_steps_per_second": 0.018, "num_input_tokens_seen": 273833260, "step": 3000 }, { "epoch": 12.5, "eval_compot_CIoU": 0.5151858925819397, "eval_compot_GIoU": 0.5106890201568604, "eval_compot_IoU": 0.5789482891559601, "eval_compot_MAE_all": 0.0476725697517395, "eval_compot_MAE_h": 0.06707289069890976, "eval_compot_MAE_w": 0.11045684665441513, "eval_compot_MAE_x_boxes": 0.10997127369046211, "eval_compot_MAE_y_boxes": 0.06569756753742695, "eval_compot_NUM_probability": 0.9999961256980896, "eval_compot_inside_bbox": 0.7951388955116272, "eval_compot_loss": 0.26056361198425293, "eval_compot_loss_ce": 0.031769391149282455, "eval_compot_loss_iou": 0.33770751953125, "eval_compot_loss_num": 0.04244804382324219, "eval_compot_loss_xval": 0.21216583251953125, "eval_compot_runtime": 90.3101, "eval_compot_samples_per_second": 0.554, "eval_compot_steps_per_second": 0.022, "num_input_tokens_seen": 273833260, "step": 3000 }, { "epoch": 12.5, "loss": 0.23503346741199493, "loss_ce": 0.034380391240119934, "loss_iou": 0.33984375, "loss_num": 0.0400390625, "loss_xval": 0.2001953125, "num_input_tokens_seen": 273833260, "step": 3000 }, { "epoch": 12.504166666666666, "grad_norm": 1.6827014254027144, "learning_rate": 5e-05, "loss": 0.0354, "num_input_tokens_seen": 273924180, "step": 3001 }, { "epoch": 12.504166666666666, "loss": 0.03862186148762703, "loss_ce": 1.8641272845343337e-06, "loss_iou": 0.220703125, "loss_num": 0.007720947265625, "loss_xval": 0.03857421875, "num_input_tokens_seen": 273924180, "step": 3001 }, { "epoch": 12.508333333333333, "grad_norm": 3.767378502480506, "learning_rate": 5e-05, "loss": 0.0669, "num_input_tokens_seen": 274015228, "step": 3002 }, { "epoch": 12.508333333333333, "loss": 0.08175476640462875, "loss_ce": 0.00024231600400526077, "loss_iou": 0.20703125, "loss_num": 0.016357421875, "loss_xval": 0.08154296875, "num_input_tokens_seen": 274015228, "step": 3002 }, { "epoch": 12.5125, "grad_norm": 3.628278411234878, "learning_rate": 5e-05, "loss": 0.0554, "num_input_tokens_seen": 274106380, "step": 3003 }, { "epoch": 12.5125, "loss": 0.06517404317855835, "loss_ce": 3.7579250147246057e-06, "loss_iou": 0.431640625, "loss_num": 0.0130615234375, "loss_xval": 0.06494140625, "num_input_tokens_seen": 274106380, "step": 3003 }, { "epoch": 12.516666666666667, "grad_norm": 6.21563970342266, "learning_rate": 5e-05, "loss": 0.0711, "num_input_tokens_seen": 274197700, "step": 3004 }, { "epoch": 12.516666666666667, "loss": 0.07570692151784897, "loss_ce": 2.332625081180595e-05, "loss_iou": 0.40625, "loss_num": 0.01513671875, "loss_xval": 0.07568359375, "num_input_tokens_seen": 274197700, "step": 3004 }, { "epoch": 12.520833333333334, "grad_norm": 1.6570068700315066, "learning_rate": 5e-05, "loss": 0.078, "num_input_tokens_seen": 274289044, "step": 3005 }, { "epoch": 12.520833333333334, "loss": 0.09042497724294662, "loss_ce": 0.0005049319006502628, "loss_iou": 0.34375, "loss_num": 0.0179443359375, "loss_xval": 0.08984375, "num_input_tokens_seen": 274289044, "step": 3005 }, { "epoch": 12.525, "grad_norm": 1.339285718282047, "learning_rate": 5e-05, "loss": 0.08, "num_input_tokens_seen": 274380896, "step": 3006 }, { "epoch": 12.525, "loss": 0.08311203867197037, "loss_ce": 1.2673946912400424e-05, "loss_iou": 0.283203125, "loss_num": 0.0166015625, "loss_xval": 0.0830078125, "num_input_tokens_seen": 274380896, "step": 3006 }, { "epoch": 12.529166666666667, "grad_norm": 3.3647496664677607, "learning_rate": 5e-05, "loss": 0.0334, "num_input_tokens_seen": 274472392, "step": 3007 }, { "epoch": 12.529166666666667, "loss": 0.0321391299366951, "loss_ce": 1.9378574506845325e-05, "loss_iou": 0.28125, "loss_num": 0.006439208984375, "loss_xval": 0.0322265625, "num_input_tokens_seen": 274472392, "step": 3007 }, { "epoch": 12.533333333333333, "grad_norm": 3.150886227407765, "learning_rate": 5e-05, "loss": 0.0591, "num_input_tokens_seen": 274563964, "step": 3008 }, { "epoch": 12.533333333333333, "loss": 0.07331331819295883, "loss_ce": 0.0019632219336926937, "loss_iou": 0.341796875, "loss_num": 0.0142822265625, "loss_xval": 0.0712890625, "num_input_tokens_seen": 274563964, "step": 3008 }, { "epoch": 12.5375, "grad_norm": 3.4248419194749027, "learning_rate": 5e-05, "loss": 0.0557, "num_input_tokens_seen": 274655244, "step": 3009 }, { "epoch": 12.5375, "loss": 0.054197344928979874, "loss_ce": 1.3388858860707842e-05, "loss_iou": 0.220703125, "loss_num": 0.01080322265625, "loss_xval": 0.05419921875, "num_input_tokens_seen": 274655244, "step": 3009 }, { "epoch": 12.541666666666666, "grad_norm": 3.6137713418335062, "learning_rate": 5e-05, "loss": 0.124, "num_input_tokens_seen": 274746376, "step": 3010 }, { "epoch": 12.541666666666666, "loss": 0.07499793916940689, "loss_ce": 9.865603942671441e-07, "loss_iou": 0.220703125, "loss_num": 0.0150146484375, "loss_xval": 0.0751953125, "num_input_tokens_seen": 274746376, "step": 3010 }, { "epoch": 12.545833333333333, "grad_norm": 2.725742441669566, "learning_rate": 5e-05, "loss": 0.0638, "num_input_tokens_seen": 274837960, "step": 3011 }, { "epoch": 12.545833333333333, "loss": 0.051561854779720306, "loss_ce": 0.0008187488419935107, "loss_iou": 0.1904296875, "loss_num": 0.0101318359375, "loss_xval": 0.05078125, "num_input_tokens_seen": 274837960, "step": 3011 }, { "epoch": 12.55, "grad_norm": 10.900517824421192, "learning_rate": 5e-05, "loss": 0.0507, "num_input_tokens_seen": 274929360, "step": 3012 }, { "epoch": 12.55, "loss": 0.06274904310703278, "loss_ce": 0.0002795600739773363, "loss_iou": 0.234375, "loss_num": 0.01251220703125, "loss_xval": 0.0625, "num_input_tokens_seen": 274929360, "step": 3012 }, { "epoch": 12.554166666666667, "grad_norm": 3.121712391262659, "learning_rate": 5e-05, "loss": 0.0482, "num_input_tokens_seen": 275020996, "step": 3013 }, { "epoch": 12.554166666666667, "loss": 0.0564560629427433, "loss_ce": 0.003065558848902583, "loss_iou": 0.228515625, "loss_num": 0.01068115234375, "loss_xval": 0.053466796875, "num_input_tokens_seen": 275020996, "step": 3013 }, { "epoch": 12.558333333333334, "grad_norm": 2.184252079837103, "learning_rate": 5e-05, "loss": 0.0478, "num_input_tokens_seen": 275112132, "step": 3014 }, { "epoch": 12.558333333333334, "loss": 0.04341685771942139, "loss_ce": 5.605666046903934e-06, "loss_iou": 0.2177734375, "loss_num": 0.0086669921875, "loss_xval": 0.04345703125, "num_input_tokens_seen": 275112132, "step": 3014 }, { "epoch": 12.5625, "grad_norm": 5.173157766739975, "learning_rate": 5e-05, "loss": 0.0637, "num_input_tokens_seen": 275203920, "step": 3015 }, { "epoch": 12.5625, "loss": 0.0419183224439621, "loss_ce": 0.001680898480117321, "loss_iou": 0.3046875, "loss_num": 0.008056640625, "loss_xval": 0.040283203125, "num_input_tokens_seen": 275203920, "step": 3015 }, { "epoch": 12.566666666666666, "grad_norm": 11.37834540555279, "learning_rate": 5e-05, "loss": 0.0618, "num_input_tokens_seen": 275295580, "step": 3016 }, { "epoch": 12.566666666666666, "loss": 0.04153522476553917, "loss_ce": 8.47255505505018e-05, "loss_iou": 0.220703125, "loss_num": 0.00830078125, "loss_xval": 0.04150390625, "num_input_tokens_seen": 275295580, "step": 3016 }, { "epoch": 12.570833333333333, "grad_norm": 2.494361842243073, "learning_rate": 5e-05, "loss": 0.0609, "num_input_tokens_seen": 275387108, "step": 3017 }, { "epoch": 12.570833333333333, "loss": 0.04674965888261795, "loss_ce": 0.00011879783414769918, "loss_iou": 0.302734375, "loss_num": 0.00933837890625, "loss_xval": 0.046630859375, "num_input_tokens_seen": 275387108, "step": 3017 }, { "epoch": 12.575, "grad_norm": 2.410403468350254, "learning_rate": 5e-05, "loss": 0.0372, "num_input_tokens_seen": 275477904, "step": 3018 }, { "epoch": 12.575, "loss": 0.03227172791957855, "loss_ce": 2.2275686205830425e-05, "loss_iou": 0.291015625, "loss_num": 0.006439208984375, "loss_xval": 0.0322265625, "num_input_tokens_seen": 275477904, "step": 3018 }, { "epoch": 12.579166666666667, "grad_norm": 2.3486592784934888, "learning_rate": 5e-05, "loss": 0.0832, "num_input_tokens_seen": 275569364, "step": 3019 }, { "epoch": 12.579166666666667, "loss": 0.06415988504886627, "loss_ce": 0.001583587029017508, "loss_iou": 0.1591796875, "loss_num": 0.01251220703125, "loss_xval": 0.0625, "num_input_tokens_seen": 275569364, "step": 3019 }, { "epoch": 12.583333333333334, "grad_norm": 9.91921123923421, "learning_rate": 5e-05, "loss": 0.0731, "num_input_tokens_seen": 275661176, "step": 3020 }, { "epoch": 12.583333333333334, "loss": 0.09567893296480179, "loss_ce": 0.001165991765446961, "loss_iou": 0.2890625, "loss_num": 0.0189208984375, "loss_xval": 0.0947265625, "num_input_tokens_seen": 275661176, "step": 3020 }, { "epoch": 12.5875, "grad_norm": 5.046254567693273, "learning_rate": 5e-05, "loss": 0.0649, "num_input_tokens_seen": 275752156, "step": 3021 }, { "epoch": 12.5875, "loss": 0.07690000534057617, "loss_ce": 4.148645530221984e-05, "loss_iou": 0.2578125, "loss_num": 0.015380859375, "loss_xval": 0.07666015625, "num_input_tokens_seen": 275752156, "step": 3021 }, { "epoch": 12.591666666666667, "grad_norm": 1.924919864484072, "learning_rate": 5e-05, "loss": 0.0322, "num_input_tokens_seen": 275842928, "step": 3022 }, { "epoch": 12.591666666666667, "loss": 0.033310018479824066, "loss_ce": 2.296766069775913e-05, "loss_iou": 0.236328125, "loss_num": 0.00665283203125, "loss_xval": 0.033203125, "num_input_tokens_seen": 275842928, "step": 3022 }, { "epoch": 12.595833333333333, "grad_norm": 5.374815815843724, "learning_rate": 5e-05, "loss": 0.0844, "num_input_tokens_seen": 275933884, "step": 3023 }, { "epoch": 12.595833333333333, "loss": 0.07975783199071884, "loss_ce": 3.065694181714207e-05, "loss_iou": 0.20703125, "loss_num": 0.0159912109375, "loss_xval": 0.07958984375, "num_input_tokens_seen": 275933884, "step": 3023 }, { "epoch": 12.6, "grad_norm": 4.761810635006499, "learning_rate": 5e-05, "loss": 0.088, "num_input_tokens_seen": 276025600, "step": 3024 }, { "epoch": 12.6, "loss": 0.10507048666477203, "loss_ce": 0.000395202892832458, "loss_iou": 0.2197265625, "loss_num": 0.02099609375, "loss_xval": 0.1044921875, "num_input_tokens_seen": 276025600, "step": 3024 }, { "epoch": 12.604166666666666, "grad_norm": 2.680798297145748, "learning_rate": 5e-05, "loss": 0.0719, "num_input_tokens_seen": 276116744, "step": 3025 }, { "epoch": 12.604166666666666, "loss": 0.0392971932888031, "loss_ce": 5.810734819533536e-06, "loss_iou": 0.322265625, "loss_num": 0.00787353515625, "loss_xval": 0.039306640625, "num_input_tokens_seen": 276116744, "step": 3025 }, { "epoch": 12.608333333333333, "grad_norm": 22.630748766733472, "learning_rate": 5e-05, "loss": 0.0925, "num_input_tokens_seen": 276207340, "step": 3026 }, { "epoch": 12.608333333333333, "loss": 0.10536612570285797, "loss_ce": 4.1947278077714145e-06, "loss_iou": 0.326171875, "loss_num": 0.0211181640625, "loss_xval": 0.10546875, "num_input_tokens_seen": 276207340, "step": 3026 }, { "epoch": 12.6125, "grad_norm": 2.357555610446469, "learning_rate": 5e-05, "loss": 0.0729, "num_input_tokens_seen": 276298156, "step": 3027 }, { "epoch": 12.6125, "loss": 0.07741272449493408, "loss_ce": 2.014567144215107e-05, "loss_iou": 0.38671875, "loss_num": 0.01544189453125, "loss_xval": 0.0771484375, "num_input_tokens_seen": 276298156, "step": 3027 }, { "epoch": 12.616666666666667, "grad_norm": 2.3887754443380174, "learning_rate": 5e-05, "loss": 0.0844, "num_input_tokens_seen": 276389868, "step": 3028 }, { "epoch": 12.616666666666667, "loss": 0.08939746767282486, "loss_ce": 5.725852679461241e-05, "loss_iou": 0.32421875, "loss_num": 0.017822265625, "loss_xval": 0.08935546875, "num_input_tokens_seen": 276389868, "step": 3028 }, { "epoch": 12.620833333333334, "grad_norm": 3.238182219509643, "learning_rate": 5e-05, "loss": 0.0718, "num_input_tokens_seen": 276481088, "step": 3029 }, { "epoch": 12.620833333333334, "loss": 0.04466459900140762, "loss_ce": 1.7379155906382948e-05, "loss_iou": 0.201171875, "loss_num": 0.0089111328125, "loss_xval": 0.044677734375, "num_input_tokens_seen": 276481088, "step": 3029 }, { "epoch": 12.625, "grad_norm": 1.8248130085422234, "learning_rate": 5e-05, "loss": 0.0454, "num_input_tokens_seen": 276572552, "step": 3030 }, { "epoch": 12.625, "loss": 0.04894650727510452, "loss_ce": 0.00019467764650471509, "loss_iou": 0.171875, "loss_num": 0.009765625, "loss_xval": 0.048828125, "num_input_tokens_seen": 276572552, "step": 3030 }, { "epoch": 12.629166666666666, "grad_norm": 14.546046331105883, "learning_rate": 5e-05, "loss": 0.1234, "num_input_tokens_seen": 276663760, "step": 3031 }, { "epoch": 12.629166666666666, "loss": 0.0462212935090065, "loss_ce": 2.4235514501924627e-06, "loss_iou": 0.310546875, "loss_num": 0.00927734375, "loss_xval": 0.046142578125, "num_input_tokens_seen": 276663760, "step": 3031 }, { "epoch": 12.633333333333333, "grad_norm": 5.467846134806575, "learning_rate": 5e-05, "loss": 0.0864, "num_input_tokens_seen": 276755064, "step": 3032 }, { "epoch": 12.633333333333333, "loss": 0.0916154608130455, "loss_ce": 0.00029160885605961084, "loss_iou": 0.314453125, "loss_num": 0.0181884765625, "loss_xval": 0.09130859375, "num_input_tokens_seen": 276755064, "step": 3032 }, { "epoch": 12.6375, "grad_norm": 3.176330171782519, "learning_rate": 5e-05, "loss": 0.0562, "num_input_tokens_seen": 276846344, "step": 3033 }, { "epoch": 12.6375, "loss": 0.05962742865085602, "loss_ce": 2.659808160387911e-05, "loss_iou": 0.1796875, "loss_num": 0.01190185546875, "loss_xval": 0.0595703125, "num_input_tokens_seen": 276846344, "step": 3033 }, { "epoch": 12.641666666666667, "grad_norm": 2.796019991026777, "learning_rate": 5e-05, "loss": 0.0546, "num_input_tokens_seen": 276937896, "step": 3034 }, { "epoch": 12.641666666666667, "loss": 0.0450693741440773, "loss_ce": 0.0007502207299694419, "loss_iou": 0.271484375, "loss_num": 0.00885009765625, "loss_xval": 0.04443359375, "num_input_tokens_seen": 276937896, "step": 3034 }, { "epoch": 12.645833333333334, "grad_norm": 5.4254994878266, "learning_rate": 5e-05, "loss": 0.0888, "num_input_tokens_seen": 277029492, "step": 3035 }, { "epoch": 12.645833333333334, "loss": 0.12581279873847961, "loss_ce": 0.00014141679275780916, "loss_iou": 0.32421875, "loss_num": 0.025146484375, "loss_xval": 0.1259765625, "num_input_tokens_seen": 277029492, "step": 3035 }, { "epoch": 12.65, "grad_norm": 8.83160209648461, "learning_rate": 5e-05, "loss": 0.0609, "num_input_tokens_seen": 277121068, "step": 3036 }, { "epoch": 12.65, "loss": 0.08744452893733978, "loss_ce": 1.929440622916445e-05, "loss_iou": 0.34375, "loss_num": 0.0174560546875, "loss_xval": 0.08740234375, "num_input_tokens_seen": 277121068, "step": 3036 }, { "epoch": 12.654166666666667, "grad_norm": 2.9922367136257133, "learning_rate": 5e-05, "loss": 0.0587, "num_input_tokens_seen": 277211956, "step": 3037 }, { "epoch": 12.654166666666667, "loss": 0.0431191548705101, "loss_ce": 1.3074773960397579e-05, "loss_iou": 0.2255859375, "loss_num": 0.00860595703125, "loss_xval": 0.043212890625, "num_input_tokens_seen": 277211956, "step": 3037 }, { "epoch": 12.658333333333333, "grad_norm": 7.312914555316495, "learning_rate": 5e-05, "loss": 0.0471, "num_input_tokens_seen": 277303404, "step": 3038 }, { "epoch": 12.658333333333333, "loss": 0.061124954372644424, "loss_ce": 0.00028816104168072343, "loss_iou": 0.298828125, "loss_num": 0.01214599609375, "loss_xval": 0.060791015625, "num_input_tokens_seen": 277303404, "step": 3038 }, { "epoch": 12.6625, "grad_norm": 2.5224699213733874, "learning_rate": 5e-05, "loss": 0.0917, "num_input_tokens_seen": 277394460, "step": 3039 }, { "epoch": 12.6625, "loss": 0.1085333526134491, "loss_ce": 3.5741508327191696e-05, "loss_iou": 0.3125, "loss_num": 0.021728515625, "loss_xval": 0.1083984375, "num_input_tokens_seen": 277394460, "step": 3039 }, { "epoch": 12.666666666666666, "grad_norm": 3.141807694935655, "learning_rate": 5e-05, "loss": 0.0563, "num_input_tokens_seen": 277485988, "step": 3040 }, { "epoch": 12.666666666666666, "loss": 0.057176895439624786, "loss_ce": 0.000284504727460444, "loss_iou": 0.1982421875, "loss_num": 0.0113525390625, "loss_xval": 0.056884765625, "num_input_tokens_seen": 277485988, "step": 3040 }, { "epoch": 12.670833333333333, "grad_norm": 3.1119452188475387, "learning_rate": 5e-05, "loss": 0.0396, "num_input_tokens_seen": 277577536, "step": 3041 }, { "epoch": 12.670833333333333, "loss": 0.04843373969197273, "loss_ce": 6.337818194879219e-05, "loss_iou": 0.302734375, "loss_num": 0.0096435546875, "loss_xval": 0.04833984375, "num_input_tokens_seen": 277577536, "step": 3041 }, { "epoch": 12.675, "grad_norm": 3.381427897262613, "learning_rate": 5e-05, "loss": 0.0732, "num_input_tokens_seen": 277668420, "step": 3042 }, { "epoch": 12.675, "loss": 0.04654950648546219, "loss_ce": 0.00013227068120613694, "loss_iou": 0.375, "loss_num": 0.00927734375, "loss_xval": 0.04638671875, "num_input_tokens_seen": 277668420, "step": 3042 }, { "epoch": 12.679166666666667, "grad_norm": 3.076538192616019, "learning_rate": 5e-05, "loss": 0.1251, "num_input_tokens_seen": 277759760, "step": 3043 }, { "epoch": 12.679166666666667, "loss": 0.07401155680418015, "loss_ce": 0.00011323827493470162, "loss_iou": 0.2294921875, "loss_num": 0.0147705078125, "loss_xval": 0.07373046875, "num_input_tokens_seen": 277759760, "step": 3043 }, { "epoch": 12.683333333333334, "grad_norm": 17.963846970582654, "learning_rate": 5e-05, "loss": 0.0708, "num_input_tokens_seen": 277851880, "step": 3044 }, { "epoch": 12.683333333333334, "loss": 0.08022314310073853, "loss_ce": 0.00014501073746941984, "loss_iou": 0.20703125, "loss_num": 0.0159912109375, "loss_xval": 0.080078125, "num_input_tokens_seen": 277851880, "step": 3044 }, { "epoch": 12.6875, "grad_norm": 2.030278631882756, "learning_rate": 5e-05, "loss": 0.056, "num_input_tokens_seen": 277943352, "step": 3045 }, { "epoch": 12.6875, "loss": 0.04376886039972305, "loss_ce": 0.0005483399145305157, "loss_iou": 0.1982421875, "loss_num": 0.0086669921875, "loss_xval": 0.043212890625, "num_input_tokens_seen": 277943352, "step": 3045 }, { "epoch": 12.691666666666666, "grad_norm": 1.051735154214462, "learning_rate": 5e-05, "loss": 0.0521, "num_input_tokens_seen": 278034200, "step": 3046 }, { "epoch": 12.691666666666666, "loss": 0.06943688541650772, "loss_ce": 2.465544457663782e-05, "loss_iou": 0.251953125, "loss_num": 0.013916015625, "loss_xval": 0.0693359375, "num_input_tokens_seen": 278034200, "step": 3046 }, { "epoch": 12.695833333333333, "grad_norm": 14.104348655354823, "learning_rate": 5e-05, "loss": 0.0888, "num_input_tokens_seen": 278125860, "step": 3047 }, { "epoch": 12.695833333333333, "loss": 0.09014366567134857, "loss_ce": 0.002939691534265876, "loss_iou": 0.251953125, "loss_num": 0.0174560546875, "loss_xval": 0.08740234375, "num_input_tokens_seen": 278125860, "step": 3047 }, { "epoch": 12.7, "grad_norm": 2.1425563049295406, "learning_rate": 5e-05, "loss": 0.0969, "num_input_tokens_seen": 278216780, "step": 3048 }, { "epoch": 12.7, "loss": 0.08864589035511017, "loss_ce": 5.336176400305703e-05, "loss_iou": 0.248046875, "loss_num": 0.0177001953125, "loss_xval": 0.08837890625, "num_input_tokens_seen": 278216780, "step": 3048 }, { "epoch": 12.704166666666667, "grad_norm": 1.705437231966854, "learning_rate": 5e-05, "loss": 0.0515, "num_input_tokens_seen": 278307744, "step": 3049 }, { "epoch": 12.704166666666667, "loss": 0.0454210564494133, "loss_ce": 1.0902575013460591e-05, "loss_iou": 0.228515625, "loss_num": 0.00909423828125, "loss_xval": 0.04541015625, "num_input_tokens_seen": 278307744, "step": 3049 }, { "epoch": 12.708333333333334, "grad_norm": 7.052905088085709, "learning_rate": 5e-05, "loss": 0.071, "num_input_tokens_seen": 278398612, "step": 3050 }, { "epoch": 12.708333333333334, "loss": 0.058208584785461426, "loss_ce": 1.1565132808755152e-05, "loss_iou": 0.10400390625, "loss_num": 0.01165771484375, "loss_xval": 0.05810546875, "num_input_tokens_seen": 278398612, "step": 3050 }, { "epoch": 12.7125, "grad_norm": 1.429215882691101, "learning_rate": 5e-05, "loss": 0.0605, "num_input_tokens_seen": 278489904, "step": 3051 }, { "epoch": 12.7125, "loss": 0.08760759234428406, "loss_ce": 0.00045702431816607714, "loss_iou": 0.197265625, "loss_num": 0.0174560546875, "loss_xval": 0.0869140625, "num_input_tokens_seen": 278489904, "step": 3051 }, { "epoch": 12.716666666666667, "grad_norm": 2.157319883356644, "learning_rate": 5e-05, "loss": 0.065, "num_input_tokens_seen": 278581152, "step": 3052 }, { "epoch": 12.716666666666667, "loss": 0.046220067888498306, "loss_ce": 4.6972148993518203e-05, "loss_iou": 0.345703125, "loss_num": 0.00921630859375, "loss_xval": 0.046142578125, "num_input_tokens_seen": 278581152, "step": 3052 }, { "epoch": 12.720833333333333, "grad_norm": 2.705812983876491, "learning_rate": 5e-05, "loss": 0.0571, "num_input_tokens_seen": 278672888, "step": 3053 }, { "epoch": 12.720833333333333, "loss": 0.031630102545022964, "loss_ce": 0.0005021725664846599, "loss_iou": 0.251953125, "loss_num": 0.0062255859375, "loss_xval": 0.0311279296875, "num_input_tokens_seen": 278672888, "step": 3053 }, { "epoch": 12.725, "grad_norm": 2.93868503933879, "learning_rate": 5e-05, "loss": 0.0698, "num_input_tokens_seen": 278764400, "step": 3054 }, { "epoch": 12.725, "loss": 0.10779900848865509, "loss_ce": 2.6188343326793984e-05, "loss_iou": 0.11669921875, "loss_num": 0.0216064453125, "loss_xval": 0.10791015625, "num_input_tokens_seen": 278764400, "step": 3054 }, { "epoch": 12.729166666666666, "grad_norm": 3.8858062498122563, "learning_rate": 5e-05, "loss": 0.0709, "num_input_tokens_seen": 278854972, "step": 3055 }, { "epoch": 12.729166666666666, "loss": 0.06658395379781723, "loss_ce": 9.854529707808979e-06, "loss_iou": 0.2734375, "loss_num": 0.0133056640625, "loss_xval": 0.06640625, "num_input_tokens_seen": 278854972, "step": 3055 }, { "epoch": 12.733333333333333, "grad_norm": 5.271084710186165, "learning_rate": 5e-05, "loss": 0.0878, "num_input_tokens_seen": 278946476, "step": 3056 }, { "epoch": 12.733333333333333, "loss": 0.10122112929821014, "loss_ce": 9.584147846908309e-06, "loss_iou": 0.390625, "loss_num": 0.020263671875, "loss_xval": 0.10107421875, "num_input_tokens_seen": 278946476, "step": 3056 }, { "epoch": 12.7375, "grad_norm": 2.6558501306865336, "learning_rate": 5e-05, "loss": 0.0468, "num_input_tokens_seen": 279038040, "step": 3057 }, { "epoch": 12.7375, "loss": 0.057488322257995605, "loss_ce": 8.464216080028564e-06, "loss_iou": 0.291015625, "loss_num": 0.011474609375, "loss_xval": 0.057373046875, "num_input_tokens_seen": 279038040, "step": 3057 }, { "epoch": 12.741666666666667, "grad_norm": 2.1151067539954984, "learning_rate": 5e-05, "loss": 0.0525, "num_input_tokens_seen": 279128956, "step": 3058 }, { "epoch": 12.741666666666667, "loss": 0.07311089336872101, "loss_ce": 2.1294054022291675e-05, "loss_iou": 0.189453125, "loss_num": 0.0146484375, "loss_xval": 0.0732421875, "num_input_tokens_seen": 279128956, "step": 3058 }, { "epoch": 12.745833333333334, "grad_norm": 1.830733179434178, "learning_rate": 5e-05, "loss": 0.0392, "num_input_tokens_seen": 279220376, "step": 3059 }, { "epoch": 12.745833333333334, "loss": 0.0318562351167202, "loss_ce": 4.1659819544292986e-05, "loss_iou": 0.173828125, "loss_num": 0.00634765625, "loss_xval": 0.03173828125, "num_input_tokens_seen": 279220376, "step": 3059 }, { "epoch": 12.75, "grad_norm": 3.7824054547708674, "learning_rate": 5e-05, "loss": 0.0631, "num_input_tokens_seen": 279311848, "step": 3060 }, { "epoch": 12.75, "loss": 0.05131109058856964, "loss_ce": 3.414619413888431e-06, "loss_iou": 0.23046875, "loss_num": 0.01025390625, "loss_xval": 0.05126953125, "num_input_tokens_seen": 279311848, "step": 3060 }, { "epoch": 12.754166666666666, "grad_norm": 8.165410655420041, "learning_rate": 5e-05, "loss": 0.0624, "num_input_tokens_seen": 279403576, "step": 3061 }, { "epoch": 12.754166666666666, "loss": 0.06295231729745865, "loss_ce": 0.00011662582255667076, "loss_iou": 0.328125, "loss_num": 0.0125732421875, "loss_xval": 0.06298828125, "num_input_tokens_seen": 279403576, "step": 3061 }, { "epoch": 12.758333333333333, "grad_norm": 2.181439238726654, "learning_rate": 5e-05, "loss": 0.0661, "num_input_tokens_seen": 279494252, "step": 3062 }, { "epoch": 12.758333333333333, "loss": 0.06396616250276566, "loss_ce": 8.523969881935045e-05, "loss_iou": 0.166015625, "loss_num": 0.01275634765625, "loss_xval": 0.06396484375, "num_input_tokens_seen": 279494252, "step": 3062 }, { "epoch": 12.7625, "grad_norm": 2.3387174785163856, "learning_rate": 5e-05, "loss": 0.0456, "num_input_tokens_seen": 279585728, "step": 3063 }, { "epoch": 12.7625, "loss": 0.035773493349552155, "loss_ce": 6.8942003963456955e-06, "loss_iou": 0.2275390625, "loss_num": 0.00714111328125, "loss_xval": 0.03564453125, "num_input_tokens_seen": 279585728, "step": 3063 }, { "epoch": 12.766666666666667, "grad_norm": 4.427669922592235, "learning_rate": 5e-05, "loss": 0.07, "num_input_tokens_seen": 279676584, "step": 3064 }, { "epoch": 12.766666666666667, "loss": 0.09679196774959564, "loss_ce": 2.07240318559343e-05, "loss_iou": 0.28125, "loss_num": 0.0194091796875, "loss_xval": 0.0966796875, "num_input_tokens_seen": 279676584, "step": 3064 }, { "epoch": 12.770833333333334, "grad_norm": 5.098015977237453, "learning_rate": 5e-05, "loss": 0.1176, "num_input_tokens_seen": 279768452, "step": 3065 }, { "epoch": 12.770833333333334, "loss": 0.1344681829214096, "loss_ce": 9.928654617397115e-05, "loss_iou": 0.19921875, "loss_num": 0.02685546875, "loss_xval": 0.134765625, "num_input_tokens_seen": 279768452, "step": 3065 }, { "epoch": 12.775, "grad_norm": 2.7691277990758274, "learning_rate": 5e-05, "loss": 0.0855, "num_input_tokens_seen": 279859876, "step": 3066 }, { "epoch": 12.775, "loss": 0.12224830687046051, "loss_ce": 0.0006967922090552747, "loss_iou": 0.361328125, "loss_num": 0.0242919921875, "loss_xval": 0.12158203125, "num_input_tokens_seen": 279859876, "step": 3066 }, { "epoch": 12.779166666666667, "grad_norm": 7.2095868531702605, "learning_rate": 5e-05, "loss": 0.0843, "num_input_tokens_seen": 279951356, "step": 3067 }, { "epoch": 12.779166666666667, "loss": 0.07838231325149536, "loss_ce": 0.0006692995084449649, "loss_iou": 0.3046875, "loss_num": 0.01556396484375, "loss_xval": 0.07763671875, "num_input_tokens_seen": 279951356, "step": 3067 }, { "epoch": 12.783333333333333, "grad_norm": 1.5714320567596505, "learning_rate": 5e-05, "loss": 0.0847, "num_input_tokens_seen": 280042444, "step": 3068 }, { "epoch": 12.783333333333333, "loss": 0.06469504535198212, "loss_ce": 5.881530887563713e-05, "loss_iou": 0.232421875, "loss_num": 0.012939453125, "loss_xval": 0.064453125, "num_input_tokens_seen": 280042444, "step": 3068 }, { "epoch": 12.7875, "grad_norm": 2.73049184603913, "learning_rate": 5e-05, "loss": 0.0748, "num_input_tokens_seen": 280132980, "step": 3069 }, { "epoch": 12.7875, "loss": 0.0646815299987793, "loss_ce": 0.00012159519974375144, "loss_iou": 0.32421875, "loss_num": 0.01287841796875, "loss_xval": 0.064453125, "num_input_tokens_seen": 280132980, "step": 3069 }, { "epoch": 12.791666666666666, "grad_norm": 2.6206030451709124, "learning_rate": 5e-05, "loss": 0.0677, "num_input_tokens_seen": 280224380, "step": 3070 }, { "epoch": 12.791666666666666, "loss": 0.048339590430259705, "loss_ce": 0.00016759365098550916, "loss_iou": 0.2373046875, "loss_num": 0.0096435546875, "loss_xval": 0.048095703125, "num_input_tokens_seen": 280224380, "step": 3070 }, { "epoch": 12.795833333333333, "grad_norm": 1.9195245267601702, "learning_rate": 5e-05, "loss": 0.097, "num_input_tokens_seen": 280315536, "step": 3071 }, { "epoch": 12.795833333333333, "loss": 0.08559077978134155, "loss_ce": 0.00011104091390734538, "loss_iou": 0.1875, "loss_num": 0.01708984375, "loss_xval": 0.08544921875, "num_input_tokens_seen": 280315536, "step": 3071 }, { "epoch": 12.8, "grad_norm": 6.07437903070192, "learning_rate": 5e-05, "loss": 0.1087, "num_input_tokens_seen": 280406956, "step": 3072 }, { "epoch": 12.8, "loss": 0.09862232953310013, "loss_ce": 4.2919116822304204e-05, "loss_iou": 0.146484375, "loss_num": 0.0196533203125, "loss_xval": 0.0986328125, "num_input_tokens_seen": 280406956, "step": 3072 }, { "epoch": 12.804166666666667, "grad_norm": 2.9138495381433853, "learning_rate": 5e-05, "loss": 0.0776, "num_input_tokens_seen": 280498940, "step": 3073 }, { "epoch": 12.804166666666667, "loss": 0.10507670789957047, "loss_ce": 0.00023356490419246256, "loss_iou": 0.25390625, "loss_num": 0.02099609375, "loss_xval": 0.10498046875, "num_input_tokens_seen": 280498940, "step": 3073 }, { "epoch": 12.808333333333334, "grad_norm": 1.9506126651259772, "learning_rate": 5e-05, "loss": 0.1089, "num_input_tokens_seen": 280590392, "step": 3074 }, { "epoch": 12.808333333333334, "loss": 0.07741060853004456, "loss_ce": 0.0009640734060667455, "loss_iou": 0.0888671875, "loss_num": 0.01531982421875, "loss_xval": 0.07666015625, "num_input_tokens_seen": 280590392, "step": 3074 }, { "epoch": 12.8125, "grad_norm": 4.158899079033557, "learning_rate": 5e-05, "loss": 0.0758, "num_input_tokens_seen": 280681300, "step": 3075 }, { "epoch": 12.8125, "loss": 0.03765298053622246, "loss_ce": 1.9179929040546995e-06, "loss_iou": 0.1865234375, "loss_num": 0.007537841796875, "loss_xval": 0.03759765625, "num_input_tokens_seen": 280681300, "step": 3075 }, { "epoch": 12.816666666666666, "grad_norm": 3.1401496759220255, "learning_rate": 5e-05, "loss": 0.0898, "num_input_tokens_seen": 280772832, "step": 3076 }, { "epoch": 12.816666666666666, "loss": 0.0684804618358612, "loss_ce": 0.00010582937102299184, "loss_iou": 0.1640625, "loss_num": 0.013671875, "loss_xval": 0.068359375, "num_input_tokens_seen": 280772832, "step": 3076 }, { "epoch": 12.820833333333333, "grad_norm": 1.9620936144898184, "learning_rate": 5e-05, "loss": 0.0522, "num_input_tokens_seen": 280863912, "step": 3077 }, { "epoch": 12.820833333333333, "loss": 0.028973519802093506, "loss_ce": 9.626205428503454e-05, "loss_iou": 0.1875, "loss_num": 0.005767822265625, "loss_xval": 0.0289306640625, "num_input_tokens_seen": 280863912, "step": 3077 }, { "epoch": 12.825, "grad_norm": 6.244746819631481, "learning_rate": 5e-05, "loss": 0.112, "num_input_tokens_seen": 280955080, "step": 3078 }, { "epoch": 12.825, "loss": 0.15819403529167175, "loss_ce": 0.010996315628290176, "loss_iou": 0.29296875, "loss_num": 0.0294189453125, "loss_xval": 0.1474609375, "num_input_tokens_seen": 280955080, "step": 3078 }, { "epoch": 12.829166666666667, "grad_norm": 5.111417722850773, "learning_rate": 5e-05, "loss": 0.0755, "num_input_tokens_seen": 281046240, "step": 3079 }, { "epoch": 12.829166666666667, "loss": 0.0795697346329689, "loss_ce": 0.0017842412926256657, "loss_iou": 0.173828125, "loss_num": 0.01556396484375, "loss_xval": 0.07763671875, "num_input_tokens_seen": 281046240, "step": 3079 }, { "epoch": 12.833333333333334, "grad_norm": 6.658668734378153, "learning_rate": 5e-05, "loss": 0.0664, "num_input_tokens_seen": 281137868, "step": 3080 }, { "epoch": 12.833333333333334, "loss": 0.05171462893486023, "loss_ce": 1.0222116543445736e-05, "loss_iou": 0.21875, "loss_num": 0.01031494140625, "loss_xval": 0.0517578125, "num_input_tokens_seen": 281137868, "step": 3080 }, { "epoch": 12.8375, "grad_norm": 3.073369365091355, "learning_rate": 5e-05, "loss": 0.0455, "num_input_tokens_seen": 281229028, "step": 3081 }, { "epoch": 12.8375, "loss": 0.03657699003815651, "loss_ce": 9.30035184865119e-06, "loss_iou": 0.2255859375, "loss_num": 0.00732421875, "loss_xval": 0.03662109375, "num_input_tokens_seen": 281229028, "step": 3081 }, { "epoch": 12.841666666666667, "grad_norm": 4.071459919065855, "learning_rate": 5e-05, "loss": 0.1149, "num_input_tokens_seen": 281320012, "step": 3082 }, { "epoch": 12.841666666666667, "loss": 0.10545842349529266, "loss_ce": 5.0711940275505185e-05, "loss_iou": 0.283203125, "loss_num": 0.0211181640625, "loss_xval": 0.10546875, "num_input_tokens_seen": 281320012, "step": 3082 }, { "epoch": 12.845833333333333, "grad_norm": 8.508769096963789, "learning_rate": 5e-05, "loss": 0.1094, "num_input_tokens_seen": 281411004, "step": 3083 }, { "epoch": 12.845833333333333, "loss": 0.17188432812690735, "loss_ce": 0.00025346592883579433, "loss_iou": 0.27734375, "loss_num": 0.034423828125, "loss_xval": 0.171875, "num_input_tokens_seen": 281411004, "step": 3083 }, { "epoch": 12.85, "grad_norm": 2.3736834116012693, "learning_rate": 5e-05, "loss": 0.0723, "num_input_tokens_seen": 281501632, "step": 3084 }, { "epoch": 12.85, "loss": 0.08856417238712311, "loss_ce": 0.0005514741060324013, "loss_iou": 0.2412109375, "loss_num": 0.017578125, "loss_xval": 0.087890625, "num_input_tokens_seen": 281501632, "step": 3084 }, { "epoch": 12.854166666666666, "grad_norm": 10.554135196933103, "learning_rate": 5e-05, "loss": 0.0802, "num_input_tokens_seen": 281592620, "step": 3085 }, { "epoch": 12.854166666666666, "loss": 0.055758289992809296, "loss_ce": 1.7931039110408165e-05, "loss_iou": 0.169921875, "loss_num": 0.01116943359375, "loss_xval": 0.0556640625, "num_input_tokens_seen": 281592620, "step": 3085 }, { "epoch": 12.858333333333333, "grad_norm": 6.416996665406724, "learning_rate": 5e-05, "loss": 0.0492, "num_input_tokens_seen": 281684444, "step": 3086 }, { "epoch": 12.858333333333333, "loss": 0.053812433034181595, "loss_ce": 0.008203910663723946, "loss_iou": 0.390625, "loss_num": 0.00909423828125, "loss_xval": 0.045654296875, "num_input_tokens_seen": 281684444, "step": 3086 }, { "epoch": 12.8625, "grad_norm": 2.7378792324830474, "learning_rate": 5e-05, "loss": 0.0753, "num_input_tokens_seen": 281776004, "step": 3087 }, { "epoch": 12.8625, "loss": 0.1071212887763977, "loss_ce": 0.003010562853887677, "loss_iou": 0.1396484375, "loss_num": 0.0208740234375, "loss_xval": 0.10400390625, "num_input_tokens_seen": 281776004, "step": 3087 }, { "epoch": 12.866666666666667, "grad_norm": 5.4016920389500935, "learning_rate": 5e-05, "loss": 0.0784, "num_input_tokens_seen": 281866916, "step": 3088 }, { "epoch": 12.866666666666667, "loss": 0.10034558922052383, "loss_ce": 3.7943809729767963e-06, "loss_iou": 0.29296875, "loss_num": 0.02001953125, "loss_xval": 0.1005859375, "num_input_tokens_seen": 281866916, "step": 3088 }, { "epoch": 12.870833333333334, "grad_norm": 4.971595148868411, "learning_rate": 5e-05, "loss": 0.0574, "num_input_tokens_seen": 281958500, "step": 3089 }, { "epoch": 12.870833333333334, "loss": 0.049116350710392, "loss_ce": 0.00262281927280128, "loss_iou": 0.369140625, "loss_num": 0.00927734375, "loss_xval": 0.04638671875, "num_input_tokens_seen": 281958500, "step": 3089 }, { "epoch": 12.875, "grad_norm": 3.903255524586416, "learning_rate": 5e-05, "loss": 0.1098, "num_input_tokens_seen": 282049928, "step": 3090 }, { "epoch": 12.875, "loss": 0.11985423415899277, "loss_ce": 0.00030162016628310084, "loss_iou": 0.3203125, "loss_num": 0.02392578125, "loss_xval": 0.11962890625, "num_input_tokens_seen": 282049928, "step": 3090 }, { "epoch": 12.879166666666666, "grad_norm": 4.243647579617985, "learning_rate": 5e-05, "loss": 0.0629, "num_input_tokens_seen": 282140984, "step": 3091 }, { "epoch": 12.879166666666666, "loss": 0.05852103605866432, "loss_ce": 0.00048423168482258916, "loss_iou": 0.291015625, "loss_num": 0.0115966796875, "loss_xval": 0.05810546875, "num_input_tokens_seen": 282140984, "step": 3091 }, { "epoch": 12.883333333333333, "grad_norm": 2.6784587835319487, "learning_rate": 5e-05, "loss": 0.0652, "num_input_tokens_seen": 282232476, "step": 3092 }, { "epoch": 12.883333333333333, "loss": 0.05952540040016174, "loss_ce": 1.6124329704325646e-05, "loss_iou": 0.330078125, "loss_num": 0.01190185546875, "loss_xval": 0.0595703125, "num_input_tokens_seen": 282232476, "step": 3092 }, { "epoch": 12.8875, "grad_norm": 12.729861278675463, "learning_rate": 5e-05, "loss": 0.0748, "num_input_tokens_seen": 282323936, "step": 3093 }, { "epoch": 12.8875, "loss": 0.10516448318958282, "loss_ce": 5.430977398646064e-05, "loss_iou": 0.314453125, "loss_num": 0.02099609375, "loss_xval": 0.10498046875, "num_input_tokens_seen": 282323936, "step": 3093 }, { "epoch": 12.891666666666667, "grad_norm": 2.1835375010426437, "learning_rate": 5e-05, "loss": 0.0569, "num_input_tokens_seen": 282415116, "step": 3094 }, { "epoch": 12.891666666666667, "loss": 0.053579214960336685, "loss_ce": 6.664089596597478e-05, "loss_iou": 0.2119140625, "loss_num": 0.01068115234375, "loss_xval": 0.053466796875, "num_input_tokens_seen": 282415116, "step": 3094 }, { "epoch": 12.895833333333334, "grad_norm": 2.974429504500354, "learning_rate": 5e-05, "loss": 0.0794, "num_input_tokens_seen": 282505324, "step": 3095 }, { "epoch": 12.895833333333334, "loss": 0.06614936143159866, "loss_ce": 0.0024133960250765085, "loss_iou": 0.341796875, "loss_num": 0.01275634765625, "loss_xval": 0.06396484375, "num_input_tokens_seen": 282505324, "step": 3095 }, { "epoch": 12.9, "grad_norm": 2.828988907059202, "learning_rate": 5e-05, "loss": 0.1236, "num_input_tokens_seen": 282596564, "step": 3096 }, { "epoch": 12.9, "loss": 0.1410875916481018, "loss_ce": 0.0005541453720070422, "loss_iou": 0.23046875, "loss_num": 0.0281982421875, "loss_xval": 0.140625, "num_input_tokens_seen": 282596564, "step": 3096 }, { "epoch": 12.904166666666667, "grad_norm": 2.5107003046604746, "learning_rate": 5e-05, "loss": 0.0545, "num_input_tokens_seen": 282688016, "step": 3097 }, { "epoch": 12.904166666666667, "loss": 0.049967456609010696, "loss_ce": 0.00010936275066342205, "loss_iou": 0.234375, "loss_num": 0.00994873046875, "loss_xval": 0.0498046875, "num_input_tokens_seen": 282688016, "step": 3097 }, { "epoch": 12.908333333333333, "grad_norm": 6.166785831775601, "learning_rate": 5e-05, "loss": 0.1325, "num_input_tokens_seen": 282778720, "step": 3098 }, { "epoch": 12.908333333333333, "loss": 0.08252835273742676, "loss_ce": 8.823913049127441e-06, "loss_iou": 0.232421875, "loss_num": 0.0164794921875, "loss_xval": 0.08251953125, "num_input_tokens_seen": 282778720, "step": 3098 }, { "epoch": 12.9125, "grad_norm": 3.4858220348262754, "learning_rate": 5e-05, "loss": 0.1047, "num_input_tokens_seen": 282869576, "step": 3099 }, { "epoch": 12.9125, "loss": 0.14050546288490295, "loss_ce": 2.538679382269038e-06, "loss_iou": 0.330078125, "loss_num": 0.028076171875, "loss_xval": 0.140625, "num_input_tokens_seen": 282869576, "step": 3099 }, { "epoch": 12.916666666666666, "grad_norm": 2.5003365309894066, "learning_rate": 5e-05, "loss": 0.0435, "num_input_tokens_seen": 282961324, "step": 3100 }, { "epoch": 12.916666666666666, "loss": 0.057860180735588074, "loss_ce": 0.0019672391936182976, "loss_iou": 0.2080078125, "loss_num": 0.01116943359375, "loss_xval": 0.055908203125, "num_input_tokens_seen": 282961324, "step": 3100 }, { "epoch": 12.920833333333333, "grad_norm": 10.557839645286178, "learning_rate": 5e-05, "loss": 0.0705, "num_input_tokens_seen": 283052668, "step": 3101 }, { "epoch": 12.920833333333333, "loss": 0.09789260476827621, "loss_ce": 4.561465175356716e-05, "loss_iou": 0.271484375, "loss_num": 0.0196533203125, "loss_xval": 0.09765625, "num_input_tokens_seen": 283052668, "step": 3101 }, { "epoch": 12.925, "grad_norm": 3.909885904693897, "learning_rate": 5e-05, "loss": 0.076, "num_input_tokens_seen": 283143300, "step": 3102 }, { "epoch": 12.925, "loss": 0.057817135006189346, "loss_ce": 1.5816697214177111e-06, "loss_iou": 0.294921875, "loss_num": 0.01153564453125, "loss_xval": 0.057861328125, "num_input_tokens_seen": 283143300, "step": 3102 }, { "epoch": 12.929166666666667, "grad_norm": 5.099507288933682, "learning_rate": 5e-05, "loss": 0.0676, "num_input_tokens_seen": 283234708, "step": 3103 }, { "epoch": 12.929166666666667, "loss": 0.06371396780014038, "loss_ce": 0.0029229512438178062, "loss_iou": 0.2578125, "loss_num": 0.01214599609375, "loss_xval": 0.060791015625, "num_input_tokens_seen": 283234708, "step": 3103 }, { "epoch": 12.933333333333334, "grad_norm": 2.574523047575215, "learning_rate": 5e-05, "loss": 0.0486, "num_input_tokens_seen": 283325848, "step": 3104 }, { "epoch": 12.933333333333334, "loss": 0.056650642305612564, "loss_ce": 6.342041160678491e-05, "loss_iou": 0.228515625, "loss_num": 0.0113525390625, "loss_xval": 0.056640625, "num_input_tokens_seen": 283325848, "step": 3104 }, { "epoch": 12.9375, "grad_norm": 3.7387324962710315, "learning_rate": 5e-05, "loss": 0.0491, "num_input_tokens_seen": 283417176, "step": 3105 }, { "epoch": 12.9375, "loss": 0.07551056891679764, "loss_ce": 0.00034577763290144503, "loss_iou": 0.326171875, "loss_num": 0.01507568359375, "loss_xval": 0.0751953125, "num_input_tokens_seen": 283417176, "step": 3105 }, { "epoch": 12.941666666666666, "grad_norm": 2.4073879710544164, "learning_rate": 5e-05, "loss": 0.0853, "num_input_tokens_seen": 283507832, "step": 3106 }, { "epoch": 12.941666666666666, "loss": 0.08266405016183853, "loss_ce": 6.822718569310382e-05, "loss_iou": 0.1904296875, "loss_num": 0.0164794921875, "loss_xval": 0.08251953125, "num_input_tokens_seen": 283507832, "step": 3106 }, { "epoch": 12.945833333333333, "grad_norm": 2.7592590095427254, "learning_rate": 5e-05, "loss": 0.0523, "num_input_tokens_seen": 283599920, "step": 3107 }, { "epoch": 12.945833333333333, "loss": 0.07048561424016953, "loss_ce": 6.630267307627946e-05, "loss_iou": 0.21484375, "loss_num": 0.01409912109375, "loss_xval": 0.0703125, "num_input_tokens_seen": 283599920, "step": 3107 }, { "epoch": 12.95, "grad_norm": 1.702638937834739, "learning_rate": 5e-05, "loss": 0.0641, "num_input_tokens_seen": 283691752, "step": 3108 }, { "epoch": 12.95, "loss": 0.04186766594648361, "loss_ce": 0.00014250561071094126, "loss_iou": 0.201171875, "loss_num": 0.00836181640625, "loss_xval": 0.041748046875, "num_input_tokens_seen": 283691752, "step": 3108 }, { "epoch": 12.954166666666667, "grad_norm": 2.709329891336406, "learning_rate": 5e-05, "loss": 0.0712, "num_input_tokens_seen": 283782824, "step": 3109 }, { "epoch": 12.954166666666667, "loss": 0.09974834322929382, "loss_ce": 1.637887521610537e-06, "loss_iou": 0.224609375, "loss_num": 0.0198974609375, "loss_xval": 0.099609375, "num_input_tokens_seen": 283782824, "step": 3109 }, { "epoch": 12.958333333333334, "grad_norm": 4.142908063588557, "learning_rate": 5e-05, "loss": 0.0534, "num_input_tokens_seen": 283873860, "step": 3110 }, { "epoch": 12.958333333333334, "loss": 0.05954314023256302, "loss_ce": 0.0007510251016356051, "loss_iou": 0.1845703125, "loss_num": 0.01171875, "loss_xval": 0.058837890625, "num_input_tokens_seen": 283873860, "step": 3110 }, { "epoch": 12.9625, "grad_norm": 1.8766834715963754, "learning_rate": 5e-05, "loss": 0.0688, "num_input_tokens_seen": 283964804, "step": 3111 }, { "epoch": 12.9625, "loss": 0.06928491592407227, "loss_ce": 1.0013500286731869e-05, "loss_iou": 0.0908203125, "loss_num": 0.01385498046875, "loss_xval": 0.0693359375, "num_input_tokens_seen": 283964804, "step": 3111 }, { "epoch": 12.966666666666667, "grad_norm": 3.0770804610216795, "learning_rate": 5e-05, "loss": 0.0475, "num_input_tokens_seen": 284056028, "step": 3112 }, { "epoch": 12.966666666666667, "loss": 0.05841745436191559, "loss_ce": 6.809648766648024e-06, "loss_iou": 0.3359375, "loss_num": 0.01165771484375, "loss_xval": 0.058349609375, "num_input_tokens_seen": 284056028, "step": 3112 }, { "epoch": 12.970833333333333, "grad_norm": 3.370660539882287, "learning_rate": 5e-05, "loss": 0.0783, "num_input_tokens_seen": 284147612, "step": 3113 }, { "epoch": 12.970833333333333, "loss": 0.05954941734671593, "loss_ce": 0.00043686505523510277, "loss_iou": 0.275390625, "loss_num": 0.0118408203125, "loss_xval": 0.05908203125, "num_input_tokens_seen": 284147612, "step": 3113 }, { "epoch": 12.975, "grad_norm": 2.706977893222399, "learning_rate": 5e-05, "loss": 0.0685, "num_input_tokens_seen": 284239144, "step": 3114 }, { "epoch": 12.975, "loss": 0.09260821342468262, "loss_ce": 0.00010943684901576489, "loss_iou": 0.32421875, "loss_num": 0.0184326171875, "loss_xval": 0.09228515625, "num_input_tokens_seen": 284239144, "step": 3114 }, { "epoch": 12.979166666666666, "grad_norm": 2.558267317672637, "learning_rate": 5e-05, "loss": 0.095, "num_input_tokens_seen": 284330800, "step": 3115 }, { "epoch": 12.979166666666666, "loss": 0.09775135666131973, "loss_ce": 0.00044605947914533317, "loss_iou": 0.283203125, "loss_num": 0.0194091796875, "loss_xval": 0.09716796875, "num_input_tokens_seen": 284330800, "step": 3115 }, { "epoch": 12.983333333333333, "grad_norm": 1.704154512433284, "learning_rate": 5e-05, "loss": 0.0546, "num_input_tokens_seen": 284421776, "step": 3116 }, { "epoch": 12.983333333333333, "loss": 0.030959153547883034, "loss_ce": 6.699737241433468e-06, "loss_iou": 0.17578125, "loss_num": 0.006195068359375, "loss_xval": 0.031005859375, "num_input_tokens_seen": 284421776, "step": 3116 }, { "epoch": 12.9875, "grad_norm": 1.6481778234420617, "learning_rate": 5e-05, "loss": 0.0631, "num_input_tokens_seen": 284512804, "step": 3117 }, { "epoch": 12.9875, "loss": 0.04519006237387657, "loss_ce": 3.9303664379986e-05, "loss_iou": 0.240234375, "loss_num": 0.009033203125, "loss_xval": 0.045166015625, "num_input_tokens_seen": 284512804, "step": 3117 }, { "epoch": 12.991666666666667, "grad_norm": 3.9810323306664572, "learning_rate": 5e-05, "loss": 0.0784, "num_input_tokens_seen": 284604228, "step": 3118 }, { "epoch": 12.991666666666667, "loss": 0.07797044515609741, "loss_ce": 1.3288912668940611e-05, "loss_iou": 0.296875, "loss_num": 0.015625, "loss_xval": 0.078125, "num_input_tokens_seen": 284604228, "step": 3118 }, { "epoch": 12.995833333333334, "grad_norm": 9.399576967826073, "learning_rate": 5e-05, "loss": 0.0548, "num_input_tokens_seen": 284695532, "step": 3119 }, { "epoch": 12.995833333333334, "loss": 0.0511610209941864, "loss_ce": 4.407762753544375e-05, "loss_iou": 0.2001953125, "loss_num": 0.01025390625, "loss_xval": 0.051025390625, "num_input_tokens_seen": 284695532, "step": 3119 }, { "epoch": 13.0, "grad_norm": 4.164992602967978, "learning_rate": 5e-05, "loss": 0.1244, "num_input_tokens_seen": 284786576, "step": 3120 }, { "epoch": 13.0, "loss": 0.18091799318790436, "loss_ce": 1.7431500964448787e-05, "loss_iou": 0.12890625, "loss_num": 0.0361328125, "loss_xval": 0.1806640625, "num_input_tokens_seen": 284786576, "step": 3120 }, { "epoch": 13.004166666666666, "grad_norm": 2.9079685810778533, "learning_rate": 5e-05, "loss": 0.0524, "num_input_tokens_seen": 284877304, "step": 3121 }, { "epoch": 13.004166666666666, "loss": 0.055656641721725464, "loss_ce": 7.834884854673874e-06, "loss_iou": 0.1826171875, "loss_num": 0.0111083984375, "loss_xval": 0.0556640625, "num_input_tokens_seen": 284877304, "step": 3121 }, { "epoch": 13.008333333333333, "grad_norm": 2.787839217725438, "learning_rate": 5e-05, "loss": 0.0453, "num_input_tokens_seen": 284969176, "step": 3122 }, { "epoch": 13.008333333333333, "loss": 0.04686301201581955, "loss_ce": 4.904704837827012e-05, "loss_iou": 0.302734375, "loss_num": 0.00933837890625, "loss_xval": 0.046875, "num_input_tokens_seen": 284969176, "step": 3122 }, { "epoch": 13.0125, "grad_norm": 2.7453722467302253, "learning_rate": 5e-05, "loss": 0.0565, "num_input_tokens_seen": 285060356, "step": 3123 }, { "epoch": 13.0125, "loss": 0.05110064521431923, "loss_ce": 1.4217559510143474e-05, "loss_iou": 0.3125, "loss_num": 0.01019287109375, "loss_xval": 0.051025390625, "num_input_tokens_seen": 285060356, "step": 3123 }, { "epoch": 13.016666666666667, "grad_norm": 3.3803002591461024, "learning_rate": 5e-05, "loss": 0.0597, "num_input_tokens_seen": 285151684, "step": 3124 }, { "epoch": 13.016666666666667, "loss": 0.03558432683348656, "loss_ce": 0.0019844716880470514, "loss_iou": 0.306640625, "loss_num": 0.0067138671875, "loss_xval": 0.03369140625, "num_input_tokens_seen": 285151684, "step": 3124 }, { "epoch": 13.020833333333334, "grad_norm": 2.220903338732122, "learning_rate": 5e-05, "loss": 0.1168, "num_input_tokens_seen": 285243324, "step": 3125 }, { "epoch": 13.020833333333334, "loss": 0.12469391524791718, "loss_ce": 2.9611199352075346e-05, "loss_iou": 0.023193359375, "loss_num": 0.02490234375, "loss_xval": 0.12451171875, "num_input_tokens_seen": 285243324, "step": 3125 }, { "epoch": 13.025, "grad_norm": 2.51031468120179, "learning_rate": 5e-05, "loss": 0.0669, "num_input_tokens_seen": 285335080, "step": 3126 }, { "epoch": 13.025, "loss": 0.07845841348171234, "loss_ce": 0.00031052250415086746, "loss_iou": 0.19140625, "loss_num": 0.015625, "loss_xval": 0.078125, "num_input_tokens_seen": 285335080, "step": 3126 }, { "epoch": 13.029166666666667, "grad_norm": 5.397176496704968, "learning_rate": 5e-05, "loss": 0.0767, "num_input_tokens_seen": 285426028, "step": 3127 }, { "epoch": 13.029166666666667, "loss": 0.036863990128040314, "loss_ce": 2.9271199309732765e-05, "loss_iou": 0.26953125, "loss_num": 0.007354736328125, "loss_xval": 0.036865234375, "num_input_tokens_seen": 285426028, "step": 3127 }, { "epoch": 13.033333333333333, "grad_norm": 4.800806798698757, "learning_rate": 5e-05, "loss": 0.0776, "num_input_tokens_seen": 285517452, "step": 3128 }, { "epoch": 13.033333333333333, "loss": 0.044313944876194, "loss_ce": 2.530910205678083e-05, "loss_iou": 0.2138671875, "loss_num": 0.00885009765625, "loss_xval": 0.044189453125, "num_input_tokens_seen": 285517452, "step": 3128 }, { "epoch": 13.0375, "grad_norm": 4.8157125922109945, "learning_rate": 5e-05, "loss": 0.0684, "num_input_tokens_seen": 285609188, "step": 3129 }, { "epoch": 13.0375, "loss": 0.09021648019552231, "loss_ce": 6.522158855659654e-06, "loss_iou": 0.232421875, "loss_num": 0.01806640625, "loss_xval": 0.09033203125, "num_input_tokens_seen": 285609188, "step": 3129 }, { "epoch": 13.041666666666666, "grad_norm": 2.759377528077706, "learning_rate": 5e-05, "loss": 0.0869, "num_input_tokens_seen": 285699832, "step": 3130 }, { "epoch": 13.041666666666666, "loss": 0.08318091928958893, "loss_ce": 5.263358161755605e-06, "loss_iou": 0.28125, "loss_num": 0.0166015625, "loss_xval": 0.0830078125, "num_input_tokens_seen": 285699832, "step": 3130 }, { "epoch": 13.045833333333333, "grad_norm": 2.551485384721823, "learning_rate": 5e-05, "loss": 0.0842, "num_input_tokens_seen": 285790920, "step": 3131 }, { "epoch": 13.045833333333333, "loss": 0.1195254698395729, "loss_ce": 3.3810711101978086e-06, "loss_iou": 0.3046875, "loss_num": 0.02392578125, "loss_xval": 0.11962890625, "num_input_tokens_seen": 285790920, "step": 3131 }, { "epoch": 13.05, "grad_norm": 3.0899997537226866, "learning_rate": 5e-05, "loss": 0.0705, "num_input_tokens_seen": 285882040, "step": 3132 }, { "epoch": 13.05, "loss": 0.07134020328521729, "loss_ce": 5.3618177844327874e-06, "loss_iou": 0.1953125, "loss_num": 0.0142822265625, "loss_xval": 0.0712890625, "num_input_tokens_seen": 285882040, "step": 3132 }, { "epoch": 13.054166666666667, "grad_norm": 8.38869458745163, "learning_rate": 5e-05, "loss": 0.0849, "num_input_tokens_seen": 285973412, "step": 3133 }, { "epoch": 13.054166666666667, "loss": 0.05942576751112938, "loss_ce": 8.045359209063463e-06, "loss_iou": 0.2373046875, "loss_num": 0.01190185546875, "loss_xval": 0.059326171875, "num_input_tokens_seen": 285973412, "step": 3133 }, { "epoch": 13.058333333333334, "grad_norm": 3.321566870982853, "learning_rate": 5e-05, "loss": 0.077, "num_input_tokens_seen": 286064536, "step": 3134 }, { "epoch": 13.058333333333334, "loss": 0.10176359862089157, "loss_ce": 0.00020109850447624922, "loss_iou": 0.32421875, "loss_num": 0.020263671875, "loss_xval": 0.1015625, "num_input_tokens_seen": 286064536, "step": 3134 }, { "epoch": 13.0625, "grad_norm": 1.2412773253705964, "learning_rate": 5e-05, "loss": 0.0608, "num_input_tokens_seen": 286155540, "step": 3135 }, { "epoch": 13.0625, "loss": 0.03508618474006653, "loss_ce": 2.1485979232238606e-05, "loss_iou": 0.232421875, "loss_num": 0.00701904296875, "loss_xval": 0.03515625, "num_input_tokens_seen": 286155540, "step": 3135 }, { "epoch": 13.066666666666666, "grad_norm": 2.831490400001111, "learning_rate": 5e-05, "loss": 0.0646, "num_input_tokens_seen": 286246804, "step": 3136 }, { "epoch": 13.066666666666666, "loss": 0.04610330983996391, "loss_ce": 2.1765297788078897e-05, "loss_iou": 0.154296875, "loss_num": 0.00921630859375, "loss_xval": 0.046142578125, "num_input_tokens_seen": 286246804, "step": 3136 }, { "epoch": 13.070833333333333, "grad_norm": 2.5292858582678353, "learning_rate": 5e-05, "loss": 0.0669, "num_input_tokens_seen": 286338372, "step": 3137 }, { "epoch": 13.070833333333333, "loss": 0.06301337480545044, "loss_ce": 2.8901637051603757e-05, "loss_iou": 0.1962890625, "loss_num": 0.0125732421875, "loss_xval": 0.06298828125, "num_input_tokens_seen": 286338372, "step": 3137 }, { "epoch": 13.075, "grad_norm": 1.956081203255567, "learning_rate": 5e-05, "loss": 0.0529, "num_input_tokens_seen": 286429816, "step": 3138 }, { "epoch": 13.075, "loss": 0.048599861562252045, "loss_ce": 3.113657294306904e-05, "loss_iou": 0.134765625, "loss_num": 0.00970458984375, "loss_xval": 0.048583984375, "num_input_tokens_seen": 286429816, "step": 3138 }, { "epoch": 13.079166666666667, "grad_norm": 1.758795319298648, "learning_rate": 5e-05, "loss": 0.0599, "num_input_tokens_seen": 286521076, "step": 3139 }, { "epoch": 13.079166666666667, "loss": 0.04789041727781296, "loss_ce": 8.336599421454594e-06, "loss_iou": 0.296875, "loss_num": 0.00958251953125, "loss_xval": 0.0478515625, "num_input_tokens_seen": 286521076, "step": 3139 }, { "epoch": 13.083333333333334, "grad_norm": 4.482802864674869, "learning_rate": 5e-05, "loss": 0.0946, "num_input_tokens_seen": 286611880, "step": 3140 }, { "epoch": 13.083333333333334, "loss": 0.07295797020196915, "loss_ce": 0.03647420182824135, "loss_iou": 0.244140625, "loss_num": 0.007293701171875, "loss_xval": 0.036376953125, "num_input_tokens_seen": 286611880, "step": 3140 }, { "epoch": 13.0875, "grad_norm": 2.6958998324483803, "learning_rate": 5e-05, "loss": 0.088, "num_input_tokens_seen": 286703356, "step": 3141 }, { "epoch": 13.0875, "loss": 0.10715167224407196, "loss_ce": 0.00023333393619395792, "loss_iou": 0.1787109375, "loss_num": 0.0213623046875, "loss_xval": 0.10693359375, "num_input_tokens_seen": 286703356, "step": 3141 }, { "epoch": 13.091666666666667, "grad_norm": 2.4808391143690827, "learning_rate": 5e-05, "loss": 0.0451, "num_input_tokens_seen": 286794744, "step": 3142 }, { "epoch": 13.091666666666667, "loss": 0.04689784348011017, "loss_ce": 0.0003585351223591715, "loss_iou": 0.169921875, "loss_num": 0.00927734375, "loss_xval": 0.046630859375, "num_input_tokens_seen": 286794744, "step": 3142 }, { "epoch": 13.095833333333333, "grad_norm": 3.273393608824858, "learning_rate": 5e-05, "loss": 0.126, "num_input_tokens_seen": 286886376, "step": 3143 }, { "epoch": 13.095833333333333, "loss": 0.07159381359815598, "loss_ce": 4.535461630439386e-05, "loss_iou": 0.34375, "loss_num": 0.0142822265625, "loss_xval": 0.07177734375, "num_input_tokens_seen": 286886376, "step": 3143 }, { "epoch": 13.1, "grad_norm": 2.014824322296352, "learning_rate": 5e-05, "loss": 0.0648, "num_input_tokens_seen": 286977692, "step": 3144 }, { "epoch": 13.1, "loss": 0.05477256327867508, "loss_ce": 0.000542825844604522, "loss_iou": 0.2041015625, "loss_num": 0.0108642578125, "loss_xval": 0.05419921875, "num_input_tokens_seen": 286977692, "step": 3144 }, { "epoch": 13.104166666666666, "grad_norm": 1.9183730508376748, "learning_rate": 5e-05, "loss": 0.0576, "num_input_tokens_seen": 287068988, "step": 3145 }, { "epoch": 13.104166666666666, "loss": 0.03723179176449776, "loss_ce": 1.560458986205049e-05, "loss_iou": 0.203125, "loss_num": 0.0074462890625, "loss_xval": 0.037109375, "num_input_tokens_seen": 287068988, "step": 3145 }, { "epoch": 13.108333333333333, "grad_norm": 12.41752476804688, "learning_rate": 5e-05, "loss": 0.0676, "num_input_tokens_seen": 287161128, "step": 3146 }, { "epoch": 13.108333333333333, "loss": 0.07558012008666992, "loss_ce": 0.0022005955688655376, "loss_iou": 0.2119140625, "loss_num": 0.0146484375, "loss_xval": 0.0732421875, "num_input_tokens_seen": 287161128, "step": 3146 }, { "epoch": 13.1125, "grad_norm": 2.3556336220738054, "learning_rate": 5e-05, "loss": 0.0347, "num_input_tokens_seen": 287252576, "step": 3147 }, { "epoch": 13.1125, "loss": 0.03619590029120445, "loss_ce": 0.0005971458158455789, "loss_iou": 0.28515625, "loss_num": 0.00714111328125, "loss_xval": 0.03564453125, "num_input_tokens_seen": 287252576, "step": 3147 }, { "epoch": 13.116666666666667, "grad_norm": 3.9532078740492214, "learning_rate": 5e-05, "loss": 0.1146, "num_input_tokens_seen": 287343876, "step": 3148 }, { "epoch": 13.116666666666667, "loss": 0.048301875591278076, "loss_ce": 7.809003363945521e-06, "loss_iou": 0.388671875, "loss_num": 0.0096435546875, "loss_xval": 0.04833984375, "num_input_tokens_seen": 287343876, "step": 3148 }, { "epoch": 13.120833333333334, "grad_norm": 2.2679126909560647, "learning_rate": 5e-05, "loss": 0.0498, "num_input_tokens_seen": 287435524, "step": 3149 }, { "epoch": 13.120833333333334, "loss": 0.05186320096254349, "loss_ce": 2.1469897546921857e-05, "loss_iou": 0.1796875, "loss_num": 0.0103759765625, "loss_xval": 0.0517578125, "num_input_tokens_seen": 287435524, "step": 3149 }, { "epoch": 13.125, "grad_norm": 2.889271472199033, "learning_rate": 5e-05, "loss": 0.0661, "num_input_tokens_seen": 287526712, "step": 3150 }, { "epoch": 13.125, "loss": 0.07298816740512848, "loss_ce": 5.382002200349234e-06, "loss_iou": 0.2177734375, "loss_num": 0.01458740234375, "loss_xval": 0.07275390625, "num_input_tokens_seen": 287526712, "step": 3150 }, { "epoch": 13.129166666666666, "grad_norm": 3.3133748528039897, "learning_rate": 5e-05, "loss": 0.0697, "num_input_tokens_seen": 287617580, "step": 3151 }, { "epoch": 13.129166666666666, "loss": 0.06128918379545212, "loss_ce": 0.0002845459384843707, "loss_iou": 0.322265625, "loss_num": 0.01220703125, "loss_xval": 0.06103515625, "num_input_tokens_seen": 287617580, "step": 3151 }, { "epoch": 13.133333333333333, "grad_norm": 1.5366254448525556, "learning_rate": 5e-05, "loss": 0.0499, "num_input_tokens_seen": 287709012, "step": 3152 }, { "epoch": 13.133333333333333, "loss": 0.06348831951618195, "loss_ce": 1.1753787475754507e-05, "loss_iou": 0.025634765625, "loss_num": 0.0126953125, "loss_xval": 0.0634765625, "num_input_tokens_seen": 287709012, "step": 3152 }, { "epoch": 13.1375, "grad_norm": 5.812825209981419, "learning_rate": 5e-05, "loss": 0.0578, "num_input_tokens_seen": 287800116, "step": 3153 }, { "epoch": 13.1375, "loss": 0.06690312922000885, "loss_ce": 7.726660260232165e-05, "loss_iou": 0.1591796875, "loss_num": 0.01336669921875, "loss_xval": 0.06689453125, "num_input_tokens_seen": 287800116, "step": 3153 }, { "epoch": 13.141666666666667, "grad_norm": 3.549976425566346, "learning_rate": 5e-05, "loss": 0.0421, "num_input_tokens_seen": 287891084, "step": 3154 }, { "epoch": 13.141666666666667, "loss": 0.04346313700079918, "loss_ce": 6.105668035161216e-06, "loss_iou": 0.205078125, "loss_num": 0.00872802734375, "loss_xval": 0.04345703125, "num_input_tokens_seen": 287891084, "step": 3154 }, { "epoch": 13.145833333333334, "grad_norm": 3.045076072600189, "learning_rate": 5e-05, "loss": 0.1268, "num_input_tokens_seen": 287982756, "step": 3155 }, { "epoch": 13.145833333333334, "loss": 0.19146740436553955, "loss_ce": 7.640861440449953e-05, "loss_iou": 0.33203125, "loss_num": 0.038330078125, "loss_xval": 0.19140625, "num_input_tokens_seen": 287982756, "step": 3155 }, { "epoch": 13.15, "grad_norm": 3.803486531579513, "learning_rate": 5e-05, "loss": 0.047, "num_input_tokens_seen": 288073972, "step": 3156 }, { "epoch": 13.15, "loss": 0.0343029648065567, "loss_ce": 1.2068505839124555e-06, "loss_iou": 0.3515625, "loss_num": 0.006866455078125, "loss_xval": 0.0341796875, "num_input_tokens_seen": 288073972, "step": 3156 }, { "epoch": 13.154166666666667, "grad_norm": 2.7662928320027382, "learning_rate": 5e-05, "loss": 0.0711, "num_input_tokens_seen": 288165052, "step": 3157 }, { "epoch": 13.154166666666667, "loss": 0.0952390804886818, "loss_ce": 8.984496162156574e-06, "loss_iou": 0.1875, "loss_num": 0.01904296875, "loss_xval": 0.09521484375, "num_input_tokens_seen": 288165052, "step": 3157 }, { "epoch": 13.158333333333333, "grad_norm": 4.308966729779597, "learning_rate": 5e-05, "loss": 0.0702, "num_input_tokens_seen": 288256128, "step": 3158 }, { "epoch": 13.158333333333333, "loss": 0.06495509296655655, "loss_ce": 0.0002730850537773222, "loss_iou": 0.32421875, "loss_num": 0.012939453125, "loss_xval": 0.064453125, "num_input_tokens_seen": 288256128, "step": 3158 }, { "epoch": 13.1625, "grad_norm": 3.8397928699877433, "learning_rate": 5e-05, "loss": 0.0637, "num_input_tokens_seen": 288347464, "step": 3159 }, { "epoch": 13.1625, "loss": 0.04620542749762535, "loss_ce": 1.816176222746435e-06, "loss_iou": 0.3671875, "loss_num": 0.00921630859375, "loss_xval": 0.046142578125, "num_input_tokens_seen": 288347464, "step": 3159 }, { "epoch": 13.166666666666666, "grad_norm": 2.3495969026979284, "learning_rate": 5e-05, "loss": 0.0437, "num_input_tokens_seen": 288438828, "step": 3160 }, { "epoch": 13.166666666666666, "loss": 0.04184609279036522, "loss_ce": 2.1753499822807498e-05, "loss_iou": 0.146484375, "loss_num": 0.00836181640625, "loss_xval": 0.041748046875, "num_input_tokens_seen": 288438828, "step": 3160 }, { "epoch": 13.170833333333333, "grad_norm": 2.1779618590482057, "learning_rate": 5e-05, "loss": 0.0776, "num_input_tokens_seen": 288529740, "step": 3161 }, { "epoch": 13.170833333333333, "loss": 0.10389817506074905, "loss_ce": 1.0718932799136383e-06, "loss_iou": 0.1123046875, "loss_num": 0.020751953125, "loss_xval": 0.10400390625, "num_input_tokens_seen": 288529740, "step": 3161 }, { "epoch": 13.175, "grad_norm": 4.719728899260996, "learning_rate": 5e-05, "loss": 0.0641, "num_input_tokens_seen": 288620912, "step": 3162 }, { "epoch": 13.175, "loss": 0.0952850729227066, "loss_ce": 1.5630433836122393e-06, "loss_iou": 0.36328125, "loss_num": 0.01904296875, "loss_xval": 0.09521484375, "num_input_tokens_seen": 288620912, "step": 3162 }, { "epoch": 13.179166666666667, "grad_norm": 3.034087198442263, "learning_rate": 5e-05, "loss": 0.066, "num_input_tokens_seen": 288712524, "step": 3163 }, { "epoch": 13.179166666666667, "loss": 0.051329098641872406, "loss_ce": 0.0002426707505946979, "loss_iou": 0.19140625, "loss_num": 0.01019287109375, "loss_xval": 0.051025390625, "num_input_tokens_seen": 288712524, "step": 3163 }, { "epoch": 13.183333333333334, "grad_norm": 2.4655614107995922, "learning_rate": 5e-05, "loss": 0.0637, "num_input_tokens_seen": 288803184, "step": 3164 }, { "epoch": 13.183333333333334, "loss": 0.055702660232782364, "loss_ce": 8.08061577117769e-06, "loss_iou": 0.248046875, "loss_num": 0.0111083984375, "loss_xval": 0.0556640625, "num_input_tokens_seen": 288803184, "step": 3164 }, { "epoch": 13.1875, "grad_norm": 3.0002036304689423, "learning_rate": 5e-05, "loss": 0.0751, "num_input_tokens_seen": 288893656, "step": 3165 }, { "epoch": 13.1875, "loss": 0.07198071479797363, "loss_ce": 5.004450486012502e-06, "loss_iou": 0.3515625, "loss_num": 0.014404296875, "loss_xval": 0.07177734375, "num_input_tokens_seen": 288893656, "step": 3165 }, { "epoch": 13.191666666666666, "grad_norm": 3.3823548533561465, "learning_rate": 5e-05, "loss": 0.0526, "num_input_tokens_seen": 288985708, "step": 3166 }, { "epoch": 13.191666666666666, "loss": 0.061142697930336, "loss_ce": 7.702488801442087e-05, "loss_iou": 0.275390625, "loss_num": 0.01220703125, "loss_xval": 0.06103515625, "num_input_tokens_seen": 288985708, "step": 3166 }, { "epoch": 13.195833333333333, "grad_norm": 3.1353485248169104, "learning_rate": 5e-05, "loss": 0.0708, "num_input_tokens_seen": 289076480, "step": 3167 }, { "epoch": 13.195833333333333, "loss": 0.06192321702837944, "loss_ce": 3.05069329442631e-06, "loss_iou": 0.298828125, "loss_num": 0.01239013671875, "loss_xval": 0.06201171875, "num_input_tokens_seen": 289076480, "step": 3167 }, { "epoch": 13.2, "grad_norm": 2.0384665273080653, "learning_rate": 5e-05, "loss": 0.0371, "num_input_tokens_seen": 289167716, "step": 3168 }, { "epoch": 13.2, "loss": 0.05005306378006935, "loss_ce": 4.234663720126264e-06, "loss_iou": 0.08447265625, "loss_num": 0.010009765625, "loss_xval": 0.050048828125, "num_input_tokens_seen": 289167716, "step": 3168 }, { "epoch": 13.204166666666667, "grad_norm": 3.8817508516141057, "learning_rate": 5e-05, "loss": 0.0664, "num_input_tokens_seen": 289258356, "step": 3169 }, { "epoch": 13.204166666666667, "loss": 0.05380536615848541, "loss_ce": 2.8745339477609377e-06, "loss_iou": 0.291015625, "loss_num": 0.0107421875, "loss_xval": 0.0537109375, "num_input_tokens_seen": 289258356, "step": 3169 }, { "epoch": 13.208333333333334, "grad_norm": 3.1083953606747827, "learning_rate": 5e-05, "loss": 0.0864, "num_input_tokens_seen": 289349840, "step": 3170 }, { "epoch": 13.208333333333334, "loss": 0.09058225899934769, "loss_ce": 3.6607481888495386e-05, "loss_iou": 0.2109375, "loss_num": 0.01806640625, "loss_xval": 0.09033203125, "num_input_tokens_seen": 289349840, "step": 3170 }, { "epoch": 13.2125, "grad_norm": 2.1639030749255412, "learning_rate": 5e-05, "loss": 0.0332, "num_input_tokens_seen": 289440916, "step": 3171 }, { "epoch": 13.2125, "loss": 0.027187222614884377, "loss_ce": 0.00014864768309053034, "loss_iou": 0.1923828125, "loss_num": 0.005401611328125, "loss_xval": 0.027099609375, "num_input_tokens_seen": 289440916, "step": 3171 }, { "epoch": 13.216666666666667, "grad_norm": 2.6789458317545987, "learning_rate": 5e-05, "loss": 0.0413, "num_input_tokens_seen": 289532536, "step": 3172 }, { "epoch": 13.216666666666667, "loss": 0.037016674876213074, "loss_ce": 0.00027350973687134683, "loss_iou": 0.234375, "loss_num": 0.007354736328125, "loss_xval": 0.03662109375, "num_input_tokens_seen": 289532536, "step": 3172 }, { "epoch": 13.220833333333333, "grad_norm": 3.155049670169387, "learning_rate": 5e-05, "loss": 0.048, "num_input_tokens_seen": 289624016, "step": 3173 }, { "epoch": 13.220833333333333, "loss": 0.04516705870628357, "loss_ce": 1.629926191526465e-05, "loss_iou": 0.322265625, "loss_num": 0.009033203125, "loss_xval": 0.045166015625, "num_input_tokens_seen": 289624016, "step": 3173 }, { "epoch": 13.225, "grad_norm": 2.230629433782654, "learning_rate": 5e-05, "loss": 0.069, "num_input_tokens_seen": 289715732, "step": 3174 }, { "epoch": 13.225, "loss": 0.08780436962842941, "loss_ce": 0.0012717776698991656, "loss_iou": 0.265625, "loss_num": 0.017333984375, "loss_xval": 0.08642578125, "num_input_tokens_seen": 289715732, "step": 3174 }, { "epoch": 13.229166666666666, "grad_norm": 3.4239700489282483, "learning_rate": 5e-05, "loss": 0.0747, "num_input_tokens_seen": 289807220, "step": 3175 }, { "epoch": 13.229166666666666, "loss": 0.08312824368476868, "loss_ce": 4.4145624997327104e-05, "loss_iou": 0.224609375, "loss_num": 0.0166015625, "loss_xval": 0.0830078125, "num_input_tokens_seen": 289807220, "step": 3175 }, { "epoch": 13.233333333333333, "grad_norm": 3.053632576851467, "learning_rate": 5e-05, "loss": 0.0505, "num_input_tokens_seen": 289898104, "step": 3176 }, { "epoch": 13.233333333333333, "loss": 0.043274927884340286, "loss_ce": 1.626130506338086e-05, "loss_iou": 0.2734375, "loss_num": 0.0086669921875, "loss_xval": 0.043212890625, "num_input_tokens_seen": 289898104, "step": 3176 }, { "epoch": 13.2375, "grad_norm": 2.529505553985378, "learning_rate": 5e-05, "loss": 0.0557, "num_input_tokens_seen": 289989496, "step": 3177 }, { "epoch": 13.2375, "loss": 0.05597273260354996, "loss_ce": 0.0005680684698745608, "loss_iou": 0.29296875, "loss_num": 0.0111083984375, "loss_xval": 0.055419921875, "num_input_tokens_seen": 289989496, "step": 3177 }, { "epoch": 13.241666666666667, "grad_norm": 3.0168185065630957, "learning_rate": 5e-05, "loss": 0.0606, "num_input_tokens_seen": 290081328, "step": 3178 }, { "epoch": 13.241666666666667, "loss": 0.04695458710193634, "loss_ce": 3.290790800747345e-06, "loss_iou": 0.353515625, "loss_num": 0.0093994140625, "loss_xval": 0.046875, "num_input_tokens_seen": 290081328, "step": 3178 }, { "epoch": 13.245833333333334, "grad_norm": 10.427004976348053, "learning_rate": 5e-05, "loss": 0.0679, "num_input_tokens_seen": 290172672, "step": 3179 }, { "epoch": 13.245833333333334, "loss": 0.04928769916296005, "loss_ce": 7.810293027432635e-05, "loss_iou": 0.3515625, "loss_num": 0.00982666015625, "loss_xval": 0.04931640625, "num_input_tokens_seen": 290172672, "step": 3179 }, { "epoch": 13.25, "grad_norm": 3.649719781616416, "learning_rate": 5e-05, "loss": 0.0511, "num_input_tokens_seen": 290264228, "step": 3180 }, { "epoch": 13.25, "loss": 0.056522026658058167, "loss_ce": 1.1101107702415902e-05, "loss_iou": 0.32421875, "loss_num": 0.01129150390625, "loss_xval": 0.056396484375, "num_input_tokens_seen": 290264228, "step": 3180 }, { "epoch": 13.254166666666666, "grad_norm": 2.619291730296451, "learning_rate": 5e-05, "loss": 0.0645, "num_input_tokens_seen": 290355480, "step": 3181 }, { "epoch": 13.254166666666666, "loss": 0.054982878267765045, "loss_ce": 0.0004708552733063698, "loss_iou": 0.28515625, "loss_num": 0.01092529296875, "loss_xval": 0.054443359375, "num_input_tokens_seen": 290355480, "step": 3181 }, { "epoch": 13.258333333333333, "grad_norm": 2.2875459548458377, "learning_rate": 5e-05, "loss": 0.0497, "num_input_tokens_seen": 290447160, "step": 3182 }, { "epoch": 13.258333333333333, "loss": 0.05870981141924858, "loss_ce": 7.028212712612003e-05, "loss_iou": 0.41015625, "loss_num": 0.01171875, "loss_xval": 0.05859375, "num_input_tokens_seen": 290447160, "step": 3182 }, { "epoch": 13.2625, "grad_norm": 12.306772143262872, "learning_rate": 5e-05, "loss": 0.0484, "num_input_tokens_seen": 290538864, "step": 3183 }, { "epoch": 13.2625, "loss": 0.02843681536614895, "loss_ce": 2.494886939530261e-05, "loss_iou": 0.2431640625, "loss_num": 0.00567626953125, "loss_xval": 0.0284423828125, "num_input_tokens_seen": 290538864, "step": 3183 }, { "epoch": 13.266666666666667, "grad_norm": 2.1338759100200586, "learning_rate": 5e-05, "loss": 0.0504, "num_input_tokens_seen": 290630028, "step": 3184 }, { "epoch": 13.266666666666667, "loss": 0.059931568801403046, "loss_ce": 2.67685436483589e-06, "loss_iou": 0.291015625, "loss_num": 0.011962890625, "loss_xval": 0.059814453125, "num_input_tokens_seen": 290630028, "step": 3184 }, { "epoch": 13.270833333333334, "grad_norm": 1.8325673657695876, "learning_rate": 5e-05, "loss": 0.0593, "num_input_tokens_seen": 290719740, "step": 3185 }, { "epoch": 13.270833333333334, "loss": 0.04650639742612839, "loss_ce": 0.0071082036010921, "loss_iou": 0.181640625, "loss_num": 0.00787353515625, "loss_xval": 0.039306640625, "num_input_tokens_seen": 290719740, "step": 3185 }, { "epoch": 13.275, "grad_norm": 1.2132126586571146, "learning_rate": 5e-05, "loss": 0.0425, "num_input_tokens_seen": 290811232, "step": 3186 }, { "epoch": 13.275, "loss": 0.03850376605987549, "loss_ce": 5.842209247930441e-06, "loss_iou": 0.19140625, "loss_num": 0.007720947265625, "loss_xval": 0.03857421875, "num_input_tokens_seen": 290811232, "step": 3186 }, { "epoch": 13.279166666666667, "grad_norm": 2.764621298389163, "learning_rate": 5e-05, "loss": 0.0401, "num_input_tokens_seen": 290902284, "step": 3187 }, { "epoch": 13.279166666666667, "loss": 0.04973310977220535, "loss_ce": 4.718270247394685e-06, "loss_iou": 0.251953125, "loss_num": 0.00994873046875, "loss_xval": 0.0498046875, "num_input_tokens_seen": 290902284, "step": 3187 }, { "epoch": 13.283333333333333, "grad_norm": 2.4934463767616686, "learning_rate": 5e-05, "loss": 0.0329, "num_input_tokens_seen": 290993196, "step": 3188 }, { "epoch": 13.283333333333333, "loss": 0.04270722717046738, "loss_ce": 1.3137170753907412e-05, "loss_iou": 0.26171875, "loss_num": 0.008544921875, "loss_xval": 0.042724609375, "num_input_tokens_seen": 290993196, "step": 3188 }, { "epoch": 13.2875, "grad_norm": 3.3667243336877553, "learning_rate": 5e-05, "loss": 0.0562, "num_input_tokens_seen": 291084408, "step": 3189 }, { "epoch": 13.2875, "loss": 0.06418552249670029, "loss_ce": 0.0002435675705783069, "loss_iou": 0.291015625, "loss_num": 0.0128173828125, "loss_xval": 0.06396484375, "num_input_tokens_seen": 291084408, "step": 3189 }, { "epoch": 13.291666666666666, "grad_norm": 2.904027696337672, "learning_rate": 5e-05, "loss": 0.0734, "num_input_tokens_seen": 291174800, "step": 3190 }, { "epoch": 13.291666666666666, "loss": 0.05541330575942993, "loss_ce": 2.3902084649307653e-05, "loss_iou": 0.296875, "loss_num": 0.01104736328125, "loss_xval": 0.055419921875, "num_input_tokens_seen": 291174800, "step": 3190 }, { "epoch": 13.295833333333333, "grad_norm": 5.006072671935732, "learning_rate": 5e-05, "loss": 0.0448, "num_input_tokens_seen": 291266916, "step": 3191 }, { "epoch": 13.295833333333333, "loss": 0.046564217656850815, "loss_ce": 0.00014698227460030466, "loss_iou": 0.302734375, "loss_num": 0.00927734375, "loss_xval": 0.04638671875, "num_input_tokens_seen": 291266916, "step": 3191 }, { "epoch": 13.3, "grad_norm": 2.719090840073287, "learning_rate": 5e-05, "loss": 0.0613, "num_input_tokens_seen": 291358592, "step": 3192 }, { "epoch": 13.3, "loss": 0.050211891531944275, "loss_ce": 0.001048073172569275, "loss_iou": 0.296875, "loss_num": 0.00982666015625, "loss_xval": 0.049072265625, "num_input_tokens_seen": 291358592, "step": 3192 }, { "epoch": 13.304166666666667, "grad_norm": 2.189135791353442, "learning_rate": 5e-05, "loss": 0.0834, "num_input_tokens_seen": 291449948, "step": 3193 }, { "epoch": 13.304166666666667, "loss": 0.10209139436483383, "loss_ce": 0.00016268000763375312, "loss_iou": 0.419921875, "loss_num": 0.0203857421875, "loss_xval": 0.10205078125, "num_input_tokens_seen": 291449948, "step": 3193 }, { "epoch": 13.308333333333334, "grad_norm": 5.16939944493548, "learning_rate": 5e-05, "loss": 0.0639, "num_input_tokens_seen": 291540952, "step": 3194 }, { "epoch": 13.308333333333334, "loss": 0.0835447683930397, "loss_ce": 2.898941602325067e-06, "loss_iou": 0.349609375, "loss_num": 0.0167236328125, "loss_xval": 0.08349609375, "num_input_tokens_seen": 291540952, "step": 3194 }, { "epoch": 13.3125, "grad_norm": 27.4742236946222, "learning_rate": 5e-05, "loss": 0.0495, "num_input_tokens_seen": 291632664, "step": 3195 }, { "epoch": 13.3125, "loss": 0.05430099368095398, "loss_ce": 1.0216759619652294e-05, "loss_iou": 0.2734375, "loss_num": 0.0108642578125, "loss_xval": 0.05419921875, "num_input_tokens_seen": 291632664, "step": 3195 }, { "epoch": 13.316666666666666, "grad_norm": 1.853929865425854, "learning_rate": 5e-05, "loss": 0.0951, "num_input_tokens_seen": 291723896, "step": 3196 }, { "epoch": 13.316666666666666, "loss": 0.058908045291900635, "loss_ce": 1.4912147889845073e-06, "loss_iou": 0.1943359375, "loss_num": 0.01177978515625, "loss_xval": 0.058837890625, "num_input_tokens_seen": 291723896, "step": 3196 }, { "epoch": 13.320833333333333, "grad_norm": 1.4714996835976843, "learning_rate": 5e-05, "loss": 0.0501, "num_input_tokens_seen": 291814032, "step": 3197 }, { "epoch": 13.320833333333333, "loss": 0.0393851101398468, "loss_ce": 4.032349170302041e-05, "loss_iou": 0.2158203125, "loss_num": 0.00787353515625, "loss_xval": 0.039306640625, "num_input_tokens_seen": 291814032, "step": 3197 }, { "epoch": 13.325, "grad_norm": 2.0295531105932603, "learning_rate": 5e-05, "loss": 0.0593, "num_input_tokens_seen": 291905260, "step": 3198 }, { "epoch": 13.325, "loss": 0.09402615576982498, "loss_ce": 1.5004483202574193e-06, "loss_iou": 0.2373046875, "loss_num": 0.018798828125, "loss_xval": 0.09423828125, "num_input_tokens_seen": 291905260, "step": 3198 }, { "epoch": 13.329166666666667, "grad_norm": 6.217075773976911, "learning_rate": 5e-05, "loss": 0.0684, "num_input_tokens_seen": 291996148, "step": 3199 }, { "epoch": 13.329166666666667, "loss": 0.06208435446023941, "loss_ce": 4.211819759802893e-05, "loss_iou": 0.28515625, "loss_num": 0.01239013671875, "loss_xval": 0.06201171875, "num_input_tokens_seen": 291996148, "step": 3199 }, { "epoch": 13.333333333333334, "grad_norm": 7.558257323309482, "learning_rate": 5e-05, "loss": 0.0741, "num_input_tokens_seen": 292087368, "step": 3200 }, { "epoch": 13.333333333333334, "loss": 0.06037037819623947, "loss_ce": 6.764857971575111e-05, "loss_iou": 0.287109375, "loss_num": 0.0120849609375, "loss_xval": 0.060302734375, "num_input_tokens_seen": 292087368, "step": 3200 }, { "epoch": 13.3375, "grad_norm": 3.1211902818135626, "learning_rate": 5e-05, "loss": 0.0708, "num_input_tokens_seen": 292178960, "step": 3201 }, { "epoch": 13.3375, "loss": 0.05840154364705086, "loss_ce": 0.00011296742013655603, "loss_iou": 0.287109375, "loss_num": 0.01165771484375, "loss_xval": 0.058349609375, "num_input_tokens_seen": 292178960, "step": 3201 }, { "epoch": 13.341666666666667, "grad_norm": 4.746488694193927, "learning_rate": 5e-05, "loss": 0.0896, "num_input_tokens_seen": 292270872, "step": 3202 }, { "epoch": 13.341666666666667, "loss": 0.12660613656044006, "loss_ce": 0.003002327401190996, "loss_iou": 0.388671875, "loss_num": 0.024658203125, "loss_xval": 0.12353515625, "num_input_tokens_seen": 292270872, "step": 3202 }, { "epoch": 13.345833333333333, "grad_norm": 5.126919814558636, "learning_rate": 5e-05, "loss": 0.0557, "num_input_tokens_seen": 292362420, "step": 3203 }, { "epoch": 13.345833333333333, "loss": 0.0694979578256607, "loss_ce": 0.00045193356345407665, "loss_iou": 0.380859375, "loss_num": 0.01385498046875, "loss_xval": 0.06884765625, "num_input_tokens_seen": 292362420, "step": 3203 }, { "epoch": 13.35, "grad_norm": 4.9269241324619735, "learning_rate": 5e-05, "loss": 0.0797, "num_input_tokens_seen": 292453896, "step": 3204 }, { "epoch": 13.35, "loss": 0.07445742189884186, "loss_ce": 8.608803909737617e-05, "loss_iou": 0.306640625, "loss_num": 0.014892578125, "loss_xval": 0.07421875, "num_input_tokens_seen": 292453896, "step": 3204 }, { "epoch": 13.354166666666666, "grad_norm": 4.441509840417962, "learning_rate": 5e-05, "loss": 0.0766, "num_input_tokens_seen": 292545344, "step": 3205 }, { "epoch": 13.354166666666666, "loss": 0.05894090607762337, "loss_ce": 4.198206079308875e-05, "loss_iou": 0.23828125, "loss_num": 0.01177978515625, "loss_xval": 0.058837890625, "num_input_tokens_seen": 292545344, "step": 3205 }, { "epoch": 13.358333333333333, "grad_norm": 10.734619290972743, "learning_rate": 5e-05, "loss": 0.0484, "num_input_tokens_seen": 292636268, "step": 3206 }, { "epoch": 13.358333333333333, "loss": 0.06319648027420044, "loss_ce": 9.830087947193533e-06, "loss_iou": 0.228515625, "loss_num": 0.01263427734375, "loss_xval": 0.06298828125, "num_input_tokens_seen": 292636268, "step": 3206 }, { "epoch": 13.3625, "grad_norm": 3.1316433085948847, "learning_rate": 5e-05, "loss": 0.0533, "num_input_tokens_seen": 292727492, "step": 3207 }, { "epoch": 13.3625, "loss": 0.05296047031879425, "loss_ce": 7.350678788498044e-05, "loss_iou": 0.20703125, "loss_num": 0.01055908203125, "loss_xval": 0.052978515625, "num_input_tokens_seen": 292727492, "step": 3207 }, { "epoch": 13.366666666666667, "grad_norm": 3.5349685046344845, "learning_rate": 5e-05, "loss": 0.0942, "num_input_tokens_seen": 292819156, "step": 3208 }, { "epoch": 13.366666666666667, "loss": 0.13701561093330383, "loss_ce": 0.00022056905436329544, "loss_iou": 0.2138671875, "loss_num": 0.02734375, "loss_xval": 0.13671875, "num_input_tokens_seen": 292819156, "step": 3208 }, { "epoch": 13.370833333333334, "grad_norm": 3.446596785343221, "learning_rate": 5e-05, "loss": 0.0689, "num_input_tokens_seen": 292909460, "step": 3209 }, { "epoch": 13.370833333333334, "loss": 0.08869334310293198, "loss_ce": 0.0007569487206637859, "loss_iou": 0.3203125, "loss_num": 0.017578125, "loss_xval": 0.087890625, "num_input_tokens_seen": 292909460, "step": 3209 }, { "epoch": 13.375, "grad_norm": 3.1388333940340516, "learning_rate": 5e-05, "loss": 0.0552, "num_input_tokens_seen": 293001040, "step": 3210 }, { "epoch": 13.375, "loss": 0.07420412451028824, "loss_ce": 0.00012270617298781872, "loss_iou": 0.328125, "loss_num": 0.01483154296875, "loss_xval": 0.07421875, "num_input_tokens_seen": 293001040, "step": 3210 }, { "epoch": 13.379166666666666, "grad_norm": 3.0297964653931015, "learning_rate": 5e-05, "loss": 0.0597, "num_input_tokens_seen": 293091976, "step": 3211 }, { "epoch": 13.379166666666666, "loss": 0.06871441006660461, "loss_ce": 4.083505245944252e-06, "loss_iou": 0.23828125, "loss_num": 0.01373291015625, "loss_xval": 0.06884765625, "num_input_tokens_seen": 293091976, "step": 3211 }, { "epoch": 13.383333333333333, "grad_norm": 2.8523748146602013, "learning_rate": 5e-05, "loss": 0.0787, "num_input_tokens_seen": 293183272, "step": 3212 }, { "epoch": 13.383333333333333, "loss": 0.08873356878757477, "loss_ce": 3.7158408758841688e-06, "loss_iou": 0.3671875, "loss_num": 0.0177001953125, "loss_xval": 0.0888671875, "num_input_tokens_seen": 293183272, "step": 3212 }, { "epoch": 13.3875, "grad_norm": 3.764933436661599, "learning_rate": 5e-05, "loss": 0.0637, "num_input_tokens_seen": 293274228, "step": 3213 }, { "epoch": 13.3875, "loss": 0.0799584686756134, "loss_ce": 2.4127339202095754e-06, "loss_iou": 0.28515625, "loss_num": 0.0159912109375, "loss_xval": 0.080078125, "num_input_tokens_seen": 293274228, "step": 3213 }, { "epoch": 13.391666666666667, "grad_norm": 1.6610604273488636, "learning_rate": 5e-05, "loss": 0.0433, "num_input_tokens_seen": 293365980, "step": 3214 }, { "epoch": 13.391666666666667, "loss": 0.043038297444581985, "loss_ce": 8.513585271430202e-06, "loss_iou": 0.28515625, "loss_num": 0.00860595703125, "loss_xval": 0.04296875, "num_input_tokens_seen": 293365980, "step": 3214 }, { "epoch": 13.395833333333334, "grad_norm": 3.524622724240209, "learning_rate": 5e-05, "loss": 0.0592, "num_input_tokens_seen": 293457464, "step": 3215 }, { "epoch": 13.395833333333334, "loss": 0.06250010430812836, "loss_ce": 0.00016794718976598233, "loss_iou": 0.3671875, "loss_num": 0.012451171875, "loss_xval": 0.062255859375, "num_input_tokens_seen": 293457464, "step": 3215 }, { "epoch": 13.4, "grad_norm": 2.8888911715197008, "learning_rate": 5e-05, "loss": 0.0707, "num_input_tokens_seen": 293549552, "step": 3216 }, { "epoch": 13.4, "loss": 0.07175292819738388, "loss_ce": 0.00015869125491008162, "loss_iou": 0.349609375, "loss_num": 0.0142822265625, "loss_xval": 0.07177734375, "num_input_tokens_seen": 293549552, "step": 3216 }, { "epoch": 13.404166666666667, "grad_norm": 3.3778301357320637, "learning_rate": 5e-05, "loss": 0.0598, "num_input_tokens_seen": 293641200, "step": 3217 }, { "epoch": 13.404166666666667, "loss": 0.05942009389400482, "loss_ce": 4.814373096451163e-05, "loss_iou": 0.259765625, "loss_num": 0.01190185546875, "loss_xval": 0.059326171875, "num_input_tokens_seen": 293641200, "step": 3217 }, { "epoch": 13.408333333333333, "grad_norm": 2.7493530635289845, "learning_rate": 5e-05, "loss": 0.0614, "num_input_tokens_seen": 293732296, "step": 3218 }, { "epoch": 13.408333333333333, "loss": 0.05776010453701019, "loss_ce": 5.587176019616891e-06, "loss_iou": 0.2119140625, "loss_num": 0.01153564453125, "loss_xval": 0.057861328125, "num_input_tokens_seen": 293732296, "step": 3218 }, { "epoch": 13.4125, "grad_norm": 2.4829311814395063, "learning_rate": 5e-05, "loss": 0.0345, "num_input_tokens_seen": 293821980, "step": 3219 }, { "epoch": 13.4125, "loss": 0.03791956976056099, "loss_ce": 1.4795160723224399e-06, "loss_iou": 0.31640625, "loss_num": 0.007598876953125, "loss_xval": 0.037841796875, "num_input_tokens_seen": 293821980, "step": 3219 }, { "epoch": 13.416666666666666, "grad_norm": 3.703491461010344, "learning_rate": 5e-05, "loss": 0.0584, "num_input_tokens_seen": 293913188, "step": 3220 }, { "epoch": 13.416666666666666, "loss": 0.0662919133901596, "loss_ce": 0.0001298037386732176, "loss_iou": 0.2412109375, "loss_num": 0.01324462890625, "loss_xval": 0.06640625, "num_input_tokens_seen": 293913188, "step": 3220 }, { "epoch": 13.420833333333333, "grad_norm": 3.084202386080775, "learning_rate": 5e-05, "loss": 0.0756, "num_input_tokens_seen": 294004852, "step": 3221 }, { "epoch": 13.420833333333333, "loss": 0.1051362007856369, "loss_ce": 0.0005028643645346165, "loss_iou": 0.24609375, "loss_num": 0.0208740234375, "loss_xval": 0.1044921875, "num_input_tokens_seen": 294004852, "step": 3221 }, { "epoch": 13.425, "grad_norm": 3.1228620062025017, "learning_rate": 5e-05, "loss": 0.1263, "num_input_tokens_seen": 294096136, "step": 3222 }, { "epoch": 13.425, "loss": 0.17248567938804626, "loss_ce": 0.00010714141535572708, "loss_iou": 0.345703125, "loss_num": 0.034423828125, "loss_xval": 0.1728515625, "num_input_tokens_seen": 294096136, "step": 3222 }, { "epoch": 13.429166666666667, "grad_norm": 1.37170971948216, "learning_rate": 5e-05, "loss": 0.0333, "num_input_tokens_seen": 294187544, "step": 3223 }, { "epoch": 13.429166666666667, "loss": 0.03299761191010475, "loss_ce": 6.914298137417063e-05, "loss_iou": 0.20703125, "loss_num": 0.006591796875, "loss_xval": 0.032958984375, "num_input_tokens_seen": 294187544, "step": 3223 }, { "epoch": 13.433333333333334, "grad_norm": 2.073077056193961, "learning_rate": 5e-05, "loss": 0.0383, "num_input_tokens_seen": 294279420, "step": 3224 }, { "epoch": 13.433333333333334, "loss": 0.04905780777335167, "loss_ce": 8.432472895947285e-06, "loss_iou": 0.314453125, "loss_num": 0.00982666015625, "loss_xval": 0.049072265625, "num_input_tokens_seen": 294279420, "step": 3224 }, { "epoch": 13.4375, "grad_norm": 3.519264702945926, "learning_rate": 5e-05, "loss": 0.0569, "num_input_tokens_seen": 294370768, "step": 3225 }, { "epoch": 13.4375, "loss": 0.035311006009578705, "loss_ce": 0.00018527149222791195, "loss_iou": 0.33984375, "loss_num": 0.00701904296875, "loss_xval": 0.03515625, "num_input_tokens_seen": 294370768, "step": 3225 }, { "epoch": 13.441666666666666, "grad_norm": 2.2554218223026634, "learning_rate": 5e-05, "loss": 0.0565, "num_input_tokens_seen": 294462096, "step": 3226 }, { "epoch": 13.441666666666666, "loss": 0.05369444936513901, "loss_ce": 0.006071767769753933, "loss_iou": 0.287109375, "loss_num": 0.009521484375, "loss_xval": 0.047607421875, "num_input_tokens_seen": 294462096, "step": 3226 }, { "epoch": 13.445833333333333, "grad_norm": 2.390943543831352, "learning_rate": 5e-05, "loss": 0.0375, "num_input_tokens_seen": 294553516, "step": 3227 }, { "epoch": 13.445833333333333, "loss": 0.05189789831638336, "loss_ce": 2.753644821495982e-06, "loss_iou": 0.220703125, "loss_num": 0.0103759765625, "loss_xval": 0.052001953125, "num_input_tokens_seen": 294553516, "step": 3227 }, { "epoch": 13.45, "grad_norm": 2.6968114523630544, "learning_rate": 5e-05, "loss": 0.0488, "num_input_tokens_seen": 294645088, "step": 3228 }, { "epoch": 13.45, "loss": 0.05043869838118553, "loss_ce": 5.417674037744291e-05, "loss_iou": 0.2119140625, "loss_num": 0.01007080078125, "loss_xval": 0.05029296875, "num_input_tokens_seen": 294645088, "step": 3228 }, { "epoch": 13.454166666666667, "grad_norm": 4.856076039293107, "learning_rate": 5e-05, "loss": 0.0846, "num_input_tokens_seen": 294736588, "step": 3229 }, { "epoch": 13.454166666666667, "loss": 0.09605845808982849, "loss_ce": 0.0003095621941611171, "loss_iou": 0.2734375, "loss_num": 0.0191650390625, "loss_xval": 0.095703125, "num_input_tokens_seen": 294736588, "step": 3229 }, { "epoch": 13.458333333333334, "grad_norm": 2.5069662318370267, "learning_rate": 5e-05, "loss": 0.0591, "num_input_tokens_seen": 294828220, "step": 3230 }, { "epoch": 13.458333333333334, "loss": 0.047693684697151184, "loss_ce": 0.0015358488308265805, "loss_iou": 0.314453125, "loss_num": 0.00921630859375, "loss_xval": 0.046142578125, "num_input_tokens_seen": 294828220, "step": 3230 }, { "epoch": 13.4625, "grad_norm": 8.298429445162126, "learning_rate": 5e-05, "loss": 0.0467, "num_input_tokens_seen": 294919444, "step": 3231 }, { "epoch": 13.4625, "loss": 0.062046781182289124, "loss_ce": 7.320824079215527e-05, "loss_iou": 0.1416015625, "loss_num": 0.01239013671875, "loss_xval": 0.06201171875, "num_input_tokens_seen": 294919444, "step": 3231 }, { "epoch": 13.466666666666667, "grad_norm": 1.5209402952815354, "learning_rate": 5e-05, "loss": 0.0973, "num_input_tokens_seen": 295010332, "step": 3232 }, { "epoch": 13.466666666666667, "loss": 0.07522740960121155, "loss_ce": 0.0013748712372034788, "loss_iou": 0.158203125, "loss_num": 0.0147705078125, "loss_xval": 0.07373046875, "num_input_tokens_seen": 295010332, "step": 3232 }, { "epoch": 13.470833333333333, "grad_norm": 1.5515329397715614, "learning_rate": 5e-05, "loss": 0.0484, "num_input_tokens_seen": 295101164, "step": 3233 }, { "epoch": 13.470833333333333, "loss": 0.031293466687202454, "loss_ce": 4.3465348426252604e-05, "loss_iou": 0.169921875, "loss_num": 0.006256103515625, "loss_xval": 0.03125, "num_input_tokens_seen": 295101164, "step": 3233 }, { "epoch": 13.475, "grad_norm": 2.279928448141374, "learning_rate": 5e-05, "loss": 0.0578, "num_input_tokens_seen": 295192356, "step": 3234 }, { "epoch": 13.475, "loss": 0.044148027896881104, "loss_ce": 4.352897576609394e-06, "loss_iou": 0.2431640625, "loss_num": 0.00885009765625, "loss_xval": 0.044189453125, "num_input_tokens_seen": 295192356, "step": 3234 }, { "epoch": 13.479166666666666, "grad_norm": 6.026628542014703, "learning_rate": 5e-05, "loss": 0.0724, "num_input_tokens_seen": 295283856, "step": 3235 }, { "epoch": 13.479166666666666, "loss": 0.10446874797344208, "loss_ce": 0.00020544748986139894, "loss_iou": 0.361328125, "loss_num": 0.0208740234375, "loss_xval": 0.1044921875, "num_input_tokens_seen": 295283856, "step": 3235 }, { "epoch": 13.483333333333333, "grad_norm": 3.62820253390924, "learning_rate": 5e-05, "loss": 0.0532, "num_input_tokens_seen": 295375088, "step": 3236 }, { "epoch": 13.483333333333333, "loss": 0.07601499557495117, "loss_ce": 0.00011778472980950028, "loss_iou": 0.41796875, "loss_num": 0.01519775390625, "loss_xval": 0.07568359375, "num_input_tokens_seen": 295375088, "step": 3236 }, { "epoch": 13.4875, "grad_norm": 3.6475192112020154, "learning_rate": 5e-05, "loss": 0.0743, "num_input_tokens_seen": 295466100, "step": 3237 }, { "epoch": 13.4875, "loss": 0.0731085017323494, "loss_ce": 3.415943865547888e-05, "loss_iou": 0.330078125, "loss_num": 0.01458740234375, "loss_xval": 0.0732421875, "num_input_tokens_seen": 295466100, "step": 3237 }, { "epoch": 13.491666666666667, "grad_norm": 2.6123224492326496, "learning_rate": 5e-05, "loss": 0.0923, "num_input_tokens_seen": 295557220, "step": 3238 }, { "epoch": 13.491666666666667, "loss": 0.05012810602784157, "loss_ce": 6.401949940482154e-05, "loss_iou": 0.388671875, "loss_num": 0.010009765625, "loss_xval": 0.050048828125, "num_input_tokens_seen": 295557220, "step": 3238 }, { "epoch": 13.495833333333334, "grad_norm": 3.359588517198497, "learning_rate": 5e-05, "loss": 0.0438, "num_input_tokens_seen": 295648748, "step": 3239 }, { "epoch": 13.495833333333334, "loss": 0.036655161529779434, "loss_ce": 3.5483176361594815e-06, "loss_iou": 0.2890625, "loss_num": 0.00732421875, "loss_xval": 0.03662109375, "num_input_tokens_seen": 295648748, "step": 3239 }, { "epoch": 13.5, "grad_norm": 3.604821762237533, "learning_rate": 5e-05, "loss": 0.0582, "num_input_tokens_seen": 295740244, "step": 3240 }, { "epoch": 13.5, "loss": 0.062007561326026917, "loss_ce": 0.001887934748083353, "loss_iou": 0.19140625, "loss_num": 0.01202392578125, "loss_xval": 0.06005859375, "num_input_tokens_seen": 295740244, "step": 3240 }, { "epoch": 13.504166666666666, "grad_norm": 4.055241534775984, "learning_rate": 5e-05, "loss": 0.071, "num_input_tokens_seen": 295830984, "step": 3241 }, { "epoch": 13.504166666666666, "loss": 0.07002242654561996, "loss_ce": 0.0029600486159324646, "loss_iou": 0.203125, "loss_num": 0.013427734375, "loss_xval": 0.06689453125, "num_input_tokens_seen": 295830984, "step": 3241 }, { "epoch": 13.508333333333333, "grad_norm": 6.029464768209386, "learning_rate": 5e-05, "loss": 0.057, "num_input_tokens_seen": 295922340, "step": 3242 }, { "epoch": 13.508333333333333, "loss": 0.06743155419826508, "loss_ce": 0.00014029696467332542, "loss_iou": 0.27734375, "loss_num": 0.01348876953125, "loss_xval": 0.0673828125, "num_input_tokens_seen": 295922340, "step": 3242 }, { "epoch": 13.5125, "grad_norm": 2.6982344595986687, "learning_rate": 5e-05, "loss": 0.0921, "num_input_tokens_seen": 296013456, "step": 3243 }, { "epoch": 13.5125, "loss": 0.05534626543521881, "loss_ce": 0.0016963636735454202, "loss_iou": 0.26953125, "loss_num": 0.0107421875, "loss_xval": 0.0537109375, "num_input_tokens_seen": 296013456, "step": 3243 }, { "epoch": 13.516666666666667, "grad_norm": 2.8279473447502537, "learning_rate": 5e-05, "loss": 0.0931, "num_input_tokens_seen": 296105064, "step": 3244 }, { "epoch": 13.516666666666667, "loss": 0.05431250110268593, "loss_ce": 0.0002506112796254456, "loss_iou": 0.17578125, "loss_num": 0.01080322265625, "loss_xval": 0.053955078125, "num_input_tokens_seen": 296105064, "step": 3244 }, { "epoch": 13.520833333333334, "grad_norm": 3.0632875241857533, "learning_rate": 5e-05, "loss": 0.0859, "num_input_tokens_seen": 296196896, "step": 3245 }, { "epoch": 13.520833333333334, "loss": 0.08690355718135834, "loss_ce": 0.0030870346818119287, "loss_iou": 0.2177734375, "loss_num": 0.0167236328125, "loss_xval": 0.083984375, "num_input_tokens_seen": 296196896, "step": 3245 }, { "epoch": 13.525, "grad_norm": 47.683143876117725, "learning_rate": 5e-05, "loss": 0.0669, "num_input_tokens_seen": 296287372, "step": 3246 }, { "epoch": 13.525, "loss": 0.052827537059783936, "loss_ce": 1.6864692952367477e-05, "loss_iou": 0.212890625, "loss_num": 0.01055908203125, "loss_xval": 0.052734375, "num_input_tokens_seen": 296287372, "step": 3246 }, { "epoch": 13.529166666666667, "grad_norm": 5.317836818171935, "learning_rate": 5e-05, "loss": 0.0682, "num_input_tokens_seen": 296379124, "step": 3247 }, { "epoch": 13.529166666666667, "loss": 0.04468844458460808, "loss_ce": 1.0708215995691717e-05, "loss_iou": 0.1865234375, "loss_num": 0.0089111328125, "loss_xval": 0.044677734375, "num_input_tokens_seen": 296379124, "step": 3247 }, { "epoch": 13.533333333333333, "grad_norm": 1.592828681904474, "learning_rate": 5e-05, "loss": 0.0638, "num_input_tokens_seen": 296470452, "step": 3248 }, { "epoch": 13.533333333333333, "loss": 0.057369768619537354, "loss_ce": 0.00043922552140429616, "loss_iou": 0.1015625, "loss_num": 0.01141357421875, "loss_xval": 0.056884765625, "num_input_tokens_seen": 296470452, "step": 3248 }, { "epoch": 13.5375, "grad_norm": 2.19081889373599, "learning_rate": 5e-05, "loss": 0.0815, "num_input_tokens_seen": 296561668, "step": 3249 }, { "epoch": 13.5375, "loss": 0.09439752995967865, "loss_ce": 0.00018976579303853214, "loss_iou": 0.27734375, "loss_num": 0.018798828125, "loss_xval": 0.09423828125, "num_input_tokens_seen": 296561668, "step": 3249 }, { "epoch": 13.541666666666666, "grad_norm": 2.3321345177616406, "learning_rate": 5e-05, "loss": 0.0514, "num_input_tokens_seen": 296653248, "step": 3250 }, { "epoch": 13.541666666666666, "eval_seeclick_CIoU": 0.22977716475725174, "eval_seeclick_GIoU": 0.2202046513557434, "eval_seeclick_IoU": 0.33641111850738525, "eval_seeclick_MAE_all": 0.10021472722291946, "eval_seeclick_MAE_h": 0.09250224754214287, "eval_seeclick_MAE_w": 0.20677661150693893, "eval_seeclick_MAE_x_boxes": 0.22251524031162262, "eval_seeclick_MAE_y_boxes": 0.09527213498950005, "eval_seeclick_NUM_probability": 0.9999966621398926, "eval_seeclick_inside_bbox": 0.4786931872367859, "eval_seeclick_loss": 0.5649774670600891, "eval_seeclick_loss_ce": 0.12786505371332169, "eval_seeclick_loss_iou": 0.4483642578125, "eval_seeclick_loss_num": 0.0825653076171875, "eval_seeclick_loss_xval": 0.41302490234375, "eval_seeclick_runtime": 76.5699, "eval_seeclick_samples_per_second": 0.562, "eval_seeclick_steps_per_second": 0.026, "num_input_tokens_seen": 296653248, "step": 3250 }, { "epoch": 13.541666666666666, "eval_icons_CIoU": 0.36827825009822845, "eval_icons_GIoU": 0.3672151267528534, "eval_icons_IoU": 0.43234655261039734, "eval_icons_MAE_all": 0.05891683138906956, "eval_icons_MAE_h": 0.11804331094026566, "eval_icons_MAE_w": 0.0930902399122715, "eval_icons_MAE_x_boxes": 0.08903488144278526, "eval_icons_MAE_y_boxes": 0.11908634006977081, "eval_icons_NUM_probability": 0.9999977350234985, "eval_icons_inside_bbox": 0.6145833432674408, "eval_icons_loss": 0.2957323491573334, "eval_icons_loss_ce": 0.0025786529295146465, "eval_icons_loss_iou": 0.2705078125, "eval_icons_loss_num": 0.06157684326171875, "eval_icons_loss_xval": 0.3079833984375, "eval_icons_runtime": 88.3295, "eval_icons_samples_per_second": 0.566, "eval_icons_steps_per_second": 0.023, "num_input_tokens_seen": 296653248, "step": 3250 }, { "epoch": 13.541666666666666, "eval_screenspot_CIoU": 0.36341987053553265, "eval_screenspot_GIoU": 0.33735422293345135, "eval_screenspot_IoU": 0.44230784972508747, "eval_screenspot_MAE_all": 0.1005148043235143, "eval_screenspot_MAE_h": 0.11051860203345616, "eval_screenspot_MAE_w": 0.2034025490283966, "eval_screenspot_MAE_x_boxes": 0.18765543401241302, "eval_screenspot_MAE_y_boxes": 0.10739666223526001, "eval_screenspot_NUM_probability": 0.9987173080444336, "eval_screenspot_inside_bbox": 0.662500003973643, "eval_screenspot_loss": 0.5043066740036011, "eval_screenspot_loss_ce": 0.0002492249436727434, "eval_screenspot_loss_iou": 0.3599853515625, "eval_screenspot_loss_num": 0.10186767578125, "eval_screenspot_loss_xval": 0.5091959635416666, "eval_screenspot_runtime": 158.9939, "eval_screenspot_samples_per_second": 0.56, "eval_screenspot_steps_per_second": 0.019, "num_input_tokens_seen": 296653248, "step": 3250 }, { "epoch": 13.541666666666666, "eval_compot_CIoU": 0.5643254518508911, "eval_compot_GIoU": 0.559347540140152, "eval_compot_IoU": 0.62269726395607, "eval_compot_MAE_all": 0.04530065879225731, "eval_compot_MAE_h": 0.06042328476905823, "eval_compot_MAE_w": 0.10373424738645554, "eval_compot_MAE_x_boxes": 0.10278752446174622, "eval_compot_MAE_y_boxes": 0.05855695717036724, "eval_compot_NUM_probability": 0.9999950230121613, "eval_compot_inside_bbox": 0.7986111044883728, "eval_compot_loss": 0.24213729798793793, "eval_compot_loss_ce": 0.029723092913627625, "eval_compot_loss_iou": 0.40264892578125, "eval_compot_loss_num": 0.036865234375, "eval_compot_loss_xval": 0.18415069580078125, "eval_compot_runtime": 91.1287, "eval_compot_samples_per_second": 0.549, "eval_compot_steps_per_second": 0.022, "num_input_tokens_seen": 296653248, "step": 3250 }, { "epoch": 13.541666666666666, "loss": 0.2028052806854248, "loss_ce": 0.026993528008461, "loss_iou": 0.404296875, "loss_num": 0.03515625, "loss_xval": 0.17578125, "num_input_tokens_seen": 296653248, "step": 3250 }, { "epoch": 13.545833333333333, "grad_norm": 2.8028758060177674, "learning_rate": 5e-05, "loss": 0.066, "num_input_tokens_seen": 296744932, "step": 3251 }, { "epoch": 13.545833333333333, "loss": 0.046471647918224335, "loss_ce": 0.00013070247950963676, "loss_iou": 0.30859375, "loss_num": 0.00927734375, "loss_xval": 0.04638671875, "num_input_tokens_seen": 296744932, "step": 3251 }, { "epoch": 13.55, "grad_norm": 2.103715226100908, "learning_rate": 5e-05, "loss": 0.0709, "num_input_tokens_seen": 296836068, "step": 3252 }, { "epoch": 13.55, "loss": 0.07032459229230881, "loss_ce": 1.2091385542589705e-05, "loss_iou": 0.3125, "loss_num": 0.01409912109375, "loss_xval": 0.0703125, "num_input_tokens_seen": 296836068, "step": 3252 }, { "epoch": 13.554166666666667, "grad_norm": 22.408969808322134, "learning_rate": 5e-05, "loss": 0.069, "num_input_tokens_seen": 296927288, "step": 3253 }, { "epoch": 13.554166666666667, "loss": 0.09005076438188553, "loss_ce": 0.0006037460989318788, "loss_iou": 0.3125, "loss_num": 0.0179443359375, "loss_xval": 0.08935546875, "num_input_tokens_seen": 296927288, "step": 3253 }, { "epoch": 13.558333333333334, "grad_norm": 1.2666054672318465, "learning_rate": 5e-05, "loss": 0.0311, "num_input_tokens_seen": 297018372, "step": 3254 }, { "epoch": 13.558333333333334, "loss": 0.031558021903038025, "loss_ce": 2.8491049306467175e-06, "loss_iou": 0.2109375, "loss_num": 0.006317138671875, "loss_xval": 0.031494140625, "num_input_tokens_seen": 297018372, "step": 3254 }, { "epoch": 13.5625, "grad_norm": 2.562090417454454, "learning_rate": 5e-05, "loss": 0.0901, "num_input_tokens_seen": 297109632, "step": 3255 }, { "epoch": 13.5625, "loss": 0.12382586300373077, "loss_ce": 4.657105091609992e-05, "loss_iou": 0.244140625, "loss_num": 0.0247802734375, "loss_xval": 0.1240234375, "num_input_tokens_seen": 297109632, "step": 3255 }, { "epoch": 13.566666666666666, "grad_norm": 2.697239892910222, "learning_rate": 5e-05, "loss": 0.0882, "num_input_tokens_seen": 297200668, "step": 3256 }, { "epoch": 13.566666666666666, "loss": 0.058978091925382614, "loss_ce": 2.871132892323658e-06, "loss_iou": 0.25390625, "loss_num": 0.01177978515625, "loss_xval": 0.05908203125, "num_input_tokens_seen": 297200668, "step": 3256 }, { "epoch": 13.570833333333333, "grad_norm": 2.1794785374967636, "learning_rate": 5e-05, "loss": 0.0445, "num_input_tokens_seen": 297292116, "step": 3257 }, { "epoch": 13.570833333333333, "loss": 0.04801057279109955, "loss_ce": 6.423933882615529e-06, "loss_iou": 0.1943359375, "loss_num": 0.00958251953125, "loss_xval": 0.048095703125, "num_input_tokens_seen": 297292116, "step": 3257 }, { "epoch": 13.575, "grad_norm": 4.592907359980534, "learning_rate": 5e-05, "loss": 0.0705, "num_input_tokens_seen": 297383600, "step": 3258 }, { "epoch": 13.575, "loss": 0.0719158798456192, "loss_ce": 0.002900381339713931, "loss_iou": 0.2734375, "loss_num": 0.0137939453125, "loss_xval": 0.06884765625, "num_input_tokens_seen": 297383600, "step": 3258 }, { "epoch": 13.579166666666667, "grad_norm": 2.7038816836170354, "learning_rate": 5e-05, "loss": 0.0849, "num_input_tokens_seen": 297474900, "step": 3259 }, { "epoch": 13.579166666666667, "loss": 0.053036607801914215, "loss_ce": 0.00027171536930836737, "loss_iou": 0.23828125, "loss_num": 0.01055908203125, "loss_xval": 0.052734375, "num_input_tokens_seen": 297474900, "step": 3259 }, { "epoch": 13.583333333333334, "grad_norm": 4.639443056226232, "learning_rate": 5e-05, "loss": 0.0883, "num_input_tokens_seen": 297565816, "step": 3260 }, { "epoch": 13.583333333333334, "loss": 0.10763812065124512, "loss_ce": 0.00015521238674409688, "loss_iou": 0.216796875, "loss_num": 0.021484375, "loss_xval": 0.107421875, "num_input_tokens_seen": 297565816, "step": 3260 }, { "epoch": 13.5875, "grad_norm": 2.9872991114469727, "learning_rate": 5e-05, "loss": 0.0771, "num_input_tokens_seen": 297656984, "step": 3261 }, { "epoch": 13.5875, "loss": 0.07486598193645477, "loss_ce": 1.3987812053528614e-05, "loss_iou": 0.21484375, "loss_num": 0.01495361328125, "loss_xval": 0.07470703125, "num_input_tokens_seen": 297656984, "step": 3261 }, { "epoch": 13.591666666666667, "grad_norm": 5.086790925707244, "learning_rate": 5e-05, "loss": 0.0661, "num_input_tokens_seen": 297748888, "step": 3262 }, { "epoch": 13.591666666666667, "loss": 0.07709628343582153, "loss_ce": 2.413909896858968e-05, "loss_iou": 0.34375, "loss_num": 0.015380859375, "loss_xval": 0.0771484375, "num_input_tokens_seen": 297748888, "step": 3262 }, { "epoch": 13.595833333333333, "grad_norm": 3.0673587592493154, "learning_rate": 5e-05, "loss": 0.0406, "num_input_tokens_seen": 297840056, "step": 3263 }, { "epoch": 13.595833333333333, "loss": 0.031825195997953415, "loss_ce": 0.00043023889884352684, "loss_iou": 0.2578125, "loss_num": 0.00628662109375, "loss_xval": 0.031494140625, "num_input_tokens_seen": 297840056, "step": 3263 }, { "epoch": 13.6, "grad_norm": 2.8440067449826523, "learning_rate": 5e-05, "loss": 0.0653, "num_input_tokens_seen": 297931472, "step": 3264 }, { "epoch": 13.6, "loss": 0.07889088243246078, "loss_ce": 3.346401354065165e-05, "loss_iou": 0.23046875, "loss_num": 0.0157470703125, "loss_xval": 0.0791015625, "num_input_tokens_seen": 297931472, "step": 3264 }, { "epoch": 13.604166666666666, "grad_norm": 2.300014104251759, "learning_rate": 5e-05, "loss": 0.0421, "num_input_tokens_seen": 298022684, "step": 3265 }, { "epoch": 13.604166666666666, "loss": 0.05194888263940811, "loss_ce": 6.899907748447731e-05, "loss_iou": 0.265625, "loss_num": 0.0103759765625, "loss_xval": 0.0517578125, "num_input_tokens_seen": 298022684, "step": 3265 }, { "epoch": 13.608333333333333, "grad_norm": 3.4597564597891304, "learning_rate": 5e-05, "loss": 0.0596, "num_input_tokens_seen": 298114044, "step": 3266 }, { "epoch": 13.608333333333333, "loss": 0.05340898036956787, "loss_ce": 9.47702574194409e-05, "loss_iou": 0.3671875, "loss_num": 0.01068115234375, "loss_xval": 0.05322265625, "num_input_tokens_seen": 298114044, "step": 3266 }, { "epoch": 13.6125, "grad_norm": 2.4288926560854542, "learning_rate": 5e-05, "loss": 0.0417, "num_input_tokens_seen": 298205696, "step": 3267 }, { "epoch": 13.6125, "loss": 0.04282256215810776, "loss_ce": 0.0009066681959666312, "loss_iou": 0.2890625, "loss_num": 0.00836181640625, "loss_xval": 0.0419921875, "num_input_tokens_seen": 298205696, "step": 3267 }, { "epoch": 13.616666666666667, "grad_norm": 1.5850870816034652, "learning_rate": 5e-05, "loss": 0.0712, "num_input_tokens_seen": 298297312, "step": 3268 }, { "epoch": 13.616666666666667, "loss": 0.05654668062925339, "loss_ce": 1.2864399650425185e-05, "loss_iou": 0.2060546875, "loss_num": 0.01129150390625, "loss_xval": 0.056640625, "num_input_tokens_seen": 298297312, "step": 3268 }, { "epoch": 13.620833333333334, "grad_norm": 1.3325825115255354, "learning_rate": 5e-05, "loss": 0.0436, "num_input_tokens_seen": 298388808, "step": 3269 }, { "epoch": 13.620833333333334, "loss": 0.05588510259985924, "loss_ce": 2.2675443688058294e-05, "loss_iou": 0.33203125, "loss_num": 0.01116943359375, "loss_xval": 0.055908203125, "num_input_tokens_seen": 298388808, "step": 3269 }, { "epoch": 13.625, "grad_norm": 2.703315621670545, "learning_rate": 5e-05, "loss": 0.0768, "num_input_tokens_seen": 298479920, "step": 3270 }, { "epoch": 13.625, "loss": 0.10270601511001587, "loss_ce": 0.00010591827594907954, "loss_iou": 0.2060546875, "loss_num": 0.0205078125, "loss_xval": 0.1025390625, "num_input_tokens_seen": 298479920, "step": 3270 }, { "epoch": 13.629166666666666, "grad_norm": 2.310328603995657, "learning_rate": 5e-05, "loss": 0.0511, "num_input_tokens_seen": 298572016, "step": 3271 }, { "epoch": 13.629166666666666, "loss": 0.06343436986207962, "loss_ce": 0.00038505299016833305, "loss_iou": 0.2294921875, "loss_num": 0.01263427734375, "loss_xval": 0.06298828125, "num_input_tokens_seen": 298572016, "step": 3271 }, { "epoch": 13.633333333333333, "grad_norm": 3.9734558947742147, "learning_rate": 5e-05, "loss": 0.1338, "num_input_tokens_seen": 298661968, "step": 3272 }, { "epoch": 13.633333333333333, "loss": 0.1281973421573639, "loss_ce": 6.176564966153819e-07, "loss_iou": 0.18359375, "loss_num": 0.025634765625, "loss_xval": 0.1279296875, "num_input_tokens_seen": 298661968, "step": 3272 }, { "epoch": 13.6375, "grad_norm": 3.734083755989058, "learning_rate": 5e-05, "loss": 0.0862, "num_input_tokens_seen": 298753168, "step": 3273 }, { "epoch": 13.6375, "loss": 0.07938088476657867, "loss_ce": 4.663808340410469e-06, "loss_iou": 0.23828125, "loss_num": 0.015869140625, "loss_xval": 0.07958984375, "num_input_tokens_seen": 298753168, "step": 3273 }, { "epoch": 13.641666666666667, "grad_norm": 2.7377364329741494, "learning_rate": 5e-05, "loss": 0.0533, "num_input_tokens_seen": 298844796, "step": 3274 }, { "epoch": 13.641666666666667, "loss": 0.06879688799381256, "loss_ce": 2.637311581565882e-06, "loss_iou": 0.25390625, "loss_num": 0.01373291015625, "loss_xval": 0.06884765625, "num_input_tokens_seen": 298844796, "step": 3274 }, { "epoch": 13.645833333333334, "grad_norm": 5.587498360937527, "learning_rate": 5e-05, "loss": 0.0589, "num_input_tokens_seen": 298936064, "step": 3275 }, { "epoch": 13.645833333333334, "loss": 0.07425408810377121, "loss_ce": 4.820844878850039e-06, "loss_iou": 0.173828125, "loss_num": 0.01483154296875, "loss_xval": 0.07421875, "num_input_tokens_seen": 298936064, "step": 3275 }, { "epoch": 13.65, "grad_norm": 1.4913922652249463, "learning_rate": 5e-05, "loss": 0.0427, "num_input_tokens_seen": 299027768, "step": 3276 }, { "epoch": 13.65, "loss": 0.05714738368988037, "loss_ce": 0.002337811980396509, "loss_iou": 0.265625, "loss_num": 0.010986328125, "loss_xval": 0.0546875, "num_input_tokens_seen": 299027768, "step": 3276 }, { "epoch": 13.654166666666667, "grad_norm": 2.540078846532057, "learning_rate": 5e-05, "loss": 0.0678, "num_input_tokens_seen": 299119264, "step": 3277 }, { "epoch": 13.654166666666667, "loss": 0.04711538180708885, "loss_ce": 0.00022511978750117123, "loss_iou": 0.189453125, "loss_num": 0.0093994140625, "loss_xval": 0.046875, "num_input_tokens_seen": 299119264, "step": 3277 }, { "epoch": 13.658333333333333, "grad_norm": 4.590505300981462, "learning_rate": 5e-05, "loss": 0.0823, "num_input_tokens_seen": 299211500, "step": 3278 }, { "epoch": 13.658333333333333, "loss": 0.0764964297413826, "loss_ce": 0.0002635241544339806, "loss_iou": 0.2470703125, "loss_num": 0.01519775390625, "loss_xval": 0.076171875, "num_input_tokens_seen": 299211500, "step": 3278 }, { "epoch": 13.6625, "grad_norm": 3.1231933101305698, "learning_rate": 5e-05, "loss": 0.0529, "num_input_tokens_seen": 299302380, "step": 3279 }, { "epoch": 13.6625, "loss": 0.05796603858470917, "loss_ce": 1.3158914043742698e-05, "loss_iou": 0.283203125, "loss_num": 0.0115966796875, "loss_xval": 0.057861328125, "num_input_tokens_seen": 299302380, "step": 3279 }, { "epoch": 13.666666666666666, "grad_norm": 4.598121246096437, "learning_rate": 5e-05, "loss": 0.1037, "num_input_tokens_seen": 299393528, "step": 3280 }, { "epoch": 13.666666666666666, "loss": 0.13770559430122375, "loss_ce": 0.0002544168964959681, "loss_iou": 0.376953125, "loss_num": 0.0274658203125, "loss_xval": 0.1376953125, "num_input_tokens_seen": 299393528, "step": 3280 }, { "epoch": 13.670833333333333, "grad_norm": 14.41610190677706, "learning_rate": 5e-05, "loss": 0.0893, "num_input_tokens_seen": 299484840, "step": 3281 }, { "epoch": 13.670833333333333, "loss": 0.0995384082198143, "loss_ce": 3.584696241887286e-05, "loss_iou": 0.34375, "loss_num": 0.0198974609375, "loss_xval": 0.099609375, "num_input_tokens_seen": 299484840, "step": 3281 }, { "epoch": 13.675, "grad_norm": 3.5831341085790216, "learning_rate": 5e-05, "loss": 0.0765, "num_input_tokens_seen": 299576196, "step": 3282 }, { "epoch": 13.675, "loss": 0.08482582122087479, "loss_ce": 2.2115034425951308e-06, "loss_iou": 0.08984375, "loss_num": 0.0169677734375, "loss_xval": 0.0849609375, "num_input_tokens_seen": 299576196, "step": 3282 }, { "epoch": 13.679166666666667, "grad_norm": 3.421202748259547, "learning_rate": 5e-05, "loss": 0.0703, "num_input_tokens_seen": 299667204, "step": 3283 }, { "epoch": 13.679166666666667, "loss": 0.08301764726638794, "loss_ce": 9.841729479376227e-06, "loss_iou": 0.416015625, "loss_num": 0.0166015625, "loss_xval": 0.0830078125, "num_input_tokens_seen": 299667204, "step": 3283 }, { "epoch": 13.683333333333334, "grad_norm": 2.5523787944062843, "learning_rate": 5e-05, "loss": 0.1216, "num_input_tokens_seen": 299758168, "step": 3284 }, { "epoch": 13.683333333333334, "loss": 0.05732997506856918, "loss_ce": 2.708056399569614e-06, "loss_iou": 0.2578125, "loss_num": 0.011474609375, "loss_xval": 0.057373046875, "num_input_tokens_seen": 299758168, "step": 3284 }, { "epoch": 13.6875, "grad_norm": 2.867892743118164, "learning_rate": 5e-05, "loss": 0.0597, "num_input_tokens_seen": 299848980, "step": 3285 }, { "epoch": 13.6875, "loss": 0.05687332898378372, "loss_ce": 3.821900463663042e-06, "loss_iou": 0.318359375, "loss_num": 0.0113525390625, "loss_xval": 0.056884765625, "num_input_tokens_seen": 299848980, "step": 3285 }, { "epoch": 13.691666666666666, "grad_norm": 3.3272804987740137, "learning_rate": 5e-05, "loss": 0.0458, "num_input_tokens_seen": 299939920, "step": 3286 }, { "epoch": 13.691666666666666, "loss": 0.04706061631441116, "loss_ce": 2.51245137405931e-06, "loss_iou": 0.310546875, "loss_num": 0.0093994140625, "loss_xval": 0.047119140625, "num_input_tokens_seen": 299939920, "step": 3286 }, { "epoch": 13.695833333333333, "grad_norm": 6.618524967812087, "learning_rate": 5e-05, "loss": 0.0699, "num_input_tokens_seen": 300031380, "step": 3287 }, { "epoch": 13.695833333333333, "loss": 0.07023078203201294, "loss_ce": 0.00046760475379414856, "loss_iou": 0.2197265625, "loss_num": 0.013916015625, "loss_xval": 0.06982421875, "num_input_tokens_seen": 300031380, "step": 3287 }, { "epoch": 13.7, "grad_norm": 1.8579559681332511, "learning_rate": 5e-05, "loss": 0.0728, "num_input_tokens_seen": 300123220, "step": 3288 }, { "epoch": 13.7, "loss": 0.04622560739517212, "loss_ce": 0.00044924189569428563, "loss_iou": 0.3828125, "loss_num": 0.0091552734375, "loss_xval": 0.0458984375, "num_input_tokens_seen": 300123220, "step": 3288 }, { "epoch": 13.704166666666667, "grad_norm": 3.685118217162781, "learning_rate": 5e-05, "loss": 0.0533, "num_input_tokens_seen": 300214848, "step": 3289 }, { "epoch": 13.704166666666667, "loss": 0.04516689479351044, "loss_ce": 8.769609962655522e-07, "loss_iou": 0.302734375, "loss_num": 0.009033203125, "loss_xval": 0.045166015625, "num_input_tokens_seen": 300214848, "step": 3289 }, { "epoch": 13.708333333333334, "grad_norm": 4.049860614055312, "learning_rate": 5e-05, "loss": 0.0591, "num_input_tokens_seen": 300306604, "step": 3290 }, { "epoch": 13.708333333333334, "loss": 0.05506318807601929, "loss_ce": 7.051033026073128e-05, "loss_iou": 0.177734375, "loss_num": 0.010986328125, "loss_xval": 0.054931640625, "num_input_tokens_seen": 300306604, "step": 3290 }, { "epoch": 13.7125, "grad_norm": 2.6490315457322073, "learning_rate": 5e-05, "loss": 0.0518, "num_input_tokens_seen": 300397640, "step": 3291 }, { "epoch": 13.7125, "loss": 0.05209658667445183, "loss_ce": 3.081056775044999e-06, "loss_iou": 0.31640625, "loss_num": 0.01043701171875, "loss_xval": 0.052001953125, "num_input_tokens_seen": 300397640, "step": 3291 }, { "epoch": 13.716666666666667, "grad_norm": 2.240454278054644, "learning_rate": 5e-05, "loss": 0.0586, "num_input_tokens_seen": 300489368, "step": 3292 }, { "epoch": 13.716666666666667, "loss": 0.07966948300600052, "loss_ce": 0.0011782721849158406, "loss_iou": 0.197265625, "loss_num": 0.0157470703125, "loss_xval": 0.07861328125, "num_input_tokens_seen": 300489368, "step": 3292 }, { "epoch": 13.720833333333333, "grad_norm": 4.747013091701193, "learning_rate": 5e-05, "loss": 0.0601, "num_input_tokens_seen": 300580500, "step": 3293 }, { "epoch": 13.720833333333333, "loss": 0.04834999889135361, "loss_ce": 1.0152914001082536e-05, "loss_iou": 0.298828125, "loss_num": 0.0096435546875, "loss_xval": 0.04833984375, "num_input_tokens_seen": 300580500, "step": 3293 }, { "epoch": 13.725, "grad_norm": 2.851445875282756, "learning_rate": 5e-05, "loss": 0.049, "num_input_tokens_seen": 300671940, "step": 3294 }, { "epoch": 13.725, "loss": 0.032225240021944046, "loss_ce": 0.0005861401441507041, "loss_iou": 0.205078125, "loss_num": 0.006317138671875, "loss_xval": 0.03173828125, "num_input_tokens_seen": 300671940, "step": 3294 }, { "epoch": 13.729166666666666, "grad_norm": 3.034120458590585, "learning_rate": 5e-05, "loss": 0.0529, "num_input_tokens_seen": 300762992, "step": 3295 }, { "epoch": 13.729166666666666, "loss": 0.05423610657453537, "loss_ce": 9.792399941943586e-05, "loss_iou": 0.298828125, "loss_num": 0.01080322265625, "loss_xval": 0.05419921875, "num_input_tokens_seen": 300762992, "step": 3295 }, { "epoch": 13.733333333333333, "grad_norm": 84.9857919333762, "learning_rate": 5e-05, "loss": 0.0516, "num_input_tokens_seen": 300854340, "step": 3296 }, { "epoch": 13.733333333333333, "loss": 0.05023489147424698, "loss_ce": 2.9535742669395404e-06, "loss_iou": 0.2275390625, "loss_num": 0.010009765625, "loss_xval": 0.05029296875, "num_input_tokens_seen": 300854340, "step": 3296 }, { "epoch": 13.7375, "grad_norm": 2.8617475247265927, "learning_rate": 5e-05, "loss": 0.0428, "num_input_tokens_seen": 300945760, "step": 3297 }, { "epoch": 13.7375, "loss": 0.04916559159755707, "loss_ce": 0.0013216584920883179, "loss_iou": 0.275390625, "loss_num": 0.00958251953125, "loss_xval": 0.0478515625, "num_input_tokens_seen": 300945760, "step": 3297 }, { "epoch": 13.741666666666667, "grad_norm": 2.957150236721741, "learning_rate": 5e-05, "loss": 0.0655, "num_input_tokens_seen": 301037156, "step": 3298 }, { "epoch": 13.741666666666667, "loss": 0.07779006659984589, "loss_ce": 1.6021556803025305e-05, "loss_iou": 0.302734375, "loss_num": 0.01556396484375, "loss_xval": 0.07763671875, "num_input_tokens_seen": 301037156, "step": 3298 }, { "epoch": 13.745833333333334, "grad_norm": 3.7764971279076978, "learning_rate": 5e-05, "loss": 0.0997, "num_input_tokens_seen": 301128548, "step": 3299 }, { "epoch": 13.745833333333334, "loss": 0.11629924178123474, "loss_ce": 1.2012060324195772e-05, "loss_iou": 0.2890625, "loss_num": 0.0233154296875, "loss_xval": 0.1162109375, "num_input_tokens_seen": 301128548, "step": 3299 }, { "epoch": 13.75, "grad_norm": 9.032833363759718, "learning_rate": 5e-05, "loss": 0.0753, "num_input_tokens_seen": 301219812, "step": 3300 }, { "epoch": 13.75, "loss": 0.07080667465925217, "loss_ce": 5.8882683333649766e-06, "loss_iou": 0.2197265625, "loss_num": 0.01416015625, "loss_xval": 0.07080078125, "num_input_tokens_seen": 301219812, "step": 3300 }, { "epoch": 13.754166666666666, "grad_norm": 2.5179286794266242, "learning_rate": 5e-05, "loss": 0.0367, "num_input_tokens_seen": 301311568, "step": 3301 }, { "epoch": 13.754166666666666, "loss": 0.038580723106861115, "loss_ce": 6.501464213215513e-06, "loss_iou": 0.19921875, "loss_num": 0.007720947265625, "loss_xval": 0.03857421875, "num_input_tokens_seen": 301311568, "step": 3301 }, { "epoch": 13.758333333333333, "grad_norm": 2.915091842981452, "learning_rate": 5e-05, "loss": 0.0722, "num_input_tokens_seen": 301402176, "step": 3302 }, { "epoch": 13.758333333333333, "loss": 0.0477927103638649, "loss_ce": 9.814760232984554e-06, "loss_iou": 0.31640625, "loss_num": 0.00958251953125, "loss_xval": 0.0478515625, "num_input_tokens_seen": 301402176, "step": 3302 }, { "epoch": 13.7625, "grad_norm": 1.8902026133477452, "learning_rate": 5e-05, "loss": 0.0335, "num_input_tokens_seen": 301493596, "step": 3303 }, { "epoch": 13.7625, "loss": 0.04254970699548721, "loss_ce": 0.0001684220042079687, "loss_iou": 0.17578125, "loss_num": 0.00848388671875, "loss_xval": 0.04248046875, "num_input_tokens_seen": 301493596, "step": 3303 }, { "epoch": 13.766666666666667, "grad_norm": 4.509340256046256, "learning_rate": 5e-05, "loss": 0.0481, "num_input_tokens_seen": 301585296, "step": 3304 }, { "epoch": 13.766666666666667, "loss": 0.03628820180892944, "loss_ce": 0.0005215998971834779, "loss_iou": 0.294921875, "loss_num": 0.00714111328125, "loss_xval": 0.03564453125, "num_input_tokens_seen": 301585296, "step": 3304 }, { "epoch": 13.770833333333334, "grad_norm": 5.033491555102182, "learning_rate": 5e-05, "loss": 0.0712, "num_input_tokens_seen": 301676888, "step": 3305 }, { "epoch": 13.770833333333334, "loss": 0.034825798124074936, "loss_ce": 5.241945927991765e-06, "loss_iou": 0.310546875, "loss_num": 0.0069580078125, "loss_xval": 0.034912109375, "num_input_tokens_seen": 301676888, "step": 3305 }, { "epoch": 13.775, "grad_norm": 7.191350621732554, "learning_rate": 5e-05, "loss": 0.0957, "num_input_tokens_seen": 301767656, "step": 3306 }, { "epoch": 13.775, "loss": 0.11079069972038269, "loss_ce": 1.1897653166670352e-05, "loss_iou": 0.228515625, "loss_num": 0.022216796875, "loss_xval": 0.11083984375, "num_input_tokens_seen": 301767656, "step": 3306 }, { "epoch": 13.779166666666667, "grad_norm": 3.804325948813366, "learning_rate": 5e-05, "loss": 0.0638, "num_input_tokens_seen": 301858904, "step": 3307 }, { "epoch": 13.779166666666667, "loss": 0.05447077751159668, "loss_ce": 0.0006377688841894269, "loss_iou": 0.328125, "loss_num": 0.0107421875, "loss_xval": 0.0537109375, "num_input_tokens_seen": 301858904, "step": 3307 }, { "epoch": 13.783333333333333, "grad_norm": 2.261119446992845, "learning_rate": 5e-05, "loss": 0.0718, "num_input_tokens_seen": 301949480, "step": 3308 }, { "epoch": 13.783333333333333, "loss": 0.0620134212076664, "loss_ce": 1.7031369452524814e-06, "loss_iou": 0.2109375, "loss_num": 0.01239013671875, "loss_xval": 0.06201171875, "num_input_tokens_seen": 301949480, "step": 3308 }, { "epoch": 13.7875, "grad_norm": 3.9577403010481476, "learning_rate": 5e-05, "loss": 0.06, "num_input_tokens_seen": 302040868, "step": 3309 }, { "epoch": 13.7875, "loss": 0.04542113095521927, "loss_ce": 4.14906426158268e-05, "loss_iou": 0.29296875, "loss_num": 0.00909423828125, "loss_xval": 0.04541015625, "num_input_tokens_seen": 302040868, "step": 3309 }, { "epoch": 13.791666666666666, "grad_norm": 2.609996874344853, "learning_rate": 5e-05, "loss": 0.0621, "num_input_tokens_seen": 302132496, "step": 3310 }, { "epoch": 13.791666666666666, "loss": 0.05624785274267197, "loss_ce": 1.1587387234612834e-05, "loss_iou": 0.3359375, "loss_num": 0.01123046875, "loss_xval": 0.05615234375, "num_input_tokens_seen": 302132496, "step": 3310 }, { "epoch": 13.795833333333333, "grad_norm": 3.227300689239271, "learning_rate": 5e-05, "loss": 0.0832, "num_input_tokens_seen": 302223328, "step": 3311 }, { "epoch": 13.795833333333333, "loss": 0.11890000104904175, "loss_ce": 3.5156972444383428e-06, "loss_iou": 0.263671875, "loss_num": 0.0238037109375, "loss_xval": 0.119140625, "num_input_tokens_seen": 302223328, "step": 3311 }, { "epoch": 13.8, "grad_norm": 5.317931108982696, "learning_rate": 5e-05, "loss": 0.0505, "num_input_tokens_seen": 302315076, "step": 3312 }, { "epoch": 13.8, "loss": 0.061526887118816376, "loss_ce": 1.870663254521787e-05, "loss_iou": 0.306640625, "loss_num": 0.0123291015625, "loss_xval": 0.0615234375, "num_input_tokens_seen": 302315076, "step": 3312 }, { "epoch": 13.804166666666667, "grad_norm": 2.548530665417501, "learning_rate": 5e-05, "loss": 0.0652, "num_input_tokens_seen": 302406864, "step": 3313 }, { "epoch": 13.804166666666667, "loss": 0.07590900361537933, "loss_ce": 1.1779991837101988e-05, "loss_iou": 0.3046875, "loss_num": 0.01519775390625, "loss_xval": 0.07568359375, "num_input_tokens_seen": 302406864, "step": 3313 }, { "epoch": 13.808333333333334, "grad_norm": 2.7120286980691404, "learning_rate": 5e-05, "loss": 0.112, "num_input_tokens_seen": 302497700, "step": 3314 }, { "epoch": 13.808333333333334, "loss": 0.04778246209025383, "loss_ce": 0.0002513352083042264, "loss_iou": 0.390625, "loss_num": 0.009521484375, "loss_xval": 0.047607421875, "num_input_tokens_seen": 302497700, "step": 3314 }, { "epoch": 13.8125, "grad_norm": 12.520246038924078, "learning_rate": 5e-05, "loss": 0.0664, "num_input_tokens_seen": 302587720, "step": 3315 }, { "epoch": 13.8125, "loss": 0.04641222953796387, "loss_ce": 0.0002391317393630743, "loss_iou": 0.3203125, "loss_num": 0.00921630859375, "loss_xval": 0.046142578125, "num_input_tokens_seen": 302587720, "step": 3315 }, { "epoch": 13.816666666666666, "grad_norm": 3.094948368543814, "learning_rate": 5e-05, "loss": 0.0713, "num_input_tokens_seen": 302678844, "step": 3316 }, { "epoch": 13.816666666666666, "loss": 0.06758721172809601, "loss_ce": 0.0005095761734992266, "loss_iou": 0.25, "loss_num": 0.013427734375, "loss_xval": 0.06689453125, "num_input_tokens_seen": 302678844, "step": 3316 }, { "epoch": 13.820833333333333, "grad_norm": 5.153459037298692, "learning_rate": 5e-05, "loss": 0.0536, "num_input_tokens_seen": 302769940, "step": 3317 }, { "epoch": 13.820833333333333, "loss": 0.05050954967737198, "loss_ce": 3.34742981067393e-05, "loss_iou": 0.33984375, "loss_num": 0.01007080078125, "loss_xval": 0.050537109375, "num_input_tokens_seen": 302769940, "step": 3317 }, { "epoch": 13.825, "grad_norm": 4.53011008560501, "learning_rate": 5e-05, "loss": 0.0505, "num_input_tokens_seen": 302861468, "step": 3318 }, { "epoch": 13.825, "loss": 0.026505716145038605, "loss_ce": 1.2008333669655258e-06, "loss_iou": 0.220703125, "loss_num": 0.00531005859375, "loss_xval": 0.0264892578125, "num_input_tokens_seen": 302861468, "step": 3318 }, { "epoch": 13.829166666666667, "grad_norm": 4.308616516778707, "learning_rate": 5e-05, "loss": 0.0747, "num_input_tokens_seen": 302951936, "step": 3319 }, { "epoch": 13.829166666666667, "loss": 0.057527441531419754, "loss_ce": 0.0002459501556586474, "loss_iou": 0.201171875, "loss_num": 0.011474609375, "loss_xval": 0.057373046875, "num_input_tokens_seen": 302951936, "step": 3319 }, { "epoch": 13.833333333333334, "grad_norm": 3.117831840990339, "learning_rate": 5e-05, "loss": 0.0753, "num_input_tokens_seen": 303042944, "step": 3320 }, { "epoch": 13.833333333333334, "loss": 0.0689389780163765, "loss_ce": 1.5027325389382895e-05, "loss_iou": 0.26953125, "loss_num": 0.0137939453125, "loss_xval": 0.06884765625, "num_input_tokens_seen": 303042944, "step": 3320 }, { "epoch": 13.8375, "grad_norm": 3.3334205918882374, "learning_rate": 5e-05, "loss": 0.0481, "num_input_tokens_seen": 303134920, "step": 3321 }, { "epoch": 13.8375, "loss": 0.05091170221567154, "loss_ce": 0.0007865775842219591, "loss_iou": 0.3046875, "loss_num": 0.010009765625, "loss_xval": 0.050048828125, "num_input_tokens_seen": 303134920, "step": 3321 }, { "epoch": 13.841666666666667, "grad_norm": 3.6615777665525617, "learning_rate": 5e-05, "loss": 0.0523, "num_input_tokens_seen": 303226032, "step": 3322 }, { "epoch": 13.841666666666667, "loss": 0.04552161693572998, "loss_ce": 0.00040137613541446626, "loss_iou": 0.2470703125, "loss_num": 0.009033203125, "loss_xval": 0.045166015625, "num_input_tokens_seen": 303226032, "step": 3322 }, { "epoch": 13.845833333333333, "grad_norm": 3.2236865662362146, "learning_rate": 5e-05, "loss": 0.0687, "num_input_tokens_seen": 303317104, "step": 3323 }, { "epoch": 13.845833333333333, "loss": 0.06049469858407974, "loss_ce": 8.860646630637348e-06, "loss_iou": 0.310546875, "loss_num": 0.0120849609375, "loss_xval": 0.060546875, "num_input_tokens_seen": 303317104, "step": 3323 }, { "epoch": 13.85, "grad_norm": 3.551217326920821, "learning_rate": 5e-05, "loss": 0.0761, "num_input_tokens_seen": 303408324, "step": 3324 }, { "epoch": 13.85, "loss": 0.055788613855838776, "loss_ce": 6.351471529342234e-05, "loss_iou": 0.166015625, "loss_num": 0.0111083984375, "loss_xval": 0.0556640625, "num_input_tokens_seen": 303408324, "step": 3324 }, { "epoch": 13.854166666666666, "grad_norm": 2.225093520187707, "learning_rate": 5e-05, "loss": 0.0725, "num_input_tokens_seen": 303499640, "step": 3325 }, { "epoch": 13.854166666666666, "loss": 0.07631266117095947, "loss_ce": 0.006007795687764883, "loss_iou": 0.2421875, "loss_num": 0.0140380859375, "loss_xval": 0.0703125, "num_input_tokens_seen": 303499640, "step": 3325 }, { "epoch": 13.858333333333333, "grad_norm": 2.7554504416171737, "learning_rate": 5e-05, "loss": 0.0716, "num_input_tokens_seen": 303590800, "step": 3326 }, { "epoch": 13.858333333333333, "loss": 0.08511213213205338, "loss_ce": 0.0009370205807499588, "loss_iou": 0.26171875, "loss_num": 0.016845703125, "loss_xval": 0.083984375, "num_input_tokens_seen": 303590800, "step": 3326 }, { "epoch": 13.8625, "grad_norm": 14.147968514520535, "learning_rate": 5e-05, "loss": 0.0971, "num_input_tokens_seen": 303681040, "step": 3327 }, { "epoch": 13.8625, "loss": 0.10966304689645767, "loss_ce": 1.3385659258347005e-05, "loss_iou": 0.22265625, "loss_num": 0.02197265625, "loss_xval": 0.10986328125, "num_input_tokens_seen": 303681040, "step": 3327 }, { "epoch": 13.866666666666667, "grad_norm": 12.554145044744343, "learning_rate": 5e-05, "loss": 0.0538, "num_input_tokens_seen": 303771648, "step": 3328 }, { "epoch": 13.866666666666667, "loss": 0.05274789035320282, "loss_ce": 0.0025312108919024467, "loss_iou": 0.328125, "loss_num": 0.010009765625, "loss_xval": 0.05029296875, "num_input_tokens_seen": 303771648, "step": 3328 }, { "epoch": 13.870833333333334, "grad_norm": 2.5352712739982453, "learning_rate": 5e-05, "loss": 0.041, "num_input_tokens_seen": 303863712, "step": 3329 }, { "epoch": 13.870833333333334, "loss": 0.03622327744960785, "loss_ce": 0.00017438957002013922, "loss_iou": 0.259765625, "loss_num": 0.0072021484375, "loss_xval": 0.0361328125, "num_input_tokens_seen": 303863712, "step": 3329 }, { "epoch": 13.875, "grad_norm": 3.100654221670784, "learning_rate": 5e-05, "loss": 0.0736, "num_input_tokens_seen": 303954564, "step": 3330 }, { "epoch": 13.875, "loss": 0.08480900526046753, "loss_ce": 3.116882362519391e-05, "loss_iou": 0.1455078125, "loss_num": 0.0169677734375, "loss_xval": 0.0849609375, "num_input_tokens_seen": 303954564, "step": 3330 }, { "epoch": 13.879166666666666, "grad_norm": 3.217399946775899, "learning_rate": 5e-05, "loss": 0.0858, "num_input_tokens_seen": 304046424, "step": 3331 }, { "epoch": 13.879166666666666, "loss": 0.1203436478972435, "loss_ce": 4.335532139521092e-05, "loss_iou": 0.275390625, "loss_num": 0.0240478515625, "loss_xval": 0.1201171875, "num_input_tokens_seen": 304046424, "step": 3331 }, { "epoch": 13.883333333333333, "grad_norm": 6.4596229589144425, "learning_rate": 5e-05, "loss": 0.1057, "num_input_tokens_seen": 304138408, "step": 3332 }, { "epoch": 13.883333333333333, "loss": 0.11160202324390411, "loss_ce": 6.864986062282696e-06, "loss_iou": 0.22265625, "loss_num": 0.0223388671875, "loss_xval": 0.11181640625, "num_input_tokens_seen": 304138408, "step": 3332 }, { "epoch": 13.8875, "grad_norm": 2.7269878636408387, "learning_rate": 5e-05, "loss": 0.0498, "num_input_tokens_seen": 304229500, "step": 3333 }, { "epoch": 13.8875, "loss": 0.032473981380462646, "loss_ce": 1.0906089300988242e-05, "loss_iou": 0.154296875, "loss_num": 0.006500244140625, "loss_xval": 0.032470703125, "num_input_tokens_seen": 304229500, "step": 3333 }, { "epoch": 13.891666666666667, "grad_norm": 5.250857290393431, "learning_rate": 5e-05, "loss": 0.0666, "num_input_tokens_seen": 304320936, "step": 3334 }, { "epoch": 13.891666666666667, "loss": 0.06586028635501862, "loss_ce": 0.0006747441948391497, "loss_iou": 0.2177734375, "loss_num": 0.01300048828125, "loss_xval": 0.0654296875, "num_input_tokens_seen": 304320936, "step": 3334 }, { "epoch": 13.895833333333334, "grad_norm": 1.8983321006579221, "learning_rate": 5e-05, "loss": 0.0641, "num_input_tokens_seen": 304412016, "step": 3335 }, { "epoch": 13.895833333333334, "loss": 0.061099447309970856, "loss_ce": 0.00040761884883977473, "loss_iou": 0.11181640625, "loss_num": 0.01214599609375, "loss_xval": 0.060791015625, "num_input_tokens_seen": 304412016, "step": 3335 }, { "epoch": 13.9, "grad_norm": 4.511945486895753, "learning_rate": 5e-05, "loss": 0.084, "num_input_tokens_seen": 304503732, "step": 3336 }, { "epoch": 13.9, "loss": 0.07431946694850922, "loss_ce": 9.166088602796663e-06, "loss_iou": 0.380859375, "loss_num": 0.014892578125, "loss_xval": 0.07421875, "num_input_tokens_seen": 304503732, "step": 3336 }, { "epoch": 13.904166666666667, "grad_norm": 2.205920050046423, "learning_rate": 5e-05, "loss": 0.0657, "num_input_tokens_seen": 304594832, "step": 3337 }, { "epoch": 13.904166666666667, "loss": 0.05776306986808777, "loss_ce": 3.9070018829079345e-05, "loss_iou": 0.1962890625, "loss_num": 0.01153564453125, "loss_xval": 0.0576171875, "num_input_tokens_seen": 304594832, "step": 3337 }, { "epoch": 13.908333333333333, "grad_norm": 2.3042991811889095, "learning_rate": 5e-05, "loss": 0.0496, "num_input_tokens_seen": 304685720, "step": 3338 }, { "epoch": 13.908333333333333, "loss": 0.046074278652668, "loss_ce": 3.851512883557007e-05, "loss_iou": 0.150390625, "loss_num": 0.00921630859375, "loss_xval": 0.046142578125, "num_input_tokens_seen": 304685720, "step": 3338 }, { "epoch": 13.9125, "grad_norm": 2.630684878369044, "learning_rate": 5e-05, "loss": 0.0709, "num_input_tokens_seen": 304776520, "step": 3339 }, { "epoch": 13.9125, "loss": 0.10420745611190796, "loss_ce": 3.569770706235431e-05, "loss_iou": 0.2265625, "loss_num": 0.0208740234375, "loss_xval": 0.10400390625, "num_input_tokens_seen": 304776520, "step": 3339 }, { "epoch": 13.916666666666666, "grad_norm": 3.1437627546639684, "learning_rate": 5e-05, "loss": 0.104, "num_input_tokens_seen": 304868184, "step": 3340 }, { "epoch": 13.916666666666666, "loss": 0.05712635815143585, "loss_ce": 0.0039037007372826338, "loss_iou": 0.2890625, "loss_num": 0.0106201171875, "loss_xval": 0.05322265625, "num_input_tokens_seen": 304868184, "step": 3340 }, { "epoch": 13.920833333333333, "grad_norm": 2.5147699603318334, "learning_rate": 5e-05, "loss": 0.0612, "num_input_tokens_seen": 304959688, "step": 3341 }, { "epoch": 13.920833333333333, "loss": 0.06732615828514099, "loss_ce": 0.006901353597640991, "loss_iou": 0.275390625, "loss_num": 0.0120849609375, "loss_xval": 0.060546875, "num_input_tokens_seen": 304959688, "step": 3341 }, { "epoch": 13.925, "grad_norm": 3.285856756914253, "learning_rate": 5e-05, "loss": 0.0878, "num_input_tokens_seen": 305049076, "step": 3342 }, { "epoch": 13.925, "loss": 0.11383511871099472, "loss_ce": 1.9817061911453493e-05, "loss_iou": 0.349609375, "loss_num": 0.022705078125, "loss_xval": 0.11376953125, "num_input_tokens_seen": 305049076, "step": 3342 }, { "epoch": 13.929166666666667, "grad_norm": 22.50870734198593, "learning_rate": 5e-05, "loss": 0.0925, "num_input_tokens_seen": 305140444, "step": 3343 }, { "epoch": 13.929166666666667, "loss": 0.0981772392988205, "loss_ce": 6.322274566628039e-05, "loss_iou": 0.1103515625, "loss_num": 0.0196533203125, "loss_xval": 0.09814453125, "num_input_tokens_seen": 305140444, "step": 3343 }, { "epoch": 13.933333333333334, "grad_norm": 8.855170280331524, "learning_rate": 5e-05, "loss": 0.0663, "num_input_tokens_seen": 305231820, "step": 3344 }, { "epoch": 13.933333333333334, "loss": 0.09573826193809509, "loss_ce": 4.617154445440974e-06, "loss_iou": 0.248046875, "loss_num": 0.0191650390625, "loss_xval": 0.095703125, "num_input_tokens_seen": 305231820, "step": 3344 }, { "epoch": 13.9375, "grad_norm": 2.690396848874294, "learning_rate": 5e-05, "loss": 0.1034, "num_input_tokens_seen": 305323096, "step": 3345 }, { "epoch": 13.9375, "loss": 0.1162140816450119, "loss_ce": 3.1447380024474114e-06, "loss_iou": 0.1796875, "loss_num": 0.023193359375, "loss_xval": 0.1162109375, "num_input_tokens_seen": 305323096, "step": 3345 }, { "epoch": 13.941666666666666, "grad_norm": 3.1007746118976574, "learning_rate": 5e-05, "loss": 0.0617, "num_input_tokens_seen": 305414204, "step": 3346 }, { "epoch": 13.941666666666666, "loss": 0.07584530860185623, "loss_ce": 0.00011593498493311927, "loss_iou": 0.1181640625, "loss_num": 0.01513671875, "loss_xval": 0.07568359375, "num_input_tokens_seen": 305414204, "step": 3346 }, { "epoch": 13.945833333333333, "grad_norm": 1.568536077812119, "learning_rate": 5e-05, "loss": 0.0516, "num_input_tokens_seen": 305505092, "step": 3347 }, { "epoch": 13.945833333333333, "loss": 0.07008616626262665, "loss_ce": 2.5482622731942683e-06, "loss_iou": 0.2236328125, "loss_num": 0.01397705078125, "loss_xval": 0.0703125, "num_input_tokens_seen": 305505092, "step": 3347 }, { "epoch": 13.95, "grad_norm": 3.939095608261935, "learning_rate": 5e-05, "loss": 0.0927, "num_input_tokens_seen": 305596448, "step": 3348 }, { "epoch": 13.95, "loss": 0.06267900764942169, "loss_ce": 0.00027056410908699036, "loss_iou": 0.294921875, "loss_num": 0.012451171875, "loss_xval": 0.0625, "num_input_tokens_seen": 305596448, "step": 3348 }, { "epoch": 13.954166666666667, "grad_norm": 5.10613752536137, "learning_rate": 5e-05, "loss": 0.0819, "num_input_tokens_seen": 305687404, "step": 3349 }, { "epoch": 13.954166666666667, "loss": 0.1333942413330078, "loss_ce": 0.00033760571386665106, "loss_iou": 0.244140625, "loss_num": 0.026611328125, "loss_xval": 0.1328125, "num_input_tokens_seen": 305687404, "step": 3349 }, { "epoch": 13.958333333333334, "grad_norm": 2.8926341934022926, "learning_rate": 5e-05, "loss": 0.0852, "num_input_tokens_seen": 305779152, "step": 3350 }, { "epoch": 13.958333333333334, "loss": 0.07041196525096893, "loss_ce": 0.0004351534298621118, "loss_iou": 0.322265625, "loss_num": 0.01397705078125, "loss_xval": 0.06982421875, "num_input_tokens_seen": 305779152, "step": 3350 }, { "epoch": 13.9625, "grad_norm": 30.49982092068426, "learning_rate": 5e-05, "loss": 0.0744, "num_input_tokens_seen": 305870624, "step": 3351 }, { "epoch": 13.9625, "loss": 0.09339140355587006, "loss_ce": 0.00034330939524807036, "loss_iou": 0.1884765625, "loss_num": 0.0186767578125, "loss_xval": 0.09326171875, "num_input_tokens_seen": 305870624, "step": 3351 }, { "epoch": 13.966666666666667, "grad_norm": 7.2361907832037575, "learning_rate": 5e-05, "loss": 0.0706, "num_input_tokens_seen": 305962124, "step": 3352 }, { "epoch": 13.966666666666667, "loss": 0.0672103762626648, "loss_ce": 2.592532109702006e-05, "loss_iou": 0.2734375, "loss_num": 0.013427734375, "loss_xval": 0.0673828125, "num_input_tokens_seen": 305962124, "step": 3352 }, { "epoch": 13.970833333333333, "grad_norm": 2.3789658089565124, "learning_rate": 5e-05, "loss": 0.0329, "num_input_tokens_seen": 306052748, "step": 3353 }, { "epoch": 13.970833333333333, "loss": 0.033177584409713745, "loss_ce": 4.97952350997366e-06, "loss_iou": 0.1953125, "loss_num": 0.006622314453125, "loss_xval": 0.033203125, "num_input_tokens_seen": 306052748, "step": 3353 }, { "epoch": 13.975, "grad_norm": 2.603933218904834, "learning_rate": 5e-05, "loss": 0.0499, "num_input_tokens_seen": 306143928, "step": 3354 }, { "epoch": 13.975, "loss": 0.05014060065150261, "loss_ce": 7.84904113970697e-06, "loss_iou": 0.1953125, "loss_num": 0.010009765625, "loss_xval": 0.050048828125, "num_input_tokens_seen": 306143928, "step": 3354 }, { "epoch": 13.979166666666666, "grad_norm": 5.750381590450532, "learning_rate": 5e-05, "loss": 0.0652, "num_input_tokens_seen": 306235504, "step": 3355 }, { "epoch": 13.979166666666666, "loss": 0.057033687829971313, "loss_ce": 0.0032922320533543825, "loss_iou": 0.310546875, "loss_num": 0.0107421875, "loss_xval": 0.0537109375, "num_input_tokens_seen": 306235504, "step": 3355 }, { "epoch": 13.983333333333333, "grad_norm": 3.811378525309582, "learning_rate": 5e-05, "loss": 0.0617, "num_input_tokens_seen": 306327264, "step": 3356 }, { "epoch": 13.983333333333333, "loss": 0.06342743337154388, "loss_ce": 0.000423891848186031, "loss_iou": 0.197265625, "loss_num": 0.0125732421875, "loss_xval": 0.06298828125, "num_input_tokens_seen": 306327264, "step": 3356 }, { "epoch": 13.9875, "grad_norm": 5.83970757945946, "learning_rate": 5e-05, "loss": 0.0613, "num_input_tokens_seen": 306419288, "step": 3357 }, { "epoch": 13.9875, "loss": 0.05281481146812439, "loss_ce": 0.002430290449410677, "loss_iou": 0.1787109375, "loss_num": 0.01007080078125, "loss_xval": 0.05029296875, "num_input_tokens_seen": 306419288, "step": 3357 }, { "epoch": 13.991666666666667, "grad_norm": 3.286523795520112, "learning_rate": 5e-05, "loss": 0.084, "num_input_tokens_seen": 306510708, "step": 3358 }, { "epoch": 13.991666666666667, "loss": 0.12988848984241486, "loss_ce": 0.003622006392106414, "loss_iou": 0.31640625, "loss_num": 0.0252685546875, "loss_xval": 0.1259765625, "num_input_tokens_seen": 306510708, "step": 3358 }, { "epoch": 13.995833333333334, "grad_norm": 3.2398357988179542, "learning_rate": 5e-05, "loss": 0.0581, "num_input_tokens_seen": 306602088, "step": 3359 }, { "epoch": 13.995833333333334, "loss": 0.05087633430957794, "loss_ce": 0.0009114285348914564, "loss_iou": 0.36328125, "loss_num": 0.010009765625, "loss_xval": 0.050048828125, "num_input_tokens_seen": 306602088, "step": 3359 }, { "epoch": 14.0, "grad_norm": 5.617467287281432, "learning_rate": 5e-05, "loss": 0.0711, "num_input_tokens_seen": 306692936, "step": 3360 }, { "epoch": 14.0, "loss": 0.07821273058652878, "loss_ce": 3.8057094116084045e-06, "loss_iou": 0.4140625, "loss_num": 0.015625, "loss_xval": 0.078125, "num_input_tokens_seen": 306692936, "step": 3360 }, { "epoch": 14.004166666666666, "grad_norm": 4.188985861313355, "learning_rate": 5e-05, "loss": 0.0354, "num_input_tokens_seen": 306784476, "step": 3361 }, { "epoch": 14.004166666666666, "loss": 0.03913436457514763, "loss_ce": 1.0830644896486774e-05, "loss_iou": 0.27734375, "loss_num": 0.0078125, "loss_xval": 0.0390625, "num_input_tokens_seen": 306784476, "step": 3361 }, { "epoch": 14.008333333333333, "grad_norm": 3.77837181129567, "learning_rate": 5e-05, "loss": 0.059, "num_input_tokens_seen": 306875068, "step": 3362 }, { "epoch": 14.008333333333333, "loss": 0.040547363460063934, "loss_ce": 2.0018822397105396e-05, "loss_iou": 0.353515625, "loss_num": 0.00811767578125, "loss_xval": 0.04052734375, "num_input_tokens_seen": 306875068, "step": 3362 }, { "epoch": 14.0125, "grad_norm": 2.5090734897570366, "learning_rate": 5e-05, "loss": 0.0624, "num_input_tokens_seen": 306966612, "step": 3363 }, { "epoch": 14.0125, "loss": 0.08640769869089127, "loss_ce": 0.00011924252612516284, "loss_iou": 0.25390625, "loss_num": 0.0172119140625, "loss_xval": 0.08642578125, "num_input_tokens_seen": 306966612, "step": 3363 }, { "epoch": 14.016666666666667, "grad_norm": 2.772188082718569, "learning_rate": 5e-05, "loss": 0.0461, "num_input_tokens_seen": 307058192, "step": 3364 }, { "epoch": 14.016666666666667, "loss": 0.05798184871673584, "loss_ce": 1.3712133295484819e-05, "loss_iou": 0.2001953125, "loss_num": 0.0115966796875, "loss_xval": 0.057861328125, "num_input_tokens_seen": 307058192, "step": 3364 }, { "epoch": 14.020833333333334, "grad_norm": 2.3346947679030703, "learning_rate": 5e-05, "loss": 0.0469, "num_input_tokens_seen": 307149620, "step": 3365 }, { "epoch": 14.020833333333334, "loss": 0.04485933482646942, "loss_ce": 4.4271389924688265e-05, "loss_iou": 0.166015625, "loss_num": 0.00897216796875, "loss_xval": 0.044921875, "num_input_tokens_seen": 307149620, "step": 3365 }, { "epoch": 14.025, "grad_norm": 1.825324947239382, "learning_rate": 5e-05, "loss": 0.0711, "num_input_tokens_seen": 307240716, "step": 3366 }, { "epoch": 14.025, "loss": 0.0676366314291954, "loss_ce": 9.680274160928093e-06, "loss_iou": 0.298828125, "loss_num": 0.0135498046875, "loss_xval": 0.0673828125, "num_input_tokens_seen": 307240716, "step": 3366 }, { "epoch": 14.029166666666667, "grad_norm": 3.571026495378173, "learning_rate": 5e-05, "loss": 0.0773, "num_input_tokens_seen": 307332192, "step": 3367 }, { "epoch": 14.029166666666667, "loss": 0.029795819893479347, "loss_ce": 3.355112858116627e-05, "loss_iou": 0.201171875, "loss_num": 0.005950927734375, "loss_xval": 0.02978515625, "num_input_tokens_seen": 307332192, "step": 3367 }, { "epoch": 14.033333333333333, "grad_norm": 2.6230559325630014, "learning_rate": 5e-05, "loss": 0.0332, "num_input_tokens_seen": 307423484, "step": 3368 }, { "epoch": 14.033333333333333, "loss": 0.03575886785984039, "loss_ce": 2.2783957319916226e-05, "loss_iou": 0.265625, "loss_num": 0.00714111328125, "loss_xval": 0.03564453125, "num_input_tokens_seen": 307423484, "step": 3368 }, { "epoch": 14.0375, "grad_norm": 12.282876672065623, "learning_rate": 5e-05, "loss": 0.0665, "num_input_tokens_seen": 307513036, "step": 3369 }, { "epoch": 14.0375, "loss": 0.05255713686347008, "loss_ce": 3.638456837506965e-05, "loss_iou": 0.15234375, "loss_num": 0.010498046875, "loss_xval": 0.052490234375, "num_input_tokens_seen": 307513036, "step": 3369 }, { "epoch": 14.041666666666666, "grad_norm": 6.657644439684569, "learning_rate": 5e-05, "loss": 0.062, "num_input_tokens_seen": 307603948, "step": 3370 }, { "epoch": 14.041666666666666, "loss": 0.07731955498456955, "loss_ce": 3.269945864303736e-06, "loss_iou": 0.26953125, "loss_num": 0.01544189453125, "loss_xval": 0.0771484375, "num_input_tokens_seen": 307603948, "step": 3370 }, { "epoch": 14.045833333333333, "grad_norm": 4.046932156015916, "learning_rate": 5e-05, "loss": 0.0886, "num_input_tokens_seen": 307696304, "step": 3371 }, { "epoch": 14.045833333333333, "loss": 0.05779094621539116, "loss_ce": 4.787262878380716e-05, "loss_iou": 0.33984375, "loss_num": 0.01153564453125, "loss_xval": 0.057861328125, "num_input_tokens_seen": 307696304, "step": 3371 }, { "epoch": 14.05, "grad_norm": 2.9804179259547467, "learning_rate": 5e-05, "loss": 0.0794, "num_input_tokens_seen": 307787224, "step": 3372 }, { "epoch": 14.05, "loss": 0.0840468555688858, "loss_ce": 4.7224642912624404e-05, "loss_iou": 0.142578125, "loss_num": 0.0167236328125, "loss_xval": 0.083984375, "num_input_tokens_seen": 307787224, "step": 3372 }, { "epoch": 14.054166666666667, "grad_norm": 2.744260667240401, "learning_rate": 5e-05, "loss": 0.0484, "num_input_tokens_seen": 307879208, "step": 3373 }, { "epoch": 14.054166666666667, "loss": 0.03362205624580383, "loss_ce": 0.00018241905490867794, "loss_iou": 0.2421875, "loss_num": 0.006683349609375, "loss_xval": 0.033447265625, "num_input_tokens_seen": 307879208, "step": 3373 }, { "epoch": 14.058333333333334, "grad_norm": 9.798140760602497, "learning_rate": 5e-05, "loss": 0.0871, "num_input_tokens_seen": 307970324, "step": 3374 }, { "epoch": 14.058333333333334, "loss": 0.05981835722923279, "loss_ce": 3.90674313166528e-06, "loss_iou": 0.3984375, "loss_num": 0.011962890625, "loss_xval": 0.059814453125, "num_input_tokens_seen": 307970324, "step": 3374 }, { "epoch": 14.0625, "grad_norm": 4.46546384352056, "learning_rate": 5e-05, "loss": 0.0523, "num_input_tokens_seen": 308061856, "step": 3375 }, { "epoch": 14.0625, "loss": 0.06678728759288788, "loss_ce": 4.534380423137918e-05, "loss_iou": 0.275390625, "loss_num": 0.01336669921875, "loss_xval": 0.06689453125, "num_input_tokens_seen": 308061856, "step": 3375 }, { "epoch": 14.066666666666666, "grad_norm": 2.906845444184048, "learning_rate": 5e-05, "loss": 0.033, "num_input_tokens_seen": 308153420, "step": 3376 }, { "epoch": 14.066666666666666, "loss": 0.028707150369882584, "loss_ce": 0.0005699438625015318, "loss_iou": 0.1669921875, "loss_num": 0.005615234375, "loss_xval": 0.028076171875, "num_input_tokens_seen": 308153420, "step": 3376 }, { "epoch": 14.070833333333333, "grad_norm": 3.116604490032572, "learning_rate": 5e-05, "loss": 0.0639, "num_input_tokens_seen": 308245176, "step": 3377 }, { "epoch": 14.070833333333333, "loss": 0.06883566081523895, "loss_ce": 0.00015584740322083235, "loss_iou": 0.349609375, "loss_num": 0.01373291015625, "loss_xval": 0.06884765625, "num_input_tokens_seen": 308245176, "step": 3377 }, { "epoch": 14.075, "grad_norm": 1.9318580460017465, "learning_rate": 5e-05, "loss": 0.0492, "num_input_tokens_seen": 308336652, "step": 3378 }, { "epoch": 14.075, "loss": 0.04875883460044861, "loss_ce": 2.226128890470136e-05, "loss_iou": 0.1875, "loss_num": 0.009765625, "loss_xval": 0.048828125, "num_input_tokens_seen": 308336652, "step": 3378 }, { "epoch": 14.079166666666667, "grad_norm": 4.22545329602741, "learning_rate": 5e-05, "loss": 0.0801, "num_input_tokens_seen": 308428316, "step": 3379 }, { "epoch": 14.079166666666667, "loss": 0.10865432024002075, "loss_ce": 0.002613367047160864, "loss_iou": 0.2275390625, "loss_num": 0.021240234375, "loss_xval": 0.10595703125, "num_input_tokens_seen": 308428316, "step": 3379 }, { "epoch": 14.083333333333334, "grad_norm": 1.2427563657952412, "learning_rate": 5e-05, "loss": 0.0406, "num_input_tokens_seen": 308519912, "step": 3380 }, { "epoch": 14.083333333333334, "loss": 0.049828533083200455, "loss_ce": 8.586703188484535e-06, "loss_iou": 0.28125, "loss_num": 0.00994873046875, "loss_xval": 0.0498046875, "num_input_tokens_seen": 308519912, "step": 3380 }, { "epoch": 14.0875, "grad_norm": 1.1341950938205148, "learning_rate": 5e-05, "loss": 0.059, "num_input_tokens_seen": 308610988, "step": 3381 }, { "epoch": 14.0875, "loss": 0.04335072636604309, "loss_ce": 8.138382327160798e-06, "loss_iou": 0.240234375, "loss_num": 0.0086669921875, "loss_xval": 0.04345703125, "num_input_tokens_seen": 308610988, "step": 3381 }, { "epoch": 14.091666666666667, "grad_norm": 2.1222583710275025, "learning_rate": 5e-05, "loss": 0.0434, "num_input_tokens_seen": 308702240, "step": 3382 }, { "epoch": 14.091666666666667, "loss": 0.0367155522108078, "loss_ce": 2.908373062382452e-06, "loss_iou": 0.220703125, "loss_num": 0.00732421875, "loss_xval": 0.03662109375, "num_input_tokens_seen": 308702240, "step": 3382 }, { "epoch": 14.095833333333333, "grad_norm": 3.906211488436715, "learning_rate": 5e-05, "loss": 0.0907, "num_input_tokens_seen": 308793188, "step": 3383 }, { "epoch": 14.095833333333333, "loss": 0.055466026067733765, "loss_ce": 1.5587416783091612e-05, "loss_iou": 0.2314453125, "loss_num": 0.0111083984375, "loss_xval": 0.055419921875, "num_input_tokens_seen": 308793188, "step": 3383 }, { "epoch": 14.1, "grad_norm": 3.340470863630639, "learning_rate": 5e-05, "loss": 0.0756, "num_input_tokens_seen": 308883792, "step": 3384 }, { "epoch": 14.1, "loss": 0.10700327903032303, "loss_ce": 3.9162659959401935e-05, "loss_iou": 0.31640625, "loss_num": 0.0213623046875, "loss_xval": 0.10693359375, "num_input_tokens_seen": 308883792, "step": 3384 }, { "epoch": 14.104166666666666, "grad_norm": 3.95383471125247, "learning_rate": 5e-05, "loss": 0.0436, "num_input_tokens_seen": 308975832, "step": 3385 }, { "epoch": 14.104166666666666, "loss": 0.03994490206241608, "loss_ce": 1.2653750673052855e-05, "loss_iou": 0.37109375, "loss_num": 0.00799560546875, "loss_xval": 0.0400390625, "num_input_tokens_seen": 308975832, "step": 3385 }, { "epoch": 14.108333333333333, "grad_norm": 3.0562950230924226, "learning_rate": 5e-05, "loss": 0.0565, "num_input_tokens_seen": 309066836, "step": 3386 }, { "epoch": 14.108333333333333, "loss": 0.07869721204042435, "loss_ce": 0.0010452393908053637, "loss_iou": 0.2578125, "loss_num": 0.0155029296875, "loss_xval": 0.07763671875, "num_input_tokens_seen": 309066836, "step": 3386 }, { "epoch": 14.1125, "grad_norm": 3.4501582032949134, "learning_rate": 5e-05, "loss": 0.0633, "num_input_tokens_seen": 309158444, "step": 3387 }, { "epoch": 14.1125, "loss": 0.030149977654218674, "loss_ce": 4.438710311660543e-05, "loss_iou": 0.3125, "loss_num": 0.00604248046875, "loss_xval": 0.0301513671875, "num_input_tokens_seen": 309158444, "step": 3387 }, { "epoch": 14.116666666666667, "grad_norm": 2.3305611167856184, "learning_rate": 5e-05, "loss": 0.0494, "num_input_tokens_seen": 309249628, "step": 3388 }, { "epoch": 14.116666666666667, "loss": 0.0578351654112339, "loss_ce": 4.353590156824794e-06, "loss_iou": 0.349609375, "loss_num": 0.01153564453125, "loss_xval": 0.057861328125, "num_input_tokens_seen": 309249628, "step": 3388 }, { "epoch": 14.120833333333334, "grad_norm": 2.9320244192284792, "learning_rate": 5e-05, "loss": 0.0659, "num_input_tokens_seen": 309340384, "step": 3389 }, { "epoch": 14.120833333333334, "loss": 0.07648331671953201, "loss_ce": 6.266511263675056e-06, "loss_iou": 0.138671875, "loss_num": 0.01531982421875, "loss_xval": 0.07666015625, "num_input_tokens_seen": 309340384, "step": 3389 }, { "epoch": 14.125, "grad_norm": 2.5052687026659686, "learning_rate": 5e-05, "loss": 0.0485, "num_input_tokens_seen": 309431800, "step": 3390 }, { "epoch": 14.125, "loss": 0.0563613623380661, "loss_ce": 2.5910518161254004e-05, "loss_iou": 0.255859375, "loss_num": 0.01129150390625, "loss_xval": 0.056396484375, "num_input_tokens_seen": 309431800, "step": 3390 }, { "epoch": 14.129166666666666, "grad_norm": 3.1261621170052343, "learning_rate": 5e-05, "loss": 0.0476, "num_input_tokens_seen": 309522736, "step": 3391 }, { "epoch": 14.129166666666666, "loss": 0.046834796667099, "loss_ce": 5.573211637965869e-06, "loss_iou": 0.29296875, "loss_num": 0.0093994140625, "loss_xval": 0.046875, "num_input_tokens_seen": 309522736, "step": 3391 }, { "epoch": 14.133333333333333, "grad_norm": 2.719234641985888, "learning_rate": 5e-05, "loss": 0.0778, "num_input_tokens_seen": 309613952, "step": 3392 }, { "epoch": 14.133333333333333, "loss": 0.11014129221439362, "loss_ce": 0.0001406825758749619, "loss_iou": 0.349609375, "loss_num": 0.02197265625, "loss_xval": 0.10986328125, "num_input_tokens_seen": 309613952, "step": 3392 }, { "epoch": 14.1375, "grad_norm": 2.6314723677992595, "learning_rate": 5e-05, "loss": 0.0278, "num_input_tokens_seen": 309705516, "step": 3393 }, { "epoch": 14.1375, "loss": 0.024617359042167664, "loss_ce": 0.0006610597483813763, "loss_iou": 0.13671875, "loss_num": 0.004791259765625, "loss_xval": 0.02392578125, "num_input_tokens_seen": 309705516, "step": 3393 }, { "epoch": 14.141666666666667, "grad_norm": 15.164866486052677, "learning_rate": 5e-05, "loss": 0.0555, "num_input_tokens_seen": 309797308, "step": 3394 }, { "epoch": 14.141666666666667, "loss": 0.0349428728222847, "loss_ce": 3.076143184443936e-05, "loss_iou": 0.236328125, "loss_num": 0.006988525390625, "loss_xval": 0.034912109375, "num_input_tokens_seen": 309797308, "step": 3394 }, { "epoch": 14.145833333333334, "grad_norm": 3.320160941361757, "learning_rate": 5e-05, "loss": 0.0621, "num_input_tokens_seen": 309888808, "step": 3395 }, { "epoch": 14.145833333333334, "loss": 0.07468635588884354, "loss_ce": 2.5103210646193475e-05, "loss_iou": 0.283203125, "loss_num": 0.014892578125, "loss_xval": 0.07470703125, "num_input_tokens_seen": 309888808, "step": 3395 }, { "epoch": 14.15, "grad_norm": 7.294682697484828, "learning_rate": 5e-05, "loss": 0.0669, "num_input_tokens_seen": 309980220, "step": 3396 }, { "epoch": 14.15, "loss": 0.04793470725417137, "loss_ce": 0.00011366332182660699, "loss_iou": 0.28515625, "loss_num": 0.00958251953125, "loss_xval": 0.0478515625, "num_input_tokens_seen": 309980220, "step": 3396 }, { "epoch": 14.154166666666667, "grad_norm": 3.0591828989824954, "learning_rate": 5e-05, "loss": 0.066, "num_input_tokens_seen": 310071624, "step": 3397 }, { "epoch": 14.154166666666667, "loss": 0.06359957903623581, "loss_ce": 9.428997600480216e-07, "loss_iou": 0.1865234375, "loss_num": 0.0126953125, "loss_xval": 0.0634765625, "num_input_tokens_seen": 310071624, "step": 3397 }, { "epoch": 14.158333333333333, "grad_norm": 0.7538136953677265, "learning_rate": 5e-05, "loss": 0.0613, "num_input_tokens_seen": 310163344, "step": 3398 }, { "epoch": 14.158333333333333, "loss": 0.07692218571901321, "loss_ce": 2.6306820473109838e-06, "loss_iou": 0.2578125, "loss_num": 0.015380859375, "loss_xval": 0.0771484375, "num_input_tokens_seen": 310163344, "step": 3398 }, { "epoch": 14.1625, "grad_norm": 1.6353708868238535, "learning_rate": 5e-05, "loss": 0.0794, "num_input_tokens_seen": 310255072, "step": 3399 }, { "epoch": 14.1625, "loss": 0.09011051058769226, "loss_ce": 1.4989738701842725e-05, "loss_iou": 0.248046875, "loss_num": 0.01806640625, "loss_xval": 0.09033203125, "num_input_tokens_seen": 310255072, "step": 3399 }, { "epoch": 14.166666666666666, "grad_norm": 4.702226228150257, "learning_rate": 5e-05, "loss": 0.0382, "num_input_tokens_seen": 310346900, "step": 3400 }, { "epoch": 14.166666666666666, "loss": 0.05216794088482857, "loss_ce": 0.0003185725654475391, "loss_iou": 0.27734375, "loss_num": 0.0103759765625, "loss_xval": 0.0517578125, "num_input_tokens_seen": 310346900, "step": 3400 }, { "epoch": 14.170833333333333, "grad_norm": 2.424212958036511, "learning_rate": 5e-05, "loss": 0.1025, "num_input_tokens_seen": 310438064, "step": 3401 }, { "epoch": 14.170833333333333, "loss": 0.16618862748146057, "loss_ce": 5.094004882266745e-05, "loss_iou": 0.2392578125, "loss_num": 0.033203125, "loss_xval": 0.166015625, "num_input_tokens_seen": 310438064, "step": 3401 }, { "epoch": 14.175, "grad_norm": 2.365340500812245, "learning_rate": 5e-05, "loss": 0.0641, "num_input_tokens_seen": 310529152, "step": 3402 }, { "epoch": 14.175, "loss": 0.05468135327100754, "loss_ce": 5.488721944857389e-05, "loss_iou": 0.1982421875, "loss_num": 0.01092529296875, "loss_xval": 0.0546875, "num_input_tokens_seen": 310529152, "step": 3402 }, { "epoch": 14.179166666666667, "grad_norm": 5.275771030939526, "learning_rate": 5e-05, "loss": 0.053, "num_input_tokens_seen": 310620380, "step": 3403 }, { "epoch": 14.179166666666667, "loss": 0.04898332804441452, "loss_ce": 2.6164125301875174e-06, "loss_iou": 0.3046875, "loss_num": 0.00982666015625, "loss_xval": 0.049072265625, "num_input_tokens_seen": 310620380, "step": 3403 }, { "epoch": 14.183333333333334, "grad_norm": 4.5906342531945405, "learning_rate": 5e-05, "loss": 0.0419, "num_input_tokens_seen": 310711712, "step": 3404 }, { "epoch": 14.183333333333334, "loss": 0.044072914868593216, "loss_ce": 5.13083505211398e-05, "loss_iou": 0.255859375, "loss_num": 0.0087890625, "loss_xval": 0.0439453125, "num_input_tokens_seen": 310711712, "step": 3404 }, { "epoch": 14.1875, "grad_norm": 3.312375728688097, "learning_rate": 5e-05, "loss": 0.0776, "num_input_tokens_seen": 310803616, "step": 3405 }, { "epoch": 14.1875, "loss": 0.07145528495311737, "loss_ce": 5.941041308688e-05, "loss_iou": 0.283203125, "loss_num": 0.0142822265625, "loss_xval": 0.0712890625, "num_input_tokens_seen": 310803616, "step": 3405 }, { "epoch": 14.191666666666666, "grad_norm": 11.85380634188713, "learning_rate": 5e-05, "loss": 0.1209, "num_input_tokens_seen": 310894768, "step": 3406 }, { "epoch": 14.191666666666666, "loss": 0.15935643017292023, "loss_ce": 0.0001004475197987631, "loss_iou": 0.22265625, "loss_num": 0.03173828125, "loss_xval": 0.1591796875, "num_input_tokens_seen": 310894768, "step": 3406 }, { "epoch": 14.195833333333333, "grad_norm": 3.7060806106659654, "learning_rate": 5e-05, "loss": 0.078, "num_input_tokens_seen": 310985924, "step": 3407 }, { "epoch": 14.195833333333333, "loss": 0.08576707541942596, "loss_ce": 0.0003026033518835902, "loss_iou": 0.279296875, "loss_num": 0.01708984375, "loss_xval": 0.08544921875, "num_input_tokens_seen": 310985924, "step": 3407 }, { "epoch": 14.2, "grad_norm": 2.983513817753317, "learning_rate": 5e-05, "loss": 0.0525, "num_input_tokens_seen": 311077484, "step": 3408 }, { "epoch": 14.2, "loss": 0.04799432307481766, "loss_ce": 0.00013513251906260848, "loss_iou": 0.287109375, "loss_num": 0.00958251953125, "loss_xval": 0.0478515625, "num_input_tokens_seen": 311077484, "step": 3408 }, { "epoch": 14.204166666666667, "grad_norm": 2.1804492896885983, "learning_rate": 5e-05, "loss": 0.0458, "num_input_tokens_seen": 311169032, "step": 3409 }, { "epoch": 14.204166666666667, "loss": 0.050522398203611374, "loss_ce": 7.684266165597364e-05, "loss_iou": 0.2392578125, "loss_num": 0.01007080078125, "loss_xval": 0.050537109375, "num_input_tokens_seen": 311169032, "step": 3409 }, { "epoch": 14.208333333333334, "grad_norm": 2.7746453176567187, "learning_rate": 5e-05, "loss": 0.0874, "num_input_tokens_seen": 311259696, "step": 3410 }, { "epoch": 14.208333333333334, "loss": 0.10649485886096954, "loss_ce": 3.762988853850402e-06, "loss_iou": 0.2119140625, "loss_num": 0.021240234375, "loss_xval": 0.1064453125, "num_input_tokens_seen": 311259696, "step": 3410 }, { "epoch": 14.2125, "grad_norm": 3.558439839591837, "learning_rate": 5e-05, "loss": 0.0433, "num_input_tokens_seen": 311350888, "step": 3411 }, { "epoch": 14.2125, "loss": 0.051032066345214844, "loss_ce": 5.24527276866138e-05, "loss_iou": 0.310546875, "loss_num": 0.01019287109375, "loss_xval": 0.051025390625, "num_input_tokens_seen": 311350888, "step": 3411 }, { "epoch": 14.216666666666667, "grad_norm": 3.6374062420646713, "learning_rate": 5e-05, "loss": 0.0612, "num_input_tokens_seen": 311442084, "step": 3412 }, { "epoch": 14.216666666666667, "loss": 0.036105334758758545, "loss_ce": 3.0388860068342183e-06, "loss_iou": 0.380859375, "loss_num": 0.007232666015625, "loss_xval": 0.0361328125, "num_input_tokens_seen": 311442084, "step": 3412 }, { "epoch": 14.220833333333333, "grad_norm": 3.0367560816333987, "learning_rate": 5e-05, "loss": 0.0494, "num_input_tokens_seen": 311533748, "step": 3413 }, { "epoch": 14.220833333333333, "loss": 0.04989338293671608, "loss_ce": 3.529056630213745e-05, "loss_iou": 0.171875, "loss_num": 0.010009765625, "loss_xval": 0.0498046875, "num_input_tokens_seen": 311533748, "step": 3413 }, { "epoch": 14.225, "grad_norm": 2.3895487962927464, "learning_rate": 5e-05, "loss": 0.0595, "num_input_tokens_seen": 311625040, "step": 3414 }, { "epoch": 14.225, "loss": 0.07540172338485718, "loss_ce": 8.049310054047965e-06, "loss_iou": 0.287109375, "loss_num": 0.01507568359375, "loss_xval": 0.0751953125, "num_input_tokens_seen": 311625040, "step": 3414 }, { "epoch": 14.229166666666666, "grad_norm": 3.3165335421466415, "learning_rate": 5e-05, "loss": 0.0616, "num_input_tokens_seen": 311716492, "step": 3415 }, { "epoch": 14.229166666666666, "loss": 0.045228682458400726, "loss_ce": 0.004213057924062014, "loss_iou": 0.26953125, "loss_num": 0.0081787109375, "loss_xval": 0.041015625, "num_input_tokens_seen": 311716492, "step": 3415 }, { "epoch": 14.233333333333333, "grad_norm": 2.8110109784901676, "learning_rate": 5e-05, "loss": 0.0401, "num_input_tokens_seen": 311807624, "step": 3416 }, { "epoch": 14.233333333333333, "loss": 0.028724106028676033, "loss_ce": 7.06592118149274e-06, "loss_iou": 0.271484375, "loss_num": 0.0057373046875, "loss_xval": 0.0286865234375, "num_input_tokens_seen": 311807624, "step": 3416 }, { "epoch": 14.2375, "grad_norm": 7.586267983628135, "learning_rate": 5e-05, "loss": 0.0575, "num_input_tokens_seen": 311899152, "step": 3417 }, { "epoch": 14.2375, "loss": 0.04901839420199394, "loss_ce": 8.345822425326332e-05, "loss_iou": 0.3984375, "loss_num": 0.009765625, "loss_xval": 0.048828125, "num_input_tokens_seen": 311899152, "step": 3417 }, { "epoch": 14.241666666666667, "grad_norm": 2.708253309616207, "learning_rate": 5e-05, "loss": 0.111, "num_input_tokens_seen": 311989696, "step": 3418 }, { "epoch": 14.241666666666667, "loss": 0.12908238172531128, "loss_ce": 0.0003134589351247996, "loss_iou": 0.2216796875, "loss_num": 0.0257568359375, "loss_xval": 0.12890625, "num_input_tokens_seen": 311989696, "step": 3418 }, { "epoch": 14.245833333333334, "grad_norm": 1.306529737096536, "learning_rate": 5e-05, "loss": 0.0559, "num_input_tokens_seen": 312080840, "step": 3419 }, { "epoch": 14.245833333333334, "loss": 0.060873642563819885, "loss_ce": 0.0001436614984413609, "loss_iou": 0.2578125, "loss_num": 0.01214599609375, "loss_xval": 0.060791015625, "num_input_tokens_seen": 312080840, "step": 3419 }, { "epoch": 14.25, "grad_norm": 3.000357538609261, "learning_rate": 5e-05, "loss": 0.0679, "num_input_tokens_seen": 312172280, "step": 3420 }, { "epoch": 14.25, "loss": 0.05053142458200455, "loss_ce": 0.00010112797463079914, "loss_iou": 0.2109375, "loss_num": 0.01007080078125, "loss_xval": 0.050537109375, "num_input_tokens_seen": 312172280, "step": 3420 }, { "epoch": 14.254166666666666, "grad_norm": 2.3051491048317834, "learning_rate": 5e-05, "loss": 0.0397, "num_input_tokens_seen": 312264132, "step": 3421 }, { "epoch": 14.254166666666666, "loss": 0.03641936928033829, "loss_ce": 0.004787897691130638, "loss_iou": 0.154296875, "loss_num": 0.006317138671875, "loss_xval": 0.03173828125, "num_input_tokens_seen": 312264132, "step": 3421 }, { "epoch": 14.258333333333333, "grad_norm": 2.3064057360485966, "learning_rate": 5e-05, "loss": 0.084, "num_input_tokens_seen": 312355324, "step": 3422 }, { "epoch": 14.258333333333333, "loss": 0.03896855190396309, "loss_ce": 0.000268447125563398, "loss_iou": 0.1953125, "loss_num": 0.00775146484375, "loss_xval": 0.038818359375, "num_input_tokens_seen": 312355324, "step": 3422 }, { "epoch": 14.2625, "grad_norm": 2.441656794874996, "learning_rate": 5e-05, "loss": 0.0842, "num_input_tokens_seen": 312446936, "step": 3423 }, { "epoch": 14.2625, "loss": 0.07822424173355103, "loss_ce": 0.0009155810112133622, "loss_iou": 0.2373046875, "loss_num": 0.0155029296875, "loss_xval": 0.0771484375, "num_input_tokens_seen": 312446936, "step": 3423 }, { "epoch": 14.266666666666667, "grad_norm": 1.7532222834884197, "learning_rate": 5e-05, "loss": 0.0713, "num_input_tokens_seen": 312537936, "step": 3424 }, { "epoch": 14.266666666666667, "loss": 0.09173586964607239, "loss_ce": 6.106894579716027e-05, "loss_iou": 0.328125, "loss_num": 0.018310546875, "loss_xval": 0.091796875, "num_input_tokens_seen": 312537936, "step": 3424 }, { "epoch": 14.270833333333334, "grad_norm": 6.687486262938646, "learning_rate": 5e-05, "loss": 0.0512, "num_input_tokens_seen": 312629212, "step": 3425 }, { "epoch": 14.270833333333334, "loss": 0.06332320719957352, "loss_ce": 0.00022811289818491787, "loss_iou": 0.271484375, "loss_num": 0.01263427734375, "loss_xval": 0.06298828125, "num_input_tokens_seen": 312629212, "step": 3425 }, { "epoch": 14.275, "grad_norm": 2.8460177705290843, "learning_rate": 5e-05, "loss": 0.043, "num_input_tokens_seen": 312719904, "step": 3426 }, { "epoch": 14.275, "loss": 0.05278380960226059, "loss_ce": 3.6613828342524357e-06, "loss_iou": 0.2734375, "loss_num": 0.01055908203125, "loss_xval": 0.052734375, "num_input_tokens_seen": 312719904, "step": 3426 }, { "epoch": 14.279166666666667, "grad_norm": 2.9263352700930794, "learning_rate": 5e-05, "loss": 0.1052, "num_input_tokens_seen": 312810740, "step": 3427 }, { "epoch": 14.279166666666667, "loss": 0.10622579604387283, "loss_ce": 0.00016195495845749974, "loss_iou": 0.341796875, "loss_num": 0.021240234375, "loss_xval": 0.10595703125, "num_input_tokens_seen": 312810740, "step": 3427 }, { "epoch": 14.283333333333333, "grad_norm": 3.215534074928557, "learning_rate": 5e-05, "loss": 0.0646, "num_input_tokens_seen": 312902320, "step": 3428 }, { "epoch": 14.283333333333333, "loss": 0.059037499129772186, "loss_ce": 6.227454287e-05, "loss_iou": 0.1650390625, "loss_num": 0.01177978515625, "loss_xval": 0.05908203125, "num_input_tokens_seen": 312902320, "step": 3428 }, { "epoch": 14.2875, "grad_norm": 4.164203787467469, "learning_rate": 5e-05, "loss": 0.0523, "num_input_tokens_seen": 312994128, "step": 3429 }, { "epoch": 14.2875, "loss": 0.053357671946287155, "loss_ce": 0.0009589894907549024, "loss_iou": 0.3046875, "loss_num": 0.010498046875, "loss_xval": 0.052490234375, "num_input_tokens_seen": 312994128, "step": 3429 }, { "epoch": 14.291666666666666, "grad_norm": 3.3424798425486393, "learning_rate": 5e-05, "loss": 0.0696, "num_input_tokens_seen": 313086072, "step": 3430 }, { "epoch": 14.291666666666666, "loss": 0.06933162361383438, "loss_ce": 8.724544750293717e-05, "loss_iou": 0.2578125, "loss_num": 0.01385498046875, "loss_xval": 0.0693359375, "num_input_tokens_seen": 313086072, "step": 3430 }, { "epoch": 14.295833333333333, "grad_norm": 3.221319375620749, "learning_rate": 5e-05, "loss": 0.0801, "num_input_tokens_seen": 313177428, "step": 3431 }, { "epoch": 14.295833333333333, "loss": 0.050983380526304245, "loss_ce": 9.531808609608561e-05, "loss_iou": 0.224609375, "loss_num": 0.01019287109375, "loss_xval": 0.05078125, "num_input_tokens_seen": 313177428, "step": 3431 }, { "epoch": 14.3, "grad_norm": 6.893536221165098, "learning_rate": 5e-05, "loss": 0.0545, "num_input_tokens_seen": 313268856, "step": 3432 }, { "epoch": 14.3, "loss": 0.06522677838802338, "loss_ce": 1.0717561053752434e-05, "loss_iou": 0.306640625, "loss_num": 0.0130615234375, "loss_xval": 0.0654296875, "num_input_tokens_seen": 313268856, "step": 3432 }, { "epoch": 14.304166666666667, "grad_norm": 2.266375022124694, "learning_rate": 5e-05, "loss": 0.0503, "num_input_tokens_seen": 313359784, "step": 3433 }, { "epoch": 14.304166666666667, "loss": 0.04838399589061737, "loss_ce": 2.8890797693748027e-05, "loss_iou": 0.2109375, "loss_num": 0.00970458984375, "loss_xval": 0.04833984375, "num_input_tokens_seen": 313359784, "step": 3433 }, { "epoch": 14.308333333333334, "grad_norm": 2.6935861241346992, "learning_rate": 5e-05, "loss": 0.065, "num_input_tokens_seen": 313451584, "step": 3434 }, { "epoch": 14.308333333333334, "loss": 0.03947412967681885, "loss_ce": 3.778855170821771e-05, "loss_iou": 0.173828125, "loss_num": 0.00787353515625, "loss_xval": 0.03955078125, "num_input_tokens_seen": 313451584, "step": 3434 }, { "epoch": 14.3125, "grad_norm": 2.491603991707111, "learning_rate": 5e-05, "loss": 0.0562, "num_input_tokens_seen": 313542728, "step": 3435 }, { "epoch": 14.3125, "loss": 0.06238909065723419, "loss_ce": 7.219469989649951e-05, "loss_iou": 0.1796875, "loss_num": 0.012451171875, "loss_xval": 0.062255859375, "num_input_tokens_seen": 313542728, "step": 3435 }, { "epoch": 14.316666666666666, "grad_norm": 4.281069025371908, "learning_rate": 5e-05, "loss": 0.0691, "num_input_tokens_seen": 313634348, "step": 3436 }, { "epoch": 14.316666666666666, "loss": 0.0943194031715393, "loss_ce": 0.0006533203413709998, "loss_iou": 0.212890625, "loss_num": 0.018798828125, "loss_xval": 0.09375, "num_input_tokens_seen": 313634348, "step": 3436 }, { "epoch": 14.320833333333333, "grad_norm": 2.3967973579395525, "learning_rate": 5e-05, "loss": 0.0634, "num_input_tokens_seen": 313725236, "step": 3437 }, { "epoch": 14.320833333333333, "loss": 0.042279839515686035, "loss_ce": 8.928626630222425e-05, "loss_iou": 0.265625, "loss_num": 0.0084228515625, "loss_xval": 0.042236328125, "num_input_tokens_seen": 313725236, "step": 3437 }, { "epoch": 14.325, "grad_norm": 2.257621931213549, "learning_rate": 5e-05, "loss": 0.0503, "num_input_tokens_seen": 313816824, "step": 3438 }, { "epoch": 14.325, "loss": 0.054193347692489624, "loss_ce": 1.7587877891855896e-06, "loss_iou": 0.263671875, "loss_num": 0.01080322265625, "loss_xval": 0.05419921875, "num_input_tokens_seen": 313816824, "step": 3438 }, { "epoch": 14.329166666666667, "grad_norm": 5.272133435023301, "learning_rate": 5e-05, "loss": 0.052, "num_input_tokens_seen": 313908068, "step": 3439 }, { "epoch": 14.329166666666667, "loss": 0.04028581082820892, "loss_ce": 5.2200211939634755e-05, "loss_iou": 0.2373046875, "loss_num": 0.008056640625, "loss_xval": 0.040283203125, "num_input_tokens_seen": 313908068, "step": 3439 }, { "epoch": 14.333333333333334, "grad_norm": 2.7108855978850372, "learning_rate": 5e-05, "loss": 0.0568, "num_input_tokens_seen": 313999224, "step": 3440 }, { "epoch": 14.333333333333334, "loss": 0.023377256467938423, "loss_ce": 6.18260819464922e-05, "loss_iou": 0.2392578125, "loss_num": 0.004669189453125, "loss_xval": 0.0233154296875, "num_input_tokens_seen": 313999224, "step": 3440 }, { "epoch": 14.3375, "grad_norm": 2.528980208355573, "learning_rate": 5e-05, "loss": 0.1086, "num_input_tokens_seen": 314090544, "step": 3441 }, { "epoch": 14.3375, "loss": 0.06901770830154419, "loss_ce": 2.204579686804209e-06, "loss_iou": 0.2890625, "loss_num": 0.01385498046875, "loss_xval": 0.06884765625, "num_input_tokens_seen": 314090544, "step": 3441 }, { "epoch": 14.341666666666667, "grad_norm": 2.5497706986522486, "learning_rate": 5e-05, "loss": 0.0559, "num_input_tokens_seen": 314181788, "step": 3442 }, { "epoch": 14.341666666666667, "loss": 0.06665512919425964, "loss_ce": 4.742196324514225e-06, "loss_iou": 0.30078125, "loss_num": 0.0133056640625, "loss_xval": 0.06640625, "num_input_tokens_seen": 314181788, "step": 3442 }, { "epoch": 14.345833333333333, "grad_norm": 3.328778861443991, "learning_rate": 5e-05, "loss": 0.0548, "num_input_tokens_seen": 314272928, "step": 3443 }, { "epoch": 14.345833333333333, "loss": 0.07248881459236145, "loss_ce": 0.0018406271701678634, "loss_iou": 0.158203125, "loss_num": 0.01409912109375, "loss_xval": 0.07080078125, "num_input_tokens_seen": 314272928, "step": 3443 }, { "epoch": 14.35, "grad_norm": 4.11741321863604, "learning_rate": 5e-05, "loss": 0.0648, "num_input_tokens_seen": 314364008, "step": 3444 }, { "epoch": 14.35, "loss": 0.08235032856464386, "loss_ce": 7.493438897654414e-05, "loss_iou": 0.306640625, "loss_num": 0.0164794921875, "loss_xval": 0.08203125, "num_input_tokens_seen": 314364008, "step": 3444 }, { "epoch": 14.354166666666666, "grad_norm": 5.913859683796368, "learning_rate": 5e-05, "loss": 0.0735, "num_input_tokens_seen": 314455664, "step": 3445 }, { "epoch": 14.354166666666666, "loss": 0.10439564287662506, "loss_ce": 0.004648941569030285, "loss_iou": 0.33203125, "loss_num": 0.02001953125, "loss_xval": 0.099609375, "num_input_tokens_seen": 314455664, "step": 3445 }, { "epoch": 14.358333333333333, "grad_norm": 2.042636336445981, "learning_rate": 5e-05, "loss": 0.0526, "num_input_tokens_seen": 314547192, "step": 3446 }, { "epoch": 14.358333333333333, "loss": 0.04504550248384476, "loss_ce": 1.5587525012961123e-06, "loss_iou": 0.3359375, "loss_num": 0.009033203125, "loss_xval": 0.044921875, "num_input_tokens_seen": 314547192, "step": 3446 }, { "epoch": 14.3625, "grad_norm": 11.13085697956147, "learning_rate": 5e-05, "loss": 0.0776, "num_input_tokens_seen": 314638912, "step": 3447 }, { "epoch": 14.3625, "loss": 0.10157739371061325, "loss_ce": 1.4897475921316072e-05, "loss_iou": 0.3046875, "loss_num": 0.020263671875, "loss_xval": 0.1015625, "num_input_tokens_seen": 314638912, "step": 3447 }, { "epoch": 14.366666666666667, "grad_norm": 1.8191878542115973, "learning_rate": 5e-05, "loss": 0.086, "num_input_tokens_seen": 314728820, "step": 3448 }, { "epoch": 14.366666666666667, "loss": 0.1349654495716095, "loss_ce": 0.0005660292226821184, "loss_iou": 0.21875, "loss_num": 0.02685546875, "loss_xval": 0.134765625, "num_input_tokens_seen": 314728820, "step": 3448 }, { "epoch": 14.370833333333334, "grad_norm": 0.8656441342338449, "learning_rate": 5e-05, "loss": 0.0806, "num_input_tokens_seen": 314820188, "step": 3449 }, { "epoch": 14.370833333333334, "loss": 0.02592162787914276, "loss_ce": 0.0002105679304804653, "loss_iou": 0.16796875, "loss_num": 0.005157470703125, "loss_xval": 0.0257568359375, "num_input_tokens_seen": 314820188, "step": 3449 }, { "epoch": 14.375, "grad_norm": 4.541659388869669, "learning_rate": 5e-05, "loss": 0.0895, "num_input_tokens_seen": 314911288, "step": 3450 }, { "epoch": 14.375, "loss": 0.138495534658432, "loss_ce": 1.4394384379556868e-05, "loss_iou": 0.1982421875, "loss_num": 0.0277099609375, "loss_xval": 0.138671875, "num_input_tokens_seen": 314911288, "step": 3450 }, { "epoch": 14.379166666666666, "grad_norm": 2.3182304855785305, "learning_rate": 5e-05, "loss": 0.0688, "num_input_tokens_seen": 315002220, "step": 3451 }, { "epoch": 14.379166666666666, "loss": 0.0673556923866272, "loss_ce": 1.1029018423869275e-05, "loss_iou": 0.322265625, "loss_num": 0.01348876953125, "loss_xval": 0.0673828125, "num_input_tokens_seen": 315002220, "step": 3451 }, { "epoch": 14.383333333333333, "grad_norm": 0.8455518123782996, "learning_rate": 5e-05, "loss": 0.0396, "num_input_tokens_seen": 315092588, "step": 3452 }, { "epoch": 14.383333333333333, "loss": 0.05230352282524109, "loss_ce": 6.505908822873607e-05, "loss_iou": 0.2451171875, "loss_num": 0.01043701171875, "loss_xval": 0.05224609375, "num_input_tokens_seen": 315092588, "step": 3452 }, { "epoch": 14.3875, "grad_norm": 1.9418601665956887, "learning_rate": 5e-05, "loss": 0.07, "num_input_tokens_seen": 315183924, "step": 3453 }, { "epoch": 14.3875, "loss": 0.05696332827210426, "loss_ce": 1.752637399476953e-05, "loss_iou": 0.1826171875, "loss_num": 0.01141357421875, "loss_xval": 0.056884765625, "num_input_tokens_seen": 315183924, "step": 3453 }, { "epoch": 14.391666666666667, "grad_norm": 4.802017746819228, "learning_rate": 5e-05, "loss": 0.0559, "num_input_tokens_seen": 315275104, "step": 3454 }, { "epoch": 14.391666666666667, "loss": 0.06346727162599564, "loss_ce": 3.6482193536357954e-05, "loss_iou": 0.2392578125, "loss_num": 0.0126953125, "loss_xval": 0.0634765625, "num_input_tokens_seen": 315275104, "step": 3454 }, { "epoch": 14.395833333333334, "grad_norm": 1.966066230471116, "learning_rate": 5e-05, "loss": 0.0649, "num_input_tokens_seen": 315366048, "step": 3455 }, { "epoch": 14.395833333333334, "loss": 0.05793462693691254, "loss_ce": 4.63609194412129e-06, "loss_iou": 0.30078125, "loss_num": 0.0115966796875, "loss_xval": 0.057861328125, "num_input_tokens_seen": 315366048, "step": 3455 }, { "epoch": 14.4, "grad_norm": 3.4031793886468655, "learning_rate": 5e-05, "loss": 0.0896, "num_input_tokens_seen": 315457876, "step": 3456 }, { "epoch": 14.4, "loss": 0.08385931700468063, "loss_ce": 0.002087468048557639, "loss_iou": 0.1767578125, "loss_num": 0.016357421875, "loss_xval": 0.08154296875, "num_input_tokens_seen": 315457876, "step": 3456 }, { "epoch": 14.404166666666667, "grad_norm": 2.496511746648192, "learning_rate": 5e-05, "loss": 0.0644, "num_input_tokens_seen": 315548672, "step": 3457 }, { "epoch": 14.404166666666667, "loss": 0.06105422228574753, "loss_ce": 3.432453377172351e-05, "loss_iou": 0.31640625, "loss_num": 0.01220703125, "loss_xval": 0.06103515625, "num_input_tokens_seen": 315548672, "step": 3457 }, { "epoch": 14.408333333333333, "grad_norm": 1.6076830601599892, "learning_rate": 5e-05, "loss": 0.0253, "num_input_tokens_seen": 315640468, "step": 3458 }, { "epoch": 14.408333333333333, "loss": 0.017314031720161438, "loss_ce": 0.0003538889577612281, "loss_iou": 0.2255859375, "loss_num": 0.003387451171875, "loss_xval": 0.0169677734375, "num_input_tokens_seen": 315640468, "step": 3458 }, { "epoch": 14.4125, "grad_norm": 3.0724346663416675, "learning_rate": 5e-05, "loss": 0.052, "num_input_tokens_seen": 315732096, "step": 3459 }, { "epoch": 14.4125, "loss": 0.04059988260269165, "loss_ce": 0.00048452624469064176, "loss_iou": 0.3125, "loss_num": 0.00799560546875, "loss_xval": 0.0400390625, "num_input_tokens_seen": 315732096, "step": 3459 }, { "epoch": 14.416666666666666, "grad_norm": 2.415147114180416, "learning_rate": 5e-05, "loss": 0.0614, "num_input_tokens_seen": 315823008, "step": 3460 }, { "epoch": 14.416666666666666, "loss": 0.06670276820659637, "loss_ce": 6.607610885112081e-06, "loss_iou": 0.255859375, "loss_num": 0.01336669921875, "loss_xval": 0.06689453125, "num_input_tokens_seen": 315823008, "step": 3460 }, { "epoch": 14.420833333333333, "grad_norm": 2.6978788677723986, "learning_rate": 5e-05, "loss": 0.0575, "num_input_tokens_seen": 315913928, "step": 3461 }, { "epoch": 14.420833333333333, "loss": 0.04788322001695633, "loss_ce": 1.6398162188124843e-05, "loss_iou": 0.22265625, "loss_num": 0.00958251953125, "loss_xval": 0.0478515625, "num_input_tokens_seen": 315913928, "step": 3461 }, { "epoch": 14.425, "grad_norm": 3.792149348929219, "learning_rate": 5e-05, "loss": 0.0602, "num_input_tokens_seen": 316003724, "step": 3462 }, { "epoch": 14.425, "loss": 0.08763362467288971, "loss_ce": 4.817647277377546e-05, "loss_iou": 0.263671875, "loss_num": 0.0174560546875, "loss_xval": 0.08740234375, "num_input_tokens_seen": 316003724, "step": 3462 }, { "epoch": 14.429166666666667, "grad_norm": 3.6421126679520106, "learning_rate": 5e-05, "loss": 0.0782, "num_input_tokens_seen": 316094972, "step": 3463 }, { "epoch": 14.429166666666667, "loss": 0.08040214329957962, "loss_ce": 0.00032401629141531885, "loss_iou": 0.2373046875, "loss_num": 0.0159912109375, "loss_xval": 0.080078125, "num_input_tokens_seen": 316094972, "step": 3463 }, { "epoch": 14.433333333333334, "grad_norm": 2.1389868850931673, "learning_rate": 5e-05, "loss": 0.0699, "num_input_tokens_seen": 316186284, "step": 3464 }, { "epoch": 14.433333333333334, "loss": 0.07011739909648895, "loss_ce": 0.0002626621862873435, "loss_iou": 0.412109375, "loss_num": 0.01397705078125, "loss_xval": 0.06982421875, "num_input_tokens_seen": 316186284, "step": 3464 }, { "epoch": 14.4375, "grad_norm": 6.537398366871755, "learning_rate": 5e-05, "loss": 0.0766, "num_input_tokens_seen": 316277796, "step": 3465 }, { "epoch": 14.4375, "loss": 0.08371639251708984, "loss_ce": 0.000311847310513258, "loss_iou": 0.259765625, "loss_num": 0.0167236328125, "loss_xval": 0.08349609375, "num_input_tokens_seen": 316277796, "step": 3465 }, { "epoch": 14.441666666666666, "grad_norm": 3.078759532509579, "learning_rate": 5e-05, "loss": 0.0517, "num_input_tokens_seen": 316368988, "step": 3466 }, { "epoch": 14.441666666666666, "loss": 0.04081658646464348, "loss_ce": 2.9841428840882145e-05, "loss_iou": 0.224609375, "loss_num": 0.00811767578125, "loss_xval": 0.040771484375, "num_input_tokens_seen": 316368988, "step": 3466 }, { "epoch": 14.445833333333333, "grad_norm": 2.831385150482735, "learning_rate": 5e-05, "loss": 0.0516, "num_input_tokens_seen": 316460692, "step": 3467 }, { "epoch": 14.445833333333333, "loss": 0.057911500334739685, "loss_ce": 8.069008617894724e-05, "loss_iou": 0.32421875, "loss_num": 0.01153564453125, "loss_xval": 0.057861328125, "num_input_tokens_seen": 316460692, "step": 3467 }, { "epoch": 14.45, "grad_norm": 1.719084843590879, "learning_rate": 5e-05, "loss": 0.0479, "num_input_tokens_seen": 316551832, "step": 3468 }, { "epoch": 14.45, "loss": 0.044491663575172424, "loss_ce": 1.2295596206968185e-05, "loss_iou": 0.2333984375, "loss_num": 0.0089111328125, "loss_xval": 0.04443359375, "num_input_tokens_seen": 316551832, "step": 3468 }, { "epoch": 14.454166666666667, "grad_norm": 3.5030037950264177, "learning_rate": 5e-05, "loss": 0.0806, "num_input_tokens_seen": 316642824, "step": 3469 }, { "epoch": 14.454166666666667, "loss": 0.10698297619819641, "loss_ce": 3.608635779528413e-06, "loss_iou": 0.0654296875, "loss_num": 0.0213623046875, "loss_xval": 0.10693359375, "num_input_tokens_seen": 316642824, "step": 3469 }, { "epoch": 14.458333333333334, "grad_norm": 2.4394930623115587, "learning_rate": 5e-05, "loss": 0.0712, "num_input_tokens_seen": 316734436, "step": 3470 }, { "epoch": 14.458333333333334, "loss": 0.08319145441055298, "loss_ce": 0.00012260537187103182, "loss_iou": 0.185546875, "loss_num": 0.0166015625, "loss_xval": 0.0830078125, "num_input_tokens_seen": 316734436, "step": 3470 }, { "epoch": 14.4625, "grad_norm": 2.740534558701278, "learning_rate": 5e-05, "loss": 0.0676, "num_input_tokens_seen": 316826096, "step": 3471 }, { "epoch": 14.4625, "loss": 0.05262012034654617, "loss_ce": 0.00017566494352649897, "loss_iou": 0.271484375, "loss_num": 0.010498046875, "loss_xval": 0.052490234375, "num_input_tokens_seen": 316826096, "step": 3471 }, { "epoch": 14.466666666666667, "grad_norm": 3.325097662068458, "learning_rate": 5e-05, "loss": 0.0512, "num_input_tokens_seen": 316917172, "step": 3472 }, { "epoch": 14.466666666666667, "loss": 0.07085268199443817, "loss_ce": 0.0014404429821297526, "loss_iou": 0.28515625, "loss_num": 0.01385498046875, "loss_xval": 0.0693359375, "num_input_tokens_seen": 316917172, "step": 3472 }, { "epoch": 14.470833333333333, "grad_norm": 3.4718357598911647, "learning_rate": 5e-05, "loss": 0.0673, "num_input_tokens_seen": 317008100, "step": 3473 }, { "epoch": 14.470833333333333, "loss": 0.09283044934272766, "loss_ce": 7.226588058983907e-05, "loss_iou": 0.296875, "loss_num": 0.0185546875, "loss_xval": 0.0927734375, "num_input_tokens_seen": 317008100, "step": 3473 }, { "epoch": 14.475, "grad_norm": 4.438588630076275, "learning_rate": 5e-05, "loss": 0.0571, "num_input_tokens_seen": 317100020, "step": 3474 }, { "epoch": 14.475, "loss": 0.06110034137964249, "loss_ce": 4.149102096562274e-06, "loss_iou": 0.30078125, "loss_num": 0.01220703125, "loss_xval": 0.06103515625, "num_input_tokens_seen": 317100020, "step": 3474 }, { "epoch": 14.479166666666666, "grad_norm": 2.9706067154667113, "learning_rate": 5e-05, "loss": 0.0807, "num_input_tokens_seen": 317191784, "step": 3475 }, { "epoch": 14.479166666666666, "loss": 0.08926853537559509, "loss_ce": 4.615443231159588e-06, "loss_iou": 0.228515625, "loss_num": 0.017822265625, "loss_xval": 0.08935546875, "num_input_tokens_seen": 317191784, "step": 3475 }, { "epoch": 14.483333333333333, "grad_norm": 3.3619350623101614, "learning_rate": 5e-05, "loss": 0.0673, "num_input_tokens_seen": 317283224, "step": 3476 }, { "epoch": 14.483333333333333, "loss": 0.08284099400043488, "loss_ce": 0.00022990800789557397, "loss_iou": 0.18359375, "loss_num": 0.0164794921875, "loss_xval": 0.08251953125, "num_input_tokens_seen": 317283224, "step": 3476 }, { "epoch": 14.4875, "grad_norm": 2.419172275959208, "learning_rate": 5e-05, "loss": 0.0778, "num_input_tokens_seen": 317373992, "step": 3477 }, { "epoch": 14.4875, "loss": 0.04498765617609024, "loss_ce": 2.0008901628898457e-05, "loss_iou": 0.1005859375, "loss_num": 0.00897216796875, "loss_xval": 0.044921875, "num_input_tokens_seen": 317373992, "step": 3477 }, { "epoch": 14.491666666666667, "grad_norm": 5.847089711942539, "learning_rate": 5e-05, "loss": 0.0843, "num_input_tokens_seen": 317465668, "step": 3478 }, { "epoch": 14.491666666666667, "loss": 0.1135324239730835, "loss_ce": 0.00020540252444334328, "loss_iou": 0.28125, "loss_num": 0.022705078125, "loss_xval": 0.11328125, "num_input_tokens_seen": 317465668, "step": 3478 }, { "epoch": 14.495833333333334, "grad_norm": 4.647839770700526, "learning_rate": 5e-05, "loss": 0.0527, "num_input_tokens_seen": 317556852, "step": 3479 }, { "epoch": 14.495833333333334, "loss": 0.06063609942793846, "loss_ce": 4.344819171819836e-05, "loss_iou": 0.296875, "loss_num": 0.01214599609375, "loss_xval": 0.060546875, "num_input_tokens_seen": 317556852, "step": 3479 }, { "epoch": 14.5, "grad_norm": 2.4680288450953287, "learning_rate": 5e-05, "loss": 0.0548, "num_input_tokens_seen": 317648416, "step": 3480 }, { "epoch": 14.5, "loss": 0.05539741367101669, "loss_ce": 6.904706242494285e-05, "loss_iou": 0.27734375, "loss_num": 0.01104736328125, "loss_xval": 0.055419921875, "num_input_tokens_seen": 317648416, "step": 3480 }, { "epoch": 14.504166666666666, "grad_norm": 2.1585672480870173, "learning_rate": 5e-05, "loss": 0.0551, "num_input_tokens_seen": 317739400, "step": 3481 }, { "epoch": 14.504166666666666, "loss": 0.07324524223804474, "loss_ce": 3.0585702006646898e-06, "loss_iou": 0.3046875, "loss_num": 0.0146484375, "loss_xval": 0.0732421875, "num_input_tokens_seen": 317739400, "step": 3481 }, { "epoch": 14.508333333333333, "grad_norm": 2.959837229857713, "learning_rate": 5e-05, "loss": 0.0448, "num_input_tokens_seen": 317830996, "step": 3482 }, { "epoch": 14.508333333333333, "loss": 0.04864703118801117, "loss_ce": 0.0007496916223317385, "loss_iou": 0.31640625, "loss_num": 0.00958251953125, "loss_xval": 0.0478515625, "num_input_tokens_seen": 317830996, "step": 3482 }, { "epoch": 14.5125, "grad_norm": 5.104023715434689, "learning_rate": 5e-05, "loss": 0.0755, "num_input_tokens_seen": 317922784, "step": 3483 }, { "epoch": 14.5125, "loss": 0.08295391499996185, "loss_ce": 0.0008768883417360485, "loss_iou": 0.3125, "loss_num": 0.016357421875, "loss_xval": 0.08203125, "num_input_tokens_seen": 317922784, "step": 3483 }, { "epoch": 14.516666666666667, "grad_norm": 5.579693820814273, "learning_rate": 5e-05, "loss": 0.1435, "num_input_tokens_seen": 318014244, "step": 3484 }, { "epoch": 14.516666666666667, "loss": 0.12112545222043991, "loss_ce": 0.0006878351559862494, "loss_iou": 0.251953125, "loss_num": 0.0240478515625, "loss_xval": 0.12060546875, "num_input_tokens_seen": 318014244, "step": 3484 }, { "epoch": 14.520833333333334, "grad_norm": 1.3620977718613712, "learning_rate": 5e-05, "loss": 0.0772, "num_input_tokens_seen": 318106844, "step": 3485 }, { "epoch": 14.520833333333334, "loss": 0.04919442534446716, "loss_ce": 0.00012978771701455116, "loss_iou": 0.1923828125, "loss_num": 0.00982666015625, "loss_xval": 0.049072265625, "num_input_tokens_seen": 318106844, "step": 3485 }, { "epoch": 14.525, "grad_norm": 3.750694641880807, "learning_rate": 5e-05, "loss": 0.0617, "num_input_tokens_seen": 318198276, "step": 3486 }, { "epoch": 14.525, "loss": 0.05821506679058075, "loss_ce": 0.00023166877508629113, "loss_iou": 0.296875, "loss_num": 0.0115966796875, "loss_xval": 0.05810546875, "num_input_tokens_seen": 318198276, "step": 3486 }, { "epoch": 14.529166666666667, "grad_norm": 2.837096414759929, "learning_rate": 5e-05, "loss": 0.0568, "num_input_tokens_seen": 318289184, "step": 3487 }, { "epoch": 14.529166666666667, "loss": 0.04990578815340996, "loss_ce": 9.547175068291835e-06, "loss_iou": 0.2099609375, "loss_num": 0.00994873046875, "loss_xval": 0.0498046875, "num_input_tokens_seen": 318289184, "step": 3487 }, { "epoch": 14.533333333333333, "grad_norm": 2.6995133820829134, "learning_rate": 5e-05, "loss": 0.069, "num_input_tokens_seen": 318380240, "step": 3488 }, { "epoch": 14.533333333333333, "loss": 0.11265411972999573, "loss_ce": 0.00027314224280416965, "loss_iou": 0.248046875, "loss_num": 0.0224609375, "loss_xval": 0.1123046875, "num_input_tokens_seen": 318380240, "step": 3488 }, { "epoch": 14.5375, "grad_norm": 3.222938431023567, "learning_rate": 5e-05, "loss": 0.0693, "num_input_tokens_seen": 318471796, "step": 3489 }, { "epoch": 14.5375, "loss": 0.07313777506351471, "loss_ce": 1.7655922420090064e-05, "loss_iou": 0.41015625, "loss_num": 0.0146484375, "loss_xval": 0.0732421875, "num_input_tokens_seen": 318471796, "step": 3489 }, { "epoch": 14.541666666666666, "grad_norm": 2.6278819311326034, "learning_rate": 5e-05, "loss": 0.042, "num_input_tokens_seen": 318563232, "step": 3490 }, { "epoch": 14.541666666666666, "loss": 0.04149797558784485, "loss_ce": 0.0021226725075393915, "loss_iou": 0.197265625, "loss_num": 0.00787353515625, "loss_xval": 0.039306640625, "num_input_tokens_seen": 318563232, "step": 3490 }, { "epoch": 14.545833333333333, "grad_norm": 1.5850535962182528, "learning_rate": 5e-05, "loss": 0.0677, "num_input_tokens_seen": 318654996, "step": 3491 }, { "epoch": 14.545833333333333, "loss": 0.05964607372879982, "loss_ce": 0.000335162301780656, "loss_iou": 0.2275390625, "loss_num": 0.0118408203125, "loss_xval": 0.059326171875, "num_input_tokens_seen": 318654996, "step": 3491 }, { "epoch": 14.55, "grad_norm": 2.6214365855600863, "learning_rate": 5e-05, "loss": 0.0534, "num_input_tokens_seen": 318746168, "step": 3492 }, { "epoch": 14.55, "loss": 0.04661758244037628, "loss_ce": 1.97783606381563e-06, "loss_iou": 0.357421875, "loss_num": 0.00933837890625, "loss_xval": 0.046630859375, "num_input_tokens_seen": 318746168, "step": 3492 }, { "epoch": 14.554166666666667, "grad_norm": 2.2552122955124583, "learning_rate": 5e-05, "loss": 0.0341, "num_input_tokens_seen": 318837452, "step": 3493 }, { "epoch": 14.554166666666667, "loss": 0.037708431482315063, "loss_ce": 3.448417555773631e-05, "loss_iou": 0.306640625, "loss_num": 0.007537841796875, "loss_xval": 0.03759765625, "num_input_tokens_seen": 318837452, "step": 3493 }, { "epoch": 14.558333333333334, "grad_norm": 4.160645839197537, "learning_rate": 5e-05, "loss": 0.1045, "num_input_tokens_seen": 318928580, "step": 3494 }, { "epoch": 14.558333333333334, "loss": 0.037462908774614334, "loss_ce": 2.5830672711890657e-06, "loss_iou": 0.27734375, "loss_num": 0.00750732421875, "loss_xval": 0.037353515625, "num_input_tokens_seen": 318928580, "step": 3494 }, { "epoch": 14.5625, "grad_norm": 3.1912535546962335, "learning_rate": 5e-05, "loss": 0.0519, "num_input_tokens_seen": 319019968, "step": 3495 }, { "epoch": 14.5625, "loss": 0.0376228466629982, "loss_ce": 9.932819011737593e-06, "loss_iou": 0.322265625, "loss_num": 0.00750732421875, "loss_xval": 0.03759765625, "num_input_tokens_seen": 319019968, "step": 3495 }, { "epoch": 14.566666666666666, "grad_norm": 2.5813431829196265, "learning_rate": 5e-05, "loss": 0.0436, "num_input_tokens_seen": 319111048, "step": 3496 }, { "epoch": 14.566666666666666, "loss": 0.053882814943790436, "loss_ce": 0.0002939487749245018, "loss_iou": 0.1630859375, "loss_num": 0.01068115234375, "loss_xval": 0.0537109375, "num_input_tokens_seen": 319111048, "step": 3496 }, { "epoch": 14.570833333333333, "grad_norm": 2.923435349630488, "learning_rate": 5e-05, "loss": 0.0628, "num_input_tokens_seen": 319202188, "step": 3497 }, { "epoch": 14.570833333333333, "loss": 0.06960056722164154, "loss_ce": 3.575097071006894e-05, "loss_iou": 0.314453125, "loss_num": 0.013916015625, "loss_xval": 0.0693359375, "num_input_tokens_seen": 319202188, "step": 3497 }, { "epoch": 14.575, "grad_norm": 3.2955743647970683, "learning_rate": 5e-05, "loss": 0.0365, "num_input_tokens_seen": 319293164, "step": 3498 }, { "epoch": 14.575, "loss": 0.04828281328082085, "loss_ce": 4.007476945844246e-06, "loss_iou": 0.265625, "loss_num": 0.0096435546875, "loss_xval": 0.04833984375, "num_input_tokens_seen": 319293164, "step": 3498 }, { "epoch": 14.579166666666667, "grad_norm": 2.2018418363550913, "learning_rate": 5e-05, "loss": 0.0621, "num_input_tokens_seen": 319384616, "step": 3499 }, { "epoch": 14.579166666666667, "loss": 0.07954820990562439, "loss_ce": 0.00032457130146212876, "loss_iou": 0.1904296875, "loss_num": 0.015869140625, "loss_xval": 0.0791015625, "num_input_tokens_seen": 319384616, "step": 3499 }, { "epoch": 14.583333333333334, "grad_norm": 3.429875070101406, "learning_rate": 5e-05, "loss": 0.0684, "num_input_tokens_seen": 319475956, "step": 3500 }, { "epoch": 14.583333333333334, "eval_seeclick_CIoU": 0.17655083164572716, "eval_seeclick_GIoU": 0.15622518584132195, "eval_seeclick_IoU": 0.2974148243665695, "eval_seeclick_MAE_all": 0.11146583780646324, "eval_seeclick_MAE_h": 0.09194779396057129, "eval_seeclick_MAE_w": 0.23992763459682465, "eval_seeclick_MAE_x_boxes": 0.25715453177690506, "eval_seeclick_MAE_y_boxes": 0.09402742981910706, "eval_seeclick_NUM_probability": 0.9999983608722687, "eval_seeclick_inside_bbox": 0.4318181872367859, "eval_seeclick_loss": 0.6375502943992615, "eval_seeclick_loss_ce": 0.13811881095170975, "eval_seeclick_loss_iou": 0.39617919921875, "eval_seeclick_loss_num": 0.09464263916015625, "eval_seeclick_loss_xval": 0.4732666015625, "eval_seeclick_runtime": 76.4697, "eval_seeclick_samples_per_second": 0.562, "eval_seeclick_steps_per_second": 0.026, "num_input_tokens_seen": 319475956, "step": 3500 }, { "epoch": 14.583333333333334, "eval_icons_CIoU": 0.3210890293121338, "eval_icons_GIoU": 0.3353418707847595, "eval_icons_IoU": 0.3973853290081024, "eval_icons_MAE_all": 0.06360300816595554, "eval_icons_MAE_h": 0.136297307908535, "eval_icons_MAE_w": 0.09041904658079147, "eval_icons_MAE_x_boxes": 0.09114982187747955, "eval_icons_MAE_y_boxes": 0.13882280513644218, "eval_icons_NUM_probability": 0.9999990165233612, "eval_icons_inside_bbox": 0.5034722238779068, "eval_icons_loss": 0.3158852756023407, "eval_icons_loss_ce": 2.5450488465139642e-05, "eval_icons_loss_iou": 0.275146484375, "eval_icons_loss_num": 0.064208984375, "eval_icons_loss_xval": 0.32122802734375, "eval_icons_runtime": 90.401, "eval_icons_samples_per_second": 0.553, "eval_icons_steps_per_second": 0.022, "num_input_tokens_seen": 319475956, "step": 3500 }, { "epoch": 14.583333333333334, "eval_screenspot_CIoU": 0.3727397421995799, "eval_screenspot_GIoU": 0.3544403314590454, "eval_screenspot_IoU": 0.44670842091242474, "eval_screenspot_MAE_all": 0.09947741279999416, "eval_screenspot_MAE_h": 0.0852027287085851, "eval_screenspot_MAE_w": 0.21258516609668732, "eval_screenspot_MAE_x_boxes": 0.19677802920341492, "eval_screenspot_MAE_y_boxes": 0.0816585545738538, "eval_screenspot_NUM_probability": 0.999982754389445, "eval_screenspot_inside_bbox": 0.659583330154419, "eval_screenspot_loss": 0.4953005015850067, "eval_screenspot_loss_ce": 1.7430343916657876e-05, "eval_screenspot_loss_iou": 0.3859456380208333, "eval_screenspot_loss_num": 0.09992472330729167, "eval_screenspot_loss_xval": 0.4994099934895833, "eval_screenspot_runtime": 155.3672, "eval_screenspot_samples_per_second": 0.573, "eval_screenspot_steps_per_second": 0.019, "num_input_tokens_seen": 319475956, "step": 3500 }, { "epoch": 14.583333333333334, "eval_compot_CIoU": 0.5295035243034363, "eval_compot_GIoU": 0.5284956693649292, "eval_compot_IoU": 0.5923766493797302, "eval_compot_MAE_all": 0.04935946501791477, "eval_compot_MAE_h": 0.0681275837123394, "eval_compot_MAE_w": 0.11527523770928383, "eval_compot_MAE_x_boxes": 0.1153593361377716, "eval_compot_MAE_y_boxes": 0.0679020918905735, "eval_compot_NUM_probability": 0.999997466802597, "eval_compot_inside_bbox": 0.7795138955116272, "eval_compot_loss": 0.2786952555179596, "eval_compot_loss_ce": 0.03497672267258167, "eval_compot_loss_iou": 0.33575439453125, "eval_compot_loss_num": 0.0444488525390625, "eval_compot_loss_xval": 0.222320556640625, "eval_compot_runtime": 89.9096, "eval_compot_samples_per_second": 0.556, "eval_compot_steps_per_second": 0.022, "num_input_tokens_seen": 319475956, "step": 3500 }, { "epoch": 14.583333333333334, "loss": 0.2446812093257904, "loss_ce": 0.03169901296496391, "loss_iou": 0.353515625, "loss_num": 0.04248046875, "loss_xval": 0.212890625, "num_input_tokens_seen": 319475956, "step": 3500 }, { "epoch": 14.5875, "grad_norm": 4.118534474092619, "learning_rate": 5e-05, "loss": 0.0661, "num_input_tokens_seen": 319567420, "step": 3501 }, { "epoch": 14.5875, "loss": 0.032026711851358414, "loss_ce": 6.144516191852745e-06, "loss_iou": 0.33203125, "loss_num": 0.00640869140625, "loss_xval": 0.031982421875, "num_input_tokens_seen": 319567420, "step": 3501 }, { "epoch": 14.591666666666667, "grad_norm": 4.308728866604463, "learning_rate": 5e-05, "loss": 0.0601, "num_input_tokens_seen": 319659252, "step": 3502 }, { "epoch": 14.591666666666667, "loss": 0.04407627880573273, "loss_ce": 0.00013096319162286818, "loss_iou": 0.2490234375, "loss_num": 0.0087890625, "loss_xval": 0.0439453125, "num_input_tokens_seen": 319659252, "step": 3502 }, { "epoch": 14.595833333333333, "grad_norm": 3.0546123104614242, "learning_rate": 5e-05, "loss": 0.0746, "num_input_tokens_seen": 319750036, "step": 3503 }, { "epoch": 14.595833333333333, "loss": 0.10922509431838989, "loss_ce": 2.682106924112304e-06, "loss_iou": 0.322265625, "loss_num": 0.0218505859375, "loss_xval": 0.109375, "num_input_tokens_seen": 319750036, "step": 3503 }, { "epoch": 14.6, "grad_norm": 5.275806577118591, "learning_rate": 5e-05, "loss": 0.08, "num_input_tokens_seen": 319841108, "step": 3504 }, { "epoch": 14.6, "loss": 0.09145887196063995, "loss_ce": 8.924029680201784e-05, "loss_iou": 0.2109375, "loss_num": 0.018310546875, "loss_xval": 0.09130859375, "num_input_tokens_seen": 319841108, "step": 3504 }, { "epoch": 14.604166666666666, "grad_norm": 2.3741590971225217, "learning_rate": 5e-05, "loss": 0.0608, "num_input_tokens_seen": 319932436, "step": 3505 }, { "epoch": 14.604166666666666, "loss": 0.05104352533817291, "loss_ce": 0.00017072322953026742, "loss_iou": 0.2216796875, "loss_num": 0.01019287109375, "loss_xval": 0.05078125, "num_input_tokens_seen": 319932436, "step": 3505 }, { "epoch": 14.608333333333333, "grad_norm": 2.2175225111331467, "learning_rate": 5e-05, "loss": 0.0364, "num_input_tokens_seen": 320023800, "step": 3506 }, { "epoch": 14.608333333333333, "loss": 0.026951856911182404, "loss_ce": 0.00031001074239611626, "loss_iou": 0.3125, "loss_num": 0.005340576171875, "loss_xval": 0.026611328125, "num_input_tokens_seen": 320023800, "step": 3506 }, { "epoch": 14.6125, "grad_norm": 3.0426971693638762, "learning_rate": 5e-05, "loss": 0.0665, "num_input_tokens_seen": 320115288, "step": 3507 }, { "epoch": 14.6125, "loss": 0.05835458263754845, "loss_ce": 0.002160276984795928, "loss_iou": 0.1484375, "loss_num": 0.01123046875, "loss_xval": 0.05615234375, "num_input_tokens_seen": 320115288, "step": 3507 }, { "epoch": 14.616666666666667, "grad_norm": 1.2476075034819456, "learning_rate": 5e-05, "loss": 0.0636, "num_input_tokens_seen": 320206484, "step": 3508 }, { "epoch": 14.616666666666667, "loss": 0.09845627099275589, "loss_ce": 0.00025833185645751655, "loss_iou": 0.212890625, "loss_num": 0.0196533203125, "loss_xval": 0.09814453125, "num_input_tokens_seen": 320206484, "step": 3508 }, { "epoch": 14.620833333333334, "grad_norm": 2.110972411286442, "learning_rate": 5e-05, "loss": 0.05, "num_input_tokens_seen": 320297496, "step": 3509 }, { "epoch": 14.620833333333334, "loss": 0.06258494406938553, "loss_ce": 1.0235469289909815e-06, "loss_iou": 0.193359375, "loss_num": 0.01251220703125, "loss_xval": 0.0625, "num_input_tokens_seen": 320297496, "step": 3509 }, { "epoch": 14.625, "grad_norm": 3.157498930857514, "learning_rate": 5e-05, "loss": 0.0533, "num_input_tokens_seen": 320388916, "step": 3510 }, { "epoch": 14.625, "loss": 0.058251626789569855, "loss_ce": 0.0027477818075567484, "loss_iou": 0.18359375, "loss_num": 0.0111083984375, "loss_xval": 0.055419921875, "num_input_tokens_seen": 320388916, "step": 3510 }, { "epoch": 14.629166666666666, "grad_norm": 2.057699984429364, "learning_rate": 5e-05, "loss": 0.0534, "num_input_tokens_seen": 320480088, "step": 3511 }, { "epoch": 14.629166666666666, "loss": 0.0456683523952961, "loss_ce": 0.00048707760288380086, "loss_iou": 0.2177734375, "loss_num": 0.009033203125, "loss_xval": 0.045166015625, "num_input_tokens_seen": 320480088, "step": 3511 }, { "epoch": 14.633333333333333, "grad_norm": 2.6068400938906544, "learning_rate": 5e-05, "loss": 0.0995, "num_input_tokens_seen": 320571404, "step": 3512 }, { "epoch": 14.633333333333333, "loss": 0.07493845373392105, "loss_ce": 1.779539707058575e-05, "loss_iou": 0.21875, "loss_num": 0.0150146484375, "loss_xval": 0.07470703125, "num_input_tokens_seen": 320571404, "step": 3512 }, { "epoch": 14.6375, "grad_norm": 3.6310490309966594, "learning_rate": 5e-05, "loss": 0.0379, "num_input_tokens_seen": 320662948, "step": 3513 }, { "epoch": 14.6375, "loss": 0.04782600700855255, "loss_ce": 0.00014228891814127564, "loss_iou": 0.20703125, "loss_num": 0.009521484375, "loss_xval": 0.047607421875, "num_input_tokens_seen": 320662948, "step": 3513 }, { "epoch": 14.641666666666667, "grad_norm": 4.0663307305655865, "learning_rate": 5e-05, "loss": 0.0599, "num_input_tokens_seen": 320754252, "step": 3514 }, { "epoch": 14.641666666666667, "loss": 0.04836490750312805, "loss_ce": 2.1755488432972925e-06, "loss_iou": 0.18359375, "loss_num": 0.00970458984375, "loss_xval": 0.04833984375, "num_input_tokens_seen": 320754252, "step": 3514 }, { "epoch": 14.645833333333334, "grad_norm": 2.7017123754218693, "learning_rate": 5e-05, "loss": 0.1109, "num_input_tokens_seen": 320845896, "step": 3515 }, { "epoch": 14.645833333333334, "loss": 0.13035404682159424, "loss_ce": 1.3472451428242493e-05, "loss_iou": 0.224609375, "loss_num": 0.0260009765625, "loss_xval": 0.1298828125, "num_input_tokens_seen": 320845896, "step": 3515 }, { "epoch": 14.65, "grad_norm": 3.3750597376134737, "learning_rate": 5e-05, "loss": 0.0543, "num_input_tokens_seen": 320936400, "step": 3516 }, { "epoch": 14.65, "loss": 0.028896596282720566, "loss_ce": 0.00015666562831029296, "loss_iou": 0.2060546875, "loss_num": 0.0057373046875, "loss_xval": 0.0286865234375, "num_input_tokens_seen": 320936400, "step": 3516 }, { "epoch": 14.654166666666667, "grad_norm": 1.8696180434658125, "learning_rate": 5e-05, "loss": 0.0313, "num_input_tokens_seen": 321027888, "step": 3517 }, { "epoch": 14.654166666666667, "loss": 0.03676997870206833, "loss_ce": 0.0014916585059836507, "loss_iou": 0.22265625, "loss_num": 0.007049560546875, "loss_xval": 0.03515625, "num_input_tokens_seen": 321027888, "step": 3517 }, { "epoch": 14.658333333333333, "grad_norm": 2.2360835380693866, "learning_rate": 5e-05, "loss": 0.0406, "num_input_tokens_seen": 321119464, "step": 3518 }, { "epoch": 14.658333333333333, "loss": 0.04221915453672409, "loss_ce": 0.00010489897249499336, "loss_iou": 0.34765625, "loss_num": 0.0084228515625, "loss_xval": 0.0419921875, "num_input_tokens_seen": 321119464, "step": 3518 }, { "epoch": 14.6625, "grad_norm": 1.9138621502707591, "learning_rate": 5e-05, "loss": 0.0576, "num_input_tokens_seen": 321210948, "step": 3519 }, { "epoch": 14.6625, "loss": 0.07550536841154099, "loss_ce": 0.00032531472970731556, "loss_iou": 0.1806640625, "loss_num": 0.0150146484375, "loss_xval": 0.0751953125, "num_input_tokens_seen": 321210948, "step": 3519 }, { "epoch": 14.666666666666666, "grad_norm": 2.5165631989947834, "learning_rate": 5e-05, "loss": 0.0314, "num_input_tokens_seen": 321301772, "step": 3520 }, { "epoch": 14.666666666666666, "loss": 0.037338871508836746, "loss_ce": 6.164409001030435e-07, "loss_iou": 0.1875, "loss_num": 0.007476806640625, "loss_xval": 0.037353515625, "num_input_tokens_seen": 321301772, "step": 3520 }, { "epoch": 14.670833333333333, "grad_norm": 2.594196631908786, "learning_rate": 5e-05, "loss": 0.0635, "num_input_tokens_seen": 321393016, "step": 3521 }, { "epoch": 14.670833333333333, "loss": 0.09475166350603104, "loss_ce": 2.214087089669192e-06, "loss_iou": 0.18359375, "loss_num": 0.0189208984375, "loss_xval": 0.0947265625, "num_input_tokens_seen": 321393016, "step": 3521 }, { "epoch": 14.675, "grad_norm": 1.5223843939552286, "learning_rate": 5e-05, "loss": 0.0282, "num_input_tokens_seen": 321484240, "step": 3522 }, { "epoch": 14.675, "loss": 0.02998613566160202, "loss_ce": 7.890904089435935e-05, "loss_iou": 0.2275390625, "loss_num": 0.0059814453125, "loss_xval": 0.0299072265625, "num_input_tokens_seen": 321484240, "step": 3522 }, { "epoch": 14.679166666666667, "grad_norm": 3.0551389908396978, "learning_rate": 5e-05, "loss": 0.0744, "num_input_tokens_seen": 321575676, "step": 3523 }, { "epoch": 14.679166666666667, "loss": 0.07173063606023788, "loss_ce": 0.0012197702890262008, "loss_iou": 0.23828125, "loss_num": 0.01409912109375, "loss_xval": 0.0703125, "num_input_tokens_seen": 321575676, "step": 3523 }, { "epoch": 14.683333333333334, "grad_norm": 3.0763495528620277, "learning_rate": 5e-05, "loss": 0.0296, "num_input_tokens_seen": 321666800, "step": 3524 }, { "epoch": 14.683333333333334, "loss": 0.026495546102523804, "loss_ce": 0.0001893942098831758, "loss_iou": 0.26171875, "loss_num": 0.0052490234375, "loss_xval": 0.0263671875, "num_input_tokens_seen": 321666800, "step": 3524 }, { "epoch": 14.6875, "grad_norm": 8.334019762100176, "learning_rate": 5e-05, "loss": 0.0605, "num_input_tokens_seen": 321758752, "step": 3525 }, { "epoch": 14.6875, "loss": 0.07537412643432617, "loss_ce": 0.0002398555225227028, "loss_iou": 0.2177734375, "loss_num": 0.0150146484375, "loss_xval": 0.0751953125, "num_input_tokens_seen": 321758752, "step": 3525 }, { "epoch": 14.691666666666666, "grad_norm": 2.336356169742045, "learning_rate": 5e-05, "loss": 0.097, "num_input_tokens_seen": 321849716, "step": 3526 }, { "epoch": 14.691666666666666, "loss": 0.08213899284601212, "loss_ce": 9.316157729699626e-07, "loss_iou": 0.3359375, "loss_num": 0.016357421875, "loss_xval": 0.08203125, "num_input_tokens_seen": 321849716, "step": 3526 }, { "epoch": 14.695833333333333, "grad_norm": 2.31938679107471, "learning_rate": 5e-05, "loss": 0.0463, "num_input_tokens_seen": 321939640, "step": 3527 }, { "epoch": 14.695833333333333, "loss": 0.03845227137207985, "loss_ce": 0.00016796833369880915, "loss_iou": 0.236328125, "loss_num": 0.007659912109375, "loss_xval": 0.038330078125, "num_input_tokens_seen": 321939640, "step": 3527 }, { "epoch": 14.7, "grad_norm": 2.0268901653200326, "learning_rate": 5e-05, "loss": 0.0385, "num_input_tokens_seen": 322030928, "step": 3528 }, { "epoch": 14.7, "loss": 0.02897455170750618, "loss_ce": 1.3370583474170417e-05, "loss_iou": 0.1572265625, "loss_num": 0.00579833984375, "loss_xval": 0.0289306640625, "num_input_tokens_seen": 322030928, "step": 3528 }, { "epoch": 14.704166666666667, "grad_norm": 5.8854308711480146, "learning_rate": 5e-05, "loss": 0.0667, "num_input_tokens_seen": 322122012, "step": 3529 }, { "epoch": 14.704166666666667, "loss": 0.09826751798391342, "loss_ce": 9.15083489871904e-07, "loss_iou": 0.3671875, "loss_num": 0.0196533203125, "loss_xval": 0.09814453125, "num_input_tokens_seen": 322122012, "step": 3529 }, { "epoch": 14.708333333333334, "grad_norm": 2.763988709289915, "learning_rate": 5e-05, "loss": 0.0495, "num_input_tokens_seen": 322213168, "step": 3530 }, { "epoch": 14.708333333333334, "loss": 0.024704724550247192, "loss_ce": 0.000122814453789033, "loss_iou": 0.2275390625, "loss_num": 0.004913330078125, "loss_xval": 0.0245361328125, "num_input_tokens_seen": 322213168, "step": 3530 }, { "epoch": 14.7125, "grad_norm": 2.5625093123581135, "learning_rate": 5e-05, "loss": 0.0621, "num_input_tokens_seen": 322304540, "step": 3531 }, { "epoch": 14.7125, "loss": 0.07655003666877747, "loss_ce": 0.00045445383875630796, "loss_iou": 0.2255859375, "loss_num": 0.01519775390625, "loss_xval": 0.076171875, "num_input_tokens_seen": 322304540, "step": 3531 }, { "epoch": 14.716666666666667, "grad_norm": 1.5464455321738027, "learning_rate": 5e-05, "loss": 0.051, "num_input_tokens_seen": 322395988, "step": 3532 }, { "epoch": 14.716666666666667, "loss": 0.06164345145225525, "loss_ce": 5.573797807301162e-06, "loss_iou": 0.1728515625, "loss_num": 0.0123291015625, "loss_xval": 0.0615234375, "num_input_tokens_seen": 322395988, "step": 3532 }, { "epoch": 14.720833333333333, "grad_norm": 1.6193154533522065, "learning_rate": 5e-05, "loss": 0.0385, "num_input_tokens_seen": 322487532, "step": 3533 }, { "epoch": 14.720833333333333, "loss": 0.044892363250255585, "loss_ce": 1.0047299383586505e-06, "loss_iou": 0.275390625, "loss_num": 0.00897216796875, "loss_xval": 0.044921875, "num_input_tokens_seen": 322487532, "step": 3533 }, { "epoch": 14.725, "grad_norm": 3.6761690727969683, "learning_rate": 5e-05, "loss": 0.0655, "num_input_tokens_seen": 322579200, "step": 3534 }, { "epoch": 14.725, "loss": 0.04250044375658035, "loss_ce": 4.715973773272708e-06, "loss_iou": 0.3203125, "loss_num": 0.00848388671875, "loss_xval": 0.04248046875, "num_input_tokens_seen": 322579200, "step": 3534 }, { "epoch": 14.729166666666666, "grad_norm": 1.551974938766234, "learning_rate": 5e-05, "loss": 0.0395, "num_input_tokens_seen": 322670388, "step": 3535 }, { "epoch": 14.729166666666666, "loss": 0.03715559095144272, "loss_ce": 1.5698578863521107e-05, "loss_iou": 0.255859375, "loss_num": 0.0074462890625, "loss_xval": 0.037109375, "num_input_tokens_seen": 322670388, "step": 3535 }, { "epoch": 14.733333333333333, "grad_norm": 3.6318519870465376, "learning_rate": 5e-05, "loss": 0.0711, "num_input_tokens_seen": 322761260, "step": 3536 }, { "epoch": 14.733333333333333, "loss": 0.10466891527175903, "loss_ce": 9.280054655391723e-05, "loss_iou": 0.2021484375, "loss_num": 0.0208740234375, "loss_xval": 0.1044921875, "num_input_tokens_seen": 322761260, "step": 3536 }, { "epoch": 14.7375, "grad_norm": 13.28436486626998, "learning_rate": 5e-05, "loss": 0.0812, "num_input_tokens_seen": 322852928, "step": 3537 }, { "epoch": 14.7375, "loss": 0.10683774203062057, "loss_ce": 0.0003008772328030318, "loss_iou": 0.1875, "loss_num": 0.0213623046875, "loss_xval": 0.1064453125, "num_input_tokens_seen": 322852928, "step": 3537 }, { "epoch": 14.741666666666667, "grad_norm": 2.416420583465496, "learning_rate": 5e-05, "loss": 0.0722, "num_input_tokens_seen": 322944620, "step": 3538 }, { "epoch": 14.741666666666667, "loss": 0.06891767680644989, "loss_ce": 0.0003637520712800324, "loss_iou": 0.3125, "loss_num": 0.01373291015625, "loss_xval": 0.068359375, "num_input_tokens_seen": 322944620, "step": 3538 }, { "epoch": 14.745833333333334, "grad_norm": 2.5616024779607884, "learning_rate": 5e-05, "loss": 0.0494, "num_input_tokens_seen": 323036304, "step": 3539 }, { "epoch": 14.745833333333334, "loss": 0.05444856733083725, "loss_ce": 5.208913535170723e-06, "loss_iou": 0.2470703125, "loss_num": 0.0108642578125, "loss_xval": 0.054443359375, "num_input_tokens_seen": 323036304, "step": 3539 }, { "epoch": 14.75, "grad_norm": 3.4890030998851023, "learning_rate": 5e-05, "loss": 0.0777, "num_input_tokens_seen": 323127580, "step": 3540 }, { "epoch": 14.75, "loss": 0.06150487810373306, "loss_ce": 2.721688724705018e-05, "loss_iou": 0.29296875, "loss_num": 0.01226806640625, "loss_xval": 0.0615234375, "num_input_tokens_seen": 323127580, "step": 3540 }, { "epoch": 14.754166666666666, "grad_norm": 6.813408628687027, "learning_rate": 5e-05, "loss": 0.0591, "num_input_tokens_seen": 323218656, "step": 3541 }, { "epoch": 14.754166666666666, "loss": 0.07420557737350464, "loss_ce": 2.0836291696468834e-06, "loss_iou": 0.30078125, "loss_num": 0.01483154296875, "loss_xval": 0.07421875, "num_input_tokens_seen": 323218656, "step": 3541 }, { "epoch": 14.758333333333333, "grad_norm": 3.70710954353085, "learning_rate": 5e-05, "loss": 0.0415, "num_input_tokens_seen": 323309872, "step": 3542 }, { "epoch": 14.758333333333333, "loss": 0.03889217972755432, "loss_ce": 0.0007299504359252751, "loss_iou": 0.294921875, "loss_num": 0.00762939453125, "loss_xval": 0.0380859375, "num_input_tokens_seen": 323309872, "step": 3542 }, { "epoch": 14.7625, "grad_norm": 2.7670308299627657, "learning_rate": 5e-05, "loss": 0.0595, "num_input_tokens_seen": 323401128, "step": 3543 }, { "epoch": 14.7625, "loss": 0.07968062162399292, "loss_ce": 9.077793220058084e-05, "loss_iou": 0.298828125, "loss_num": 0.015869140625, "loss_xval": 0.07958984375, "num_input_tokens_seen": 323401128, "step": 3543 }, { "epoch": 14.766666666666667, "grad_norm": 2.275986846611061, "learning_rate": 5e-05, "loss": 0.0475, "num_input_tokens_seen": 323492344, "step": 3544 }, { "epoch": 14.766666666666667, "loss": 0.03380130976438522, "loss_ce": 0.0008423267281614244, "loss_iou": 0.24609375, "loss_num": 0.006591796875, "loss_xval": 0.032958984375, "num_input_tokens_seen": 323492344, "step": 3544 }, { "epoch": 14.770833333333334, "grad_norm": 2.614157998303283, "learning_rate": 5e-05, "loss": 0.0599, "num_input_tokens_seen": 323583560, "step": 3545 }, { "epoch": 14.770833333333334, "loss": 0.03604413568973541, "loss_ce": 1.8135235222871415e-05, "loss_iou": 0.341796875, "loss_num": 0.0072021484375, "loss_xval": 0.0361328125, "num_input_tokens_seen": 323583560, "step": 3545 }, { "epoch": 14.775, "grad_norm": 4.118987210157419, "learning_rate": 5e-05, "loss": 0.0666, "num_input_tokens_seen": 323675416, "step": 3546 }, { "epoch": 14.775, "loss": 0.04108428210020065, "loss_ce": 9.53611233853735e-05, "loss_iou": 0.34375, "loss_num": 0.0081787109375, "loss_xval": 0.041015625, "num_input_tokens_seen": 323675416, "step": 3546 }, { "epoch": 14.779166666666667, "grad_norm": 6.501283613064894, "learning_rate": 5e-05, "loss": 0.0603, "num_input_tokens_seen": 323766912, "step": 3547 }, { "epoch": 14.779166666666667, "loss": 0.040303364396095276, "loss_ce": 4.9025234147848096e-06, "loss_iou": 0.251953125, "loss_num": 0.008056640625, "loss_xval": 0.040283203125, "num_input_tokens_seen": 323766912, "step": 3547 }, { "epoch": 14.783333333333333, "grad_norm": 6.079005531482708, "learning_rate": 5e-05, "loss": 0.0669, "num_input_tokens_seen": 323858404, "step": 3548 }, { "epoch": 14.783333333333333, "loss": 0.06674402207136154, "loss_ce": 0.002924136118963361, "loss_iou": 0.373046875, "loss_num": 0.01275634765625, "loss_xval": 0.06396484375, "num_input_tokens_seen": 323858404, "step": 3548 }, { "epoch": 14.7875, "grad_norm": 2.8525996359524197, "learning_rate": 5e-05, "loss": 0.0354, "num_input_tokens_seen": 323949236, "step": 3549 }, { "epoch": 14.7875, "loss": 0.04042597860097885, "loss_ce": 3.596375972847454e-05, "loss_iou": 0.2412109375, "loss_num": 0.008056640625, "loss_xval": 0.040283203125, "num_input_tokens_seen": 323949236, "step": 3549 }, { "epoch": 14.791666666666666, "grad_norm": 4.001291035694281, "learning_rate": 5e-05, "loss": 0.0619, "num_input_tokens_seen": 324040436, "step": 3550 }, { "epoch": 14.791666666666666, "loss": 0.03389447182416916, "loss_ce": 4.702661499322858e-06, "loss_iou": 0.296875, "loss_num": 0.00677490234375, "loss_xval": 0.033935546875, "num_input_tokens_seen": 324040436, "step": 3550 }, { "epoch": 14.795833333333333, "grad_norm": 2.5368419134219238, "learning_rate": 5e-05, "loss": 0.0312, "num_input_tokens_seen": 324131804, "step": 3551 }, { "epoch": 14.795833333333333, "loss": 0.03450581058859825, "loss_ce": 5.689916179107968e-06, "loss_iou": 0.1533203125, "loss_num": 0.00689697265625, "loss_xval": 0.034423828125, "num_input_tokens_seen": 324131804, "step": 3551 }, { "epoch": 14.8, "grad_norm": 2.013270021900645, "learning_rate": 5e-05, "loss": 0.0483, "num_input_tokens_seen": 324223076, "step": 3552 }, { "epoch": 14.8, "loss": 0.02905517816543579, "loss_ce": 2.4460521217406495e-06, "loss_iou": 0.353515625, "loss_num": 0.00579833984375, "loss_xval": 0.029052734375, "num_input_tokens_seen": 324223076, "step": 3552 }, { "epoch": 14.804166666666667, "grad_norm": 2.4595982818652824, "learning_rate": 5e-05, "loss": 0.063, "num_input_tokens_seen": 324314896, "step": 3553 }, { "epoch": 14.804166666666667, "loss": 0.07248193770647049, "loss_ce": 1.7949929315363988e-05, "loss_iou": 0.349609375, "loss_num": 0.01446533203125, "loss_xval": 0.072265625, "num_input_tokens_seen": 324314896, "step": 3553 }, { "epoch": 14.808333333333334, "grad_norm": 2.6005864024543137, "learning_rate": 5e-05, "loss": 0.0768, "num_input_tokens_seen": 324406368, "step": 3554 }, { "epoch": 14.808333333333334, "loss": 0.11534042656421661, "loss_ce": 0.0003959625319112092, "loss_iou": 0.10546875, "loss_num": 0.02294921875, "loss_xval": 0.11474609375, "num_input_tokens_seen": 324406368, "step": 3554 }, { "epoch": 14.8125, "grad_norm": 1.6112301855903222, "learning_rate": 5e-05, "loss": 0.0461, "num_input_tokens_seen": 324497652, "step": 3555 }, { "epoch": 14.8125, "loss": 0.057490721344947815, "loss_ce": 0.0001100460285670124, "loss_iou": 0.3203125, "loss_num": 0.011474609375, "loss_xval": 0.057373046875, "num_input_tokens_seen": 324497652, "step": 3555 }, { "epoch": 14.816666666666666, "grad_norm": 2.515654842620723, "learning_rate": 5e-05, "loss": 0.0691, "num_input_tokens_seen": 324588820, "step": 3556 }, { "epoch": 14.816666666666666, "loss": 0.047474320977926254, "loss_ce": 1.9487149984342977e-05, "loss_iou": 0.2421875, "loss_num": 0.009521484375, "loss_xval": 0.04736328125, "num_input_tokens_seen": 324588820, "step": 3556 }, { "epoch": 14.820833333333333, "grad_norm": 5.683309356890506, "learning_rate": 5e-05, "loss": 0.0955, "num_input_tokens_seen": 324680276, "step": 3557 }, { "epoch": 14.820833333333333, "loss": 0.12900656461715698, "loss_ce": 0.0008174782851710916, "loss_iou": 0.345703125, "loss_num": 0.025634765625, "loss_xval": 0.1279296875, "num_input_tokens_seen": 324680276, "step": 3557 }, { "epoch": 14.825, "grad_norm": 4.9157808656117075, "learning_rate": 5e-05, "loss": 0.0749, "num_input_tokens_seen": 324771392, "step": 3558 }, { "epoch": 14.825, "loss": 0.09027700126171112, "loss_ce": 9.755467181093991e-05, "loss_iou": 0.1806640625, "loss_num": 0.01806640625, "loss_xval": 0.09033203125, "num_input_tokens_seen": 324771392, "step": 3558 }, { "epoch": 14.829166666666667, "grad_norm": 4.327085265970158, "learning_rate": 5e-05, "loss": 0.0585, "num_input_tokens_seen": 324862568, "step": 3559 }, { "epoch": 14.829166666666667, "loss": 0.07519317418336868, "loss_ce": 5.3175652283243835e-05, "loss_iou": 0.35546875, "loss_num": 0.0150146484375, "loss_xval": 0.0751953125, "num_input_tokens_seen": 324862568, "step": 3559 }, { "epoch": 14.833333333333334, "grad_norm": 2.9200242618316183, "learning_rate": 5e-05, "loss": 0.0619, "num_input_tokens_seen": 324953504, "step": 3560 }, { "epoch": 14.833333333333334, "loss": 0.0702124685049057, "loss_ce": 5.256131771602668e-05, "loss_iou": 0.21484375, "loss_num": 0.0140380859375, "loss_xval": 0.0703125, "num_input_tokens_seen": 324953504, "step": 3560 }, { "epoch": 14.8375, "grad_norm": 7.980168289767925, "learning_rate": 5e-05, "loss": 0.094, "num_input_tokens_seen": 325044980, "step": 3561 }, { "epoch": 14.8375, "loss": 0.06561485677957535, "loss_ce": 3.2580312108621e-05, "loss_iou": 0.30078125, "loss_num": 0.01312255859375, "loss_xval": 0.0654296875, "num_input_tokens_seen": 325044980, "step": 3561 }, { "epoch": 14.841666666666667, "grad_norm": 2.534301338125501, "learning_rate": 5e-05, "loss": 0.0696, "num_input_tokens_seen": 325136512, "step": 3562 }, { "epoch": 14.841666666666667, "loss": 0.05281366780400276, "loss_ce": 2.9979187274875585e-06, "loss_iou": 0.1904296875, "loss_num": 0.01055908203125, "loss_xval": 0.052734375, "num_input_tokens_seen": 325136512, "step": 3562 }, { "epoch": 14.845833333333333, "grad_norm": 2.3211136578655696, "learning_rate": 5e-05, "loss": 0.0704, "num_input_tokens_seen": 325227988, "step": 3563 }, { "epoch": 14.845833333333333, "loss": 0.10832767188549042, "loss_ce": 5.5274854275921825e-06, "loss_iou": 0.2392578125, "loss_num": 0.0216064453125, "loss_xval": 0.1083984375, "num_input_tokens_seen": 325227988, "step": 3563 }, { "epoch": 14.85, "grad_norm": 2.0010843935107183, "learning_rate": 5e-05, "loss": 0.0885, "num_input_tokens_seen": 325319428, "step": 3564 }, { "epoch": 14.85, "loss": 0.10545650124549866, "loss_ce": 1.0635448234097566e-05, "loss_iou": 0.3359375, "loss_num": 0.0211181640625, "loss_xval": 0.10546875, "num_input_tokens_seen": 325319428, "step": 3564 }, { "epoch": 14.854166666666666, "grad_norm": 2.2152933496585656, "learning_rate": 5e-05, "loss": 0.0541, "num_input_tokens_seen": 325411304, "step": 3565 }, { "epoch": 14.854166666666666, "loss": 0.04346586763858795, "loss_ce": 8.837326276989188e-06, "loss_iou": 0.2216796875, "loss_num": 0.0086669921875, "loss_xval": 0.04345703125, "num_input_tokens_seen": 325411304, "step": 3565 }, { "epoch": 14.858333333333333, "grad_norm": 3.133004974905781, "learning_rate": 5e-05, "loss": 0.0471, "num_input_tokens_seen": 325502720, "step": 3566 }, { "epoch": 14.858333333333333, "loss": 0.05578949302434921, "loss_ce": 3.3608721423661336e-06, "loss_iou": 0.27734375, "loss_num": 0.01116943359375, "loss_xval": 0.0556640625, "num_input_tokens_seen": 325502720, "step": 3566 }, { "epoch": 14.8625, "grad_norm": 2.2451849806419304, "learning_rate": 5e-05, "loss": 0.0704, "num_input_tokens_seen": 325593764, "step": 3567 }, { "epoch": 14.8625, "loss": 0.041220203042030334, "loss_ce": 6.215145731403027e-06, "loss_iou": 0.2578125, "loss_num": 0.00823974609375, "loss_xval": 0.041259765625, "num_input_tokens_seen": 325593764, "step": 3567 }, { "epoch": 14.866666666666667, "grad_norm": 1.2833324031924245, "learning_rate": 5e-05, "loss": 0.0418, "num_input_tokens_seen": 325685004, "step": 3568 }, { "epoch": 14.866666666666667, "loss": 0.044435691088438034, "loss_ce": 4.78729052701965e-05, "loss_iou": 0.203125, "loss_num": 0.00885009765625, "loss_xval": 0.04443359375, "num_input_tokens_seen": 325685004, "step": 3568 }, { "epoch": 14.870833333333334, "grad_norm": 1.6872962235402553, "learning_rate": 5e-05, "loss": 0.0504, "num_input_tokens_seen": 325776788, "step": 3569 }, { "epoch": 14.870833333333334, "loss": 0.053148359060287476, "loss_ce": 3.251617818023078e-05, "loss_iou": 0.21875, "loss_num": 0.0106201171875, "loss_xval": 0.05322265625, "num_input_tokens_seen": 325776788, "step": 3569 }, { "epoch": 14.875, "grad_norm": 4.775188991555694, "learning_rate": 5e-05, "loss": 0.0582, "num_input_tokens_seen": 325868156, "step": 3570 }, { "epoch": 14.875, "loss": 0.0691394954919815, "loss_ce": 7.821511826477945e-05, "loss_iou": 0.29296875, "loss_num": 0.0137939453125, "loss_xval": 0.06884765625, "num_input_tokens_seen": 325868156, "step": 3570 }, { "epoch": 14.879166666666666, "grad_norm": 2.131116925129658, "learning_rate": 5e-05, "loss": 0.0378, "num_input_tokens_seen": 325959612, "step": 3571 }, { "epoch": 14.879166666666666, "loss": 0.049073703587055206, "loss_ce": 0.0016188665758818388, "loss_iou": 0.376953125, "loss_num": 0.00946044921875, "loss_xval": 0.04736328125, "num_input_tokens_seen": 325959612, "step": 3571 }, { "epoch": 14.883333333333333, "grad_norm": 2.7535034525581246, "learning_rate": 5e-05, "loss": 0.0796, "num_input_tokens_seen": 326051084, "step": 3572 }, { "epoch": 14.883333333333333, "loss": 0.09897946566343307, "loss_ce": 1.0963100066874176e-05, "loss_iou": 0.259765625, "loss_num": 0.019775390625, "loss_xval": 0.09912109375, "num_input_tokens_seen": 326051084, "step": 3572 }, { "epoch": 14.8875, "grad_norm": 1.2638926995119009, "learning_rate": 5e-05, "loss": 0.0634, "num_input_tokens_seen": 326141908, "step": 3573 }, { "epoch": 14.8875, "loss": 0.05105225369334221, "loss_ce": 0.0002023401320911944, "loss_iou": 0.220703125, "loss_num": 0.01019287109375, "loss_xval": 0.05078125, "num_input_tokens_seen": 326141908, "step": 3573 }, { "epoch": 14.891666666666667, "grad_norm": 1.6713820019916632, "learning_rate": 5e-05, "loss": 0.0629, "num_input_tokens_seen": 326233920, "step": 3574 }, { "epoch": 14.891666666666667, "loss": 0.08183803409337997, "loss_ce": 0.0005849840235896409, "loss_iou": 0.21484375, "loss_num": 0.0162353515625, "loss_xval": 0.0810546875, "num_input_tokens_seen": 326233920, "step": 3574 }, { "epoch": 14.895833333333334, "grad_norm": 1.8503550101021848, "learning_rate": 5e-05, "loss": 0.0991, "num_input_tokens_seen": 326325268, "step": 3575 }, { "epoch": 14.895833333333334, "loss": 0.1190267950296402, "loss_ce": 0.0015951523091644049, "loss_iou": 0.134765625, "loss_num": 0.0234375, "loss_xval": 0.1171875, "num_input_tokens_seen": 326325268, "step": 3575 }, { "epoch": 14.9, "grad_norm": 1.9195103637938333, "learning_rate": 5e-05, "loss": 0.0689, "num_input_tokens_seen": 326416216, "step": 3576 }, { "epoch": 14.9, "loss": 0.10601796954870224, "loss_ce": 0.000640773621853441, "loss_iou": 0.30078125, "loss_num": 0.0211181640625, "loss_xval": 0.10546875, "num_input_tokens_seen": 326416216, "step": 3576 }, { "epoch": 14.904166666666667, "grad_norm": 5.487943418890862, "learning_rate": 5e-05, "loss": 0.0937, "num_input_tokens_seen": 326507140, "step": 3577 }, { "epoch": 14.904166666666667, "loss": 0.08022335171699524, "loss_ce": 2.315099845873192e-05, "loss_iou": 0.2451171875, "loss_num": 0.0159912109375, "loss_xval": 0.080078125, "num_input_tokens_seen": 326507140, "step": 3577 }, { "epoch": 14.908333333333333, "grad_norm": 2.446270218723829, "learning_rate": 5e-05, "loss": 0.0698, "num_input_tokens_seen": 326598324, "step": 3578 }, { "epoch": 14.908333333333333, "loss": 0.05691614747047424, "loss_ce": 8.657324315208825e-07, "loss_iou": 0.26171875, "loss_num": 0.01141357421875, "loss_xval": 0.056884765625, "num_input_tokens_seen": 326598324, "step": 3578 }, { "epoch": 14.9125, "grad_norm": 1.9621850861220034, "learning_rate": 5e-05, "loss": 0.0494, "num_input_tokens_seen": 326689168, "step": 3579 }, { "epoch": 14.9125, "loss": 0.06024498492479324, "loss_ce": 3.2864827517187223e-06, "loss_iou": 0.2333984375, "loss_num": 0.01202392578125, "loss_xval": 0.060302734375, "num_input_tokens_seen": 326689168, "step": 3579 }, { "epoch": 14.916666666666666, "grad_norm": 2.3981445276381455, "learning_rate": 5e-05, "loss": 0.0552, "num_input_tokens_seen": 326780392, "step": 3580 }, { "epoch": 14.916666666666666, "loss": 0.05113024637103081, "loss_ce": 0.0007762408349663019, "loss_iou": 0.166015625, "loss_num": 0.01007080078125, "loss_xval": 0.05029296875, "num_input_tokens_seen": 326780392, "step": 3580 }, { "epoch": 14.920833333333333, "grad_norm": 4.281863455784161, "learning_rate": 5e-05, "loss": 0.0489, "num_input_tokens_seen": 326871508, "step": 3581 }, { "epoch": 14.920833333333333, "loss": 0.039055127650499344, "loss_ce": 0.0033800816163420677, "loss_iou": 0.134765625, "loss_num": 0.00714111328125, "loss_xval": 0.03564453125, "num_input_tokens_seen": 326871508, "step": 3581 }, { "epoch": 14.925, "grad_norm": 4.393403438025354, "learning_rate": 5e-05, "loss": 0.0739, "num_input_tokens_seen": 326963588, "step": 3582 }, { "epoch": 14.925, "loss": 0.07046674937009811, "loss_ce": 0.00018477137200534344, "loss_iou": 0.134765625, "loss_num": 0.0140380859375, "loss_xval": 0.0703125, "num_input_tokens_seen": 326963588, "step": 3582 }, { "epoch": 14.929166666666667, "grad_norm": 3.570579908241656, "learning_rate": 5e-05, "loss": 0.0638, "num_input_tokens_seen": 327054772, "step": 3583 }, { "epoch": 14.929166666666667, "loss": 0.06317311525344849, "loss_ce": 1.7325469343631994e-06, "loss_iou": 0.25390625, "loss_num": 0.01263427734375, "loss_xval": 0.06298828125, "num_input_tokens_seen": 327054772, "step": 3583 }, { "epoch": 14.933333333333334, "grad_norm": 2.4158229427503337, "learning_rate": 5e-05, "loss": 0.0642, "num_input_tokens_seen": 327145804, "step": 3584 }, { "epoch": 14.933333333333334, "loss": 0.08556769043207169, "loss_ce": 0.0010339971631765366, "loss_iou": 0.3125, "loss_num": 0.016845703125, "loss_xval": 0.08447265625, "num_input_tokens_seen": 327145804, "step": 3584 }, { "epoch": 14.9375, "grad_norm": 3.440511605747932, "learning_rate": 5e-05, "loss": 0.0766, "num_input_tokens_seen": 327236888, "step": 3585 }, { "epoch": 14.9375, "loss": 0.08531348407268524, "loss_ce": 0.000154180932440795, "loss_iou": 0.35546875, "loss_num": 0.01708984375, "loss_xval": 0.0849609375, "num_input_tokens_seen": 327236888, "step": 3585 }, { "epoch": 14.941666666666666, "grad_norm": 17.60435585784368, "learning_rate": 5e-05, "loss": 0.0817, "num_input_tokens_seen": 327328136, "step": 3586 }, { "epoch": 14.941666666666666, "loss": 0.10764288902282715, "loss_ce": 0.00023627388873137534, "loss_iou": 0.3359375, "loss_num": 0.021484375, "loss_xval": 0.107421875, "num_input_tokens_seen": 327328136, "step": 3586 }, { "epoch": 14.945833333333333, "grad_norm": 2.269990229583021, "learning_rate": 5e-05, "loss": 0.045, "num_input_tokens_seen": 327418900, "step": 3587 }, { "epoch": 14.945833333333333, "loss": 0.047153279185295105, "loss_ce": 3.623671091190772e-06, "loss_iou": 0.2255859375, "loss_num": 0.00946044921875, "loss_xval": 0.047119140625, "num_input_tokens_seen": 327418900, "step": 3587 }, { "epoch": 14.95, "grad_norm": 3.0739284813249776, "learning_rate": 5e-05, "loss": 0.0693, "num_input_tokens_seen": 327509888, "step": 3588 }, { "epoch": 14.95, "loss": 0.08897919952869415, "loss_ce": 8.148964116116986e-05, "loss_iou": 0.333984375, "loss_num": 0.017822265625, "loss_xval": 0.0888671875, "num_input_tokens_seen": 327509888, "step": 3588 }, { "epoch": 14.954166666666667, "grad_norm": 3.041000004524471, "learning_rate": 5e-05, "loss": 0.0343, "num_input_tokens_seen": 327601236, "step": 3589 }, { "epoch": 14.954166666666667, "loss": 0.03820054233074188, "loss_ce": 3.831431968137622e-05, "loss_iou": 0.1533203125, "loss_num": 0.00762939453125, "loss_xval": 0.0380859375, "num_input_tokens_seen": 327601236, "step": 3589 }, { "epoch": 14.958333333333334, "grad_norm": 2.4864528317602104, "learning_rate": 5e-05, "loss": 0.0633, "num_input_tokens_seen": 327691636, "step": 3590 }, { "epoch": 14.958333333333334, "loss": 0.06497173756361008, "loss_ce": 1.5070919289428275e-05, "loss_iou": 0.251953125, "loss_num": 0.01300048828125, "loss_xval": 0.06494140625, "num_input_tokens_seen": 327691636, "step": 3590 }, { "epoch": 14.9625, "grad_norm": 1.8455373863541833, "learning_rate": 5e-05, "loss": 0.0575, "num_input_tokens_seen": 327782680, "step": 3591 }, { "epoch": 14.9625, "loss": 0.05134090036153793, "loss_ce": 1.0335241313441657e-05, "loss_iou": 0.33984375, "loss_num": 0.01025390625, "loss_xval": 0.05126953125, "num_input_tokens_seen": 327782680, "step": 3591 }, { "epoch": 14.966666666666667, "grad_norm": 1.96959325048514, "learning_rate": 5e-05, "loss": 0.0468, "num_input_tokens_seen": 327873792, "step": 3592 }, { "epoch": 14.966666666666667, "loss": 0.041168928146362305, "loss_ce": 7.187348955994821e-07, "loss_iou": 0.279296875, "loss_num": 0.00823974609375, "loss_xval": 0.041259765625, "num_input_tokens_seen": 327873792, "step": 3592 }, { "epoch": 14.970833333333333, "grad_norm": 5.7229393107077735, "learning_rate": 5e-05, "loss": 0.0624, "num_input_tokens_seen": 327965508, "step": 3593 }, { "epoch": 14.970833333333333, "loss": 0.0583159439265728, "loss_ce": 1.2109203453292139e-05, "loss_iou": 0.263671875, "loss_num": 0.01165771484375, "loss_xval": 0.058349609375, "num_input_tokens_seen": 327965508, "step": 3593 }, { "epoch": 14.975, "grad_norm": 2.2666128192625825, "learning_rate": 5e-05, "loss": 0.0529, "num_input_tokens_seen": 328056804, "step": 3594 }, { "epoch": 14.975, "loss": 0.03889273852109909, "loss_ce": 3.432005178183317e-05, "loss_iou": 0.212890625, "loss_num": 0.007781982421875, "loss_xval": 0.038818359375, "num_input_tokens_seen": 328056804, "step": 3594 }, { "epoch": 14.979166666666666, "grad_norm": 4.750338180067551, "learning_rate": 5e-05, "loss": 0.0669, "num_input_tokens_seen": 328148388, "step": 3595 }, { "epoch": 14.979166666666666, "loss": 0.0718790739774704, "loss_ce": 7.121425733203068e-05, "loss_iou": 0.380859375, "loss_num": 0.01434326171875, "loss_xval": 0.07177734375, "num_input_tokens_seen": 328148388, "step": 3595 }, { "epoch": 14.983333333333333, "grad_norm": 2.4401063130844283, "learning_rate": 5e-05, "loss": 0.0357, "num_input_tokens_seen": 328239896, "step": 3596 }, { "epoch": 14.983333333333333, "loss": 0.02217935025691986, "loss_ce": 3.5031724110012874e-05, "loss_iou": 0.203125, "loss_num": 0.004425048828125, "loss_xval": 0.0220947265625, "num_input_tokens_seen": 328239896, "step": 3596 }, { "epoch": 14.9875, "grad_norm": 2.442549598675774, "learning_rate": 5e-05, "loss": 0.0579, "num_input_tokens_seen": 328331384, "step": 3597 }, { "epoch": 14.9875, "loss": 0.05925852432847023, "loss_ce": 6.205165118444711e-05, "loss_iou": 0.2119140625, "loss_num": 0.0118408203125, "loss_xval": 0.05908203125, "num_input_tokens_seen": 328331384, "step": 3597 }, { "epoch": 14.991666666666667, "grad_norm": 2.055400133996437, "learning_rate": 5e-05, "loss": 0.0391, "num_input_tokens_seen": 328422936, "step": 3598 }, { "epoch": 14.991666666666667, "loss": 0.03320079296827316, "loss_ce": 5.295316441333853e-06, "loss_iou": 0.2109375, "loss_num": 0.00665283203125, "loss_xval": 0.033203125, "num_input_tokens_seen": 328422936, "step": 3598 }, { "epoch": 14.995833333333334, "grad_norm": 1.9865687878745821, "learning_rate": 5e-05, "loss": 0.0439, "num_input_tokens_seen": 328514684, "step": 3599 }, { "epoch": 14.995833333333334, "loss": 0.0515916682779789, "loss_ce": 0.00016955150931607932, "loss_iou": 0.234375, "loss_num": 0.01025390625, "loss_xval": 0.051513671875, "num_input_tokens_seen": 328514684, "step": 3599 }, { "epoch": 15.0, "grad_norm": 4.937737227297816, "learning_rate": 5e-05, "loss": 0.0381, "num_input_tokens_seen": 328605860, "step": 3600 }, { "epoch": 15.0, "loss": 0.04353149235248566, "loss_ce": 0.0005398534703999758, "loss_iou": 0.291015625, "loss_num": 0.00860595703125, "loss_xval": 0.04296875, "num_input_tokens_seen": 328605860, "step": 3600 }, { "epoch": 15.004166666666666, "grad_norm": 4.7503086302857405, "learning_rate": 5e-05, "loss": 0.0544, "num_input_tokens_seen": 328697836, "step": 3601 }, { "epoch": 15.004166666666666, "loss": 0.05971873551607132, "loss_ce": 0.0028492261189967394, "loss_iou": 0.25390625, "loss_num": 0.0113525390625, "loss_xval": 0.056884765625, "num_input_tokens_seen": 328697836, "step": 3601 }, { "epoch": 15.008333333333333, "grad_norm": 4.643644494444229, "learning_rate": 5e-05, "loss": 0.0901, "num_input_tokens_seen": 328789168, "step": 3602 }, { "epoch": 15.008333333333333, "loss": 0.03825650364160538, "loss_ce": 2.7188930289412383e-06, "loss_iou": 0.2734375, "loss_num": 0.007659912109375, "loss_xval": 0.038330078125, "num_input_tokens_seen": 328789168, "step": 3602 }, { "epoch": 15.0125, "grad_norm": 3.1429965086107567, "learning_rate": 5e-05, "loss": 0.0363, "num_input_tokens_seen": 328879752, "step": 3603 }, { "epoch": 15.0125, "loss": 0.03419603407382965, "loss_ce": 9.26396605791524e-05, "loss_iou": 0.36328125, "loss_num": 0.0068359375, "loss_xval": 0.0341796875, "num_input_tokens_seen": 328879752, "step": 3603 }, { "epoch": 15.016666666666667, "grad_norm": 3.654518939083431, "learning_rate": 5e-05, "loss": 0.0649, "num_input_tokens_seen": 328971004, "step": 3604 }, { "epoch": 15.016666666666667, "loss": 0.03746304288506508, "loss_ce": 7.901009666966274e-05, "loss_iou": 0.298828125, "loss_num": 0.007476806640625, "loss_xval": 0.037353515625, "num_input_tokens_seen": 328971004, "step": 3604 }, { "epoch": 15.020833333333334, "grad_norm": 2.6319228156521968, "learning_rate": 5e-05, "loss": 0.0572, "num_input_tokens_seen": 329062488, "step": 3605 }, { "epoch": 15.020833333333334, "loss": 0.07449492067098618, "loss_ce": 1.5129454595808056e-06, "loss_iou": 0.283203125, "loss_num": 0.014892578125, "loss_xval": 0.07470703125, "num_input_tokens_seen": 329062488, "step": 3605 }, { "epoch": 15.025, "grad_norm": 9.610639039151225, "learning_rate": 5e-05, "loss": 0.0581, "num_input_tokens_seen": 329153660, "step": 3606 }, { "epoch": 15.025, "loss": 0.050093911588191986, "loss_ce": 2.2191003154148348e-05, "loss_iou": 0.2890625, "loss_num": 0.010009765625, "loss_xval": 0.050048828125, "num_input_tokens_seen": 329153660, "step": 3606 }, { "epoch": 15.029166666666667, "grad_norm": 2.774691887572913, "learning_rate": 5e-05, "loss": 0.0746, "num_input_tokens_seen": 329245388, "step": 3607 }, { "epoch": 15.029166666666667, "loss": 0.07727287709712982, "loss_ce": 2.3686347958573606e-06, "loss_iou": 0.30078125, "loss_num": 0.01544189453125, "loss_xval": 0.0771484375, "num_input_tokens_seen": 329245388, "step": 3607 }, { "epoch": 15.033333333333333, "grad_norm": 6.824942188349254, "learning_rate": 5e-05, "loss": 0.0472, "num_input_tokens_seen": 329337076, "step": 3608 }, { "epoch": 15.033333333333333, "loss": 0.0612323172390461, "loss_ce": 0.003782975487411022, "loss_iou": 0.298828125, "loss_num": 0.011474609375, "loss_xval": 0.057373046875, "num_input_tokens_seen": 329337076, "step": 3608 }, { "epoch": 15.0375, "grad_norm": 4.709692774966925, "learning_rate": 5e-05, "loss": 0.0645, "num_input_tokens_seen": 329428540, "step": 3609 }, { "epoch": 15.0375, "loss": 0.06253225356340408, "loss_ce": 1.7341440070595127e-06, "loss_iou": 0.30859375, "loss_num": 0.01251220703125, "loss_xval": 0.0625, "num_input_tokens_seen": 329428540, "step": 3609 }, { "epoch": 15.041666666666666, "grad_norm": 2.2319162438458187, "learning_rate": 5e-05, "loss": 0.0425, "num_input_tokens_seen": 329519428, "step": 3610 }, { "epoch": 15.041666666666666, "loss": 0.025795357301831245, "loss_ce": 4.1901939766830765e-06, "loss_iou": 0.2265625, "loss_num": 0.005157470703125, "loss_xval": 0.0257568359375, "num_input_tokens_seen": 329519428, "step": 3610 }, { "epoch": 15.045833333333333, "grad_norm": 3.7526942246991566, "learning_rate": 5e-05, "loss": 0.0354, "num_input_tokens_seen": 329611248, "step": 3611 }, { "epoch": 15.045833333333333, "loss": 0.021277323365211487, "loss_ce": 2.1829899196745828e-05, "loss_iou": 0.322265625, "loss_num": 0.004241943359375, "loss_xval": 0.021240234375, "num_input_tokens_seen": 329611248, "step": 3611 }, { "epoch": 15.05, "grad_norm": 9.139209536856107, "learning_rate": 5e-05, "loss": 0.0549, "num_input_tokens_seen": 329701952, "step": 3612 }, { "epoch": 15.05, "loss": 0.050409846007823944, "loss_ce": 0.0002618358703330159, "loss_iou": 0.271484375, "loss_num": 0.010009765625, "loss_xval": 0.050048828125, "num_input_tokens_seen": 329701952, "step": 3612 }, { "epoch": 15.054166666666667, "grad_norm": 2.2795942382093677, "learning_rate": 5e-05, "loss": 0.056, "num_input_tokens_seen": 329793288, "step": 3613 }, { "epoch": 15.054166666666667, "loss": 0.05421944707632065, "loss_ce": 6.600363121833652e-05, "loss_iou": 0.32421875, "loss_num": 0.0108642578125, "loss_xval": 0.05419921875, "num_input_tokens_seen": 329793288, "step": 3613 }, { "epoch": 15.058333333333334, "grad_norm": 2.303786347859785, "learning_rate": 5e-05, "loss": 0.0593, "num_input_tokens_seen": 329884836, "step": 3614 }, { "epoch": 15.058333333333334, "loss": 0.07987719774246216, "loss_ce": 7.373141124844551e-05, "loss_iou": 0.30078125, "loss_num": 0.0159912109375, "loss_xval": 0.07958984375, "num_input_tokens_seen": 329884836, "step": 3614 }, { "epoch": 15.0625, "grad_norm": 2.4687676014422, "learning_rate": 5e-05, "loss": 0.0376, "num_input_tokens_seen": 329975512, "step": 3615 }, { "epoch": 15.0625, "loss": 0.03640202432870865, "loss_ce": 2.184108780056704e-06, "loss_iou": 0.251953125, "loss_num": 0.00726318359375, "loss_xval": 0.036376953125, "num_input_tokens_seen": 329975512, "step": 3615 }, { "epoch": 15.066666666666666, "grad_norm": 3.028327068968897, "learning_rate": 5e-05, "loss": 0.0531, "num_input_tokens_seen": 330066736, "step": 3616 }, { "epoch": 15.066666666666666, "loss": 0.07391392439603806, "loss_ce": 1.5607045497745275e-05, "loss_iou": 0.30078125, "loss_num": 0.0147705078125, "loss_xval": 0.07373046875, "num_input_tokens_seen": 330066736, "step": 3616 }, { "epoch": 15.070833333333333, "grad_norm": 6.055814960292821, "learning_rate": 5e-05, "loss": 0.0493, "num_input_tokens_seen": 330157904, "step": 3617 }, { "epoch": 15.070833333333333, "loss": 0.0358605720102787, "loss_ce": 2.418515578028746e-06, "loss_iou": 0.365234375, "loss_num": 0.007171630859375, "loss_xval": 0.035888671875, "num_input_tokens_seen": 330157904, "step": 3617 }, { "epoch": 15.075, "grad_norm": 3.246438483533676, "learning_rate": 5e-05, "loss": 0.0357, "num_input_tokens_seen": 330248900, "step": 3618 }, { "epoch": 15.075, "loss": 0.038288623094558716, "loss_ce": 0.0002484585565980524, "loss_iou": 0.099609375, "loss_num": 0.007598876953125, "loss_xval": 0.0380859375, "num_input_tokens_seen": 330248900, "step": 3618 }, { "epoch": 15.079166666666667, "grad_norm": 2.9315869774054555, "learning_rate": 5e-05, "loss": 0.0755, "num_input_tokens_seen": 330340052, "step": 3619 }, { "epoch": 15.079166666666667, "loss": 0.09723498672246933, "loss_ce": 5.990674253553152e-06, "loss_iou": 0.1943359375, "loss_num": 0.01953125, "loss_xval": 0.09716796875, "num_input_tokens_seen": 330340052, "step": 3619 }, { "epoch": 15.083333333333334, "grad_norm": 2.854142624643113, "learning_rate": 5e-05, "loss": 0.0771, "num_input_tokens_seen": 330431588, "step": 3620 }, { "epoch": 15.083333333333334, "loss": 0.061580635607242584, "loss_ce": 2.6683097530622035e-05, "loss_iou": 0.2333984375, "loss_num": 0.0123291015625, "loss_xval": 0.0615234375, "num_input_tokens_seen": 330431588, "step": 3620 }, { "epoch": 15.0875, "grad_norm": 6.519389816149147, "learning_rate": 5e-05, "loss": 0.0862, "num_input_tokens_seen": 330523172, "step": 3621 }, { "epoch": 15.0875, "loss": 0.11213652789592743, "loss_ce": 0.00028960229246877134, "loss_iou": 0.291015625, "loss_num": 0.0223388671875, "loss_xval": 0.11181640625, "num_input_tokens_seen": 330523172, "step": 3621 }, { "epoch": 15.091666666666667, "grad_norm": 3.8389583619655046, "learning_rate": 5e-05, "loss": 0.0501, "num_input_tokens_seen": 330614360, "step": 3622 }, { "epoch": 15.091666666666667, "loss": 0.04965383931994438, "loss_ce": 1.7411761064067832e-06, "loss_iou": 0.3359375, "loss_num": 0.00994873046875, "loss_xval": 0.049560546875, "num_input_tokens_seen": 330614360, "step": 3622 }, { "epoch": 15.095833333333333, "grad_norm": 3.2749420098599127, "learning_rate": 5e-05, "loss": 0.0623, "num_input_tokens_seen": 330705472, "step": 3623 }, { "epoch": 15.095833333333333, "loss": 0.05485723540186882, "loss_ce": 1.8875684872909915e-06, "loss_iou": 0.158203125, "loss_num": 0.010986328125, "loss_xval": 0.054931640625, "num_input_tokens_seen": 330705472, "step": 3623 }, { "epoch": 15.1, "grad_norm": 2.7497409913777666, "learning_rate": 5e-05, "loss": 0.0831, "num_input_tokens_seen": 330796792, "step": 3624 }, { "epoch": 15.1, "loss": 0.09329473972320557, "loss_ce": 0.00027715889154933393, "loss_iou": 0.2080078125, "loss_num": 0.0185546875, "loss_xval": 0.0927734375, "num_input_tokens_seen": 330796792, "step": 3624 }, { "epoch": 15.104166666666666, "grad_norm": 3.011368972190201, "learning_rate": 5e-05, "loss": 0.0428, "num_input_tokens_seen": 330887952, "step": 3625 }, { "epoch": 15.104166666666666, "loss": 0.034572720527648926, "loss_ce": 0.0007878522155806422, "loss_iou": 0.1796875, "loss_num": 0.006744384765625, "loss_xval": 0.03369140625, "num_input_tokens_seen": 330887952, "step": 3625 }, { "epoch": 15.108333333333333, "grad_norm": 2.537962697527776, "learning_rate": 5e-05, "loss": 0.0446, "num_input_tokens_seen": 330979616, "step": 3626 }, { "epoch": 15.108333333333333, "loss": 0.05786379426717758, "loss_ce": 1.772696850821376e-05, "loss_iou": 0.259765625, "loss_num": 0.0115966796875, "loss_xval": 0.057861328125, "num_input_tokens_seen": 330979616, "step": 3626 }, { "epoch": 15.1125, "grad_norm": 4.051346261798277, "learning_rate": 5e-05, "loss": 0.0546, "num_input_tokens_seen": 331070492, "step": 3627 }, { "epoch": 15.1125, "loss": 0.04033700376749039, "loss_ce": 1.565598722663708e-05, "loss_iou": 0.072265625, "loss_num": 0.008056640625, "loss_xval": 0.040283203125, "num_input_tokens_seen": 331070492, "step": 3627 }, { "epoch": 15.116666666666667, "grad_norm": 4.603074725929813, "learning_rate": 5e-05, "loss": 0.088, "num_input_tokens_seen": 331160904, "step": 3628 }, { "epoch": 15.116666666666667, "loss": 0.07139493525028229, "loss_ce": 1.4317161912913434e-05, "loss_iou": 0.19140625, "loss_num": 0.0142822265625, "loss_xval": 0.0712890625, "num_input_tokens_seen": 331160904, "step": 3628 }, { "epoch": 15.120833333333334, "grad_norm": 3.2322360125048104, "learning_rate": 5e-05, "loss": 0.063, "num_input_tokens_seen": 331251768, "step": 3629 }, { "epoch": 15.120833333333334, "loss": 0.05669593811035156, "loss_ce": 2.4793273041723296e-05, "loss_iou": 0.203125, "loss_num": 0.0113525390625, "loss_xval": 0.056640625, "num_input_tokens_seen": 331251768, "step": 3629 }, { "epoch": 15.125, "grad_norm": 4.3279161634296495, "learning_rate": 5e-05, "loss": 0.0566, "num_input_tokens_seen": 331343652, "step": 3630 }, { "epoch": 15.125, "loss": 0.04381554201245308, "loss_ce": 2.2818392608314753e-05, "loss_iou": 0.255859375, "loss_num": 0.00872802734375, "loss_xval": 0.043701171875, "num_input_tokens_seen": 331343652, "step": 3630 }, { "epoch": 15.129166666666666, "grad_norm": 4.0162518502250455, "learning_rate": 5e-05, "loss": 0.0579, "num_input_tokens_seen": 331434912, "step": 3631 }, { "epoch": 15.129166666666666, "loss": 0.04392173886299133, "loss_ce": 9.468426287639886e-05, "loss_iou": 0.05517578125, "loss_num": 0.0087890625, "loss_xval": 0.0439453125, "num_input_tokens_seen": 331434912, "step": 3631 }, { "epoch": 15.133333333333333, "grad_norm": 2.3099710437696825, "learning_rate": 5e-05, "loss": 0.0494, "num_input_tokens_seen": 331526496, "step": 3632 }, { "epoch": 15.133333333333333, "loss": 0.04162848740816116, "loss_ce": 2.510636250008247e-06, "loss_iou": 0.1953125, "loss_num": 0.00830078125, "loss_xval": 0.04150390625, "num_input_tokens_seen": 331526496, "step": 3632 }, { "epoch": 15.1375, "grad_norm": 2.3738202552434315, "learning_rate": 5e-05, "loss": 0.0657, "num_input_tokens_seen": 331617872, "step": 3633 }, { "epoch": 15.1375, "loss": 0.08647745847702026, "loss_ce": 2.1168279999983497e-05, "loss_iou": 0.1494140625, "loss_num": 0.017333984375, "loss_xval": 0.08642578125, "num_input_tokens_seen": 331617872, "step": 3633 }, { "epoch": 15.141666666666667, "grad_norm": 5.825434013763398, "learning_rate": 5e-05, "loss": 0.0571, "num_input_tokens_seen": 331709636, "step": 3634 }, { "epoch": 15.141666666666667, "loss": 0.08738897740840912, "loss_ce": 4.7672125219833106e-05, "loss_iou": 0.365234375, "loss_num": 0.0174560546875, "loss_xval": 0.08740234375, "num_input_tokens_seen": 331709636, "step": 3634 }, { "epoch": 15.145833333333334, "grad_norm": 1.1844947286072622, "learning_rate": 5e-05, "loss": 0.066, "num_input_tokens_seen": 331801648, "step": 3635 }, { "epoch": 15.145833333333334, "loss": 0.05893933027982712, "loss_ce": 3.27760171785485e-05, "loss_iou": 0.26171875, "loss_num": 0.01177978515625, "loss_xval": 0.058837890625, "num_input_tokens_seen": 331801648, "step": 3635 }, { "epoch": 15.15, "grad_norm": 4.953652363856765, "learning_rate": 5e-05, "loss": 0.0679, "num_input_tokens_seen": 331893020, "step": 3636 }, { "epoch": 15.15, "loss": 0.06529572606086731, "loss_ce": 0.0003543271741364151, "loss_iou": 0.28125, "loss_num": 0.01300048828125, "loss_xval": 0.06494140625, "num_input_tokens_seen": 331893020, "step": 3636 }, { "epoch": 15.154166666666667, "grad_norm": 3.4201083531366914, "learning_rate": 5e-05, "loss": 0.0775, "num_input_tokens_seen": 331984624, "step": 3637 }, { "epoch": 15.154166666666667, "loss": 0.09096341580152512, "loss_ce": 0.04213528707623482, "loss_iou": 0.2236328125, "loss_num": 0.009765625, "loss_xval": 0.048828125, "num_input_tokens_seen": 331984624, "step": 3637 }, { "epoch": 15.158333333333333, "grad_norm": 2.0518076595764794, "learning_rate": 5e-05, "loss": 0.0536, "num_input_tokens_seen": 332076068, "step": 3638 }, { "epoch": 15.158333333333333, "loss": 0.04714092239737511, "loss_ce": 9.807346214074641e-05, "loss_iou": 0.28125, "loss_num": 0.0093994140625, "loss_xval": 0.047119140625, "num_input_tokens_seen": 332076068, "step": 3638 }, { "epoch": 15.1625, "grad_norm": 4.691395639111677, "learning_rate": 5e-05, "loss": 0.0806, "num_input_tokens_seen": 332167548, "step": 3639 }, { "epoch": 15.1625, "loss": 0.09495604038238525, "loss_ce": 3.111699697910808e-05, "loss_iou": 0.34765625, "loss_num": 0.0189208984375, "loss_xval": 0.0947265625, "num_input_tokens_seen": 332167548, "step": 3639 }, { "epoch": 15.166666666666666, "grad_norm": 4.28942326396276, "learning_rate": 5e-05, "loss": 0.038, "num_input_tokens_seen": 332259140, "step": 3640 }, { "epoch": 15.166666666666666, "loss": 0.03751256689429283, "loss_ce": 6.46383978164522e-06, "loss_iou": 0.306640625, "loss_num": 0.00750732421875, "loss_xval": 0.03759765625, "num_input_tokens_seen": 332259140, "step": 3640 }, { "epoch": 15.170833333333333, "grad_norm": 3.1445487000295334, "learning_rate": 5e-05, "loss": 0.0454, "num_input_tokens_seen": 332350108, "step": 3641 }, { "epoch": 15.170833333333333, "loss": 0.04145258292555809, "loss_ce": 4.023050132673234e-05, "loss_iou": 0.25, "loss_num": 0.00830078125, "loss_xval": 0.04150390625, "num_input_tokens_seen": 332350108, "step": 3641 }, { "epoch": 15.175, "grad_norm": 2.438382664266843, "learning_rate": 5e-05, "loss": 0.0554, "num_input_tokens_seen": 332441836, "step": 3642 }, { "epoch": 15.175, "loss": 0.04246381297707558, "loss_ce": 9.778579988051206e-05, "loss_iou": 0.22265625, "loss_num": 0.00848388671875, "loss_xval": 0.04248046875, "num_input_tokens_seen": 332441836, "step": 3642 }, { "epoch": 15.179166666666667, "grad_norm": 2.68848117982997, "learning_rate": 5e-05, "loss": 0.0802, "num_input_tokens_seen": 332533088, "step": 3643 }, { "epoch": 15.179166666666667, "loss": 0.09259673953056335, "loss_ce": 6.405875410564477e-06, "loss_iou": 0.33203125, "loss_num": 0.0185546875, "loss_xval": 0.0927734375, "num_input_tokens_seen": 332533088, "step": 3643 }, { "epoch": 15.183333333333334, "grad_norm": 7.997747451676923, "learning_rate": 5e-05, "loss": 0.0579, "num_input_tokens_seen": 332624424, "step": 3644 }, { "epoch": 15.183333333333334, "loss": 0.04149026423692703, "loss_ce": 1.615691644474282e-06, "loss_iou": 0.2314453125, "loss_num": 0.00830078125, "loss_xval": 0.04150390625, "num_input_tokens_seen": 332624424, "step": 3644 }, { "epoch": 15.1875, "grad_norm": 2.216687066270345, "learning_rate": 5e-05, "loss": 0.0498, "num_input_tokens_seen": 332715968, "step": 3645 }, { "epoch": 15.1875, "loss": 0.06847859174013138, "loss_ce": 0.0003328350721858442, "loss_iou": 0.2255859375, "loss_num": 0.01361083984375, "loss_xval": 0.068359375, "num_input_tokens_seen": 332715968, "step": 3645 }, { "epoch": 15.191666666666666, "grad_norm": 1.7072100404061863, "learning_rate": 5e-05, "loss": 0.0503, "num_input_tokens_seen": 332807160, "step": 3646 }, { "epoch": 15.191666666666666, "loss": 0.07139115035533905, "loss_ce": 2.904944722104119e-06, "loss_iou": 0.09033203125, "loss_num": 0.0142822265625, "loss_xval": 0.0712890625, "num_input_tokens_seen": 332807160, "step": 3646 }, { "epoch": 15.195833333333333, "grad_norm": 40.80216402513253, "learning_rate": 5e-05, "loss": 0.0476, "num_input_tokens_seen": 332898216, "step": 3647 }, { "epoch": 15.195833333333333, "loss": 0.04613684490323067, "loss_ce": 3.241437298129313e-05, "loss_iou": 0.244140625, "loss_num": 0.00921630859375, "loss_xval": 0.046142578125, "num_input_tokens_seen": 332898216, "step": 3647 }, { "epoch": 15.2, "grad_norm": 2.800792431588098, "learning_rate": 5e-05, "loss": 0.1075, "num_input_tokens_seen": 332989600, "step": 3648 }, { "epoch": 15.2, "loss": 0.06875791400671005, "loss_ce": 1.7064860003301874e-05, "loss_iou": 0.248046875, "loss_num": 0.01373291015625, "loss_xval": 0.06884765625, "num_input_tokens_seen": 332989600, "step": 3648 }, { "epoch": 15.204166666666667, "grad_norm": 6.290240929768387, "learning_rate": 5e-05, "loss": 0.0639, "num_input_tokens_seen": 333081132, "step": 3649 }, { "epoch": 15.204166666666667, "loss": 0.049857739359140396, "loss_ce": 1.4904736417520326e-05, "loss_iou": 0.185546875, "loss_num": 0.00994873046875, "loss_xval": 0.0498046875, "num_input_tokens_seen": 333081132, "step": 3649 }, { "epoch": 15.208333333333334, "grad_norm": 4.303558482273825, "learning_rate": 5e-05, "loss": 0.1207, "num_input_tokens_seen": 333172096, "step": 3650 }, { "epoch": 15.208333333333334, "loss": 0.11342789977788925, "loss_ce": 9.32297461986309e-06, "loss_iou": 0.384765625, "loss_num": 0.022705078125, "loss_xval": 0.11328125, "num_input_tokens_seen": 333172096, "step": 3650 }, { "epoch": 15.2125, "grad_norm": 2.49403191773149, "learning_rate": 5e-05, "loss": 0.0635, "num_input_tokens_seen": 333263640, "step": 3651 }, { "epoch": 15.2125, "loss": 0.10153535008430481, "loss_ce": 3.3662881833151914e-06, "loss_iou": 0.24609375, "loss_num": 0.020263671875, "loss_xval": 0.1015625, "num_input_tokens_seen": 333263640, "step": 3651 }, { "epoch": 15.216666666666667, "grad_norm": 4.595165834455283, "learning_rate": 5e-05, "loss": 0.103, "num_input_tokens_seen": 333355240, "step": 3652 }, { "epoch": 15.216666666666667, "loss": 0.06734571605920792, "loss_ce": 0.0010310175130143762, "loss_iou": 0.294921875, "loss_num": 0.01324462890625, "loss_xval": 0.06640625, "num_input_tokens_seen": 333355240, "step": 3652 }, { "epoch": 15.220833333333333, "grad_norm": 2.703493008041717, "learning_rate": 5e-05, "loss": 0.0567, "num_input_tokens_seen": 333445676, "step": 3653 }, { "epoch": 15.220833333333333, "loss": 0.05132361128926277, "loss_ce": 2.3561253328807652e-05, "loss_iou": 0.185546875, "loss_num": 0.01025390625, "loss_xval": 0.05126953125, "num_input_tokens_seen": 333445676, "step": 3653 }, { "epoch": 15.225, "grad_norm": 1.4419388376125457, "learning_rate": 5e-05, "loss": 0.0544, "num_input_tokens_seen": 333537284, "step": 3654 }, { "epoch": 15.225, "loss": 0.056093744933605194, "loss_ce": 4.058098420500755e-05, "loss_iou": 0.271484375, "loss_num": 0.01116943359375, "loss_xval": 0.05615234375, "num_input_tokens_seen": 333537284, "step": 3654 }, { "epoch": 15.229166666666666, "grad_norm": 1.8212927080711885, "learning_rate": 5e-05, "loss": 0.068, "num_input_tokens_seen": 333628176, "step": 3655 }, { "epoch": 15.229166666666666, "loss": 0.053495533764362335, "loss_ce": 5.848135970154544e-06, "loss_iou": 0.1552734375, "loss_num": 0.01068115234375, "loss_xval": 0.053466796875, "num_input_tokens_seen": 333628176, "step": 3655 }, { "epoch": 15.233333333333333, "grad_norm": 1.6671543863495777, "learning_rate": 5e-05, "loss": 0.0413, "num_input_tokens_seen": 333719424, "step": 3656 }, { "epoch": 15.233333333333333, "loss": 0.03739180788397789, "loss_ce": 0.00017562204448040575, "loss_iou": 0.31640625, "loss_num": 0.0074462890625, "loss_xval": 0.037109375, "num_input_tokens_seen": 333719424, "step": 3656 }, { "epoch": 15.2375, "grad_norm": 2.4930836197541706, "learning_rate": 5e-05, "loss": 0.0568, "num_input_tokens_seen": 333810568, "step": 3657 }, { "epoch": 15.2375, "loss": 0.04177769273519516, "loss_ce": 1.43868601298891e-05, "loss_iou": 0.291015625, "loss_num": 0.00836181640625, "loss_xval": 0.041748046875, "num_input_tokens_seen": 333810568, "step": 3657 }, { "epoch": 15.241666666666667, "grad_norm": 2.571465833651463, "learning_rate": 5e-05, "loss": 0.0305, "num_input_tokens_seen": 333901300, "step": 3658 }, { "epoch": 15.241666666666667, "loss": 0.025437403470277786, "loss_ce": 8.630683623778168e-06, "loss_iou": 0.23828125, "loss_num": 0.005096435546875, "loss_xval": 0.025390625, "num_input_tokens_seen": 333901300, "step": 3658 }, { "epoch": 15.245833333333334, "grad_norm": 6.086562990212025, "learning_rate": 5e-05, "loss": 0.0363, "num_input_tokens_seen": 333992744, "step": 3659 }, { "epoch": 15.245833333333334, "loss": 0.0372001938521862, "loss_ce": 0.00012133685959270224, "loss_iou": 0.330078125, "loss_num": 0.007415771484375, "loss_xval": 0.037109375, "num_input_tokens_seen": 333992744, "step": 3659 }, { "epoch": 15.25, "grad_norm": 3.414750208420596, "learning_rate": 5e-05, "loss": 0.0557, "num_input_tokens_seen": 334084372, "step": 3660 }, { "epoch": 15.25, "loss": 0.0666206032037735, "loss_ce": 1.5992854969226755e-05, "loss_iou": 0.27734375, "loss_num": 0.0133056640625, "loss_xval": 0.06640625, "num_input_tokens_seen": 334084372, "step": 3660 }, { "epoch": 15.254166666666666, "grad_norm": 2.9417870993211124, "learning_rate": 5e-05, "loss": 0.0716, "num_input_tokens_seen": 334175668, "step": 3661 }, { "epoch": 15.254166666666666, "loss": 0.045750442892313004, "loss_ce": 4.591216566041112e-06, "loss_iou": 0.2021484375, "loss_num": 0.0091552734375, "loss_xval": 0.045654296875, "num_input_tokens_seen": 334175668, "step": 3661 }, { "epoch": 15.258333333333333, "grad_norm": 4.215670622888787, "learning_rate": 5e-05, "loss": 0.0522, "num_input_tokens_seen": 334266836, "step": 3662 }, { "epoch": 15.258333333333333, "loss": 0.05722185969352722, "loss_ce": 1.6664025679347105e-05, "loss_iou": 0.17578125, "loss_num": 0.01141357421875, "loss_xval": 0.05712890625, "num_input_tokens_seen": 334266836, "step": 3662 }, { "epoch": 15.2625, "grad_norm": 9.404887556711703, "learning_rate": 5e-05, "loss": 0.0351, "num_input_tokens_seen": 334358152, "step": 3663 }, { "epoch": 15.2625, "loss": 0.039673589169979095, "loss_ce": 7.389946858893381e-07, "loss_iou": 0.234375, "loss_num": 0.0079345703125, "loss_xval": 0.03955078125, "num_input_tokens_seen": 334358152, "step": 3663 }, { "epoch": 15.266666666666667, "grad_norm": 2.4858143158405177, "learning_rate": 5e-05, "loss": 0.0724, "num_input_tokens_seen": 334448752, "step": 3664 }, { "epoch": 15.266666666666667, "loss": 0.061934880912303925, "loss_ce": 9.101112664211541e-05, "loss_iou": 0.224609375, "loss_num": 0.01239013671875, "loss_xval": 0.061767578125, "num_input_tokens_seen": 334448752, "step": 3664 }, { "epoch": 15.270833333333334, "grad_norm": 1.9686179382224764, "learning_rate": 5e-05, "loss": 0.0862, "num_input_tokens_seen": 334539696, "step": 3665 }, { "epoch": 15.270833333333334, "loss": 0.13766661286354065, "loss_ce": 0.0003375158121343702, "loss_iou": 0.265625, "loss_num": 0.0274658203125, "loss_xval": 0.1376953125, "num_input_tokens_seen": 334539696, "step": 3665 }, { "epoch": 15.275, "grad_norm": 4.593574012253526, "learning_rate": 5e-05, "loss": 0.0562, "num_input_tokens_seen": 334630908, "step": 3666 }, { "epoch": 15.275, "loss": 0.05104167014360428, "loss_ce": 1.0194967217103112e-06, "loss_iou": 0.24609375, "loss_num": 0.01019287109375, "loss_xval": 0.051025390625, "num_input_tokens_seen": 334630908, "step": 3666 }, { "epoch": 15.279166666666667, "grad_norm": 1.778737458581344, "learning_rate": 5e-05, "loss": 0.0379, "num_input_tokens_seen": 334722220, "step": 3667 }, { "epoch": 15.279166666666667, "loss": 0.051860690116882324, "loss_ce": 2.658232369867619e-05, "loss_iou": 0.26953125, "loss_num": 0.0103759765625, "loss_xval": 0.0517578125, "num_input_tokens_seen": 334722220, "step": 3667 }, { "epoch": 15.283333333333333, "grad_norm": 4.357471825362792, "learning_rate": 5e-05, "loss": 0.0537, "num_input_tokens_seen": 334813188, "step": 3668 }, { "epoch": 15.283333333333333, "loss": 0.043428897857666016, "loss_ce": 3.290424865554087e-05, "loss_iou": 0.21875, "loss_num": 0.0086669921875, "loss_xval": 0.04345703125, "num_input_tokens_seen": 334813188, "step": 3668 }, { "epoch": 15.2875, "grad_norm": 2.7862133487294996, "learning_rate": 5e-05, "loss": 0.0603, "num_input_tokens_seen": 334904468, "step": 3669 }, { "epoch": 15.2875, "loss": 0.046986065804958344, "loss_ce": 4.255929979990469e-06, "loss_iou": 0.306640625, "loss_num": 0.0093994140625, "loss_xval": 0.046875, "num_input_tokens_seen": 334904468, "step": 3669 }, { "epoch": 15.291666666666666, "grad_norm": 3.181565942625571, "learning_rate": 5e-05, "loss": 0.0459, "num_input_tokens_seen": 334995568, "step": 3670 }, { "epoch": 15.291666666666666, "loss": 0.051836755126714706, "loss_ce": 2.553722879383713e-05, "loss_iou": 0.216796875, "loss_num": 0.0103759765625, "loss_xval": 0.0517578125, "num_input_tokens_seen": 334995568, "step": 3670 }, { "epoch": 15.295833333333333, "grad_norm": 2.2787344894670634, "learning_rate": 5e-05, "loss": 0.041, "num_input_tokens_seen": 335086432, "step": 3671 }, { "epoch": 15.295833333333333, "loss": 0.04101455956697464, "loss_ce": 3.708266012836248e-05, "loss_iou": 0.0986328125, "loss_num": 0.0081787109375, "loss_xval": 0.041015625, "num_input_tokens_seen": 335086432, "step": 3671 }, { "epoch": 15.3, "grad_norm": 1.786113667363368, "learning_rate": 5e-05, "loss": 0.0464, "num_input_tokens_seen": 335177840, "step": 3672 }, { "epoch": 15.3, "loss": 0.033018480986356735, "loss_ce": 1.3719601156481076e-05, "loss_iou": 0.28125, "loss_num": 0.006591796875, "loss_xval": 0.032958984375, "num_input_tokens_seen": 335177840, "step": 3672 }, { "epoch": 15.304166666666667, "grad_norm": 2.113044401356243, "learning_rate": 5e-05, "loss": 0.0504, "num_input_tokens_seen": 335269108, "step": 3673 }, { "epoch": 15.304166666666667, "loss": 0.07917618006467819, "loss_ce": 0.0013105771504342556, "loss_iou": 0.291015625, "loss_num": 0.01556396484375, "loss_xval": 0.07763671875, "num_input_tokens_seen": 335269108, "step": 3673 }, { "epoch": 15.308333333333334, "grad_norm": 1.4949228973993478, "learning_rate": 5e-05, "loss": 0.0463, "num_input_tokens_seen": 335360528, "step": 3674 }, { "epoch": 15.308333333333334, "loss": 0.029251961037516594, "loss_ce": 8.631383821011696e-07, "loss_iou": 0.28125, "loss_num": 0.005859375, "loss_xval": 0.029296875, "num_input_tokens_seen": 335360528, "step": 3674 }, { "epoch": 15.3125, "grad_norm": 2.6006503688280285, "learning_rate": 5e-05, "loss": 0.041, "num_input_tokens_seen": 335451900, "step": 3675 }, { "epoch": 15.3125, "loss": 0.026928190141916275, "loss_ce": 2.6945717763737775e-05, "loss_iou": 0.2099609375, "loss_num": 0.00537109375, "loss_xval": 0.02685546875, "num_input_tokens_seen": 335451900, "step": 3675 }, { "epoch": 15.316666666666666, "grad_norm": 2.8694457709743233, "learning_rate": 5e-05, "loss": 0.0397, "num_input_tokens_seen": 335543404, "step": 3676 }, { "epoch": 15.316666666666666, "loss": 0.040234267711639404, "loss_ce": 3.8800335460109636e-05, "loss_iou": 0.33203125, "loss_num": 0.008056640625, "loss_xval": 0.040283203125, "num_input_tokens_seen": 335543404, "step": 3676 }, { "epoch": 15.320833333333333, "grad_norm": 4.660576388851059, "learning_rate": 5e-05, "loss": 0.0402, "num_input_tokens_seen": 335635024, "step": 3677 }, { "epoch": 15.320833333333333, "loss": 0.04173457995057106, "loss_ce": 1.7916502201842377e-06, "loss_iou": 0.2392578125, "loss_num": 0.00836181640625, "loss_xval": 0.041748046875, "num_input_tokens_seen": 335635024, "step": 3677 }, { "epoch": 15.325, "grad_norm": 3.8747091262044515, "learning_rate": 5e-05, "loss": 0.0507, "num_input_tokens_seen": 335725892, "step": 3678 }, { "epoch": 15.325, "loss": 0.052254341542720795, "loss_ce": 6.148645752546145e-07, "loss_iou": 0.1767578125, "loss_num": 0.01043701171875, "loss_xval": 0.05224609375, "num_input_tokens_seen": 335725892, "step": 3678 }, { "epoch": 15.329166666666667, "grad_norm": 2.500436542626791, "learning_rate": 5e-05, "loss": 0.0532, "num_input_tokens_seen": 335815720, "step": 3679 }, { "epoch": 15.329166666666667, "loss": 0.07567595690488815, "loss_ce": 0.00011443771654739976, "loss_iou": 0.2734375, "loss_num": 0.01513671875, "loss_xval": 0.07568359375, "num_input_tokens_seen": 335815720, "step": 3679 }, { "epoch": 15.333333333333334, "grad_norm": 1.7950942209894891, "learning_rate": 5e-05, "loss": 0.0435, "num_input_tokens_seen": 335906696, "step": 3680 }, { "epoch": 15.333333333333334, "loss": 0.047455932945013046, "loss_ce": 1.100194594982895e-06, "loss_iou": 0.2001953125, "loss_num": 0.009521484375, "loss_xval": 0.04736328125, "num_input_tokens_seen": 335906696, "step": 3680 }, { "epoch": 15.3375, "grad_norm": 2.603836604353161, "learning_rate": 5e-05, "loss": 0.0955, "num_input_tokens_seen": 335997680, "step": 3681 }, { "epoch": 15.3375, "loss": 0.13542352616786957, "loss_ce": 1.774315933289472e-06, "loss_iou": 0.17578125, "loss_num": 0.027099609375, "loss_xval": 0.1357421875, "num_input_tokens_seen": 335997680, "step": 3681 }, { "epoch": 15.341666666666667, "grad_norm": 2.4985064327993722, "learning_rate": 5e-05, "loss": 0.0476, "num_input_tokens_seen": 336088752, "step": 3682 }, { "epoch": 15.341666666666667, "loss": 0.06178619712591171, "loss_ce": 3.360460823387257e-06, "loss_iou": 0.259765625, "loss_num": 0.0123291015625, "loss_xval": 0.061767578125, "num_input_tokens_seen": 336088752, "step": 3682 }, { "epoch": 15.345833333333333, "grad_norm": 2.156982488750445, "learning_rate": 5e-05, "loss": 0.0488, "num_input_tokens_seen": 336180452, "step": 3683 }, { "epoch": 15.345833333333333, "loss": 0.0459609180688858, "loss_ce": 9.299816883867607e-05, "loss_iou": 0.283203125, "loss_num": 0.00921630859375, "loss_xval": 0.0458984375, "num_input_tokens_seen": 336180452, "step": 3683 }, { "epoch": 15.35, "grad_norm": 2.593791060516741, "learning_rate": 5e-05, "loss": 0.0568, "num_input_tokens_seen": 336271508, "step": 3684 }, { "epoch": 15.35, "loss": 0.058250319212675095, "loss_ce": 2.2780919607612304e-05, "loss_iou": 0.21875, "loss_num": 0.01165771484375, "loss_xval": 0.05810546875, "num_input_tokens_seen": 336271508, "step": 3684 }, { "epoch": 15.354166666666666, "grad_norm": 5.079357675038114, "learning_rate": 5e-05, "loss": 0.0386, "num_input_tokens_seen": 336363056, "step": 3685 }, { "epoch": 15.354166666666666, "loss": 0.04351774975657463, "loss_ce": 0.0006786992307752371, "loss_iou": 0.21484375, "loss_num": 0.008544921875, "loss_xval": 0.042724609375, "num_input_tokens_seen": 336363056, "step": 3685 }, { "epoch": 15.358333333333333, "grad_norm": 3.1484998429187874, "learning_rate": 5e-05, "loss": 0.0337, "num_input_tokens_seen": 336454452, "step": 3686 }, { "epoch": 15.358333333333333, "loss": 0.040700171142816544, "loss_ce": 4.980157427780796e-06, "loss_iou": 0.263671875, "loss_num": 0.00811767578125, "loss_xval": 0.040771484375, "num_input_tokens_seen": 336454452, "step": 3686 }, { "epoch": 15.3625, "grad_norm": 2.542866368364727, "learning_rate": 5e-05, "loss": 0.0367, "num_input_tokens_seen": 336545208, "step": 3687 }, { "epoch": 15.3625, "loss": 0.034080490469932556, "loss_ce": 0.002601608633995056, "loss_iou": 0.2060546875, "loss_num": 0.00628662109375, "loss_xval": 0.031494140625, "num_input_tokens_seen": 336545208, "step": 3687 }, { "epoch": 15.366666666666667, "grad_norm": 1.6240964189368892, "learning_rate": 5e-05, "loss": 0.0633, "num_input_tokens_seen": 336637080, "step": 3688 }, { "epoch": 15.366666666666667, "loss": 0.041000962257385254, "loss_ce": 5.973120664748421e-07, "loss_iou": 0.3046875, "loss_num": 0.0081787109375, "loss_xval": 0.041015625, "num_input_tokens_seen": 336637080, "step": 3688 }, { "epoch": 15.370833333333334, "grad_norm": 3.5883647756619697, "learning_rate": 5e-05, "loss": 0.0567, "num_input_tokens_seen": 336728324, "step": 3689 }, { "epoch": 15.370833333333334, "loss": 0.049876682460308075, "loss_ce": 3.3272726795985363e-06, "loss_iou": 0.1484375, "loss_num": 0.010009765625, "loss_xval": 0.0498046875, "num_input_tokens_seen": 336728324, "step": 3689 }, { "epoch": 15.375, "grad_norm": 1.8711898203308313, "learning_rate": 5e-05, "loss": 0.03, "num_input_tokens_seen": 336819684, "step": 3690 }, { "epoch": 15.375, "loss": 0.01915108412504196, "loss_ce": 1.3037391681791632e-06, "loss_iou": 0.2021484375, "loss_num": 0.0038299560546875, "loss_xval": 0.0191650390625, "num_input_tokens_seen": 336819684, "step": 3690 }, { "epoch": 15.379166666666666, "grad_norm": 3.0763388136092193, "learning_rate": 5e-05, "loss": 0.1026, "num_input_tokens_seen": 336910716, "step": 3691 }, { "epoch": 15.379166666666666, "loss": 0.10127241164445877, "loss_ce": 0.00048811070155352354, "loss_iou": 0.2890625, "loss_num": 0.0201416015625, "loss_xval": 0.1005859375, "num_input_tokens_seen": 336910716, "step": 3691 }, { "epoch": 15.383333333333333, "grad_norm": 1.6205927936647166, "learning_rate": 5e-05, "loss": 0.0679, "num_input_tokens_seen": 337002312, "step": 3692 }, { "epoch": 15.383333333333333, "loss": 0.03497444838285446, "loss_ce": 0.00011192738747922704, "loss_iou": 0.275390625, "loss_num": 0.0069580078125, "loss_xval": 0.034912109375, "num_input_tokens_seen": 337002312, "step": 3692 }, { "epoch": 15.3875, "grad_norm": 1.8502923008152186, "learning_rate": 5e-05, "loss": 0.0953, "num_input_tokens_seen": 337093572, "step": 3693 }, { "epoch": 15.3875, "loss": 0.12129370868206024, "loss_ce": 1.603203713784751e-06, "loss_iou": 0.162109375, "loss_num": 0.0242919921875, "loss_xval": 0.12109375, "num_input_tokens_seen": 337093572, "step": 3693 }, { "epoch": 15.391666666666667, "grad_norm": 3.603181757698564, "learning_rate": 5e-05, "loss": 0.0522, "num_input_tokens_seen": 337185096, "step": 3694 }, { "epoch": 15.391666666666667, "loss": 0.032368484884500504, "loss_ce": 1.2223916201037355e-05, "loss_iou": 0.2021484375, "loss_num": 0.0064697265625, "loss_xval": 0.032470703125, "num_input_tokens_seen": 337185096, "step": 3694 }, { "epoch": 15.395833333333334, "grad_norm": 4.3647795322814265, "learning_rate": 5e-05, "loss": 0.0753, "num_input_tokens_seen": 337275612, "step": 3695 }, { "epoch": 15.395833333333334, "loss": 0.09905469417572021, "loss_ce": 4.0408194763585925e-05, "loss_iou": 0.291015625, "loss_num": 0.019775390625, "loss_xval": 0.09912109375, "num_input_tokens_seen": 337275612, "step": 3695 }, { "epoch": 15.4, "grad_norm": 3.20284183256731, "learning_rate": 5e-05, "loss": 0.0541, "num_input_tokens_seen": 337366872, "step": 3696 }, { "epoch": 15.4, "loss": 0.07359351217746735, "loss_ce": 3.749448183043569e-07, "loss_iou": 0.224609375, "loss_num": 0.01470947265625, "loss_xval": 0.07373046875, "num_input_tokens_seen": 337366872, "step": 3696 }, { "epoch": 15.404166666666667, "grad_norm": 1.755567458846452, "learning_rate": 5e-05, "loss": 0.0583, "num_input_tokens_seen": 337458240, "step": 3697 }, { "epoch": 15.404166666666667, "loss": 0.06957247108221054, "loss_ce": 6.86895873514004e-05, "loss_iou": 0.29296875, "loss_num": 0.013916015625, "loss_xval": 0.0693359375, "num_input_tokens_seen": 337458240, "step": 3697 }, { "epoch": 15.408333333333333, "grad_norm": 1.9444490464027675, "learning_rate": 5e-05, "loss": 0.0485, "num_input_tokens_seen": 337549956, "step": 3698 }, { "epoch": 15.408333333333333, "loss": 0.03700829669833183, "loss_ce": 2.8621196179301478e-05, "loss_iou": 0.3046875, "loss_num": 0.007415771484375, "loss_xval": 0.036865234375, "num_input_tokens_seen": 337549956, "step": 3698 }, { "epoch": 15.4125, "grad_norm": 1.6221037989352245, "learning_rate": 5e-05, "loss": 0.068, "num_input_tokens_seen": 337640524, "step": 3699 }, { "epoch": 15.4125, "loss": 0.10857482999563217, "loss_ce": 9.140936185758619e-07, "loss_iou": 0.314453125, "loss_num": 0.021728515625, "loss_xval": 0.1083984375, "num_input_tokens_seen": 337640524, "step": 3699 }, { "epoch": 15.416666666666666, "grad_norm": 3.614329912471893, "learning_rate": 5e-05, "loss": 0.0541, "num_input_tokens_seen": 337731884, "step": 3700 }, { "epoch": 15.416666666666666, "loss": 0.05494129657745361, "loss_ce": 2.4917135306168348e-05, "loss_iou": 0.19140625, "loss_num": 0.010986328125, "loss_xval": 0.054931640625, "num_input_tokens_seen": 337731884, "step": 3700 }, { "epoch": 15.420833333333333, "grad_norm": 2.238256260352113, "learning_rate": 5e-05, "loss": 0.0581, "num_input_tokens_seen": 337823232, "step": 3701 }, { "epoch": 15.420833333333333, "loss": 0.0470069944858551, "loss_ce": 0.0004142856923863292, "loss_iou": 0.20703125, "loss_num": 0.00933837890625, "loss_xval": 0.046630859375, "num_input_tokens_seen": 337823232, "step": 3701 }, { "epoch": 15.425, "grad_norm": 1.6980198759268228, "learning_rate": 5e-05, "loss": 0.0599, "num_input_tokens_seen": 337914872, "step": 3702 }, { "epoch": 15.425, "loss": 0.040833212435245514, "loss_ce": 1.5953277397784404e-05, "loss_iou": 0.228515625, "loss_num": 0.0081787109375, "loss_xval": 0.040771484375, "num_input_tokens_seen": 337914872, "step": 3702 }, { "epoch": 15.429166666666667, "grad_norm": 1.0651156234654016, "learning_rate": 5e-05, "loss": 0.0691, "num_input_tokens_seen": 338006000, "step": 3703 }, { "epoch": 15.429166666666667, "loss": 0.028311312198638916, "loss_ce": 0.0006394971860572696, "loss_iou": 0.07275390625, "loss_num": 0.005523681640625, "loss_xval": 0.0277099609375, "num_input_tokens_seen": 338006000, "step": 3703 }, { "epoch": 15.433333333333334, "grad_norm": 1.6898483447623203, "learning_rate": 5e-05, "loss": 0.0508, "num_input_tokens_seen": 338097044, "step": 3704 }, { "epoch": 15.433333333333334, "loss": 0.037339404225349426, "loss_ce": 0.0005886115832254291, "loss_iou": 0.166015625, "loss_num": 0.007354736328125, "loss_xval": 0.036865234375, "num_input_tokens_seen": 338097044, "step": 3704 }, { "epoch": 15.4375, "grad_norm": 2.714367995414061, "learning_rate": 5e-05, "loss": 0.0962, "num_input_tokens_seen": 338188904, "step": 3705 }, { "epoch": 15.4375, "loss": 0.13568150997161865, "loss_ce": 3.0868512112647295e-05, "loss_iou": 0.28125, "loss_num": 0.027099609375, "loss_xval": 0.1357421875, "num_input_tokens_seen": 338188904, "step": 3705 }, { "epoch": 15.441666666666666, "grad_norm": 1.5087140513638733, "learning_rate": 5e-05, "loss": 0.07, "num_input_tokens_seen": 338279972, "step": 3706 }, { "epoch": 15.441666666666666, "loss": 0.10155145823955536, "loss_ce": 4.223983523843344e-06, "loss_iou": 0.330078125, "loss_num": 0.020263671875, "loss_xval": 0.1015625, "num_input_tokens_seen": 338279972, "step": 3706 }, { "epoch": 15.445833333333333, "grad_norm": 1.318609741070438, "learning_rate": 5e-05, "loss": 0.0534, "num_input_tokens_seen": 338371048, "step": 3707 }, { "epoch": 15.445833333333333, "loss": 0.06752166152000427, "loss_ce": 1.5217010513879359e-06, "loss_iou": 0.2412109375, "loss_num": 0.01348876953125, "loss_xval": 0.0673828125, "num_input_tokens_seen": 338371048, "step": 3707 }, { "epoch": 15.45, "grad_norm": 1.7232080244761543, "learning_rate": 5e-05, "loss": 0.0576, "num_input_tokens_seen": 338462560, "step": 3708 }, { "epoch": 15.45, "loss": 0.04955866187810898, "loss_ce": 0.0001430758275091648, "loss_iou": 0.171875, "loss_num": 0.0098876953125, "loss_xval": 0.04931640625, "num_input_tokens_seen": 338462560, "step": 3708 }, { "epoch": 15.454166666666667, "grad_norm": 2.1165823242380437, "learning_rate": 5e-05, "loss": 0.0571, "num_input_tokens_seen": 338553560, "step": 3709 }, { "epoch": 15.454166666666667, "loss": 0.08360796421766281, "loss_ce": 5.0576054491102695e-06, "loss_iou": 0.265625, "loss_num": 0.0167236328125, "loss_xval": 0.08349609375, "num_input_tokens_seen": 338553560, "step": 3709 }, { "epoch": 15.458333333333334, "grad_norm": 3.0387382777837564, "learning_rate": 5e-05, "loss": 0.0427, "num_input_tokens_seen": 338643152, "step": 3710 }, { "epoch": 15.458333333333334, "loss": 0.0352066308259964, "loss_ce": 4.60691808257252e-06, "loss_iou": 0.2490234375, "loss_num": 0.007049560546875, "loss_xval": 0.03515625, "num_input_tokens_seen": 338643152, "step": 3710 }, { "epoch": 15.4625, "grad_norm": 1.7541841298933207, "learning_rate": 5e-05, "loss": 0.0939, "num_input_tokens_seen": 338734472, "step": 3711 }, { "epoch": 15.4625, "loss": 0.0419999435544014, "loss_ce": 0.00011456872744020075, "loss_iou": 0.181640625, "loss_num": 0.00836181640625, "loss_xval": 0.0419921875, "num_input_tokens_seen": 338734472, "step": 3711 }, { "epoch": 15.466666666666667, "grad_norm": 6.008227836686593, "learning_rate": 5e-05, "loss": 0.0905, "num_input_tokens_seen": 338826220, "step": 3712 }, { "epoch": 15.466666666666667, "loss": 0.06825199723243713, "loss_ce": 2.995487375301309e-05, "loss_iou": 0.294921875, "loss_num": 0.01361083984375, "loss_xval": 0.068359375, "num_input_tokens_seen": 338826220, "step": 3712 }, { "epoch": 15.470833333333333, "grad_norm": 3.9839977790493015, "learning_rate": 5e-05, "loss": 0.0596, "num_input_tokens_seen": 338917768, "step": 3713 }, { "epoch": 15.470833333333333, "loss": 0.05084093660116196, "loss_ce": 2.9167258617235348e-05, "loss_iou": 0.275390625, "loss_num": 0.01019287109375, "loss_xval": 0.05078125, "num_input_tokens_seen": 338917768, "step": 3713 }, { "epoch": 15.475, "grad_norm": 2.807098196818164, "learning_rate": 5e-05, "loss": 0.088, "num_input_tokens_seen": 339008996, "step": 3714 }, { "epoch": 15.475, "loss": 0.11402676999568939, "loss_ce": 1.3098615454509854e-05, "loss_iou": 0.1962890625, "loss_num": 0.0228271484375, "loss_xval": 0.1142578125, "num_input_tokens_seen": 339008996, "step": 3714 }, { "epoch": 15.479166666666666, "grad_norm": 1.4004717586879123, "learning_rate": 5e-05, "loss": 0.0543, "num_input_tokens_seen": 339100072, "step": 3715 }, { "epoch": 15.479166666666666, "loss": 0.056993044912815094, "loss_ce": 1.4680579170089914e-06, "loss_iou": 0.171875, "loss_num": 0.01141357421875, "loss_xval": 0.056884765625, "num_input_tokens_seen": 339100072, "step": 3715 }, { "epoch": 15.483333333333333, "grad_norm": 1.7399946641840538, "learning_rate": 5e-05, "loss": 0.0741, "num_input_tokens_seen": 339189988, "step": 3716 }, { "epoch": 15.483333333333333, "loss": 0.07177933305501938, "loss_ce": 1.9884635094058467e-06, "loss_iou": 0.189453125, "loss_num": 0.014404296875, "loss_xval": 0.07177734375, "num_input_tokens_seen": 339189988, "step": 3716 }, { "epoch": 15.4875, "grad_norm": 5.881810042717518, "learning_rate": 5e-05, "loss": 0.1044, "num_input_tokens_seen": 339281092, "step": 3717 }, { "epoch": 15.4875, "loss": 0.1643996387720108, "loss_ce": 1.446839405616629e-06, "loss_iou": 0.25390625, "loss_num": 0.032958984375, "loss_xval": 0.1640625, "num_input_tokens_seen": 339281092, "step": 3717 }, { "epoch": 15.491666666666667, "grad_norm": 2.7759819223595805, "learning_rate": 5e-05, "loss": 0.0782, "num_input_tokens_seen": 339372124, "step": 3718 }, { "epoch": 15.491666666666667, "loss": 0.08161170780658722, "loss_ce": 7.701344657107256e-06, "loss_iou": 0.25390625, "loss_num": 0.016357421875, "loss_xval": 0.08154296875, "num_input_tokens_seen": 339372124, "step": 3718 }, { "epoch": 15.495833333333334, "grad_norm": 4.453573063728625, "learning_rate": 5e-05, "loss": 0.0398, "num_input_tokens_seen": 339463436, "step": 3719 }, { "epoch": 15.495833333333334, "loss": 0.051303643733263016, "loss_ce": 0.0001256657560588792, "loss_iou": 0.259765625, "loss_num": 0.01025390625, "loss_xval": 0.05126953125, "num_input_tokens_seen": 339463436, "step": 3719 }, { "epoch": 15.5, "grad_norm": 3.943457288126614, "learning_rate": 5e-05, "loss": 0.056, "num_input_tokens_seen": 339555000, "step": 3720 }, { "epoch": 15.5, "loss": 0.08295242488384247, "loss_ce": 2.090242378471885e-05, "loss_iou": 0.185546875, "loss_num": 0.0166015625, "loss_xval": 0.0830078125, "num_input_tokens_seen": 339555000, "step": 3720 }, { "epoch": 15.504166666666666, "grad_norm": 1.6440153934824757, "learning_rate": 5e-05, "loss": 0.0493, "num_input_tokens_seen": 339646268, "step": 3721 }, { "epoch": 15.504166666666666, "loss": 0.048688024282455444, "loss_ce": 0.00023373885778710246, "loss_iou": 0.169921875, "loss_num": 0.00970458984375, "loss_xval": 0.04833984375, "num_input_tokens_seen": 339646268, "step": 3721 }, { "epoch": 15.508333333333333, "grad_norm": 4.5372580731869885, "learning_rate": 5e-05, "loss": 0.0557, "num_input_tokens_seen": 339737016, "step": 3722 }, { "epoch": 15.508333333333333, "loss": 0.07211792469024658, "loss_ce": 4.881694621872157e-06, "loss_iou": 0.2734375, "loss_num": 0.014404296875, "loss_xval": 0.072265625, "num_input_tokens_seen": 339737016, "step": 3722 }, { "epoch": 15.5125, "grad_norm": 2.9173584963090877, "learning_rate": 5e-05, "loss": 0.049, "num_input_tokens_seen": 339828408, "step": 3723 }, { "epoch": 15.5125, "loss": 0.04044932872056961, "loss_ce": 2.8794276659027673e-05, "loss_iou": 0.306640625, "loss_num": 0.008056640625, "loss_xval": 0.04052734375, "num_input_tokens_seen": 339828408, "step": 3723 }, { "epoch": 15.516666666666667, "grad_norm": 3.3504030651939107, "learning_rate": 5e-05, "loss": 0.0462, "num_input_tokens_seen": 339919824, "step": 3724 }, { "epoch": 15.516666666666667, "loss": 0.05100865289568901, "loss_ce": 0.0015854372177273035, "loss_iou": 0.197265625, "loss_num": 0.0098876953125, "loss_xval": 0.04931640625, "num_input_tokens_seen": 339919824, "step": 3724 }, { "epoch": 15.520833333333334, "grad_norm": 5.532775939930558, "learning_rate": 5e-05, "loss": 0.0541, "num_input_tokens_seen": 340011676, "step": 3725 }, { "epoch": 15.520833333333334, "loss": 0.056049033999443054, "loss_ce": 9.505392517894506e-05, "loss_iou": 0.3359375, "loss_num": 0.01116943359375, "loss_xval": 0.055908203125, "num_input_tokens_seen": 340011676, "step": 3725 }, { "epoch": 15.525, "grad_norm": 2.5293521700652435, "learning_rate": 5e-05, "loss": 0.1004, "num_input_tokens_seen": 340102328, "step": 3726 }, { "epoch": 15.525, "loss": 0.13496966660022736, "loss_ce": 0.0020808603148907423, "loss_iou": 0.224609375, "loss_num": 0.026611328125, "loss_xval": 0.1328125, "num_input_tokens_seen": 340102328, "step": 3726 }, { "epoch": 15.529166666666667, "grad_norm": 2.10725062293184, "learning_rate": 5e-05, "loss": 0.0869, "num_input_tokens_seen": 340193392, "step": 3727 }, { "epoch": 15.529166666666667, "loss": 0.08296191692352295, "loss_ce": 4.565744529827498e-05, "loss_iou": 0.296875, "loss_num": 0.0166015625, "loss_xval": 0.0830078125, "num_input_tokens_seen": 340193392, "step": 3727 }, { "epoch": 15.533333333333333, "grad_norm": 2.7269435977165535, "learning_rate": 5e-05, "loss": 0.041, "num_input_tokens_seen": 340284680, "step": 3728 }, { "epoch": 15.533333333333333, "loss": 0.04044805467128754, "loss_ce": 1.2264949873497244e-05, "loss_iou": 0.2197265625, "loss_num": 0.008056640625, "loss_xval": 0.04052734375, "num_input_tokens_seen": 340284680, "step": 3728 }, { "epoch": 15.5375, "grad_norm": 2.668113882514995, "learning_rate": 5e-05, "loss": 0.0563, "num_input_tokens_seen": 340375616, "step": 3729 }, { "epoch": 15.5375, "loss": 0.0317254438996315, "loss_ce": 1.767879439285025e-05, "loss_iou": 0.298828125, "loss_num": 0.00634765625, "loss_xval": 0.03173828125, "num_input_tokens_seen": 340375616, "step": 3729 }, { "epoch": 15.541666666666666, "grad_norm": 2.5479889869323333, "learning_rate": 5e-05, "loss": 0.0682, "num_input_tokens_seen": 340467096, "step": 3730 }, { "epoch": 15.541666666666666, "loss": 0.09114043414592743, "loss_ce": 4.546367927105166e-05, "loss_iou": 0.28125, "loss_num": 0.0181884765625, "loss_xval": 0.09130859375, "num_input_tokens_seen": 340467096, "step": 3730 }, { "epoch": 15.545833333333333, "grad_norm": 2.9021149806382938, "learning_rate": 5e-05, "loss": 0.0506, "num_input_tokens_seen": 340558564, "step": 3731 }, { "epoch": 15.545833333333333, "loss": 0.03785187005996704, "loss_ce": 1.0071442375192419e-05, "loss_iou": 0.259765625, "loss_num": 0.007568359375, "loss_xval": 0.037841796875, "num_input_tokens_seen": 340558564, "step": 3731 }, { "epoch": 15.55, "grad_norm": 1.3694067401097818, "learning_rate": 5e-05, "loss": 0.0273, "num_input_tokens_seen": 340649084, "step": 3732 }, { "epoch": 15.55, "loss": 0.031138941645622253, "loss_ce": 0.0001483419182477519, "loss_iou": 0.28125, "loss_num": 0.006195068359375, "loss_xval": 0.031005859375, "num_input_tokens_seen": 340649084, "step": 3732 }, { "epoch": 15.554166666666667, "grad_norm": 2.3170750731989416, "learning_rate": 5e-05, "loss": 0.0631, "num_input_tokens_seen": 340740796, "step": 3733 }, { "epoch": 15.554166666666667, "loss": 0.06054166704416275, "loss_ce": 2.4206601665355265e-06, "loss_iou": 0.33203125, "loss_num": 0.0120849609375, "loss_xval": 0.060546875, "num_input_tokens_seen": 340740796, "step": 3733 }, { "epoch": 15.558333333333334, "grad_norm": 2.8160029894918397, "learning_rate": 5e-05, "loss": 0.0436, "num_input_tokens_seen": 340831928, "step": 3734 }, { "epoch": 15.558333333333334, "loss": 0.052570268511772156, "loss_ce": 4.951789742335677e-05, "loss_iou": 0.162109375, "loss_num": 0.010498046875, "loss_xval": 0.052490234375, "num_input_tokens_seen": 340831928, "step": 3734 }, { "epoch": 15.5625, "grad_norm": 5.974204890106191, "learning_rate": 5e-05, "loss": 0.0692, "num_input_tokens_seen": 340924088, "step": 3735 }, { "epoch": 15.5625, "loss": 0.07624347507953644, "loss_ce": 1.056129258358851e-05, "loss_iou": 0.38671875, "loss_num": 0.0152587890625, "loss_xval": 0.076171875, "num_input_tokens_seen": 340924088, "step": 3735 }, { "epoch": 15.566666666666666, "grad_norm": 2.8797770212862535, "learning_rate": 5e-05, "loss": 0.0965, "num_input_tokens_seen": 341016048, "step": 3736 }, { "epoch": 15.566666666666666, "loss": 0.07347431033849716, "loss_ce": 4.901489955955185e-05, "loss_iou": 0.37109375, "loss_num": 0.0146484375, "loss_xval": 0.0732421875, "num_input_tokens_seen": 341016048, "step": 3736 }, { "epoch": 15.570833333333333, "grad_norm": 1.1495050094163188, "learning_rate": 5e-05, "loss": 0.0315, "num_input_tokens_seen": 341107272, "step": 3737 }, { "epoch": 15.570833333333333, "loss": 0.030686549842357635, "loss_ce": 0.0005428112344816327, "loss_iou": 0.109375, "loss_num": 0.00604248046875, "loss_xval": 0.0301513671875, "num_input_tokens_seen": 341107272, "step": 3737 }, { "epoch": 15.575, "grad_norm": 4.9596779126302355, "learning_rate": 5e-05, "loss": 0.0684, "num_input_tokens_seen": 341199252, "step": 3738 }, { "epoch": 15.575, "loss": 0.0417996421456337, "loss_ce": 4.3968546378891915e-05, "loss_iou": 0.10791015625, "loss_num": 0.00836181640625, "loss_xval": 0.041748046875, "num_input_tokens_seen": 341199252, "step": 3738 }, { "epoch": 15.579166666666667, "grad_norm": 2.502675693921476, "learning_rate": 5e-05, "loss": 0.0588, "num_input_tokens_seen": 341290672, "step": 3739 }, { "epoch": 15.579166666666667, "loss": 0.039896052330732346, "loss_ce": 0.00015453486412297934, "loss_iou": 0.28125, "loss_num": 0.0079345703125, "loss_xval": 0.039794921875, "num_input_tokens_seen": 341290672, "step": 3739 }, { "epoch": 15.583333333333334, "grad_norm": 3.319192170242713, "learning_rate": 5e-05, "loss": 0.0503, "num_input_tokens_seen": 341381892, "step": 3740 }, { "epoch": 15.583333333333334, "loss": 0.061792608350515366, "loss_ce": 4.028591138194315e-05, "loss_iou": 0.208984375, "loss_num": 0.0123291015625, "loss_xval": 0.061767578125, "num_input_tokens_seen": 341381892, "step": 3740 }, { "epoch": 15.5875, "grad_norm": 3.7933353250214163, "learning_rate": 5e-05, "loss": 0.0376, "num_input_tokens_seen": 341473844, "step": 3741 }, { "epoch": 15.5875, "loss": 0.03918928653001785, "loss_ce": 3.5236706025898457e-05, "loss_iou": 0.3203125, "loss_num": 0.0078125, "loss_xval": 0.0390625, "num_input_tokens_seen": 341473844, "step": 3741 }, { "epoch": 15.591666666666667, "grad_norm": 1.8308961731771252, "learning_rate": 5e-05, "loss": 0.0385, "num_input_tokens_seen": 341564556, "step": 3742 }, { "epoch": 15.591666666666667, "loss": 0.04009087011218071, "loss_ce": 1.365911157336086e-05, "loss_iou": 0.2177734375, "loss_num": 0.00799560546875, "loss_xval": 0.0400390625, "num_input_tokens_seen": 341564556, "step": 3742 }, { "epoch": 15.595833333333333, "grad_norm": 4.196156902144123, "learning_rate": 5e-05, "loss": 0.0908, "num_input_tokens_seen": 341656220, "step": 3743 }, { "epoch": 15.595833333333333, "loss": 0.05994286388158798, "loss_ce": 6.341123025777051e-06, "loss_iou": 0.287109375, "loss_num": 0.01202392578125, "loss_xval": 0.06005859375, "num_input_tokens_seen": 341656220, "step": 3743 }, { "epoch": 15.6, "grad_norm": 5.485161124921998, "learning_rate": 5e-05, "loss": 0.0906, "num_input_tokens_seen": 341746808, "step": 3744 }, { "epoch": 15.6, "loss": 0.11784628033638, "loss_ce": 2.6530929062573705e-06, "loss_iou": 0.25390625, "loss_num": 0.0235595703125, "loss_xval": 0.11767578125, "num_input_tokens_seen": 341746808, "step": 3744 }, { "epoch": 15.604166666666666, "grad_norm": 7.266184511187221, "learning_rate": 5e-05, "loss": 0.0809, "num_input_tokens_seen": 341838700, "step": 3745 }, { "epoch": 15.604166666666666, "loss": 0.11083197593688965, "loss_ce": 0.0003125613438896835, "loss_iou": 0.2294921875, "loss_num": 0.0220947265625, "loss_xval": 0.1103515625, "num_input_tokens_seen": 341838700, "step": 3745 }, { "epoch": 15.608333333333333, "grad_norm": 2.7001024941371177, "learning_rate": 5e-05, "loss": 0.0324, "num_input_tokens_seen": 341929980, "step": 3746 }, { "epoch": 15.608333333333333, "loss": 0.03403391316533089, "loss_ce": 0.00028910342371091247, "loss_iou": 0.318359375, "loss_num": 0.006744384765625, "loss_xval": 0.03369140625, "num_input_tokens_seen": 341929980, "step": 3746 }, { "epoch": 15.6125, "grad_norm": 3.8337372188980274, "learning_rate": 5e-05, "loss": 0.0706, "num_input_tokens_seen": 342021168, "step": 3747 }, { "epoch": 15.6125, "loss": 0.08094524592161179, "loss_ce": 1.2631683603103738e-05, "loss_iou": 0.296875, "loss_num": 0.0162353515625, "loss_xval": 0.0810546875, "num_input_tokens_seen": 342021168, "step": 3747 }, { "epoch": 15.616666666666667, "grad_norm": 3.841329253786725, "learning_rate": 5e-05, "loss": 0.0403, "num_input_tokens_seen": 342112668, "step": 3748 }, { "epoch": 15.616666666666667, "loss": 0.047874949872493744, "loss_ce": 5.3906893299426883e-05, "loss_iou": 0.38671875, "loss_num": 0.00958251953125, "loss_xval": 0.0478515625, "num_input_tokens_seen": 342112668, "step": 3748 }, { "epoch": 15.620833333333334, "grad_norm": 3.7342170529204735, "learning_rate": 5e-05, "loss": 0.0488, "num_input_tokens_seen": 342204740, "step": 3749 }, { "epoch": 15.620833333333334, "loss": 0.040019918233156204, "loss_ce": 0.0010489715496078134, "loss_iou": 0.32421875, "loss_num": 0.007781982421875, "loss_xval": 0.0390625, "num_input_tokens_seen": 342204740, "step": 3749 }, { "epoch": 15.625, "grad_norm": 2.857990856155654, "learning_rate": 5e-05, "loss": 0.048, "num_input_tokens_seen": 342295692, "step": 3750 }, { "epoch": 15.625, "eval_seeclick_CIoU": 0.2125411108136177, "eval_seeclick_GIoU": 0.20485394820570946, "eval_seeclick_IoU": 0.3233272135257721, "eval_seeclick_MAE_all": 0.09805087000131607, "eval_seeclick_MAE_h": 0.08882058411836624, "eval_seeclick_MAE_w": 0.20213264226913452, "eval_seeclick_MAE_x_boxes": 0.22167783975601196, "eval_seeclick_MAE_y_boxes": 0.09438033029437065, "eval_seeclick_NUM_probability": 0.9999986588954926, "eval_seeclick_inside_bbox": 0.4474431872367859, "eval_seeclick_loss": 0.5405165553092957, "eval_seeclick_loss_ce": 0.11569063365459442, "eval_seeclick_loss_iou": 0.4593505859375, "eval_seeclick_loss_num": 0.0818939208984375, "eval_seeclick_loss_xval": 0.40948486328125, "eval_seeclick_runtime": 76.5532, "eval_seeclick_samples_per_second": 0.562, "eval_seeclick_steps_per_second": 0.026, "num_input_tokens_seen": 342295692, "step": 3750 }, { "epoch": 15.625, "eval_icons_CIoU": 0.3149517923593521, "eval_icons_GIoU": 0.32262296974658966, "eval_icons_IoU": 0.3942710757255554, "eval_icons_MAE_all": 0.06905381754040718, "eval_icons_MAE_h": 0.1509195640683174, "eval_icons_MAE_w": 0.09647001326084137, "eval_icons_MAE_x_boxes": 0.09488264471292496, "eval_icons_MAE_y_boxes": 0.15092498809099197, "eval_icons_NUM_probability": 0.9999993443489075, "eval_icons_inside_bbox": 0.515625, "eval_icons_loss": 0.35029152035713196, "eval_icons_loss_ce": 5.0143926273449324e-05, "eval_icons_loss_iou": 0.26190185546875, "eval_icons_loss_num": 0.0713348388671875, "eval_icons_loss_xval": 0.3564453125, "eval_icons_runtime": 97.8887, "eval_icons_samples_per_second": 0.511, "eval_icons_steps_per_second": 0.02, "num_input_tokens_seen": 342295692, "step": 3750 }, { "epoch": 15.625, "eval_screenspot_CIoU": 0.35719852646191913, "eval_screenspot_GIoU": 0.34474583466847736, "eval_screenspot_IoU": 0.4328311284383138, "eval_screenspot_MAE_all": 0.09720409661531448, "eval_screenspot_MAE_h": 0.09548324843247731, "eval_screenspot_MAE_w": 0.20179721216360727, "eval_screenspot_MAE_x_boxes": 0.1839049607515335, "eval_screenspot_MAE_y_boxes": 0.09491645296414693, "eval_screenspot_NUM_probability": 0.998277485370636, "eval_screenspot_inside_bbox": 0.6833333373069763, "eval_screenspot_loss": 0.49055179953575134, "eval_screenspot_loss_ce": 0.002939210297578635, "eval_screenspot_loss_iou": 0.3556315104166667, "eval_screenspot_loss_num": 0.09782918294270833, "eval_screenspot_loss_xval": 0.4890543619791667, "eval_screenspot_runtime": 153.1993, "eval_screenspot_samples_per_second": 0.581, "eval_screenspot_steps_per_second": 0.02, "num_input_tokens_seen": 342295692, "step": 3750 }, { "epoch": 15.625, "eval_compot_CIoU": 0.5043479949235916, "eval_compot_GIoU": 0.5061911940574646, "eval_compot_IoU": 0.5701824128627777, "eval_compot_MAE_all": 0.04921010322868824, "eval_compot_MAE_h": 0.05424528568983078, "eval_compot_MAE_w": 0.1251702681183815, "eval_compot_MAE_x_boxes": 0.12915247306227684, "eval_compot_MAE_y_boxes": 0.05076356418430805, "eval_compot_NUM_probability": 0.999998539686203, "eval_compot_inside_bbox": 0.7395833432674408, "eval_compot_loss": 0.279593288898468, "eval_compot_loss_ce": 0.04102969542145729, "eval_compot_loss_iou": 0.3662109375, "eval_compot_loss_num": 0.0433807373046875, "eval_compot_loss_xval": 0.2169189453125, "eval_compot_runtime": 88.2697, "eval_compot_samples_per_second": 0.566, "eval_compot_steps_per_second": 0.023, "num_input_tokens_seen": 342295692, "step": 3750 }, { "epoch": 15.625, "loss": 0.27310457825660706, "loss_ce": 0.04312410205602646, "loss_iou": 0.357421875, "loss_num": 0.0458984375, "loss_xval": 0.23046875, "num_input_tokens_seen": 342295692, "step": 3750 }, { "epoch": 15.629166666666666, "grad_norm": 2.1199125592979478, "learning_rate": 5e-05, "loss": 0.0468, "num_input_tokens_seen": 342387188, "step": 3751 }, { "epoch": 15.629166666666666, "loss": 0.03941526263952255, "loss_ce": 0.0004900917992927134, "loss_iou": 0.255859375, "loss_num": 0.007781982421875, "loss_xval": 0.038818359375, "num_input_tokens_seen": 342387188, "step": 3751 }, { "epoch": 15.633333333333333, "grad_norm": 1.4474490571523944, "learning_rate": 5e-05, "loss": 0.0496, "num_input_tokens_seen": 342478664, "step": 3752 }, { "epoch": 15.633333333333333, "loss": 0.0719941109418869, "loss_ce": 4.8926642193691805e-05, "loss_iou": 0.2177734375, "loss_num": 0.014404296875, "loss_xval": 0.07177734375, "num_input_tokens_seen": 342478664, "step": 3752 }, { "epoch": 15.6375, "grad_norm": 1.6387608992855431, "learning_rate": 5e-05, "loss": 0.0272, "num_input_tokens_seen": 342570240, "step": 3753 }, { "epoch": 15.6375, "loss": 0.028534431010484695, "loss_ce": 0.00044300025911070406, "loss_iou": 0.11669921875, "loss_num": 0.005615234375, "loss_xval": 0.028076171875, "num_input_tokens_seen": 342570240, "step": 3753 }, { "epoch": 15.641666666666667, "grad_norm": 4.2780588978724365, "learning_rate": 5e-05, "loss": 0.0824, "num_input_tokens_seen": 342662172, "step": 3754 }, { "epoch": 15.641666666666667, "loss": 0.03853003680706024, "loss_ce": 1.5978862393239979e-06, "loss_iou": 0.162109375, "loss_num": 0.0076904296875, "loss_xval": 0.03857421875, "num_input_tokens_seen": 342662172, "step": 3754 }, { "epoch": 15.645833333333334, "grad_norm": 7.357155194854795, "learning_rate": 5e-05, "loss": 0.0557, "num_input_tokens_seen": 342753944, "step": 3755 }, { "epoch": 15.645833333333334, "loss": 0.030950158834457397, "loss_ce": 0.0001884377415990457, "loss_iou": 0.154296875, "loss_num": 0.006134033203125, "loss_xval": 0.03076171875, "num_input_tokens_seen": 342753944, "step": 3755 }, { "epoch": 15.65, "grad_norm": 3.4080494115279896, "learning_rate": 5e-05, "loss": 0.0621, "num_input_tokens_seen": 342846076, "step": 3756 }, { "epoch": 15.65, "loss": 0.069695845246315, "loss_ce": 0.0017408326966688037, "loss_iou": 0.25, "loss_num": 0.01361083984375, "loss_xval": 0.06787109375, "num_input_tokens_seen": 342846076, "step": 3756 }, { "epoch": 15.654166666666667, "grad_norm": 2.4482605988739152, "learning_rate": 5e-05, "loss": 0.0549, "num_input_tokens_seen": 342937468, "step": 3757 }, { "epoch": 15.654166666666667, "loss": 0.04597485437989235, "loss_ce": 7.641684351256117e-05, "loss_iou": 0.291015625, "loss_num": 0.0091552734375, "loss_xval": 0.0458984375, "num_input_tokens_seen": 342937468, "step": 3757 }, { "epoch": 15.658333333333333, "grad_norm": 4.3127548720585205, "learning_rate": 5e-05, "loss": 0.074, "num_input_tokens_seen": 343028756, "step": 3758 }, { "epoch": 15.658333333333333, "loss": 0.1078638955950737, "loss_ce": 0.001338477828539908, "loss_iou": 0.259765625, "loss_num": 0.0213623046875, "loss_xval": 0.1064453125, "num_input_tokens_seen": 343028756, "step": 3758 }, { "epoch": 15.6625, "grad_norm": 3.222255941415551, "learning_rate": 5e-05, "loss": 0.0581, "num_input_tokens_seen": 343120120, "step": 3759 }, { "epoch": 15.6625, "loss": 0.06610622256994247, "loss_ce": 0.00017299478349741548, "loss_iou": 0.2119140625, "loss_num": 0.01318359375, "loss_xval": 0.06591796875, "num_input_tokens_seen": 343120120, "step": 3759 }, { "epoch": 15.666666666666666, "grad_norm": 2.4420159963419183, "learning_rate": 5e-05, "loss": 0.0714, "num_input_tokens_seen": 343211932, "step": 3760 }, { "epoch": 15.666666666666666, "loss": 0.05773644149303436, "loss_ce": 0.0019045292865484953, "loss_iou": 0.283203125, "loss_num": 0.01116943359375, "loss_xval": 0.055908203125, "num_input_tokens_seen": 343211932, "step": 3760 }, { "epoch": 15.670833333333333, "grad_norm": 2.647108848460681, "learning_rate": 5e-05, "loss": 0.0537, "num_input_tokens_seen": 343302424, "step": 3761 }, { "epoch": 15.670833333333333, "loss": 0.052173398435115814, "loss_ce": 0.002406858140602708, "loss_iou": 0.16015625, "loss_num": 0.00994873046875, "loss_xval": 0.0498046875, "num_input_tokens_seen": 343302424, "step": 3761 }, { "epoch": 15.675, "grad_norm": 3.1681592427004786, "learning_rate": 5e-05, "loss": 0.0734, "num_input_tokens_seen": 343393708, "step": 3762 }, { "epoch": 15.675, "loss": 0.0612005740404129, "loss_ce": 7.386229844996706e-05, "loss_iou": 0.166015625, "loss_num": 0.01220703125, "loss_xval": 0.06103515625, "num_input_tokens_seen": 343393708, "step": 3762 }, { "epoch": 15.679166666666667, "grad_norm": 2.8364370811250232, "learning_rate": 5e-05, "loss": 0.0602, "num_input_tokens_seen": 343485656, "step": 3763 }, { "epoch": 15.679166666666667, "loss": 0.08106990158557892, "loss_ce": 1.5209958291961811e-05, "loss_iou": 0.1474609375, "loss_num": 0.0162353515625, "loss_xval": 0.0810546875, "num_input_tokens_seen": 343485656, "step": 3763 }, { "epoch": 15.683333333333334, "grad_norm": 3.7959855480425033, "learning_rate": 5e-05, "loss": 0.0782, "num_input_tokens_seen": 343576864, "step": 3764 }, { "epoch": 15.683333333333334, "loss": 0.049277886748313904, "loss_ce": 0.004951103124767542, "loss_iou": 0.314453125, "loss_num": 0.00885009765625, "loss_xval": 0.04443359375, "num_input_tokens_seen": 343576864, "step": 3764 }, { "epoch": 15.6875, "grad_norm": 2.874665218560939, "learning_rate": 5e-05, "loss": 0.0459, "num_input_tokens_seen": 343668400, "step": 3765 }, { "epoch": 15.6875, "loss": 0.0330917127430439, "loss_ce": 8.695048018125817e-05, "loss_iou": 0.326171875, "loss_num": 0.006591796875, "loss_xval": 0.032958984375, "num_input_tokens_seen": 343668400, "step": 3765 }, { "epoch": 15.691666666666666, "grad_norm": 3.00878583114591, "learning_rate": 5e-05, "loss": 0.0394, "num_input_tokens_seen": 343760056, "step": 3766 }, { "epoch": 15.691666666666666, "loss": 0.0372559018433094, "loss_ce": 0.0002838582149706781, "loss_iou": 0.1953125, "loss_num": 0.00738525390625, "loss_xval": 0.036865234375, "num_input_tokens_seen": 343760056, "step": 3766 }, { "epoch": 15.695833333333333, "grad_norm": 3.6691462198495874, "learning_rate": 5e-05, "loss": 0.0583, "num_input_tokens_seen": 343851080, "step": 3767 }, { "epoch": 15.695833333333333, "loss": 0.03983243927359581, "loss_ce": 5.2777741075260565e-05, "loss_iou": 0.3046875, "loss_num": 0.0079345703125, "loss_xval": 0.039794921875, "num_input_tokens_seen": 343851080, "step": 3767 }, { "epoch": 15.7, "grad_norm": 2.070024420063255, "learning_rate": 5e-05, "loss": 0.0792, "num_input_tokens_seen": 343942680, "step": 3768 }, { "epoch": 15.7, "loss": 0.10269272327423096, "loss_ce": 1.63358126883395e-05, "loss_iou": 0.251953125, "loss_num": 0.0205078125, "loss_xval": 0.1025390625, "num_input_tokens_seen": 343942680, "step": 3768 }, { "epoch": 15.704166666666667, "grad_norm": 1.5839008901424858, "learning_rate": 5e-05, "loss": 0.081, "num_input_tokens_seen": 344033376, "step": 3769 }, { "epoch": 15.704166666666667, "loss": 0.11858565360307693, "loss_ce": 0.0005284000653773546, "loss_iou": 0.201171875, "loss_num": 0.023681640625, "loss_xval": 0.1181640625, "num_input_tokens_seen": 344033376, "step": 3769 }, { "epoch": 15.708333333333334, "grad_norm": 1.2896545565054265, "learning_rate": 5e-05, "loss": 0.0419, "num_input_tokens_seen": 344124880, "step": 3770 }, { "epoch": 15.708333333333334, "loss": 0.03080589696764946, "loss_ce": 2.8919712349306792e-05, "loss_iou": 0.1650390625, "loss_num": 0.00616455078125, "loss_xval": 0.03076171875, "num_input_tokens_seen": 344124880, "step": 3770 }, { "epoch": 15.7125, "grad_norm": 2.5516228142288355, "learning_rate": 5e-05, "loss": 0.0679, "num_input_tokens_seen": 344214680, "step": 3771 }, { "epoch": 15.7125, "loss": 0.0802692100405693, "loss_ce": 1.560797682031989e-05, "loss_iou": 0.1962890625, "loss_num": 0.0159912109375, "loss_xval": 0.080078125, "num_input_tokens_seen": 344214680, "step": 3771 }, { "epoch": 15.716666666666667, "grad_norm": 1.7650926491566428, "learning_rate": 5e-05, "loss": 0.0494, "num_input_tokens_seen": 344305992, "step": 3772 }, { "epoch": 15.716666666666667, "loss": 0.03394448012113571, "loss_ce": 0.0013974817702546716, "loss_iou": 0.1640625, "loss_num": 0.006500244140625, "loss_xval": 0.032470703125, "num_input_tokens_seen": 344305992, "step": 3772 }, { "epoch": 15.720833333333333, "grad_norm": 2.7475410562631817, "learning_rate": 5e-05, "loss": 0.0434, "num_input_tokens_seen": 344397200, "step": 3773 }, { "epoch": 15.720833333333333, "loss": 0.03981112688779831, "loss_ce": 0.0001535326591692865, "loss_iou": 0.220703125, "loss_num": 0.0079345703125, "loss_xval": 0.03955078125, "num_input_tokens_seen": 344397200, "step": 3773 }, { "epoch": 15.725, "grad_norm": 4.791021482348356, "learning_rate": 5e-05, "loss": 0.051, "num_input_tokens_seen": 344488164, "step": 3774 }, { "epoch": 15.725, "loss": 0.05577407032251358, "loss_ce": 4.897027974948287e-05, "loss_iou": 0.314453125, "loss_num": 0.01116943359375, "loss_xval": 0.0556640625, "num_input_tokens_seen": 344488164, "step": 3774 }, { "epoch": 15.729166666666666, "grad_norm": 3.883300208504697, "learning_rate": 5e-05, "loss": 0.0415, "num_input_tokens_seen": 344579300, "step": 3775 }, { "epoch": 15.729166666666666, "loss": 0.051646310836076736, "loss_ce": 2.5828194338828325e-05, "loss_iou": 0.28125, "loss_num": 0.01031494140625, "loss_xval": 0.051513671875, "num_input_tokens_seen": 344579300, "step": 3775 }, { "epoch": 15.733333333333333, "grad_norm": 3.076657890558606, "learning_rate": 5e-05, "loss": 0.0711, "num_input_tokens_seen": 344670452, "step": 3776 }, { "epoch": 15.733333333333333, "loss": 0.04161835089325905, "loss_ce": 3.814951560343616e-05, "loss_iou": 0.166015625, "loss_num": 0.00830078125, "loss_xval": 0.04150390625, "num_input_tokens_seen": 344670452, "step": 3776 }, { "epoch": 15.7375, "grad_norm": 7.921467089919973, "learning_rate": 5e-05, "loss": 0.0494, "num_input_tokens_seen": 344761936, "step": 3777 }, { "epoch": 15.7375, "loss": 0.04783114045858383, "loss_ce": 0.00010927912080660462, "loss_iou": 0.193359375, "loss_num": 0.009521484375, "loss_xval": 0.047607421875, "num_input_tokens_seen": 344761936, "step": 3777 }, { "epoch": 15.741666666666667, "grad_norm": 4.195204208081409, "learning_rate": 5e-05, "loss": 0.0558, "num_input_tokens_seen": 344853456, "step": 3778 }, { "epoch": 15.741666666666667, "loss": 0.04136732965707779, "loss_ce": 7.541826221313386e-07, "loss_iou": 0.302734375, "loss_num": 0.00830078125, "loss_xval": 0.041259765625, "num_input_tokens_seen": 344853456, "step": 3778 }, { "epoch": 15.745833333333334, "grad_norm": 11.76621668340554, "learning_rate": 5e-05, "loss": 0.1202, "num_input_tokens_seen": 344944524, "step": 3779 }, { "epoch": 15.745833333333334, "loss": 0.189706951379776, "loss_ce": 2.4941593437688425e-05, "loss_iou": 0.1572265625, "loss_num": 0.037841796875, "loss_xval": 0.189453125, "num_input_tokens_seen": 344944524, "step": 3779 }, { "epoch": 15.75, "grad_norm": 5.560246181302266, "learning_rate": 5e-05, "loss": 0.064, "num_input_tokens_seen": 345035528, "step": 3780 }, { "epoch": 15.75, "loss": 0.07846195995807648, "loss_ce": 1.2656909120778437e-06, "loss_iou": 0.26171875, "loss_num": 0.0157470703125, "loss_xval": 0.07861328125, "num_input_tokens_seen": 345035528, "step": 3780 }, { "epoch": 15.754166666666666, "grad_norm": 2.7483561888499244, "learning_rate": 5e-05, "loss": 0.0623, "num_input_tokens_seen": 345126764, "step": 3781 }, { "epoch": 15.754166666666666, "loss": 0.04758468642830849, "loss_ce": 0.005729829426854849, "loss_iou": 0.1875, "loss_num": 0.00836181640625, "loss_xval": 0.041748046875, "num_input_tokens_seen": 345126764, "step": 3781 }, { "epoch": 15.758333333333333, "grad_norm": 3.180942509291167, "learning_rate": 5e-05, "loss": 0.0409, "num_input_tokens_seen": 345218748, "step": 3782 }, { "epoch": 15.758333333333333, "loss": 0.02534525655210018, "loss_ce": 3.855386603390798e-05, "loss_iou": 0.1953125, "loss_num": 0.00506591796875, "loss_xval": 0.0252685546875, "num_input_tokens_seen": 345218748, "step": 3782 }, { "epoch": 15.7625, "grad_norm": 3.0123041833531845, "learning_rate": 5e-05, "loss": 0.0654, "num_input_tokens_seen": 345309368, "step": 3783 }, { "epoch": 15.7625, "loss": 0.06461061537265778, "loss_ce": 6.59371362417005e-05, "loss_iou": 0.33203125, "loss_num": 0.012939453125, "loss_xval": 0.064453125, "num_input_tokens_seen": 345309368, "step": 3783 }, { "epoch": 15.766666666666667, "grad_norm": 3.4175700842294456, "learning_rate": 5e-05, "loss": 0.0534, "num_input_tokens_seen": 345400572, "step": 3784 }, { "epoch": 15.766666666666667, "loss": 0.030454548075795174, "loss_ce": 2.852268335118424e-05, "loss_iou": 0.2392578125, "loss_num": 0.006072998046875, "loss_xval": 0.0303955078125, "num_input_tokens_seen": 345400572, "step": 3784 }, { "epoch": 15.770833333333334, "grad_norm": 5.864726623125807, "learning_rate": 5e-05, "loss": 0.0398, "num_input_tokens_seen": 345491616, "step": 3785 }, { "epoch": 15.770833333333334, "loss": 0.035654254257678986, "loss_ce": 0.004770464263856411, "loss_iou": 0.314453125, "loss_num": 0.00616455078125, "loss_xval": 0.0308837890625, "num_input_tokens_seen": 345491616, "step": 3785 }, { "epoch": 15.775, "grad_norm": 5.818370725960535, "learning_rate": 5e-05, "loss": 0.0468, "num_input_tokens_seen": 345583084, "step": 3786 }, { "epoch": 15.775, "loss": 0.04433032125234604, "loss_ce": 0.0003087124787271023, "loss_iou": 0.22265625, "loss_num": 0.0087890625, "loss_xval": 0.0439453125, "num_input_tokens_seen": 345583084, "step": 3786 }, { "epoch": 15.779166666666667, "grad_norm": 1.1987201829282095, "learning_rate": 5e-05, "loss": 0.0262, "num_input_tokens_seen": 345674276, "step": 3787 }, { "epoch": 15.779166666666667, "loss": 0.019078008830547333, "loss_ce": 1.2152680938015692e-05, "loss_iou": 0.2109375, "loss_num": 0.003814697265625, "loss_xval": 0.01904296875, "num_input_tokens_seen": 345674276, "step": 3787 }, { "epoch": 15.783333333333333, "grad_norm": 15.543687534674307, "learning_rate": 5e-05, "loss": 0.0539, "num_input_tokens_seen": 345765564, "step": 3788 }, { "epoch": 15.783333333333333, "loss": 0.03491397574543953, "loss_ce": 1.8656713791642687e-06, "loss_iou": 0.37890625, "loss_num": 0.006988525390625, "loss_xval": 0.034912109375, "num_input_tokens_seen": 345765564, "step": 3788 }, { "epoch": 15.7875, "grad_norm": 5.564477666133809, "learning_rate": 5e-05, "loss": 0.069, "num_input_tokens_seen": 345858000, "step": 3789 }, { "epoch": 15.7875, "loss": 0.04144046828150749, "loss_ce": 7.389086385956034e-05, "loss_iou": 0.228515625, "loss_num": 0.00830078125, "loss_xval": 0.041259765625, "num_input_tokens_seen": 345858000, "step": 3789 }, { "epoch": 15.791666666666666, "grad_norm": 2.1167335482334555, "learning_rate": 5e-05, "loss": 0.0632, "num_input_tokens_seen": 345948800, "step": 3790 }, { "epoch": 15.791666666666666, "loss": 0.09141284227371216, "loss_ce": 0.0029042325913906097, "loss_iou": 0.224609375, "loss_num": 0.0177001953125, "loss_xval": 0.08837890625, "num_input_tokens_seen": 345948800, "step": 3790 }, { "epoch": 15.795833333333333, "grad_norm": 4.493681166342253, "learning_rate": 5e-05, "loss": 0.0486, "num_input_tokens_seen": 346040064, "step": 3791 }, { "epoch": 15.795833333333333, "loss": 0.026475880295038223, "loss_ce": 1.8809511175277294e-06, "loss_iou": 0.33984375, "loss_num": 0.005279541015625, "loss_xval": 0.0264892578125, "num_input_tokens_seen": 346040064, "step": 3791 }, { "epoch": 15.8, "grad_norm": 3.179135791427188, "learning_rate": 5e-05, "loss": 0.0534, "num_input_tokens_seen": 346131284, "step": 3792 }, { "epoch": 15.8, "loss": 0.039249323308467865, "loss_ce": 0.0031012536492198706, "loss_iou": 0.345703125, "loss_num": 0.007232666015625, "loss_xval": 0.0361328125, "num_input_tokens_seen": 346131284, "step": 3792 }, { "epoch": 15.804166666666667, "grad_norm": 3.056206637118679, "learning_rate": 5e-05, "loss": 0.0776, "num_input_tokens_seen": 346223080, "step": 3793 }, { "epoch": 15.804166666666667, "loss": 0.06411039084196091, "loss_ce": 6.16227844147943e-05, "loss_iou": 0.158203125, "loss_num": 0.0128173828125, "loss_xval": 0.06396484375, "num_input_tokens_seen": 346223080, "step": 3793 }, { "epoch": 15.808333333333334, "grad_norm": 3.8478526137418205, "learning_rate": 5e-05, "loss": 0.0413, "num_input_tokens_seen": 346314700, "step": 3794 }, { "epoch": 15.808333333333334, "loss": 0.028593793511390686, "loss_ce": 2.9341121262405068e-05, "loss_iou": 0.1650390625, "loss_num": 0.005706787109375, "loss_xval": 0.028564453125, "num_input_tokens_seen": 346314700, "step": 3794 }, { "epoch": 15.8125, "grad_norm": 3.0937844433452404, "learning_rate": 5e-05, "loss": 0.0735, "num_input_tokens_seen": 346406248, "step": 3795 }, { "epoch": 15.8125, "loss": 0.07463543117046356, "loss_ce": 0.00020305861835367978, "loss_iou": 0.259765625, "loss_num": 0.014892578125, "loss_xval": 0.07421875, "num_input_tokens_seen": 346406248, "step": 3795 }, { "epoch": 15.816666666666666, "grad_norm": 6.651472026105777, "learning_rate": 5e-05, "loss": 0.0496, "num_input_tokens_seen": 346495560, "step": 3796 }, { "epoch": 15.816666666666666, "loss": 0.058466482907533646, "loss_ce": 0.0013680945849046111, "loss_iou": 0.1396484375, "loss_num": 0.01141357421875, "loss_xval": 0.05712890625, "num_input_tokens_seen": 346495560, "step": 3796 }, { "epoch": 15.820833333333333, "grad_norm": 3.268823329261245, "learning_rate": 5e-05, "loss": 0.0574, "num_input_tokens_seen": 346585652, "step": 3797 }, { "epoch": 15.820833333333333, "loss": 0.05235815420746803, "loss_ce": 0.003011230146512389, "loss_iou": 0.205078125, "loss_num": 0.0098876953125, "loss_xval": 0.04931640625, "num_input_tokens_seen": 346585652, "step": 3797 }, { "epoch": 15.825, "grad_norm": 2.374070775676116, "learning_rate": 5e-05, "loss": 0.0549, "num_input_tokens_seen": 346677300, "step": 3798 }, { "epoch": 15.825, "loss": 0.0423770397901535, "loss_ce": 3.385990794413374e-06, "loss_iou": 0.28125, "loss_num": 0.00848388671875, "loss_xval": 0.04248046875, "num_input_tokens_seen": 346677300, "step": 3798 }, { "epoch": 15.829166666666667, "grad_norm": 3.8863831954622, "learning_rate": 5e-05, "loss": 0.0718, "num_input_tokens_seen": 346768528, "step": 3799 }, { "epoch": 15.829166666666667, "loss": 0.0791381374001503, "loss_ce": 0.003668165998533368, "loss_iou": 0.375, "loss_num": 0.01513671875, "loss_xval": 0.07568359375, "num_input_tokens_seen": 346768528, "step": 3799 }, { "epoch": 15.833333333333334, "grad_norm": 2.455564703756506, "learning_rate": 5e-05, "loss": 0.0457, "num_input_tokens_seen": 346860120, "step": 3800 }, { "epoch": 15.833333333333334, "loss": 0.05979982763528824, "loss_ce": 0.0008017204236239195, "loss_iou": 0.333984375, "loss_num": 0.01177978515625, "loss_xval": 0.05908203125, "num_input_tokens_seen": 346860120, "step": 3800 }, { "epoch": 15.8375, "grad_norm": 3.1525991541958867, "learning_rate": 5e-05, "loss": 0.0537, "num_input_tokens_seen": 346951548, "step": 3801 }, { "epoch": 15.8375, "loss": 0.06513097882270813, "loss_ce": 0.00012853417138103396, "loss_iou": 0.283203125, "loss_num": 0.01300048828125, "loss_xval": 0.06494140625, "num_input_tokens_seen": 346951548, "step": 3801 }, { "epoch": 15.841666666666667, "grad_norm": 5.440069410822113, "learning_rate": 5e-05, "loss": 0.0437, "num_input_tokens_seen": 347042116, "step": 3802 }, { "epoch": 15.841666666666667, "loss": 0.05690130591392517, "loss_ce": 1.2823103361370158e-06, "loss_iou": 0.2138671875, "loss_num": 0.01141357421875, "loss_xval": 0.056884765625, "num_input_tokens_seen": 347042116, "step": 3802 }, { "epoch": 15.845833333333333, "grad_norm": 3.570548123212897, "learning_rate": 5e-05, "loss": 0.0553, "num_input_tokens_seen": 347133712, "step": 3803 }, { "epoch": 15.845833333333333, "loss": 0.05249658226966858, "loss_ce": 6.34684738543001e-06, "loss_iou": 0.29296875, "loss_num": 0.010498046875, "loss_xval": 0.052490234375, "num_input_tokens_seen": 347133712, "step": 3803 }, { "epoch": 15.85, "grad_norm": 8.25724288412795, "learning_rate": 5e-05, "loss": 0.0405, "num_input_tokens_seen": 347224136, "step": 3804 }, { "epoch": 15.85, "loss": 0.046803995966911316, "loss_ce": 5.291229626891436e-06, "loss_iou": 0.23046875, "loss_num": 0.00933837890625, "loss_xval": 0.046875, "num_input_tokens_seen": 347224136, "step": 3804 }, { "epoch": 15.854166666666666, "grad_norm": 7.356355135686262, "learning_rate": 5e-05, "loss": 0.0374, "num_input_tokens_seen": 347315452, "step": 3805 }, { "epoch": 15.854166666666666, "loss": 0.037571605294942856, "loss_ce": 4.463842287805164e-06, "loss_iou": 0.15625, "loss_num": 0.00750732421875, "loss_xval": 0.03759765625, "num_input_tokens_seen": 347315452, "step": 3805 }, { "epoch": 15.858333333333333, "grad_norm": 1.7147176178915682, "learning_rate": 5e-05, "loss": 0.0716, "num_input_tokens_seen": 347406764, "step": 3806 }, { "epoch": 15.858333333333333, "loss": 0.06243875250220299, "loss_ce": 4.174997593509033e-05, "loss_iou": 0.201171875, "loss_num": 0.01251220703125, "loss_xval": 0.0625, "num_input_tokens_seen": 347406764, "step": 3806 }, { "epoch": 15.8625, "grad_norm": 1.8615371261266904, "learning_rate": 5e-05, "loss": 0.0741, "num_input_tokens_seen": 347497104, "step": 3807 }, { "epoch": 15.8625, "loss": 0.0767849013209343, "loss_ce": 0.001971057616174221, "loss_iou": 0.25, "loss_num": 0.01495361328125, "loss_xval": 0.07470703125, "num_input_tokens_seen": 347497104, "step": 3807 }, { "epoch": 15.866666666666667, "grad_norm": 1.4706378040439652, "learning_rate": 5e-05, "loss": 0.0433, "num_input_tokens_seen": 347588500, "step": 3808 }, { "epoch": 15.866666666666667, "loss": 0.04569356143474579, "loss_ce": 8.748088475840632e-06, "loss_iou": 0.29296875, "loss_num": 0.0091552734375, "loss_xval": 0.045654296875, "num_input_tokens_seen": 347588500, "step": 3808 }, { "epoch": 15.870833333333334, "grad_norm": 3.6079065021293917, "learning_rate": 5e-05, "loss": 0.0653, "num_input_tokens_seen": 347680224, "step": 3809 }, { "epoch": 15.870833333333334, "loss": 0.04604998230934143, "loss_ce": 4.473171793506481e-05, "loss_iou": 0.40625, "loss_num": 0.00921630859375, "loss_xval": 0.0458984375, "num_input_tokens_seen": 347680224, "step": 3809 }, { "epoch": 15.875, "grad_norm": 3.3903959022853996, "learning_rate": 5e-05, "loss": 0.052, "num_input_tokens_seen": 347771716, "step": 3810 }, { "epoch": 15.875, "loss": 0.04572838544845581, "loss_ce": 7.408703095279634e-05, "loss_iou": 0.28515625, "loss_num": 0.0091552734375, "loss_xval": 0.045654296875, "num_input_tokens_seen": 347771716, "step": 3810 }, { "epoch": 15.879166666666666, "grad_norm": 3.287637182186556, "learning_rate": 5e-05, "loss": 0.0458, "num_input_tokens_seen": 347863140, "step": 3811 }, { "epoch": 15.879166666666666, "loss": 0.037694744765758514, "loss_ce": 0.0018060706788673997, "loss_iou": 0.248046875, "loss_num": 0.007171630859375, "loss_xval": 0.035888671875, "num_input_tokens_seen": 347863140, "step": 3811 }, { "epoch": 15.883333333333333, "grad_norm": 5.253461756076304, "learning_rate": 5e-05, "loss": 0.0576, "num_input_tokens_seen": 347954324, "step": 3812 }, { "epoch": 15.883333333333333, "loss": 0.06424754858016968, "loss_ce": 1.5671426808694378e-05, "loss_iou": 0.0927734375, "loss_num": 0.01287841796875, "loss_xval": 0.064453125, "num_input_tokens_seen": 347954324, "step": 3812 }, { "epoch": 15.8875, "grad_norm": 2.1774547294251603, "learning_rate": 5e-05, "loss": 0.0861, "num_input_tokens_seen": 348045424, "step": 3813 }, { "epoch": 15.8875, "loss": 0.05383475497364998, "loss_ce": 0.00013907899847254157, "loss_iou": 0.2578125, "loss_num": 0.0107421875, "loss_xval": 0.0537109375, "num_input_tokens_seen": 348045424, "step": 3813 }, { "epoch": 15.891666666666667, "grad_norm": 10.51582874797255, "learning_rate": 5e-05, "loss": 0.0739, "num_input_tokens_seen": 348137316, "step": 3814 }, { "epoch": 15.891666666666667, "loss": 0.09596287459135056, "loss_ce": 1.5606256056344137e-05, "loss_iou": 0.171875, "loss_num": 0.0191650390625, "loss_xval": 0.095703125, "num_input_tokens_seen": 348137316, "step": 3814 }, { "epoch": 15.895833333333334, "grad_norm": 11.267172703964794, "learning_rate": 5e-05, "loss": 0.0726, "num_input_tokens_seen": 348228740, "step": 3815 }, { "epoch": 15.895833333333334, "loss": 0.09679645299911499, "loss_ce": 0.00014728127280250192, "loss_iou": 0.26953125, "loss_num": 0.019287109375, "loss_xval": 0.0966796875, "num_input_tokens_seen": 348228740, "step": 3815 }, { "epoch": 15.9, "grad_norm": 6.034282483546973, "learning_rate": 5e-05, "loss": 0.063, "num_input_tokens_seen": 348320732, "step": 3816 }, { "epoch": 15.9, "loss": 0.05001269280910492, "loss_ce": 0.0002690425608307123, "loss_iou": 0.2890625, "loss_num": 0.00994873046875, "loss_xval": 0.0498046875, "num_input_tokens_seen": 348320732, "step": 3816 }, { "epoch": 15.904166666666667, "grad_norm": 3.4675060023574518, "learning_rate": 5e-05, "loss": 0.0711, "num_input_tokens_seen": 348411824, "step": 3817 }, { "epoch": 15.904166666666667, "loss": 0.06920676678419113, "loss_ce": 8.158196578733623e-06, "loss_iou": 0.2060546875, "loss_num": 0.01385498046875, "loss_xval": 0.0693359375, "num_input_tokens_seen": 348411824, "step": 3817 }, { "epoch": 15.908333333333333, "grad_norm": 2.1176805909112275, "learning_rate": 5e-05, "loss": 0.1009, "num_input_tokens_seen": 348503136, "step": 3818 }, { "epoch": 15.908333333333333, "loss": 0.07493134588003159, "loss_ce": 0.01005860511213541, "loss_iou": 0.203125, "loss_num": 0.01300048828125, "loss_xval": 0.06494140625, "num_input_tokens_seen": 348503136, "step": 3818 }, { "epoch": 15.9125, "grad_norm": 1.6286145537936416, "learning_rate": 5e-05, "loss": 0.0427, "num_input_tokens_seen": 348594572, "step": 3819 }, { "epoch": 15.9125, "loss": 0.046741899102926254, "loss_ce": 0.0004619909741450101, "loss_iou": 0.1904296875, "loss_num": 0.00927734375, "loss_xval": 0.04638671875, "num_input_tokens_seen": 348594572, "step": 3819 }, { "epoch": 15.916666666666666, "grad_norm": 3.4577450558540916, "learning_rate": 5e-05, "loss": 0.0581, "num_input_tokens_seen": 348685828, "step": 3820 }, { "epoch": 15.916666666666666, "loss": 0.030172022059559822, "loss_ce": 5.395931566454237e-06, "loss_iou": 0.341796875, "loss_num": 0.00604248046875, "loss_xval": 0.0301513671875, "num_input_tokens_seen": 348685828, "step": 3820 }, { "epoch": 15.920833333333333, "grad_norm": 2.268927461516382, "learning_rate": 5e-05, "loss": 0.0537, "num_input_tokens_seen": 348777232, "step": 3821 }, { "epoch": 15.920833333333333, "loss": 0.04325816035270691, "loss_ce": 0.0003046693454962224, "loss_iou": 0.201171875, "loss_num": 0.00860595703125, "loss_xval": 0.04296875, "num_input_tokens_seen": 348777232, "step": 3821 }, { "epoch": 15.925, "grad_norm": 2.7056408960960496, "learning_rate": 5e-05, "loss": 0.0727, "num_input_tokens_seen": 348867292, "step": 3822 }, { "epoch": 15.925, "loss": 0.08763208985328674, "loss_ce": 8.628669547761092e-07, "loss_iou": 0.1728515625, "loss_num": 0.017578125, "loss_xval": 0.08740234375, "num_input_tokens_seen": 348867292, "step": 3822 }, { "epoch": 15.929166666666667, "grad_norm": 2.220622709806258, "learning_rate": 5e-05, "loss": 0.04, "num_input_tokens_seen": 348958972, "step": 3823 }, { "epoch": 15.929166666666667, "loss": 0.04168154299259186, "loss_ce": 0.006693140137940645, "loss_iou": 0.1337890625, "loss_num": 0.006988525390625, "loss_xval": 0.034912109375, "num_input_tokens_seen": 348958972, "step": 3823 }, { "epoch": 15.933333333333334, "grad_norm": 1.3806298831490407, "learning_rate": 5e-05, "loss": 0.0355, "num_input_tokens_seen": 349049560, "step": 3824 }, { "epoch": 15.933333333333334, "loss": 0.029834497720003128, "loss_ce": 3.5636912798509e-06, "loss_iou": 0.28515625, "loss_num": 0.0059814453125, "loss_xval": 0.02978515625, "num_input_tokens_seen": 349049560, "step": 3824 }, { "epoch": 15.9375, "grad_norm": 1.9240761470930519, "learning_rate": 5e-05, "loss": 0.0478, "num_input_tokens_seen": 349140632, "step": 3825 }, { "epoch": 15.9375, "loss": 0.04357026517391205, "loss_ce": 6.42470376988058e-06, "loss_iou": 0.283203125, "loss_num": 0.00872802734375, "loss_xval": 0.04345703125, "num_input_tokens_seen": 349140632, "step": 3825 }, { "epoch": 15.941666666666666, "grad_norm": 102.84510315262223, "learning_rate": 5e-05, "loss": 0.0457, "num_input_tokens_seen": 349230764, "step": 3826 }, { "epoch": 15.941666666666666, "loss": 0.0428328663110733, "loss_ce": 3.196366742486134e-05, "loss_iou": 0.28515625, "loss_num": 0.008544921875, "loss_xval": 0.042724609375, "num_input_tokens_seen": 349230764, "step": 3826 }, { "epoch": 15.945833333333333, "grad_norm": 8.467889067309262, "learning_rate": 5e-05, "loss": 0.0836, "num_input_tokens_seen": 349322084, "step": 3827 }, { "epoch": 15.945833333333333, "loss": 0.07375296950340271, "loss_ce": 0.009543980471789837, "loss_iou": 0.283203125, "loss_num": 0.0128173828125, "loss_xval": 0.064453125, "num_input_tokens_seen": 349322084, "step": 3827 }, { "epoch": 15.95, "grad_norm": 3.01453202459369, "learning_rate": 5e-05, "loss": 0.051, "num_input_tokens_seen": 349413060, "step": 3828 }, { "epoch": 15.95, "loss": 0.05052509903907776, "loss_ce": 3.2493758226337377e-06, "loss_iou": 0.140625, "loss_num": 0.01007080078125, "loss_xval": 0.050537109375, "num_input_tokens_seen": 349413060, "step": 3828 }, { "epoch": 15.954166666666667, "grad_norm": 3.5161494715474446, "learning_rate": 5e-05, "loss": 0.081, "num_input_tokens_seen": 349504056, "step": 3829 }, { "epoch": 15.954166666666667, "loss": 0.07083917409181595, "loss_ce": 0.00010705763270379975, "loss_iou": 0.283203125, "loss_num": 0.01416015625, "loss_xval": 0.07080078125, "num_input_tokens_seen": 349504056, "step": 3829 }, { "epoch": 15.958333333333334, "grad_norm": 2.800003641668997, "learning_rate": 5e-05, "loss": 0.1009, "num_input_tokens_seen": 349595316, "step": 3830 }, { "epoch": 15.958333333333334, "loss": 0.13452748954296112, "loss_ce": 5.1784176321234554e-05, "loss_iou": 0.283203125, "loss_num": 0.02685546875, "loss_xval": 0.134765625, "num_input_tokens_seen": 349595316, "step": 3830 }, { "epoch": 15.9625, "grad_norm": 8.297436090370738, "learning_rate": 5e-05, "loss": 0.1103, "num_input_tokens_seen": 349686492, "step": 3831 }, { "epoch": 15.9625, "loss": 0.0906902551651001, "loss_ce": 0.0003277028736192733, "loss_iou": 0.2431640625, "loss_num": 0.01806640625, "loss_xval": 0.09033203125, "num_input_tokens_seen": 349686492, "step": 3831 }, { "epoch": 15.966666666666667, "grad_norm": 3.1585541135793087, "learning_rate": 5e-05, "loss": 0.0534, "num_input_tokens_seen": 349776952, "step": 3832 }, { "epoch": 15.966666666666667, "loss": 0.036786098033189774, "loss_ce": 0.0005006975261494517, "loss_iou": 0.1982421875, "loss_num": 0.00726318359375, "loss_xval": 0.036376953125, "num_input_tokens_seen": 349776952, "step": 3832 }, { "epoch": 15.970833333333333, "grad_norm": 15.518672394577264, "learning_rate": 5e-05, "loss": 0.0723, "num_input_tokens_seen": 349868364, "step": 3833 }, { "epoch": 15.970833333333333, "loss": 0.05300527438521385, "loss_ce": 0.0009270303999073803, "loss_iou": 0.28515625, "loss_num": 0.01043701171875, "loss_xval": 0.052001953125, "num_input_tokens_seen": 349868364, "step": 3833 }, { "epoch": 15.975, "grad_norm": 3.0035389925614115, "learning_rate": 5e-05, "loss": 0.0351, "num_input_tokens_seen": 349958552, "step": 3834 }, { "epoch": 15.975, "loss": 0.039127472788095474, "loss_ce": 9.549165406497195e-05, "loss_iou": 0.181640625, "loss_num": 0.0078125, "loss_xval": 0.0390625, "num_input_tokens_seen": 349958552, "step": 3834 }, { "epoch": 15.979166666666666, "grad_norm": 2.303188031481693, "learning_rate": 5e-05, "loss": 0.0576, "num_input_tokens_seen": 350050256, "step": 3835 }, { "epoch": 15.979166666666666, "loss": 0.06078094244003296, "loss_ce": 2.0446534108486958e-05, "loss_iou": 0.3125, "loss_num": 0.01214599609375, "loss_xval": 0.060791015625, "num_input_tokens_seen": 350050256, "step": 3835 }, { "epoch": 15.983333333333333, "grad_norm": 6.314212930655639, "learning_rate": 5e-05, "loss": 0.0603, "num_input_tokens_seen": 350141172, "step": 3836 }, { "epoch": 15.983333333333333, "loss": 0.07146543264389038, "loss_ce": 5.429885641206056e-05, "loss_iou": 0.2734375, "loss_num": 0.0142822265625, "loss_xval": 0.0712890625, "num_input_tokens_seen": 350141172, "step": 3836 }, { "epoch": 15.9875, "grad_norm": 2.916406234312362, "learning_rate": 5e-05, "loss": 0.0582, "num_input_tokens_seen": 350232684, "step": 3837 }, { "epoch": 15.9875, "loss": 0.047817349433898926, "loss_ce": 4.208229438518174e-05, "loss_iou": 0.30078125, "loss_num": 0.009521484375, "loss_xval": 0.0478515625, "num_input_tokens_seen": 350232684, "step": 3837 }, { "epoch": 15.991666666666667, "grad_norm": 12.295296027812869, "learning_rate": 5e-05, "loss": 0.0765, "num_input_tokens_seen": 350324372, "step": 3838 }, { "epoch": 15.991666666666667, "loss": 0.04820753261446953, "loss_ce": 3.5534183552954346e-05, "loss_iou": 0.322265625, "loss_num": 0.0096435546875, "loss_xval": 0.048095703125, "num_input_tokens_seen": 350324372, "step": 3838 }, { "epoch": 15.995833333333334, "grad_norm": 5.8819263365479495, "learning_rate": 5e-05, "loss": 0.0449, "num_input_tokens_seen": 350415192, "step": 3839 }, { "epoch": 15.995833333333334, "loss": 0.02984732948243618, "loss_ce": 1.139207711275958e-06, "loss_iou": 0.232421875, "loss_num": 0.0059814453125, "loss_xval": 0.02978515625, "num_input_tokens_seen": 350415192, "step": 3839 }, { "epoch": 16.0, "grad_norm": 2.6258538181433733, "learning_rate": 5e-05, "loss": 0.0582, "num_input_tokens_seen": 350506624, "step": 3840 }, { "epoch": 16.0, "loss": 0.07496326416730881, "loss_ce": 0.0024534992408007383, "loss_iou": 0.1669921875, "loss_num": 0.0145263671875, "loss_xval": 0.072265625, "num_input_tokens_seen": 350506624, "step": 3840 }, { "epoch": 16.004166666666666, "grad_norm": 4.381327587546374, "learning_rate": 5e-05, "loss": 0.0597, "num_input_tokens_seen": 350597504, "step": 3841 }, { "epoch": 16.004166666666666, "loss": 0.07269339263439178, "loss_ce": 5.212401674725697e-07, "loss_iou": 0.359375, "loss_num": 0.01458740234375, "loss_xval": 0.07275390625, "num_input_tokens_seen": 350597504, "step": 3841 }, { "epoch": 16.008333333333333, "grad_norm": 4.452645426253799, "learning_rate": 5e-05, "loss": 0.0654, "num_input_tokens_seen": 350689396, "step": 3842 }, { "epoch": 16.008333333333333, "loss": 0.09613090753555298, "loss_ce": 8.168114618456457e-06, "loss_iou": 0.296875, "loss_num": 0.019287109375, "loss_xval": 0.09619140625, "num_input_tokens_seen": 350689396, "step": 3842 }, { "epoch": 16.0125, "grad_norm": 1.5707464711745487, "learning_rate": 5e-05, "loss": 0.0381, "num_input_tokens_seen": 350780704, "step": 3843 }, { "epoch": 16.0125, "loss": 0.02814393863081932, "loss_ce": 4.488003833102994e-05, "loss_iou": 0.2275390625, "loss_num": 0.005615234375, "loss_xval": 0.028076171875, "num_input_tokens_seen": 350780704, "step": 3843 }, { "epoch": 16.016666666666666, "grad_norm": 1.9733162652420486, "learning_rate": 5e-05, "loss": 0.0566, "num_input_tokens_seen": 350871916, "step": 3844 }, { "epoch": 16.016666666666666, "loss": 0.055129144340753555, "loss_ce": 4.4918466301169246e-05, "loss_iou": 0.291015625, "loss_num": 0.010986328125, "loss_xval": 0.05517578125, "num_input_tokens_seen": 350871916, "step": 3844 }, { "epoch": 16.020833333333332, "grad_norm": 1.9035264987053406, "learning_rate": 5e-05, "loss": 0.053, "num_input_tokens_seen": 350963336, "step": 3845 }, { "epoch": 16.020833333333332, "loss": 0.050709318369627, "loss_ce": 4.362413164926693e-06, "loss_iou": 0.2294921875, "loss_num": 0.0101318359375, "loss_xval": 0.05078125, "num_input_tokens_seen": 350963336, "step": 3845 }, { "epoch": 16.025, "grad_norm": 2.917842477642324, "learning_rate": 5e-05, "loss": 0.0933, "num_input_tokens_seen": 351054600, "step": 3846 }, { "epoch": 16.025, "loss": 0.10769159346818924, "loss_ce": 1.0315693543816451e-05, "loss_iou": 0.279296875, "loss_num": 0.021484375, "loss_xval": 0.10791015625, "num_input_tokens_seen": 351054600, "step": 3846 }, { "epoch": 16.029166666666665, "grad_norm": 3.9201172308607624, "learning_rate": 5e-05, "loss": 0.0812, "num_input_tokens_seen": 351145784, "step": 3847 }, { "epoch": 16.029166666666665, "loss": 0.10258331894874573, "loss_ce": 0.001997376559302211, "loss_iou": 0.23046875, "loss_num": 0.0201416015625, "loss_xval": 0.1005859375, "num_input_tokens_seen": 351145784, "step": 3847 }, { "epoch": 16.033333333333335, "grad_norm": 3.3330847054065043, "learning_rate": 5e-05, "loss": 0.0669, "num_input_tokens_seen": 351237112, "step": 3848 }, { "epoch": 16.033333333333335, "loss": 0.07070392370223999, "loss_ce": 0.00011676585563691333, "loss_iou": 0.28125, "loss_num": 0.01416015625, "loss_xval": 0.07080078125, "num_input_tokens_seen": 351237112, "step": 3848 }, { "epoch": 16.0375, "grad_norm": 1.0271650861426744, "learning_rate": 5e-05, "loss": 0.0465, "num_input_tokens_seen": 351328720, "step": 3849 }, { "epoch": 16.0375, "loss": 0.06651593744754791, "loss_ce": 4.8649879317963496e-05, "loss_iou": 0.259765625, "loss_num": 0.0133056640625, "loss_xval": 0.06640625, "num_input_tokens_seen": 351328720, "step": 3849 }, { "epoch": 16.041666666666668, "grad_norm": 0.8405494813029039, "learning_rate": 5e-05, "loss": 0.0406, "num_input_tokens_seen": 351420196, "step": 3850 }, { "epoch": 16.041666666666668, "loss": 0.0480114221572876, "loss_ce": 6.830670463386923e-05, "loss_iou": 0.234375, "loss_num": 0.00958251953125, "loss_xval": 0.0478515625, "num_input_tokens_seen": 351420196, "step": 3850 }, { "epoch": 16.045833333333334, "grad_norm": 1.8078136924110964, "learning_rate": 5e-05, "loss": 0.0433, "num_input_tokens_seen": 351511276, "step": 3851 }, { "epoch": 16.045833333333334, "loss": 0.0456475131213665, "loss_ce": 2.373470488237217e-05, "loss_iou": 0.298828125, "loss_num": 0.00909423828125, "loss_xval": 0.045654296875, "num_input_tokens_seen": 351511276, "step": 3851 }, { "epoch": 16.05, "grad_norm": 0.620052444040268, "learning_rate": 5e-05, "loss": 0.0313, "num_input_tokens_seen": 351602388, "step": 3852 }, { "epoch": 16.05, "loss": 0.0332469642162323, "loss_ce": 0.00036427262239158154, "loss_iou": 0.1728515625, "loss_num": 0.006591796875, "loss_xval": 0.032958984375, "num_input_tokens_seen": 351602388, "step": 3852 }, { "epoch": 16.054166666666667, "grad_norm": 1.6012531827704792, "learning_rate": 5e-05, "loss": 0.0711, "num_input_tokens_seen": 351693092, "step": 3853 }, { "epoch": 16.054166666666667, "loss": 0.061077818274497986, "loss_ce": 0.0007598230731673539, "loss_iou": 0.341796875, "loss_num": 0.0120849609375, "loss_xval": 0.060302734375, "num_input_tokens_seen": 351693092, "step": 3853 }, { "epoch": 16.058333333333334, "grad_norm": 23.89357125292572, "learning_rate": 5e-05, "loss": 0.0767, "num_input_tokens_seen": 351783416, "step": 3854 }, { "epoch": 16.058333333333334, "loss": 0.08595338463783264, "loss_ce": 6.319621661532437e-07, "loss_iou": 0.365234375, "loss_num": 0.0172119140625, "loss_xval": 0.0859375, "num_input_tokens_seen": 351783416, "step": 3854 }, { "epoch": 16.0625, "grad_norm": 2.8630919901550156, "learning_rate": 5e-05, "loss": 0.0486, "num_input_tokens_seen": 351874700, "step": 3855 }, { "epoch": 16.0625, "loss": 0.040271710604429245, "loss_ce": 3.7666129628632916e-06, "loss_iou": 0.322265625, "loss_num": 0.008056640625, "loss_xval": 0.040283203125, "num_input_tokens_seen": 351874700, "step": 3855 }, { "epoch": 16.066666666666666, "grad_norm": 4.667296196315083, "learning_rate": 5e-05, "loss": 0.0571, "num_input_tokens_seen": 351965908, "step": 3856 }, { "epoch": 16.066666666666666, "loss": 0.048476576805114746, "loss_ce": 0.00010621073306538165, "loss_iou": 0.2470703125, "loss_num": 0.0096435546875, "loss_xval": 0.04833984375, "num_input_tokens_seen": 351965908, "step": 3856 }, { "epoch": 16.070833333333333, "grad_norm": 2.643841858219604, "learning_rate": 5e-05, "loss": 0.0688, "num_input_tokens_seen": 352057156, "step": 3857 }, { "epoch": 16.070833333333333, "loss": 0.06522645801305771, "loss_ce": 1.0394860510132276e-05, "loss_iou": 0.3046875, "loss_num": 0.0130615234375, "loss_xval": 0.0654296875, "num_input_tokens_seen": 352057156, "step": 3857 }, { "epoch": 16.075, "grad_norm": 3.3895794943722675, "learning_rate": 5e-05, "loss": 0.0711, "num_input_tokens_seen": 352148756, "step": 3858 }, { "epoch": 16.075, "loss": 0.09010984003543854, "loss_ce": 6.772094639018178e-05, "loss_iou": 0.287109375, "loss_num": 0.0179443359375, "loss_xval": 0.08984375, "num_input_tokens_seen": 352148756, "step": 3858 }, { "epoch": 16.079166666666666, "grad_norm": 2.6239057736250326, "learning_rate": 5e-05, "loss": 0.0763, "num_input_tokens_seen": 352239384, "step": 3859 }, { "epoch": 16.079166666666666, "loss": 0.09036745131015778, "loss_ce": 4.904800334770698e-06, "loss_iou": 0.236328125, "loss_num": 0.01806640625, "loss_xval": 0.09033203125, "num_input_tokens_seen": 352239384, "step": 3859 }, { "epoch": 16.083333333333332, "grad_norm": 1.8972793154833505, "learning_rate": 5e-05, "loss": 0.031, "num_input_tokens_seen": 352330660, "step": 3860 }, { "epoch": 16.083333333333332, "loss": 0.026490317657589912, "loss_ce": 1.0600457471809932e-06, "loss_iou": 0.2041015625, "loss_num": 0.00531005859375, "loss_xval": 0.0264892578125, "num_input_tokens_seen": 352330660, "step": 3860 }, { "epoch": 16.0875, "grad_norm": 1.3183903989842958, "learning_rate": 5e-05, "loss": 0.0443, "num_input_tokens_seen": 352421700, "step": 3861 }, { "epoch": 16.0875, "loss": 0.051079727709293365, "loss_ce": 8.559488378523383e-06, "loss_iou": 0.2578125, "loss_num": 0.01019287109375, "loss_xval": 0.051025390625, "num_input_tokens_seen": 352421700, "step": 3861 }, { "epoch": 16.091666666666665, "grad_norm": 1.0495152774309502, "learning_rate": 5e-05, "loss": 0.0407, "num_input_tokens_seen": 352513560, "step": 3862 }, { "epoch": 16.091666666666665, "loss": 0.018554434180259705, "loss_ce": 1.8820934201357886e-05, "loss_iou": 0.1171875, "loss_num": 0.0037078857421875, "loss_xval": 0.0185546875, "num_input_tokens_seen": 352513560, "step": 3862 }, { "epoch": 16.095833333333335, "grad_norm": 1.3600341470832789, "learning_rate": 5e-05, "loss": 0.0531, "num_input_tokens_seen": 352604684, "step": 3863 }, { "epoch": 16.095833333333335, "loss": 0.060372743755578995, "loss_ce": 8.97386962606106e-06, "loss_iou": 0.21875, "loss_num": 0.0120849609375, "loss_xval": 0.060302734375, "num_input_tokens_seen": 352604684, "step": 3863 }, { "epoch": 16.1, "grad_norm": 1.0768338932899961, "learning_rate": 5e-05, "loss": 0.0706, "num_input_tokens_seen": 352695828, "step": 3864 }, { "epoch": 16.1, "loss": 0.05559179559350014, "loss_ce": 4.025570888188668e-06, "loss_iou": 0.04833984375, "loss_num": 0.0111083984375, "loss_xval": 0.0556640625, "num_input_tokens_seen": 352695828, "step": 3864 }, { "epoch": 16.104166666666668, "grad_norm": 2.247862443145029, "learning_rate": 5e-05, "loss": 0.062, "num_input_tokens_seen": 352787496, "step": 3865 }, { "epoch": 16.104166666666668, "loss": 0.05655861645936966, "loss_ce": 0.00048256589798256755, "loss_iou": 0.2255859375, "loss_num": 0.01123046875, "loss_xval": 0.05615234375, "num_input_tokens_seen": 352787496, "step": 3865 }, { "epoch": 16.108333333333334, "grad_norm": 1.283053998660903, "learning_rate": 5e-05, "loss": 0.0384, "num_input_tokens_seen": 352877856, "step": 3866 }, { "epoch": 16.108333333333334, "loss": 0.041413579136133194, "loss_ce": 1.2254739658601466e-06, "loss_iou": 0.1728515625, "loss_num": 0.00830078125, "loss_xval": 0.04150390625, "num_input_tokens_seen": 352877856, "step": 3866 }, { "epoch": 16.1125, "grad_norm": 3.309203175656161, "learning_rate": 5e-05, "loss": 0.0744, "num_input_tokens_seen": 352968712, "step": 3867 }, { "epoch": 16.1125, "loss": 0.07202804833650589, "loss_ce": 6.760506221326068e-05, "loss_iou": 0.1591796875, "loss_num": 0.014404296875, "loss_xval": 0.07177734375, "num_input_tokens_seen": 352968712, "step": 3867 }, { "epoch": 16.116666666666667, "grad_norm": 1.2593369880726677, "learning_rate": 5e-05, "loss": 0.0454, "num_input_tokens_seen": 353060368, "step": 3868 }, { "epoch": 16.116666666666667, "loss": 0.0669381394982338, "loss_ce": 0.00010082882363349199, "loss_iou": 0.26171875, "loss_num": 0.01336669921875, "loss_xval": 0.06689453125, "num_input_tokens_seen": 353060368, "step": 3868 }, { "epoch": 16.120833333333334, "grad_norm": 1.4114893449006416, "learning_rate": 5e-05, "loss": 0.0796, "num_input_tokens_seen": 353151512, "step": 3869 }, { "epoch": 16.120833333333334, "loss": 0.08161616325378418, "loss_ce": 0.00012659240746870637, "loss_iou": 0.234375, "loss_num": 0.016357421875, "loss_xval": 0.08154296875, "num_input_tokens_seen": 353151512, "step": 3869 }, { "epoch": 16.125, "grad_norm": 1.7572843806708736, "learning_rate": 5e-05, "loss": 0.0397, "num_input_tokens_seen": 353242324, "step": 3870 }, { "epoch": 16.125, "loss": 0.03830033540725708, "loss_ce": 0.00010758535063359886, "loss_iou": 0.3203125, "loss_num": 0.00762939453125, "loss_xval": 0.0380859375, "num_input_tokens_seen": 353242324, "step": 3870 }, { "epoch": 16.129166666666666, "grad_norm": 1.8327798660955157, "learning_rate": 5e-05, "loss": 0.0385, "num_input_tokens_seen": 353334108, "step": 3871 }, { "epoch": 16.129166666666666, "loss": 0.04010815545916557, "loss_ce": 0.00042004554416052997, "loss_iou": 0.259765625, "loss_num": 0.0079345703125, "loss_xval": 0.039794921875, "num_input_tokens_seen": 353334108, "step": 3871 }, { "epoch": 16.133333333333333, "grad_norm": 4.91119435241483, "learning_rate": 5e-05, "loss": 0.0597, "num_input_tokens_seen": 353424780, "step": 3872 }, { "epoch": 16.133333333333333, "loss": 0.0698501318693161, "loss_ce": 0.0009567025699652731, "loss_iou": 0.349609375, "loss_num": 0.0137939453125, "loss_xval": 0.06884765625, "num_input_tokens_seen": 353424780, "step": 3872 }, { "epoch": 16.1375, "grad_norm": 3.6188922276676787, "learning_rate": 5e-05, "loss": 0.0337, "num_input_tokens_seen": 353516128, "step": 3873 }, { "epoch": 16.1375, "loss": 0.037483297288417816, "loss_ce": 7.709463716309983e-06, "loss_iou": 0.3046875, "loss_num": 0.007476806640625, "loss_xval": 0.03759765625, "num_input_tokens_seen": 353516128, "step": 3873 }, { "epoch": 16.141666666666666, "grad_norm": 2.89978017600951, "learning_rate": 5e-05, "loss": 0.0372, "num_input_tokens_seen": 353607724, "step": 3874 }, { "epoch": 16.141666666666666, "loss": 0.03462275117635727, "loss_ce": 4.6336626837728545e-05, "loss_iou": 0.33984375, "loss_num": 0.006927490234375, "loss_xval": 0.03466796875, "num_input_tokens_seen": 353607724, "step": 3874 }, { "epoch": 16.145833333333332, "grad_norm": 2.379705843982471, "learning_rate": 5e-05, "loss": 0.0424, "num_input_tokens_seen": 353699508, "step": 3875 }, { "epoch": 16.145833333333332, "loss": 0.047790978103876114, "loss_ce": 0.0006565803778357804, "loss_iou": 0.302734375, "loss_num": 0.0093994140625, "loss_xval": 0.047119140625, "num_input_tokens_seen": 353699508, "step": 3875 }, { "epoch": 16.15, "grad_norm": 3.237416759894803, "learning_rate": 5e-05, "loss": 0.0666, "num_input_tokens_seen": 353791332, "step": 3876 }, { "epoch": 16.15, "loss": 0.06048550829291344, "loss_ce": 0.00010648273746483028, "loss_iou": 0.322265625, "loss_num": 0.0120849609375, "loss_xval": 0.060302734375, "num_input_tokens_seen": 353791332, "step": 3876 }, { "epoch": 16.154166666666665, "grad_norm": 2.9326643294151253, "learning_rate": 5e-05, "loss": 0.0665, "num_input_tokens_seen": 353881960, "step": 3877 }, { "epoch": 16.154166666666665, "loss": 0.07204936444759369, "loss_ce": 1.2621946552826557e-05, "loss_iou": 0.3046875, "loss_num": 0.014404296875, "loss_xval": 0.072265625, "num_input_tokens_seen": 353881960, "step": 3877 }, { "epoch": 16.158333333333335, "grad_norm": 10.049778256959911, "learning_rate": 5e-05, "loss": 0.072, "num_input_tokens_seen": 353973588, "step": 3878 }, { "epoch": 16.158333333333335, "loss": 0.10550323128700256, "loss_ce": 0.004329821560531855, "loss_iou": 0.296875, "loss_num": 0.020263671875, "loss_xval": 0.10107421875, "num_input_tokens_seen": 353973588, "step": 3878 }, { "epoch": 16.1625, "grad_norm": 4.342489795079413, "learning_rate": 5e-05, "loss": 0.0486, "num_input_tokens_seen": 354064720, "step": 3879 }, { "epoch": 16.1625, "loss": 0.04531225189566612, "loss_ce": 5.468219387694262e-05, "loss_iou": 0.380859375, "loss_num": 0.009033203125, "loss_xval": 0.045166015625, "num_input_tokens_seen": 354064720, "step": 3879 }, { "epoch": 16.166666666666668, "grad_norm": 1.6440358638616335, "learning_rate": 5e-05, "loss": 0.069, "num_input_tokens_seen": 354156308, "step": 3880 }, { "epoch": 16.166666666666668, "loss": 0.08422918617725372, "loss_ce": 6.792954536649631e-07, "loss_iou": 0.1875, "loss_num": 0.016845703125, "loss_xval": 0.083984375, "num_input_tokens_seen": 354156308, "step": 3880 }, { "epoch": 16.170833333333334, "grad_norm": 1.3478561125750785, "learning_rate": 5e-05, "loss": 0.0493, "num_input_tokens_seen": 354247956, "step": 3881 }, { "epoch": 16.170833333333334, "loss": 0.04582810401916504, "loss_ce": 5.9628473536577076e-06, "loss_iou": 0.236328125, "loss_num": 0.0091552734375, "loss_xval": 0.0458984375, "num_input_tokens_seen": 354247956, "step": 3881 }, { "epoch": 16.175, "grad_norm": 2.773034842459887, "learning_rate": 5e-05, "loss": 0.0627, "num_input_tokens_seen": 354339412, "step": 3882 }, { "epoch": 16.175, "loss": 0.0835675522685051, "loss_ce": 5.619968578685075e-05, "loss_iou": 0.22265625, "loss_num": 0.0167236328125, "loss_xval": 0.08349609375, "num_input_tokens_seen": 354339412, "step": 3882 }, { "epoch": 16.179166666666667, "grad_norm": 2.7796471112523973, "learning_rate": 5e-05, "loss": 0.0418, "num_input_tokens_seen": 354430220, "step": 3883 }, { "epoch": 16.179166666666667, "loss": 0.043384261429309845, "loss_ce": 3.523240366121172e-06, "loss_iou": 0.265625, "loss_num": 0.0086669921875, "loss_xval": 0.04345703125, "num_input_tokens_seen": 354430220, "step": 3883 }, { "epoch": 16.183333333333334, "grad_norm": 3.799689917817227, "learning_rate": 5e-05, "loss": 0.0609, "num_input_tokens_seen": 354521452, "step": 3884 }, { "epoch": 16.183333333333334, "loss": 0.05691567808389664, "loss_ce": 1.5653604350518435e-05, "loss_iou": 0.3203125, "loss_num": 0.0113525390625, "loss_xval": 0.056884765625, "num_input_tokens_seen": 354521452, "step": 3884 }, { "epoch": 16.1875, "grad_norm": 4.917707181166018, "learning_rate": 5e-05, "loss": 0.0935, "num_input_tokens_seen": 354612872, "step": 3885 }, { "epoch": 16.1875, "loss": 0.055576518177986145, "loss_ce": 3.4524997317930683e-05, "loss_iou": 0.232421875, "loss_num": 0.0111083984375, "loss_xval": 0.0556640625, "num_input_tokens_seen": 354612872, "step": 3885 }, { "epoch": 16.191666666666666, "grad_norm": 3.0422164792449506, "learning_rate": 5e-05, "loss": 0.0366, "num_input_tokens_seen": 354703848, "step": 3886 }, { "epoch": 16.191666666666666, "loss": 0.03766234964132309, "loss_ce": 3.6567685128829908e-06, "loss_iou": 0.13671875, "loss_num": 0.007537841796875, "loss_xval": 0.03759765625, "num_input_tokens_seen": 354703848, "step": 3886 }, { "epoch": 16.195833333333333, "grad_norm": 2.255614957739726, "learning_rate": 5e-05, "loss": 0.0508, "num_input_tokens_seen": 354795188, "step": 3887 }, { "epoch": 16.195833333333333, "loss": 0.04246654734015465, "loss_ce": 3.1855346605880186e-05, "loss_iou": 0.2470703125, "loss_num": 0.00848388671875, "loss_xval": 0.04248046875, "num_input_tokens_seen": 354795188, "step": 3887 }, { "epoch": 16.2, "grad_norm": 2.71762488614364, "learning_rate": 5e-05, "loss": 0.0761, "num_input_tokens_seen": 354886520, "step": 3888 }, { "epoch": 16.2, "loss": 0.10885563492774963, "loss_ce": 6.046430644346401e-05, "loss_iou": 0.259765625, "loss_num": 0.021728515625, "loss_xval": 0.10888671875, "num_input_tokens_seen": 354886520, "step": 3888 }, { "epoch": 16.204166666666666, "grad_norm": 2.0575852115195024, "learning_rate": 5e-05, "loss": 0.0476, "num_input_tokens_seen": 354978144, "step": 3889 }, { "epoch": 16.204166666666666, "loss": 0.034292981028556824, "loss_ce": 0.00020484643755480647, "loss_iou": 0.2109375, "loss_num": 0.006805419921875, "loss_xval": 0.0341796875, "num_input_tokens_seen": 354978144, "step": 3889 }, { "epoch": 16.208333333333332, "grad_norm": 2.458019324936502, "learning_rate": 5e-05, "loss": 0.0352, "num_input_tokens_seen": 355068772, "step": 3890 }, { "epoch": 16.208333333333332, "loss": 0.03707907348871231, "loss_ce": 7.844500942155719e-06, "loss_iou": 0.1455078125, "loss_num": 0.007415771484375, "loss_xval": 0.037109375, "num_input_tokens_seen": 355068772, "step": 3890 }, { "epoch": 16.2125, "grad_norm": 3.3796932033062683, "learning_rate": 5e-05, "loss": 0.0417, "num_input_tokens_seen": 355160352, "step": 3891 }, { "epoch": 16.2125, "loss": 0.0388079434633255, "loss_ce": 4.8458564378961455e-06, "loss_iou": 0.2119140625, "loss_num": 0.00775146484375, "loss_xval": 0.038818359375, "num_input_tokens_seen": 355160352, "step": 3891 }, { "epoch": 16.216666666666665, "grad_norm": 3.058885505423328, "learning_rate": 5e-05, "loss": 0.0564, "num_input_tokens_seen": 355251504, "step": 3892 }, { "epoch": 16.216666666666665, "loss": 0.04128335416316986, "loss_ce": 3.885061596520245e-05, "loss_iou": 0.10302734375, "loss_num": 0.00823974609375, "loss_xval": 0.041259765625, "num_input_tokens_seen": 355251504, "step": 3892 }, { "epoch": 16.220833333333335, "grad_norm": 9.888017978731519, "learning_rate": 5e-05, "loss": 0.0796, "num_input_tokens_seen": 355343372, "step": 3893 }, { "epoch": 16.220833333333335, "loss": 0.05901408940553665, "loss_ce": 5.412587051978335e-05, "loss_iou": 0.3125, "loss_num": 0.01177978515625, "loss_xval": 0.05908203125, "num_input_tokens_seen": 355343372, "step": 3893 }, { "epoch": 16.225, "grad_norm": 3.632287783487698, "learning_rate": 5e-05, "loss": 0.0631, "num_input_tokens_seen": 355434980, "step": 3894 }, { "epoch": 16.225, "loss": 0.050512395799160004, "loss_ce": 5.807398792967433e-06, "loss_iou": 0.1171875, "loss_num": 0.0101318359375, "loss_xval": 0.050537109375, "num_input_tokens_seen": 355434980, "step": 3894 }, { "epoch": 16.229166666666668, "grad_norm": 3.131015208397186, "learning_rate": 5e-05, "loss": 0.0467, "num_input_tokens_seen": 355527132, "step": 3895 }, { "epoch": 16.229166666666668, "loss": 0.04876472055912018, "loss_ce": 0.005017775110900402, "loss_iou": 0.234375, "loss_num": 0.00872802734375, "loss_xval": 0.043701171875, "num_input_tokens_seen": 355527132, "step": 3895 }, { "epoch": 16.233333333333334, "grad_norm": 1.3546156567536622, "learning_rate": 5e-05, "loss": 0.0402, "num_input_tokens_seen": 355618440, "step": 3896 }, { "epoch": 16.233333333333334, "loss": 0.05599173158407211, "loss_ce": 3.775473669520579e-05, "loss_iou": 0.212890625, "loss_num": 0.01116943359375, "loss_xval": 0.055908203125, "num_input_tokens_seen": 355618440, "step": 3896 }, { "epoch": 16.2375, "grad_norm": 5.400964442985786, "learning_rate": 5e-05, "loss": 0.0772, "num_input_tokens_seen": 355710044, "step": 3897 }, { "epoch": 16.2375, "loss": 0.08695336431264877, "loss_ce": 0.0020076867658644915, "loss_iou": 0.2734375, "loss_num": 0.0169677734375, "loss_xval": 0.0849609375, "num_input_tokens_seen": 355710044, "step": 3897 }, { "epoch": 16.241666666666667, "grad_norm": 5.143639772623531, "learning_rate": 5e-05, "loss": 0.0748, "num_input_tokens_seen": 355801108, "step": 3898 }, { "epoch": 16.241666666666667, "loss": 0.03608565032482147, "loss_ce": 1.3872523595637176e-05, "loss_iou": 0.2451171875, "loss_num": 0.0072021484375, "loss_xval": 0.0361328125, "num_input_tokens_seen": 355801108, "step": 3898 }, { "epoch": 16.245833333333334, "grad_norm": 1.987989936921633, "learning_rate": 5e-05, "loss": 0.0541, "num_input_tokens_seen": 355892344, "step": 3899 }, { "epoch": 16.245833333333334, "loss": 0.04525504633784294, "loss_ce": 1.273614452657057e-05, "loss_iou": 0.2197265625, "loss_num": 0.009033203125, "loss_xval": 0.045166015625, "num_input_tokens_seen": 355892344, "step": 3899 }, { "epoch": 16.25, "grad_norm": 2.3838045785219486, "learning_rate": 5e-05, "loss": 0.0458, "num_input_tokens_seen": 355983472, "step": 3900 }, { "epoch": 16.25, "loss": 0.05004560574889183, "loss_ce": 4.4070420699426904e-06, "loss_iou": 0.259765625, "loss_num": 0.010009765625, "loss_xval": 0.050048828125, "num_input_tokens_seen": 355983472, "step": 3900 }, { "epoch": 16.254166666666666, "grad_norm": 4.322737510300905, "learning_rate": 5e-05, "loss": 0.0473, "num_input_tokens_seen": 356073088, "step": 3901 }, { "epoch": 16.254166666666666, "loss": 0.05106162279844284, "loss_ce": 2.0969147954019718e-05, "loss_iou": 0.26171875, "loss_num": 0.01019287109375, "loss_xval": 0.051025390625, "num_input_tokens_seen": 356073088, "step": 3901 }, { "epoch": 16.258333333333333, "grad_norm": 2.9326430406216017, "learning_rate": 5e-05, "loss": 0.0246, "num_input_tokens_seen": 356164696, "step": 3902 }, { "epoch": 16.258333333333333, "loss": 0.022506091743707657, "loss_ce": 1.4636367268394679e-05, "loss_iou": 0.2158203125, "loss_num": 0.0045166015625, "loss_xval": 0.0224609375, "num_input_tokens_seen": 356164696, "step": 3902 }, { "epoch": 16.2625, "grad_norm": 2.52037964776615, "learning_rate": 5e-05, "loss": 0.058, "num_input_tokens_seen": 356256476, "step": 3903 }, { "epoch": 16.2625, "loss": 0.03397050127387047, "loss_ce": 0.00012650688586290926, "loss_iou": 0.0791015625, "loss_num": 0.00677490234375, "loss_xval": 0.033935546875, "num_input_tokens_seen": 356256476, "step": 3903 }, { "epoch": 16.266666666666666, "grad_norm": 3.0138612484128715, "learning_rate": 5e-05, "loss": 0.0524, "num_input_tokens_seen": 356347732, "step": 3904 }, { "epoch": 16.266666666666666, "loss": 0.07414688169956207, "loss_ce": 4.428348347573774e-06, "loss_iou": 0.267578125, "loss_num": 0.01483154296875, "loss_xval": 0.07421875, "num_input_tokens_seen": 356347732, "step": 3904 }, { "epoch": 16.270833333333332, "grad_norm": 2.551420311161361, "learning_rate": 5e-05, "loss": 0.0525, "num_input_tokens_seen": 356439436, "step": 3905 }, { "epoch": 16.270833333333332, "loss": 0.07031318545341492, "loss_ce": 0.0002448215091135353, "loss_iou": 0.365234375, "loss_num": 0.0140380859375, "loss_xval": 0.0703125, "num_input_tokens_seen": 356439436, "step": 3905 }, { "epoch": 16.275, "grad_norm": 2.4837327366530757, "learning_rate": 5e-05, "loss": 0.0542, "num_input_tokens_seen": 356530704, "step": 3906 }, { "epoch": 16.275, "loss": 0.06787487864494324, "loss_ce": 3.7870017877139617e-06, "loss_iou": 0.236328125, "loss_num": 0.01361083984375, "loss_xval": 0.06787109375, "num_input_tokens_seen": 356530704, "step": 3906 }, { "epoch": 16.279166666666665, "grad_norm": 7.965636333564063, "learning_rate": 5e-05, "loss": 0.0483, "num_input_tokens_seen": 356621628, "step": 3907 }, { "epoch": 16.279166666666665, "loss": 0.03679324686527252, "loss_ce": 4.2454055801499635e-05, "loss_iou": 0.19921875, "loss_num": 0.007354736328125, "loss_xval": 0.036865234375, "num_input_tokens_seen": 356621628, "step": 3907 }, { "epoch": 16.283333333333335, "grad_norm": 1.2914497291020453, "learning_rate": 5e-05, "loss": 0.0695, "num_input_tokens_seen": 356712460, "step": 3908 }, { "epoch": 16.283333333333335, "loss": 0.0689861923456192, "loss_ce": 1.208977664646227e-06, "loss_iou": 0.203125, "loss_num": 0.0137939453125, "loss_xval": 0.06884765625, "num_input_tokens_seen": 356712460, "step": 3908 }, { "epoch": 16.2875, "grad_norm": 2.0278846333993186, "learning_rate": 5e-05, "loss": 0.1155, "num_input_tokens_seen": 356801476, "step": 3909 }, { "epoch": 16.2875, "loss": 0.15893718600273132, "loss_ce": 1.632997964406968e-06, "loss_iou": 0.251953125, "loss_num": 0.03173828125, "loss_xval": 0.1591796875, "num_input_tokens_seen": 356801476, "step": 3909 }, { "epoch": 16.291666666666668, "grad_norm": 1.8283743924331144, "learning_rate": 5e-05, "loss": 0.0513, "num_input_tokens_seen": 356892728, "step": 3910 }, { "epoch": 16.291666666666668, "loss": 0.04230141639709473, "loss_ce": 4.052419626532355e-06, "loss_iou": 0.2421875, "loss_num": 0.00848388671875, "loss_xval": 0.042236328125, "num_input_tokens_seen": 356892728, "step": 3910 }, { "epoch": 16.295833333333334, "grad_norm": 3.090564522459806, "learning_rate": 5e-05, "loss": 0.0609, "num_input_tokens_seen": 356984724, "step": 3911 }, { "epoch": 16.295833333333334, "loss": 0.06969290971755981, "loss_ce": 0.0001280913274968043, "loss_iou": 0.2138671875, "loss_num": 0.013916015625, "loss_xval": 0.0693359375, "num_input_tokens_seen": 356984724, "step": 3911 }, { "epoch": 16.3, "grad_norm": 2.1429694540052084, "learning_rate": 5e-05, "loss": 0.0969, "num_input_tokens_seen": 357076264, "step": 3912 }, { "epoch": 16.3, "loss": 0.0981968492269516, "loss_ce": 6.758611561963335e-05, "loss_iou": 0.34765625, "loss_num": 0.0196533203125, "loss_xval": 0.09814453125, "num_input_tokens_seen": 357076264, "step": 3912 }, { "epoch": 16.304166666666667, "grad_norm": 1.7474171153292801, "learning_rate": 5e-05, "loss": 0.0424, "num_input_tokens_seen": 357168432, "step": 3913 }, { "epoch": 16.304166666666667, "loss": 0.043973229825496674, "loss_ce": 2.791850783978589e-05, "loss_iou": 0.259765625, "loss_num": 0.0087890625, "loss_xval": 0.0439453125, "num_input_tokens_seen": 357168432, "step": 3913 }, { "epoch": 16.308333333333334, "grad_norm": 1.129064597865002, "learning_rate": 5e-05, "loss": 0.0337, "num_input_tokens_seen": 357259436, "step": 3914 }, { "epoch": 16.308333333333334, "loss": 0.048458926379680634, "loss_ce": 4.638748578145169e-06, "loss_iou": 0.154296875, "loss_num": 0.00970458984375, "loss_xval": 0.04833984375, "num_input_tokens_seen": 357259436, "step": 3914 }, { "epoch": 16.3125, "grad_norm": 3.94881844780485, "learning_rate": 5e-05, "loss": 0.0688, "num_input_tokens_seen": 357350308, "step": 3915 }, { "epoch": 16.3125, "loss": 0.042931340634822845, "loss_ce": 7.373155881396087e-07, "loss_iou": 0.22265625, "loss_num": 0.00860595703125, "loss_xval": 0.04296875, "num_input_tokens_seen": 357350308, "step": 3915 }, { "epoch": 16.316666666666666, "grad_norm": 4.085693351868434, "learning_rate": 5e-05, "loss": 0.0494, "num_input_tokens_seen": 357441296, "step": 3916 }, { "epoch": 16.316666666666666, "loss": 0.04708992689847946, "loss_ce": 1.3049620974925347e-06, "loss_iou": 0.2734375, "loss_num": 0.0093994140625, "loss_xval": 0.047119140625, "num_input_tokens_seen": 357441296, "step": 3916 }, { "epoch": 16.320833333333333, "grad_norm": 6.100082012849477, "learning_rate": 5e-05, "loss": 0.0473, "num_input_tokens_seen": 357532208, "step": 3917 }, { "epoch": 16.320833333333333, "loss": 0.05945229530334473, "loss_ce": 0.0006449182983487844, "loss_iou": 0.32421875, "loss_num": 0.01177978515625, "loss_xval": 0.058837890625, "num_input_tokens_seen": 357532208, "step": 3917 }, { "epoch": 16.325, "grad_norm": 3.268667876262669, "learning_rate": 5e-05, "loss": 0.0665, "num_input_tokens_seen": 357623884, "step": 3918 }, { "epoch": 16.325, "loss": 0.03352963924407959, "loss_ce": 6.079977083572885e-06, "loss_iou": 0.25390625, "loss_num": 0.0067138671875, "loss_xval": 0.033447265625, "num_input_tokens_seen": 357623884, "step": 3918 }, { "epoch": 16.329166666666666, "grad_norm": 2.6731288129200648, "learning_rate": 5e-05, "loss": 0.0592, "num_input_tokens_seen": 357714404, "step": 3919 }, { "epoch": 16.329166666666666, "loss": 0.06423118710517883, "loss_ce": 1.457569669582881e-05, "loss_iou": 0.390625, "loss_num": 0.0128173828125, "loss_xval": 0.064453125, "num_input_tokens_seen": 357714404, "step": 3919 }, { "epoch": 16.333333333333332, "grad_norm": 2.760440924974219, "learning_rate": 5e-05, "loss": 0.0833, "num_input_tokens_seen": 357805056, "step": 3920 }, { "epoch": 16.333333333333332, "loss": 0.0687132477760315, "loss_ce": 2.9257778351166053e-06, "loss_iou": 0.2431640625, "loss_num": 0.01373291015625, "loss_xval": 0.06884765625, "num_input_tokens_seen": 357805056, "step": 3920 }, { "epoch": 16.3375, "grad_norm": 3.2057903850136404, "learning_rate": 5e-05, "loss": 0.0546, "num_input_tokens_seen": 357896480, "step": 3921 }, { "epoch": 16.3375, "loss": 0.03337109833955765, "loss_ce": 0.0004884064546786249, "loss_iou": 0.341796875, "loss_num": 0.006591796875, "loss_xval": 0.032958984375, "num_input_tokens_seen": 357896480, "step": 3921 }, { "epoch": 16.341666666666665, "grad_norm": 6.439634392685926, "learning_rate": 5e-05, "loss": 0.0671, "num_input_tokens_seen": 357987832, "step": 3922 }, { "epoch": 16.341666666666665, "loss": 0.07807601243257523, "loss_ce": 0.0001798927114577964, "loss_iou": 0.32421875, "loss_num": 0.01556396484375, "loss_xval": 0.078125, "num_input_tokens_seen": 357987832, "step": 3922 }, { "epoch": 16.345833333333335, "grad_norm": 2.2851150017124175, "learning_rate": 5e-05, "loss": 0.0386, "num_input_tokens_seen": 358079152, "step": 3923 }, { "epoch": 16.345833333333335, "loss": 0.0413411445915699, "loss_ce": 0.00017293104610871524, "loss_iou": 0.310546875, "loss_num": 0.00823974609375, "loss_xval": 0.041259765625, "num_input_tokens_seen": 358079152, "step": 3923 }, { "epoch": 16.35, "grad_norm": 1.56516964735357, "learning_rate": 5e-05, "loss": 0.0276, "num_input_tokens_seen": 358169648, "step": 3924 }, { "epoch": 16.35, "loss": 0.03811732679605484, "loss_ce": 8.725569387024734e-07, "loss_iou": 0.1767578125, "loss_num": 0.00762939453125, "loss_xval": 0.0380859375, "num_input_tokens_seen": 358169648, "step": 3924 }, { "epoch": 16.354166666666668, "grad_norm": 3.006722798471429, "learning_rate": 5e-05, "loss": 0.0472, "num_input_tokens_seen": 358261204, "step": 3925 }, { "epoch": 16.354166666666668, "loss": 0.0471673384308815, "loss_ce": 1.7678248696029186e-05, "loss_iou": 0.205078125, "loss_num": 0.0093994140625, "loss_xval": 0.047119140625, "num_input_tokens_seen": 358261204, "step": 3925 }, { "epoch": 16.358333333333334, "grad_norm": 2.434268017276299, "learning_rate": 5e-05, "loss": 0.0734, "num_input_tokens_seen": 358352804, "step": 3926 }, { "epoch": 16.358333333333334, "loss": 0.07035059481859207, "loss_ce": 0.00023645992041565478, "loss_iou": 0.08447265625, "loss_num": 0.0140380859375, "loss_xval": 0.0703125, "num_input_tokens_seen": 358352804, "step": 3926 }, { "epoch": 16.3625, "grad_norm": 3.7321751821660896, "learning_rate": 5e-05, "loss": 0.0963, "num_input_tokens_seen": 358443820, "step": 3927 }, { "epoch": 16.3625, "loss": 0.12764135003089905, "loss_ce": 1.5843133951420896e-06, "loss_iou": 0.2265625, "loss_num": 0.0255126953125, "loss_xval": 0.1279296875, "num_input_tokens_seen": 358443820, "step": 3927 }, { "epoch": 16.366666666666667, "grad_norm": 4.455672992837086, "learning_rate": 5e-05, "loss": 0.0455, "num_input_tokens_seen": 358535460, "step": 3928 }, { "epoch": 16.366666666666667, "loss": 0.04032261669635773, "loss_ce": 0.00014622484741266817, "loss_iou": 0.2314453125, "loss_num": 0.00799560546875, "loss_xval": 0.040283203125, "num_input_tokens_seen": 358535460, "step": 3928 }, { "epoch": 16.370833333333334, "grad_norm": 2.084142284188999, "learning_rate": 5e-05, "loss": 0.0396, "num_input_tokens_seen": 358626564, "step": 3929 }, { "epoch": 16.370833333333334, "loss": 0.04891330376267433, "loss_ce": 0.0002988032065331936, "loss_iou": 0.220703125, "loss_num": 0.00970458984375, "loss_xval": 0.048583984375, "num_input_tokens_seen": 358626564, "step": 3929 }, { "epoch": 16.375, "grad_norm": 3.5570218841733103, "learning_rate": 5e-05, "loss": 0.068, "num_input_tokens_seen": 358718064, "step": 3930 }, { "epoch": 16.375, "loss": 0.05540819093585014, "loss_ce": 3.5241994282841915e-06, "loss_iou": 0.21875, "loss_num": 0.01104736328125, "loss_xval": 0.055419921875, "num_input_tokens_seen": 358718064, "step": 3930 }, { "epoch": 16.379166666666666, "grad_norm": 2.7408513904624936, "learning_rate": 5e-05, "loss": 0.0917, "num_input_tokens_seen": 358809840, "step": 3931 }, { "epoch": 16.379166666666666, "loss": 0.11305944621562958, "loss_ce": 5.2851781219942495e-05, "loss_iou": 0.314453125, "loss_num": 0.0225830078125, "loss_xval": 0.11279296875, "num_input_tokens_seen": 358809840, "step": 3931 }, { "epoch": 16.383333333333333, "grad_norm": 2.653064682663934, "learning_rate": 5e-05, "loss": 0.036, "num_input_tokens_seen": 358901604, "step": 3932 }, { "epoch": 16.383333333333333, "loss": 0.04911474883556366, "loss_ce": 0.00022558898490387946, "loss_iou": 0.283203125, "loss_num": 0.009765625, "loss_xval": 0.048828125, "num_input_tokens_seen": 358901604, "step": 3932 }, { "epoch": 16.3875, "grad_norm": 3.1667283570943643, "learning_rate": 5e-05, "loss": 0.0511, "num_input_tokens_seen": 358992476, "step": 3933 }, { "epoch": 16.3875, "loss": 0.06207574903964996, "loss_ce": 2.996779585373588e-06, "loss_iou": 0.38671875, "loss_num": 0.01239013671875, "loss_xval": 0.06201171875, "num_input_tokens_seen": 358992476, "step": 3933 }, { "epoch": 16.391666666666666, "grad_norm": 3.507941496011046, "learning_rate": 5e-05, "loss": 0.0551, "num_input_tokens_seen": 359083832, "step": 3934 }, { "epoch": 16.391666666666666, "loss": 0.056043900549411774, "loss_ce": 1.3627225598611403e-05, "loss_iou": 0.2001953125, "loss_num": 0.01116943359375, "loss_xval": 0.05615234375, "num_input_tokens_seen": 359083832, "step": 3934 }, { "epoch": 16.395833333333332, "grad_norm": 3.6830244934152105, "learning_rate": 5e-05, "loss": 0.0789, "num_input_tokens_seen": 359175132, "step": 3935 }, { "epoch": 16.395833333333332, "loss": 0.060316603630781174, "loss_ce": 0.003614944638684392, "loss_iou": 0.341796875, "loss_num": 0.0113525390625, "loss_xval": 0.056640625, "num_input_tokens_seen": 359175132, "step": 3935 }, { "epoch": 16.4, "grad_norm": 2.6887028100035666, "learning_rate": 5e-05, "loss": 0.0359, "num_input_tokens_seen": 359266296, "step": 3936 }, { "epoch": 16.4, "loss": 0.036618150770664215, "loss_ce": 4.2830437450902537e-05, "loss_iou": 0.21484375, "loss_num": 0.00732421875, "loss_xval": 0.03662109375, "num_input_tokens_seen": 359266296, "step": 3936 }, { "epoch": 16.404166666666665, "grad_norm": 1.836769364658006, "learning_rate": 5e-05, "loss": 0.0421, "num_input_tokens_seen": 359358420, "step": 3937 }, { "epoch": 16.404166666666665, "loss": 0.047061532735824585, "loss_ce": 1.8682949303183705e-05, "loss_iou": 0.1689453125, "loss_num": 0.0093994140625, "loss_xval": 0.047119140625, "num_input_tokens_seen": 359358420, "step": 3937 }, { "epoch": 16.408333333333335, "grad_norm": 2.861514110892917, "learning_rate": 5e-05, "loss": 0.0554, "num_input_tokens_seen": 359449700, "step": 3938 }, { "epoch": 16.408333333333335, "loss": 0.06305009871721268, "loss_ce": 7.820833616278833e-07, "loss_iou": 0.35546875, "loss_num": 0.01263427734375, "loss_xval": 0.06298828125, "num_input_tokens_seen": 359449700, "step": 3938 }, { "epoch": 16.4125, "grad_norm": 2.783997785575247, "learning_rate": 5e-05, "loss": 0.0546, "num_input_tokens_seen": 359541136, "step": 3939 }, { "epoch": 16.4125, "loss": 0.05213546007871628, "loss_ce": 0.0002708357642404735, "loss_iou": 0.3671875, "loss_num": 0.0103759765625, "loss_xval": 0.0517578125, "num_input_tokens_seen": 359541136, "step": 3939 }, { "epoch": 16.416666666666668, "grad_norm": 3.2877178753912886, "learning_rate": 5e-05, "loss": 0.0383, "num_input_tokens_seen": 359632852, "step": 3940 }, { "epoch": 16.416666666666668, "loss": 0.03417900949716568, "loss_ce": 0.00018242768419440836, "loss_iou": 0.310546875, "loss_num": 0.006805419921875, "loss_xval": 0.033935546875, "num_input_tokens_seen": 359632852, "step": 3940 }, { "epoch": 16.420833333333334, "grad_norm": 2.5094663365036936, "learning_rate": 5e-05, "loss": 0.0447, "num_input_tokens_seen": 359724656, "step": 3941 }, { "epoch": 16.420833333333334, "loss": 0.05305403470993042, "loss_ce": 2.9741953767370433e-05, "loss_iou": 0.37109375, "loss_num": 0.0106201171875, "loss_xval": 0.052978515625, "num_input_tokens_seen": 359724656, "step": 3941 }, { "epoch": 16.425, "grad_norm": 1.9912816973667706, "learning_rate": 5e-05, "loss": 0.0615, "num_input_tokens_seen": 359816076, "step": 3942 }, { "epoch": 16.425, "loss": 0.04908981919288635, "loss_ce": 7.859131437726319e-05, "loss_iou": 0.26953125, "loss_num": 0.00982666015625, "loss_xval": 0.049072265625, "num_input_tokens_seen": 359816076, "step": 3942 }, { "epoch": 16.429166666666667, "grad_norm": 3.071130093533867, "learning_rate": 5e-05, "loss": 0.0425, "num_input_tokens_seen": 359907692, "step": 3943 }, { "epoch": 16.429166666666667, "loss": 0.0289724413305521, "loss_ce": 1.126076949731214e-05, "loss_iou": 0.283203125, "loss_num": 0.00579833984375, "loss_xval": 0.0289306640625, "num_input_tokens_seen": 359907692, "step": 3943 }, { "epoch": 16.433333333333334, "grad_norm": 4.327886328416566, "learning_rate": 5e-05, "loss": 0.0461, "num_input_tokens_seen": 359999264, "step": 3944 }, { "epoch": 16.433333333333334, "loss": 0.046083178371191025, "loss_ce": 1.689563214313239e-05, "loss_iou": 0.177734375, "loss_num": 0.00921630859375, "loss_xval": 0.046142578125, "num_input_tokens_seen": 359999264, "step": 3944 }, { "epoch": 16.4375, "grad_norm": 4.330135050282263, "learning_rate": 5e-05, "loss": 0.1078, "num_input_tokens_seen": 360090256, "step": 3945 }, { "epoch": 16.4375, "loss": 0.10656769573688507, "loss_ce": 3.0872138268023264e-07, "loss_iou": 0.2734375, "loss_num": 0.0213623046875, "loss_xval": 0.1064453125, "num_input_tokens_seen": 360090256, "step": 3945 }, { "epoch": 16.441666666666666, "grad_norm": 3.3669976976099814, "learning_rate": 5e-05, "loss": 0.0554, "num_input_tokens_seen": 360181368, "step": 3946 }, { "epoch": 16.441666666666666, "loss": 0.042137518525123596, "loss_ce": 8.001441528904252e-06, "loss_iou": 0.32421875, "loss_num": 0.0084228515625, "loss_xval": 0.042236328125, "num_input_tokens_seen": 360181368, "step": 3946 }, { "epoch": 16.445833333333333, "grad_norm": 2.954780209526445, "learning_rate": 5e-05, "loss": 0.0461, "num_input_tokens_seen": 360272876, "step": 3947 }, { "epoch": 16.445833333333333, "loss": 0.05872957780957222, "loss_ce": 0.0009140259935520589, "loss_iou": 0.2470703125, "loss_num": 0.0115966796875, "loss_xval": 0.057861328125, "num_input_tokens_seen": 360272876, "step": 3947 }, { "epoch": 16.45, "grad_norm": 1.6089914984018734, "learning_rate": 5e-05, "loss": 0.0306, "num_input_tokens_seen": 360364988, "step": 3948 }, { "epoch": 16.45, "loss": 0.03769281879067421, "loss_ce": 1.8869384803110734e-05, "loss_iou": 0.1689453125, "loss_num": 0.007537841796875, "loss_xval": 0.03759765625, "num_input_tokens_seen": 360364988, "step": 3948 }, { "epoch": 16.454166666666666, "grad_norm": 0.7518614230479753, "learning_rate": 5e-05, "loss": 0.0627, "num_input_tokens_seen": 360456460, "step": 3949 }, { "epoch": 16.454166666666666, "loss": 0.08800274133682251, "loss_ce": 5.303246325638611e-06, "loss_iou": 0.3203125, "loss_num": 0.017578125, "loss_xval": 0.087890625, "num_input_tokens_seen": 360456460, "step": 3949 }, { "epoch": 16.458333333333332, "grad_norm": 1.3710043821038695, "learning_rate": 5e-05, "loss": 0.0526, "num_input_tokens_seen": 360547960, "step": 3950 }, { "epoch": 16.458333333333332, "loss": 0.07577581703662872, "loss_ce": 0.00010747826308943331, "loss_iou": 0.177734375, "loss_num": 0.01513671875, "loss_xval": 0.07568359375, "num_input_tokens_seen": 360547960, "step": 3950 }, { "epoch": 16.4625, "grad_norm": 1.8866717426357293, "learning_rate": 5e-05, "loss": 0.0429, "num_input_tokens_seen": 360639164, "step": 3951 }, { "epoch": 16.4625, "loss": 0.05358341708779335, "loss_ce": 2.1791793187730946e-06, "loss_iou": 0.29296875, "loss_num": 0.0107421875, "loss_xval": 0.053466796875, "num_input_tokens_seen": 360639164, "step": 3951 }, { "epoch": 16.466666666666665, "grad_norm": 2.5429951388449017, "learning_rate": 5e-05, "loss": 0.0313, "num_input_tokens_seen": 360730344, "step": 3952 }, { "epoch": 16.466666666666665, "loss": 0.028706632554531097, "loss_ce": 4.849062406719895e-06, "loss_iou": 0.283203125, "loss_num": 0.0057373046875, "loss_xval": 0.0286865234375, "num_input_tokens_seen": 360730344, "step": 3952 }, { "epoch": 16.470833333333335, "grad_norm": 3.1225498238347664, "learning_rate": 5e-05, "loss": 0.0436, "num_input_tokens_seen": 360821232, "step": 3953 }, { "epoch": 16.470833333333335, "loss": 0.05040149390697479, "loss_ce": 1.710073433969228e-06, "loss_iou": 0.353515625, "loss_num": 0.01007080078125, "loss_xval": 0.05029296875, "num_input_tokens_seen": 360821232, "step": 3953 }, { "epoch": 16.475, "grad_norm": 2.91013005176825, "learning_rate": 5e-05, "loss": 0.0691, "num_input_tokens_seen": 360912412, "step": 3954 }, { "epoch": 16.475, "loss": 0.03898601979017258, "loss_ce": 3.0330040317494422e-05, "loss_iou": 0.283203125, "loss_num": 0.007781982421875, "loss_xval": 0.0390625, "num_input_tokens_seen": 360912412, "step": 3954 }, { "epoch": 16.479166666666668, "grad_norm": 6.010620113229064, "learning_rate": 5e-05, "loss": 0.04, "num_input_tokens_seen": 361004048, "step": 3955 }, { "epoch": 16.479166666666668, "loss": 0.024341393262147903, "loss_ce": 0.0021398558747023344, "loss_iou": 0.2041015625, "loss_num": 0.00445556640625, "loss_xval": 0.022216796875, "num_input_tokens_seen": 361004048, "step": 3955 }, { "epoch": 16.483333333333334, "grad_norm": 2.832025449318552, "learning_rate": 5e-05, "loss": 0.066, "num_input_tokens_seen": 361095264, "step": 3956 }, { "epoch": 16.483333333333334, "loss": 0.03328515589237213, "loss_ce": 1.3365360246098135e-05, "loss_iou": 0.296875, "loss_num": 0.00665283203125, "loss_xval": 0.033203125, "num_input_tokens_seen": 361095264, "step": 3956 }, { "epoch": 16.4875, "grad_norm": 3.1603499478759023, "learning_rate": 5e-05, "loss": 0.0369, "num_input_tokens_seen": 361186276, "step": 3957 }, { "epoch": 16.4875, "loss": 0.03434790298342705, "loss_ce": 7.6663403888233e-05, "loss_iou": 0.23828125, "loss_num": 0.006866455078125, "loss_xval": 0.0341796875, "num_input_tokens_seen": 361186276, "step": 3957 }, { "epoch": 16.491666666666667, "grad_norm": 2.6357384326333406, "learning_rate": 5e-05, "loss": 0.056, "num_input_tokens_seen": 361277428, "step": 3958 }, { "epoch": 16.491666666666667, "loss": 0.06537545472383499, "loss_ce": 0.000357751821866259, "loss_iou": 0.2421875, "loss_num": 0.01300048828125, "loss_xval": 0.06494140625, "num_input_tokens_seen": 361277428, "step": 3958 }, { "epoch": 16.495833333333334, "grad_norm": 2.5433925195431177, "learning_rate": 5e-05, "loss": 0.0781, "num_input_tokens_seen": 361368524, "step": 3959 }, { "epoch": 16.495833333333334, "loss": 0.07380108535289764, "loss_ce": 7.061405631247908e-05, "loss_iou": 0.404296875, "loss_num": 0.0147705078125, "loss_xval": 0.07373046875, "num_input_tokens_seen": 361368524, "step": 3959 }, { "epoch": 16.5, "grad_norm": 2.6606692648061943, "learning_rate": 5e-05, "loss": 0.0386, "num_input_tokens_seen": 361459344, "step": 3960 }, { "epoch": 16.5, "loss": 0.033251769840717316, "loss_ce": 9.442002919968218e-05, "loss_iou": 0.2890625, "loss_num": 0.006622314453125, "loss_xval": 0.033203125, "num_input_tokens_seen": 361459344, "step": 3960 }, { "epoch": 16.504166666666666, "grad_norm": 3.4631409239854007, "learning_rate": 5e-05, "loss": 0.0586, "num_input_tokens_seen": 361550156, "step": 3961 }, { "epoch": 16.504166666666666, "loss": 0.061251938343048096, "loss_ce": 3.1600191050529247e-06, "loss_iou": 0.263671875, "loss_num": 0.01226806640625, "loss_xval": 0.061279296875, "num_input_tokens_seen": 361550156, "step": 3961 }, { "epoch": 16.508333333333333, "grad_norm": 2.229761925013082, "learning_rate": 5e-05, "loss": 0.0724, "num_input_tokens_seen": 361641820, "step": 3962 }, { "epoch": 16.508333333333333, "loss": 0.049893561750650406, "loss_ce": 5.835596675751731e-05, "loss_iou": 0.26953125, "loss_num": 0.00994873046875, "loss_xval": 0.0498046875, "num_input_tokens_seen": 361641820, "step": 3962 }, { "epoch": 16.5125, "grad_norm": 3.5200645848253407, "learning_rate": 5e-05, "loss": 0.0483, "num_input_tokens_seen": 361733668, "step": 3963 }, { "epoch": 16.5125, "loss": 0.0387338325381279, "loss_ce": 2.2287305910140276e-05, "loss_iou": 0.28125, "loss_num": 0.00775146484375, "loss_xval": 0.038818359375, "num_input_tokens_seen": 361733668, "step": 3963 }, { "epoch": 16.516666666666666, "grad_norm": 3.639270354142469, "learning_rate": 5e-05, "loss": 0.047, "num_input_tokens_seen": 361824876, "step": 3964 }, { "epoch": 16.516666666666666, "loss": 0.05050446093082428, "loss_ce": 6.652936281170696e-05, "loss_iou": 0.20703125, "loss_num": 0.01007080078125, "loss_xval": 0.050537109375, "num_input_tokens_seen": 361824876, "step": 3964 }, { "epoch": 16.520833333333332, "grad_norm": 3.1210787100609223, "learning_rate": 5e-05, "loss": 0.0453, "num_input_tokens_seen": 361916172, "step": 3965 }, { "epoch": 16.520833333333332, "loss": 0.030680567026138306, "loss_ce": 4.0917599108070135e-05, "loss_iou": 0.33203125, "loss_num": 0.006134033203125, "loss_xval": 0.0306396484375, "num_input_tokens_seen": 361916172, "step": 3965 }, { "epoch": 16.525, "grad_norm": 2.742357402552261, "learning_rate": 5e-05, "loss": 0.0427, "num_input_tokens_seen": 362007716, "step": 3966 }, { "epoch": 16.525, "loss": 0.029899753630161285, "loss_ce": 2.3044289264362305e-05, "loss_iou": 0.240234375, "loss_num": 0.0059814453125, "loss_xval": 0.0299072265625, "num_input_tokens_seen": 362007716, "step": 3966 }, { "epoch": 16.529166666666665, "grad_norm": 3.4393570074792135, "learning_rate": 5e-05, "loss": 0.0546, "num_input_tokens_seen": 362098880, "step": 3967 }, { "epoch": 16.529166666666665, "loss": 0.05629254877567291, "loss_ce": 1.8134795027435757e-05, "loss_iou": 0.099609375, "loss_num": 0.01123046875, "loss_xval": 0.05615234375, "num_input_tokens_seen": 362098880, "step": 3967 }, { "epoch": 16.533333333333335, "grad_norm": 2.2122674663551476, "learning_rate": 5e-05, "loss": 0.0647, "num_input_tokens_seen": 362190176, "step": 3968 }, { "epoch": 16.533333333333335, "loss": 0.08881276845932007, "loss_ce": 0.0005101521383039653, "loss_iou": 0.349609375, "loss_num": 0.0177001953125, "loss_xval": 0.08837890625, "num_input_tokens_seen": 362190176, "step": 3968 }, { "epoch": 16.5375, "grad_norm": 1.754024790338209, "learning_rate": 5e-05, "loss": 0.0567, "num_input_tokens_seen": 362281400, "step": 3969 }, { "epoch": 16.5375, "loss": 0.030114684253931046, "loss_ce": 9.093943845073227e-06, "loss_iou": 0.130859375, "loss_num": 0.006011962890625, "loss_xval": 0.0301513671875, "num_input_tokens_seen": 362281400, "step": 3969 }, { "epoch": 16.541666666666668, "grad_norm": 1.4125522828745518, "learning_rate": 5e-05, "loss": 0.0554, "num_input_tokens_seen": 362372464, "step": 3970 }, { "epoch": 16.541666666666668, "loss": 0.02799748256802559, "loss_ce": 5.100919952383265e-05, "loss_iou": 0.2734375, "loss_num": 0.005584716796875, "loss_xval": 0.0279541015625, "num_input_tokens_seen": 362372464, "step": 3970 }, { "epoch": 16.545833333333334, "grad_norm": 1.9110897683022166, "learning_rate": 5e-05, "loss": 0.0363, "num_input_tokens_seen": 362463244, "step": 3971 }, { "epoch": 16.545833333333334, "loss": 0.0439089760184288, "loss_ce": 2.4702756491024047e-05, "loss_iou": 0.193359375, "loss_num": 0.0087890625, "loss_xval": 0.0439453125, "num_input_tokens_seen": 362463244, "step": 3971 }, { "epoch": 16.55, "grad_norm": 2.557862939779694, "learning_rate": 5e-05, "loss": 0.0489, "num_input_tokens_seen": 362555168, "step": 3972 }, { "epoch": 16.55, "loss": 0.061159897595644, "loss_ce": 2.6716365937318187e-06, "loss_iou": 0.3203125, "loss_num": 0.01220703125, "loss_xval": 0.06103515625, "num_input_tokens_seen": 362555168, "step": 3972 }, { "epoch": 16.554166666666667, "grad_norm": 8.832306564244698, "learning_rate": 5e-05, "loss": 0.0432, "num_input_tokens_seen": 362645688, "step": 3973 }, { "epoch": 16.554166666666667, "loss": 0.048586659133434296, "loss_ce": 2.6755474209494423e-06, "loss_iou": 0.314453125, "loss_num": 0.00970458984375, "loss_xval": 0.048583984375, "num_input_tokens_seen": 362645688, "step": 3973 }, { "epoch": 16.558333333333334, "grad_norm": 2.3787485737412917, "learning_rate": 5e-05, "loss": 0.0891, "num_input_tokens_seen": 362736740, "step": 3974 }, { "epoch": 16.558333333333334, "loss": 0.0625823587179184, "loss_ce": 2.1322119209798984e-05, "loss_iou": 0.26171875, "loss_num": 0.01251220703125, "loss_xval": 0.0625, "num_input_tokens_seen": 362736740, "step": 3974 }, { "epoch": 16.5625, "grad_norm": 3.0644700523015955, "learning_rate": 5e-05, "loss": 0.0315, "num_input_tokens_seen": 362828236, "step": 3975 }, { "epoch": 16.5625, "loss": 0.03315906971693039, "loss_ce": 0.0006654754397459328, "loss_iou": 0.12451171875, "loss_num": 0.006500244140625, "loss_xval": 0.032470703125, "num_input_tokens_seen": 362828236, "step": 3975 }, { "epoch": 16.566666666666666, "grad_norm": 2.9552589031927377, "learning_rate": 5e-05, "loss": 0.043, "num_input_tokens_seen": 362919436, "step": 3976 }, { "epoch": 16.566666666666666, "loss": 0.0296634454280138, "loss_ce": 3.5889854643755825e-07, "loss_iou": 0.3125, "loss_num": 0.00592041015625, "loss_xval": 0.0296630859375, "num_input_tokens_seen": 362919436, "step": 3976 }, { "epoch": 16.570833333333333, "grad_norm": 4.567618578224438, "learning_rate": 5e-05, "loss": 0.0752, "num_input_tokens_seen": 363010696, "step": 3977 }, { "epoch": 16.570833333333333, "loss": 0.052156493067741394, "loss_ce": 1.7207483324455097e-05, "loss_iou": 0.31640625, "loss_num": 0.01043701171875, "loss_xval": 0.05224609375, "num_input_tokens_seen": 363010696, "step": 3977 }, { "epoch": 16.575, "grad_norm": 2.7837989380544443, "learning_rate": 5e-05, "loss": 0.0476, "num_input_tokens_seen": 363101464, "step": 3978 }, { "epoch": 16.575, "loss": 0.030507449060678482, "loss_ce": 0.00011957007518503815, "loss_iou": 0.1796875, "loss_num": 0.006072998046875, "loss_xval": 0.0303955078125, "num_input_tokens_seen": 363101464, "step": 3978 }, { "epoch": 16.579166666666666, "grad_norm": 2.3226219034299707, "learning_rate": 5e-05, "loss": 0.0679, "num_input_tokens_seen": 363193092, "step": 3979 }, { "epoch": 16.579166666666666, "loss": 0.052284494042396545, "loss_ce": 7.883674697950482e-06, "loss_iou": 0.1376953125, "loss_num": 0.01043701171875, "loss_xval": 0.05224609375, "num_input_tokens_seen": 363193092, "step": 3979 }, { "epoch": 16.583333333333332, "grad_norm": 1.8494584473716926, "learning_rate": 5e-05, "loss": 0.0375, "num_input_tokens_seen": 363284432, "step": 3980 }, { "epoch": 16.583333333333332, "loss": 0.03750983625650406, "loss_ce": 1.8991537217516452e-05, "loss_iou": 0.349609375, "loss_num": 0.00750732421875, "loss_xval": 0.03759765625, "num_input_tokens_seen": 363284432, "step": 3980 }, { "epoch": 16.5875, "grad_norm": 1.3009296462354598, "learning_rate": 5e-05, "loss": 0.0532, "num_input_tokens_seen": 363375496, "step": 3981 }, { "epoch": 16.5875, "loss": 0.04523888975381851, "loss_ce": 0.0003170108830090612, "loss_iou": 0.2421875, "loss_num": 0.00897216796875, "loss_xval": 0.044921875, "num_input_tokens_seen": 363375496, "step": 3981 }, { "epoch": 16.591666666666665, "grad_norm": 1.4056978587534819, "learning_rate": 5e-05, "loss": 0.0526, "num_input_tokens_seen": 363466500, "step": 3982 }, { "epoch": 16.591666666666665, "loss": 0.05627135932445526, "loss_ce": 2.746665086306166e-05, "loss_iou": 0.271484375, "loss_num": 0.01123046875, "loss_xval": 0.05615234375, "num_input_tokens_seen": 363466500, "step": 3982 }, { "epoch": 16.595833333333335, "grad_norm": 2.3504507881273815, "learning_rate": 5e-05, "loss": 0.0302, "num_input_tokens_seen": 363558340, "step": 3983 }, { "epoch": 16.595833333333335, "loss": 0.021505560725927353, "loss_ce": 9.747860167408362e-05, "loss_iou": 0.2060546875, "loss_num": 0.0042724609375, "loss_xval": 0.0213623046875, "num_input_tokens_seen": 363558340, "step": 3983 }, { "epoch": 16.6, "grad_norm": 2.4013451408399433, "learning_rate": 5e-05, "loss": 0.0523, "num_input_tokens_seen": 363649460, "step": 3984 }, { "epoch": 16.6, "loss": 0.06826449185609818, "loss_ce": 0.00030947118648327887, "loss_iou": 0.1669921875, "loss_num": 0.01361083984375, "loss_xval": 0.06787109375, "num_input_tokens_seen": 363649460, "step": 3984 }, { "epoch": 16.604166666666668, "grad_norm": 2.578801995737697, "learning_rate": 5e-05, "loss": 0.0427, "num_input_tokens_seen": 363740344, "step": 3985 }, { "epoch": 16.604166666666668, "loss": 0.04665427654981613, "loss_ce": 6.156325980555266e-05, "loss_iou": 0.1767578125, "loss_num": 0.00933837890625, "loss_xval": 0.046630859375, "num_input_tokens_seen": 363740344, "step": 3985 }, { "epoch": 16.608333333333334, "grad_norm": 3.2172138617040766, "learning_rate": 5e-05, "loss": 0.052, "num_input_tokens_seen": 363831740, "step": 3986 }, { "epoch": 16.608333333333334, "loss": 0.03587819263339043, "loss_ce": 4.778653419634793e-06, "loss_iou": 0.30859375, "loss_num": 0.007171630859375, "loss_xval": 0.035888671875, "num_input_tokens_seen": 363831740, "step": 3986 }, { "epoch": 16.6125, "grad_norm": 3.212871970724973, "learning_rate": 5e-05, "loss": 0.0692, "num_input_tokens_seen": 363922936, "step": 3987 }, { "epoch": 16.6125, "loss": 0.11252367496490479, "loss_ce": 5.368115580495214e-06, "loss_iou": 0.34375, "loss_num": 0.0224609375, "loss_xval": 0.1123046875, "num_input_tokens_seen": 363922936, "step": 3987 }, { "epoch": 16.616666666666667, "grad_norm": 2.447288223052677, "learning_rate": 5e-05, "loss": 0.0409, "num_input_tokens_seen": 364014936, "step": 3988 }, { "epoch": 16.616666666666667, "loss": 0.03526037186384201, "loss_ce": 1.2567723388201557e-05, "loss_iou": 0.2490234375, "loss_num": 0.007049560546875, "loss_xval": 0.03515625, "num_input_tokens_seen": 364014936, "step": 3988 }, { "epoch": 16.620833333333334, "grad_norm": 2.315663400316989, "learning_rate": 5e-05, "loss": 0.0394, "num_input_tokens_seen": 364105832, "step": 3989 }, { "epoch": 16.620833333333334, "loss": 0.03379864618182182, "loss_ce": 4.292664925742429e-07, "loss_iou": 0.2734375, "loss_num": 0.00677490234375, "loss_xval": 0.03369140625, "num_input_tokens_seen": 364105832, "step": 3989 }, { "epoch": 16.625, "grad_norm": 2.3383386216465563, "learning_rate": 5e-05, "loss": 0.0692, "num_input_tokens_seen": 364197140, "step": 3990 }, { "epoch": 16.625, "loss": 0.03636720031499863, "loss_ce": 5.127894110046327e-05, "loss_iou": 0.265625, "loss_num": 0.00726318359375, "loss_xval": 0.036376953125, "num_input_tokens_seen": 364197140, "step": 3990 }, { "epoch": 16.629166666666666, "grad_norm": 3.0280427062356243, "learning_rate": 5e-05, "loss": 0.1054, "num_input_tokens_seen": 364288356, "step": 3991 }, { "epoch": 16.629166666666666, "loss": 0.0834091454744339, "loss_ce": 4.607763003150467e-06, "loss_iou": 0.33984375, "loss_num": 0.0167236328125, "loss_xval": 0.08349609375, "num_input_tokens_seen": 364288356, "step": 3991 }, { "epoch": 16.633333333333333, "grad_norm": 2.460740338451805, "learning_rate": 5e-05, "loss": 0.0453, "num_input_tokens_seen": 364378880, "step": 3992 }, { "epoch": 16.633333333333333, "loss": 0.039358705282211304, "loss_ce": 6.287586074904539e-06, "loss_iou": 0.263671875, "loss_num": 0.00787353515625, "loss_xval": 0.039306640625, "num_input_tokens_seen": 364378880, "step": 3992 }, { "epoch": 16.6375, "grad_norm": 4.056336631532033, "learning_rate": 5e-05, "loss": 0.1004, "num_input_tokens_seen": 364469260, "step": 3993 }, { "epoch": 16.6375, "loss": 0.15124809741973877, "loss_ce": 3.350642873556353e-05, "loss_iou": 0.234375, "loss_num": 0.0302734375, "loss_xval": 0.1513671875, "num_input_tokens_seen": 364469260, "step": 3993 }, { "epoch": 16.641666666666666, "grad_norm": 5.771803115622093, "learning_rate": 5e-05, "loss": 0.0689, "num_input_tokens_seen": 364560676, "step": 3994 }, { "epoch": 16.641666666666666, "loss": 0.03899519890546799, "loss_ce": 0.00010054669110104442, "loss_iou": 0.310546875, "loss_num": 0.007781982421875, "loss_xval": 0.038818359375, "num_input_tokens_seen": 364560676, "step": 3994 }, { "epoch": 16.645833333333332, "grad_norm": 2.505074864393875, "learning_rate": 5e-05, "loss": 0.0943, "num_input_tokens_seen": 364652532, "step": 3995 }, { "epoch": 16.645833333333332, "loss": 0.14425472915172577, "loss_ce": 7.443044160027057e-05, "loss_iou": 0.267578125, "loss_num": 0.02880859375, "loss_xval": 0.14453125, "num_input_tokens_seen": 364652532, "step": 3995 }, { "epoch": 16.65, "grad_norm": 3.750496925366113, "learning_rate": 5e-05, "loss": 0.0398, "num_input_tokens_seen": 364743872, "step": 3996 }, { "epoch": 16.65, "loss": 0.03405202552676201, "loss_ce": 0.00011647972860373557, "loss_iou": 0.255859375, "loss_num": 0.00677490234375, "loss_xval": 0.033935546875, "num_input_tokens_seen": 364743872, "step": 3996 }, { "epoch": 16.654166666666665, "grad_norm": 2.780715096761446, "learning_rate": 5e-05, "loss": 0.0462, "num_input_tokens_seen": 364835340, "step": 3997 }, { "epoch": 16.654166666666665, "loss": 0.037465497851371765, "loss_ce": 5.172362762095872e-06, "loss_iou": 0.251953125, "loss_num": 0.007476806640625, "loss_xval": 0.037353515625, "num_input_tokens_seen": 364835340, "step": 3997 }, { "epoch": 16.658333333333335, "grad_norm": 4.023870708348408, "learning_rate": 5e-05, "loss": 0.0559, "num_input_tokens_seen": 364926784, "step": 3998 }, { "epoch": 16.658333333333335, "loss": 0.06420918554067612, "loss_ce": 4.597852239385247e-05, "loss_iou": 0.255859375, "loss_num": 0.0128173828125, "loss_xval": 0.06396484375, "num_input_tokens_seen": 364926784, "step": 3998 }, { "epoch": 16.6625, "grad_norm": 2.9223755657733244, "learning_rate": 5e-05, "loss": 0.0447, "num_input_tokens_seen": 365018136, "step": 3999 }, { "epoch": 16.6625, "loss": 0.04452334716916084, "loss_ce": 2.8717840905301273e-05, "loss_iou": 0.423828125, "loss_num": 0.0089111328125, "loss_xval": 0.04443359375, "num_input_tokens_seen": 365018136, "step": 3999 }, { "epoch": 16.666666666666668, "grad_norm": 1.9494567562697138, "learning_rate": 5e-05, "loss": 0.035, "num_input_tokens_seen": 365109720, "step": 4000 }, { "epoch": 16.666666666666668, "eval_seeclick_CIoU": 0.2187977135181427, "eval_seeclick_GIoU": 0.19433742761611938, "eval_seeclick_IoU": 0.32613541185855865, "eval_seeclick_MAE_all": 0.10025185346603394, "eval_seeclick_MAE_h": 0.0734252966940403, "eval_seeclick_MAE_w": 0.21040697395801544, "eval_seeclick_MAE_x_boxes": 0.23823681473731995, "eval_seeclick_MAE_y_boxes": 0.07987504452466965, "eval_seeclick_NUM_probability": 0.9999950528144836, "eval_seeclick_inside_bbox": 0.5397727340459824, "eval_seeclick_loss": 0.6068128347396851, "eval_seeclick_loss_ce": 0.15084724873304367, "eval_seeclick_loss_iou": 0.4635009765625, "eval_seeclick_loss_num": 0.0870208740234375, "eval_seeclick_loss_xval": 0.43511962890625, "eval_seeclick_runtime": 79.9986, "eval_seeclick_samples_per_second": 0.538, "eval_seeclick_steps_per_second": 0.025, "num_input_tokens_seen": 365109720, "step": 4000 }, { "epoch": 16.666666666666668, "eval_icons_CIoU": 0.25196781009435654, "eval_icons_GIoU": 0.24798469990491867, "eval_icons_IoU": 0.3420267254114151, "eval_icons_MAE_all": 0.07624227181077003, "eval_icons_MAE_h": 0.17375393956899643, "eval_icons_MAE_w": 0.1023324653506279, "eval_icons_MAE_x_boxes": 0.10294432565569878, "eval_icons_MAE_y_boxes": 0.17453518509864807, "eval_icons_NUM_probability": 0.9999966323375702, "eval_icons_inside_bbox": 0.5190972238779068, "eval_icons_loss": 0.37557896971702576, "eval_icons_loss_ce": 0.00021423189900815487, "eval_icons_loss_iou": 0.19970703125, "eval_icons_loss_num": 0.0756378173828125, "eval_icons_loss_xval": 0.3778076171875, "eval_icons_runtime": 88.2075, "eval_icons_samples_per_second": 0.567, "eval_icons_steps_per_second": 0.023, "num_input_tokens_seen": 365109720, "step": 4000 }, { "epoch": 16.666666666666668, "eval_screenspot_CIoU": 0.3996092975139618, "eval_screenspot_GIoU": 0.3912068208058675, "eval_screenspot_IoU": 0.47129369775454205, "eval_screenspot_MAE_all": 0.09338084111611049, "eval_screenspot_MAE_h": 0.08267445862293243, "eval_screenspot_MAE_w": 0.20502433677514395, "eval_screenspot_MAE_x_boxes": 0.1848097344239553, "eval_screenspot_MAE_y_boxes": 0.07364016274611156, "eval_screenspot_NUM_probability": 0.9999982317288717, "eval_screenspot_inside_bbox": 0.7041666706403097, "eval_screenspot_loss": 0.4725082814693451, "eval_screenspot_loss_ce": 0.0009411601656192184, "eval_screenspot_loss_iou": 0.3634440104166667, "eval_screenspot_loss_num": 0.095428466796875, "eval_screenspot_loss_xval": 0.47698974609375, "eval_screenspot_runtime": 161.4524, "eval_screenspot_samples_per_second": 0.551, "eval_screenspot_steps_per_second": 0.019, "num_input_tokens_seen": 365109720, "step": 4000 }, { "epoch": 16.666666666666668, "eval_compot_CIoU": 0.5026089549064636, "eval_compot_GIoU": 0.5021179020404816, "eval_compot_IoU": 0.5728136301040649, "eval_compot_MAE_all": 0.052255457267165184, "eval_compot_MAE_h": 0.06820886395871639, "eval_compot_MAE_w": 0.1275174878537655, "eval_compot_MAE_x_boxes": 0.12862426042556763, "eval_compot_MAE_y_boxes": 0.06744185462594032, "eval_compot_NUM_probability": 0.9999986290931702, "eval_compot_inside_bbox": 0.7673611044883728, "eval_compot_loss": 0.30062806606292725, "eval_compot_loss_ce": 0.04959471523761749, "eval_compot_loss_iou": 0.33392333984375, "eval_compot_loss_num": 0.04592132568359375, "eval_compot_loss_xval": 0.2295684814453125, "eval_compot_runtime": 92.9128, "eval_compot_samples_per_second": 0.538, "eval_compot_steps_per_second": 0.022, "num_input_tokens_seen": 365109720, "step": 4000 }, { "epoch": 16.666666666666668, "loss": 0.278637170791626, "loss_ce": 0.04554390907287598, "loss_iou": 0.361328125, "loss_num": 0.046630859375, "loss_xval": 0.2333984375, "num_input_tokens_seen": 365109720, "step": 4000 }, { "epoch": 16.670833333333334, "grad_norm": 1.6410736715136567, "learning_rate": 5e-05, "loss": 0.0633, "num_input_tokens_seen": 365200588, "step": 4001 }, { "epoch": 16.670833333333334, "loss": 0.04317476600408554, "loss_ce": 7.651606210856698e-06, "loss_iou": 0.27734375, "loss_num": 0.0086669921875, "loss_xval": 0.043212890625, "num_input_tokens_seen": 365200588, "step": 4001 }, { "epoch": 16.675, "grad_norm": 2.832863444144271, "learning_rate": 5e-05, "loss": 0.0384, "num_input_tokens_seen": 365292064, "step": 4002 }, { "epoch": 16.675, "loss": 0.03555441275238991, "loss_ce": 7.772813114570454e-05, "loss_iou": 0.2333984375, "loss_num": 0.007080078125, "loss_xval": 0.035400390625, "num_input_tokens_seen": 365292064, "step": 4002 }, { "epoch": 16.679166666666667, "grad_norm": 6.63601226201065, "learning_rate": 5e-05, "loss": 0.0554, "num_input_tokens_seen": 365383416, "step": 4003 }, { "epoch": 16.679166666666667, "loss": 0.036875009536743164, "loss_ce": 9.77577565208776e-06, "loss_iou": 0.251953125, "loss_num": 0.00738525390625, "loss_xval": 0.036865234375, "num_input_tokens_seen": 365383416, "step": 4003 }, { "epoch": 16.683333333333334, "grad_norm": 2.3054296641080487, "learning_rate": 5e-05, "loss": 0.0334, "num_input_tokens_seen": 365474616, "step": 4004 }, { "epoch": 16.683333333333334, "loss": 0.03364076465368271, "loss_ce": 1.0395049685030244e-05, "loss_iou": 0.2177734375, "loss_num": 0.0067138671875, "loss_xval": 0.03369140625, "num_input_tokens_seen": 365474616, "step": 4004 }, { "epoch": 16.6875, "grad_norm": 2.754968795338958, "learning_rate": 5e-05, "loss": 0.0642, "num_input_tokens_seen": 365566040, "step": 4005 }, { "epoch": 16.6875, "loss": 0.037810347974300385, "loss_ce": 0.001326585072092712, "loss_iou": 0.26953125, "loss_num": 0.007293701171875, "loss_xval": 0.036376953125, "num_input_tokens_seen": 365566040, "step": 4005 }, { "epoch": 16.691666666666666, "grad_norm": 2.7741713692211545, "learning_rate": 5e-05, "loss": 0.0629, "num_input_tokens_seen": 365657660, "step": 4006 }, { "epoch": 16.691666666666666, "loss": 0.06708613783121109, "loss_ce": 0.00026027217973023653, "loss_iou": 0.26953125, "loss_num": 0.01336669921875, "loss_xval": 0.06689453125, "num_input_tokens_seen": 365657660, "step": 4006 }, { "epoch": 16.695833333333333, "grad_norm": 2.4848960988166726, "learning_rate": 5e-05, "loss": 0.0378, "num_input_tokens_seen": 365748648, "step": 4007 }, { "epoch": 16.695833333333333, "loss": 0.03710198774933815, "loss_ce": 3.076103166677058e-05, "loss_iou": 0.23046875, "loss_num": 0.007415771484375, "loss_xval": 0.037109375, "num_input_tokens_seen": 365748648, "step": 4007 }, { "epoch": 16.7, "grad_norm": 2.6255179088156115, "learning_rate": 5e-05, "loss": 0.059, "num_input_tokens_seen": 365839664, "step": 4008 }, { "epoch": 16.7, "loss": 0.041383929550647736, "loss_ce": 2.094629053317476e-06, "loss_iou": 0.259765625, "loss_num": 0.00830078125, "loss_xval": 0.04150390625, "num_input_tokens_seen": 365839664, "step": 4008 }, { "epoch": 16.704166666666666, "grad_norm": 1.7964645414736218, "learning_rate": 5e-05, "loss": 0.0493, "num_input_tokens_seen": 365931420, "step": 4009 }, { "epoch": 16.704166666666666, "loss": 0.04012474790215492, "loss_ce": 0.00010094503522850573, "loss_iou": 0.314453125, "loss_num": 0.00799560546875, "loss_xval": 0.0400390625, "num_input_tokens_seen": 365931420, "step": 4009 }, { "epoch": 16.708333333333332, "grad_norm": 5.893781572467153, "learning_rate": 5e-05, "loss": 0.0609, "num_input_tokens_seen": 366022664, "step": 4010 }, { "epoch": 16.708333333333332, "loss": 0.04142048954963684, "loss_ce": 0.00019124093523714691, "loss_iou": 0.27734375, "loss_num": 0.00823974609375, "loss_xval": 0.041259765625, "num_input_tokens_seen": 366022664, "step": 4010 }, { "epoch": 16.7125, "grad_norm": 4.09881481698516, "learning_rate": 5e-05, "loss": 0.0499, "num_input_tokens_seen": 366113676, "step": 4011 }, { "epoch": 16.7125, "loss": 0.029480930417776108, "loss_ce": 9.494428923062515e-07, "loss_iou": 0.205078125, "loss_num": 0.005889892578125, "loss_xval": 0.029541015625, "num_input_tokens_seen": 366113676, "step": 4011 }, { "epoch": 16.716666666666665, "grad_norm": 2.49927976290558, "learning_rate": 5e-05, "loss": 0.0462, "num_input_tokens_seen": 366204924, "step": 4012 }, { "epoch": 16.716666666666665, "loss": 0.06279443204402924, "loss_ce": 0.00014947263116482645, "loss_iou": 0.283203125, "loss_num": 0.01251220703125, "loss_xval": 0.0625, "num_input_tokens_seen": 366204924, "step": 4012 }, { "epoch": 16.720833333333335, "grad_norm": 5.643743771940805, "learning_rate": 5e-05, "loss": 0.0512, "num_input_tokens_seen": 366295484, "step": 4013 }, { "epoch": 16.720833333333335, "loss": 0.05839596316218376, "loss_ce": 1.5833831639611162e-05, "loss_iou": 0.32421875, "loss_num": 0.01165771484375, "loss_xval": 0.058349609375, "num_input_tokens_seen": 366295484, "step": 4013 }, { "epoch": 16.725, "grad_norm": 3.155206503441892, "learning_rate": 5e-05, "loss": 0.0546, "num_input_tokens_seen": 366386272, "step": 4014 }, { "epoch": 16.725, "loss": 0.04876965284347534, "loss_ce": 2.559177573857596e-06, "loss_iou": 0.193359375, "loss_num": 0.009765625, "loss_xval": 0.048828125, "num_input_tokens_seen": 366386272, "step": 4014 }, { "epoch": 16.729166666666668, "grad_norm": 2.0339484177133733, "learning_rate": 5e-05, "loss": 0.0324, "num_input_tokens_seen": 366477516, "step": 4015 }, { "epoch": 16.729166666666668, "loss": 0.02809450402855873, "loss_ce": 3.0740268357476452e-06, "loss_iou": 0.1796875, "loss_num": 0.005615234375, "loss_xval": 0.028076171875, "num_input_tokens_seen": 366477516, "step": 4015 }, { "epoch": 16.733333333333334, "grad_norm": 3.1178931117337405, "learning_rate": 5e-05, "loss": 0.0507, "num_input_tokens_seen": 366568904, "step": 4016 }, { "epoch": 16.733333333333334, "loss": 0.03765561431646347, "loss_ce": 0.0005538675468415022, "loss_iou": 0.359375, "loss_num": 0.007415771484375, "loss_xval": 0.037109375, "num_input_tokens_seen": 366568904, "step": 4016 }, { "epoch": 16.7375, "grad_norm": 3.7011023537882366, "learning_rate": 5e-05, "loss": 0.0544, "num_input_tokens_seen": 366660568, "step": 4017 }, { "epoch": 16.7375, "loss": 0.06564977020025253, "loss_ce": 0.00011327103129588068, "loss_iou": 0.373046875, "loss_num": 0.01312255859375, "loss_xval": 0.0654296875, "num_input_tokens_seen": 366660568, "step": 4017 }, { "epoch": 16.741666666666667, "grad_norm": 2.707010113087898, "learning_rate": 5e-05, "loss": 0.0402, "num_input_tokens_seen": 366752124, "step": 4018 }, { "epoch": 16.741666666666667, "loss": 0.04419136792421341, "loss_ce": 1.9155565951223252e-06, "loss_iou": 0.28515625, "loss_num": 0.00885009765625, "loss_xval": 0.044189453125, "num_input_tokens_seen": 366752124, "step": 4018 }, { "epoch": 16.745833333333334, "grad_norm": 2.8384005053193624, "learning_rate": 5e-05, "loss": 0.0307, "num_input_tokens_seen": 366843888, "step": 4019 }, { "epoch": 16.745833333333334, "loss": 0.03002469427883625, "loss_ce": 1.0656568520062137e-05, "loss_iou": 0.244140625, "loss_num": 0.006011962890625, "loss_xval": 0.030029296875, "num_input_tokens_seen": 366843888, "step": 4019 }, { "epoch": 16.75, "grad_norm": 3.1653530277293815, "learning_rate": 5e-05, "loss": 0.0794, "num_input_tokens_seen": 366935140, "step": 4020 }, { "epoch": 16.75, "loss": 0.09292182326316833, "loss_ce": 0.0009265905246138573, "loss_iou": 0.34765625, "loss_num": 0.0184326171875, "loss_xval": 0.091796875, "num_input_tokens_seen": 366935140, "step": 4020 }, { "epoch": 16.754166666666666, "grad_norm": 2.122395995444454, "learning_rate": 5e-05, "loss": 0.0405, "num_input_tokens_seen": 367026432, "step": 4021 }, { "epoch": 16.754166666666666, "loss": 0.030421065166592598, "loss_ce": 1.0298483175574802e-05, "loss_iou": 0.19140625, "loss_num": 0.006103515625, "loss_xval": 0.0303955078125, "num_input_tokens_seen": 367026432, "step": 4021 }, { "epoch": 16.758333333333333, "grad_norm": 2.562085608406176, "learning_rate": 5e-05, "loss": 0.0564, "num_input_tokens_seen": 367118212, "step": 4022 }, { "epoch": 16.758333333333333, "loss": 0.047422319650650024, "loss_ce": 0.00013533404853660613, "loss_iou": 0.26953125, "loss_num": 0.00946044921875, "loss_xval": 0.04736328125, "num_input_tokens_seen": 367118212, "step": 4022 }, { "epoch": 16.7625, "grad_norm": 2.655294975293356, "learning_rate": 5e-05, "loss": 0.0559, "num_input_tokens_seen": 367209388, "step": 4023 }, { "epoch": 16.7625, "loss": 0.04857932776212692, "loss_ce": 1.060061822499847e-05, "loss_iou": 0.1748046875, "loss_num": 0.00970458984375, "loss_xval": 0.048583984375, "num_input_tokens_seen": 367209388, "step": 4023 }, { "epoch": 16.766666666666666, "grad_norm": 2.447192267899224, "learning_rate": 5e-05, "loss": 0.0516, "num_input_tokens_seen": 367300760, "step": 4024 }, { "epoch": 16.766666666666666, "loss": 0.072332464158535, "loss_ce": 0.0010510298889130354, "loss_iou": 0.177734375, "loss_num": 0.0142822265625, "loss_xval": 0.0712890625, "num_input_tokens_seen": 367300760, "step": 4024 }, { "epoch": 16.770833333333332, "grad_norm": 2.851437977074312, "learning_rate": 5e-05, "loss": 0.0535, "num_input_tokens_seen": 367392496, "step": 4025 }, { "epoch": 16.770833333333332, "loss": 0.027581773698329926, "loss_ce": 2.4399269022978842e-05, "loss_iou": 0.2158203125, "loss_num": 0.005523681640625, "loss_xval": 0.027587890625, "num_input_tokens_seen": 367392496, "step": 4025 }, { "epoch": 16.775, "grad_norm": 2.6368920937836626, "learning_rate": 5e-05, "loss": 0.0593, "num_input_tokens_seen": 367484288, "step": 4026 }, { "epoch": 16.775, "loss": 0.06597106158733368, "loss_ce": 3.784064028877765e-05, "loss_iou": 0.2421875, "loss_num": 0.01318359375, "loss_xval": 0.06591796875, "num_input_tokens_seen": 367484288, "step": 4026 }, { "epoch": 16.779166666666665, "grad_norm": 2.794030801023625, "learning_rate": 5e-05, "loss": 0.0781, "num_input_tokens_seen": 367575732, "step": 4027 }, { "epoch": 16.779166666666665, "loss": 0.09035658836364746, "loss_ce": 0.00037551255081780255, "loss_iou": 0.12353515625, "loss_num": 0.0179443359375, "loss_xval": 0.08984375, "num_input_tokens_seen": 367575732, "step": 4027 }, { "epoch": 16.783333333333335, "grad_norm": 3.200663608710046, "learning_rate": 5e-05, "loss": 0.0532, "num_input_tokens_seen": 367667884, "step": 4028 }, { "epoch": 16.783333333333335, "loss": 0.052529964596033096, "loss_ce": 9.211295946442988e-06, "loss_iou": 0.2412109375, "loss_num": 0.010498046875, "loss_xval": 0.052490234375, "num_input_tokens_seen": 367667884, "step": 4028 }, { "epoch": 16.7875, "grad_norm": 3.2428786244901433, "learning_rate": 5e-05, "loss": 0.0382, "num_input_tokens_seen": 367758828, "step": 4029 }, { "epoch": 16.7875, "loss": 0.03407984972000122, "loss_ce": 5.274821523926221e-05, "loss_iou": 0.2470703125, "loss_num": 0.006805419921875, "loss_xval": 0.033935546875, "num_input_tokens_seen": 367758828, "step": 4029 }, { "epoch": 16.791666666666668, "grad_norm": 5.145981390784509, "learning_rate": 5e-05, "loss": 0.0937, "num_input_tokens_seen": 367850296, "step": 4030 }, { "epoch": 16.791666666666668, "loss": 0.05094943568110466, "loss_ce": 1.559699376230128e-05, "loss_iou": 0.26953125, "loss_num": 0.01019287109375, "loss_xval": 0.051025390625, "num_input_tokens_seen": 367850296, "step": 4030 }, { "epoch": 16.795833333333334, "grad_norm": 1.1260859347377563, "learning_rate": 5e-05, "loss": 0.0593, "num_input_tokens_seen": 367941600, "step": 4031 }, { "epoch": 16.795833333333334, "loss": 0.024731360375881195, "loss_ce": 1.2122669431846589e-05, "loss_iou": 0.267578125, "loss_num": 0.00494384765625, "loss_xval": 0.024658203125, "num_input_tokens_seen": 367941600, "step": 4031 }, { "epoch": 16.8, "grad_norm": 1.794489654910749, "learning_rate": 5e-05, "loss": 0.0559, "num_input_tokens_seen": 368033408, "step": 4032 }, { "epoch": 16.8, "loss": 0.05247056856751442, "loss_ce": 0.00022447184892371297, "loss_iou": 0.24609375, "loss_num": 0.01043701171875, "loss_xval": 0.05224609375, "num_input_tokens_seen": 368033408, "step": 4032 }, { "epoch": 16.804166666666667, "grad_norm": 6.159764645003933, "learning_rate": 5e-05, "loss": 0.0545, "num_input_tokens_seen": 368125576, "step": 4033 }, { "epoch": 16.804166666666667, "loss": 0.060446955263614655, "loss_ce": 6.894314083183417e-06, "loss_iou": 0.259765625, "loss_num": 0.0120849609375, "loss_xval": 0.060546875, "num_input_tokens_seen": 368125576, "step": 4033 }, { "epoch": 16.808333333333334, "grad_norm": 2.4625028230047485, "learning_rate": 5e-05, "loss": 0.0826, "num_input_tokens_seen": 368216032, "step": 4034 }, { "epoch": 16.808333333333334, "loss": 0.12289955466985703, "loss_ce": 0.0005393251776695251, "loss_iou": 0.2255859375, "loss_num": 0.0244140625, "loss_xval": 0.12255859375, "num_input_tokens_seen": 368216032, "step": 4034 }, { "epoch": 16.8125, "grad_norm": 3.44767132408497, "learning_rate": 5e-05, "loss": 0.0358, "num_input_tokens_seen": 368307628, "step": 4035 }, { "epoch": 16.8125, "loss": 0.03818739950656891, "loss_ce": 9.91188244370278e-06, "loss_iou": 0.375, "loss_num": 0.00762939453125, "loss_xval": 0.0380859375, "num_input_tokens_seen": 368307628, "step": 4035 }, { "epoch": 16.816666666666666, "grad_norm": 2.5776685347298294, "learning_rate": 5e-05, "loss": 0.0512, "num_input_tokens_seen": 368399232, "step": 4036 }, { "epoch": 16.816666666666666, "loss": 0.053744807839393616, "loss_ce": 0.00014068148448131979, "loss_iou": 0.39453125, "loss_num": 0.0107421875, "loss_xval": 0.0537109375, "num_input_tokens_seen": 368399232, "step": 4036 }, { "epoch": 16.820833333333333, "grad_norm": 2.9819800608908413, "learning_rate": 5e-05, "loss": 0.0502, "num_input_tokens_seen": 368490508, "step": 4037 }, { "epoch": 16.820833333333333, "loss": 0.05096079409122467, "loss_ce": 5.747316754423082e-05, "loss_iou": 0.2421875, "loss_num": 0.0101318359375, "loss_xval": 0.05078125, "num_input_tokens_seen": 368490508, "step": 4037 }, { "epoch": 16.825, "grad_norm": 2.4913500292685553, "learning_rate": 5e-05, "loss": 0.0931, "num_input_tokens_seen": 368581764, "step": 4038 }, { "epoch": 16.825, "loss": 0.09946852922439575, "loss_ce": 4.116043783142231e-06, "loss_iou": 0.28515625, "loss_num": 0.0198974609375, "loss_xval": 0.099609375, "num_input_tokens_seen": 368581764, "step": 4038 }, { "epoch": 16.829166666666666, "grad_norm": 3.0700934805413937, "learning_rate": 5e-05, "loss": 0.056, "num_input_tokens_seen": 368672488, "step": 4039 }, { "epoch": 16.829166666666666, "loss": 0.03233366832137108, "loss_ce": 2.9432834480758174e-07, "loss_iou": 0.2578125, "loss_num": 0.0064697265625, "loss_xval": 0.0322265625, "num_input_tokens_seen": 368672488, "step": 4039 }, { "epoch": 16.833333333333332, "grad_norm": 2.8659320252554616, "learning_rate": 5e-05, "loss": 0.0652, "num_input_tokens_seen": 368763464, "step": 4040 }, { "epoch": 16.833333333333332, "loss": 0.05825977399945259, "loss_ce": 1.718512294246466e-06, "loss_iou": 0.234375, "loss_num": 0.01165771484375, "loss_xval": 0.058349609375, "num_input_tokens_seen": 368763464, "step": 4040 }, { "epoch": 16.8375, "grad_norm": 2.717146392782448, "learning_rate": 5e-05, "loss": 0.0712, "num_input_tokens_seen": 368854860, "step": 4041 }, { "epoch": 16.8375, "loss": 0.08421778678894043, "loss_ce": 0.0014998923288658261, "loss_iou": 0.26171875, "loss_num": 0.0164794921875, "loss_xval": 0.08251953125, "num_input_tokens_seen": 368854860, "step": 4041 }, { "epoch": 16.841666666666665, "grad_norm": 1.9585966729625848, "learning_rate": 5e-05, "loss": 0.0574, "num_input_tokens_seen": 368946416, "step": 4042 }, { "epoch": 16.841666666666665, "loss": 0.05129852890968323, "loss_ce": 0.00014343684597406536, "loss_iou": 0.216796875, "loss_num": 0.01025390625, "loss_xval": 0.05126953125, "num_input_tokens_seen": 368946416, "step": 4042 }, { "epoch": 16.845833333333335, "grad_norm": 2.453415700006763, "learning_rate": 5e-05, "loss": 0.0507, "num_input_tokens_seen": 369037700, "step": 4043 }, { "epoch": 16.845833333333335, "loss": 0.053867191076278687, "loss_ce": 3.6648129935201723e-06, "loss_iou": 0.3515625, "loss_num": 0.0107421875, "loss_xval": 0.053955078125, "num_input_tokens_seen": 369037700, "step": 4043 }, { "epoch": 16.85, "grad_norm": 2.545531334753407, "learning_rate": 5e-05, "loss": 0.0485, "num_input_tokens_seen": 369128312, "step": 4044 }, { "epoch": 16.85, "loss": 0.053280819207429886, "loss_ce": 2.7642910936265253e-05, "loss_iou": 0.2275390625, "loss_num": 0.01068115234375, "loss_xval": 0.05322265625, "num_input_tokens_seen": 369128312, "step": 4044 }, { "epoch": 16.854166666666668, "grad_norm": 2.8526151681009715, "learning_rate": 5e-05, "loss": 0.0608, "num_input_tokens_seen": 369219084, "step": 4045 }, { "epoch": 16.854166666666668, "loss": 0.0599316768348217, "loss_ce": 2.56677685683826e-05, "loss_iou": 0.1787109375, "loss_num": 0.011962890625, "loss_xval": 0.059814453125, "num_input_tokens_seen": 369219084, "step": 4045 }, { "epoch": 16.858333333333334, "grad_norm": 2.734789273628204, "learning_rate": 5e-05, "loss": 0.0498, "num_input_tokens_seen": 369310688, "step": 4046 }, { "epoch": 16.858333333333334, "loss": 0.04425501078367233, "loss_ce": 3.504356573102996e-05, "loss_iou": 0.33203125, "loss_num": 0.00885009765625, "loss_xval": 0.044189453125, "num_input_tokens_seen": 369310688, "step": 4046 }, { "epoch": 16.8625, "grad_norm": 3.226174012215865, "learning_rate": 5e-05, "loss": 0.0339, "num_input_tokens_seen": 369401544, "step": 4047 }, { "epoch": 16.8625, "loss": 0.03497467562556267, "loss_ce": 1.5316002190957079e-06, "loss_iou": 0.298828125, "loss_num": 0.006988525390625, "loss_xval": 0.034912109375, "num_input_tokens_seen": 369401544, "step": 4047 }, { "epoch": 16.866666666666667, "grad_norm": 4.093369892681781, "learning_rate": 5e-05, "loss": 0.0694, "num_input_tokens_seen": 369493136, "step": 4048 }, { "epoch": 16.866666666666667, "loss": 0.07229539752006531, "loss_ce": 0.0024711769074201584, "loss_iou": 0.33984375, "loss_num": 0.01397705078125, "loss_xval": 0.06982421875, "num_input_tokens_seen": 369493136, "step": 4048 }, { "epoch": 16.870833333333334, "grad_norm": 15.300191803326936, "learning_rate": 5e-05, "loss": 0.0975, "num_input_tokens_seen": 369584212, "step": 4049 }, { "epoch": 16.870833333333334, "loss": 0.14228758215904236, "loss_ce": 1.4646837371401489e-05, "loss_iou": 0.34375, "loss_num": 0.0284423828125, "loss_xval": 0.142578125, "num_input_tokens_seen": 369584212, "step": 4049 }, { "epoch": 16.875, "grad_norm": 1.9861558181634973, "learning_rate": 5e-05, "loss": 0.0849, "num_input_tokens_seen": 369675404, "step": 4050 }, { "epoch": 16.875, "loss": 0.07799716293811798, "loss_ce": 9.495933227299247e-06, "loss_iou": 0.341796875, "loss_num": 0.01556396484375, "loss_xval": 0.078125, "num_input_tokens_seen": 369675404, "step": 4050 }, { "epoch": 16.879166666666666, "grad_norm": 7.091214839519732, "learning_rate": 5e-05, "loss": 0.0713, "num_input_tokens_seen": 369766796, "step": 4051 }, { "epoch": 16.879166666666666, "loss": 0.043971382081508636, "loss_ce": 0.0003846481558866799, "loss_iou": 0.32421875, "loss_num": 0.00872802734375, "loss_xval": 0.043701171875, "num_input_tokens_seen": 369766796, "step": 4051 }, { "epoch": 16.883333333333333, "grad_norm": 2.0299282941576875, "learning_rate": 5e-05, "loss": 0.0483, "num_input_tokens_seen": 369858008, "step": 4052 }, { "epoch": 16.883333333333333, "loss": 0.041609566658735275, "loss_ce": 0.000990670290775597, "loss_iou": 0.2314453125, "loss_num": 0.00811767578125, "loss_xval": 0.04052734375, "num_input_tokens_seen": 369858008, "step": 4052 }, { "epoch": 16.8875, "grad_norm": 7.830960151468681, "learning_rate": 5e-05, "loss": 0.0631, "num_input_tokens_seen": 369948996, "step": 4053 }, { "epoch": 16.8875, "loss": 0.0572042316198349, "loss_ce": 6.6593884184840135e-06, "loss_iou": 0.2314453125, "loss_num": 0.011474609375, "loss_xval": 0.05712890625, "num_input_tokens_seen": 369948996, "step": 4053 }, { "epoch": 16.891666666666666, "grad_norm": 5.273891058134392, "learning_rate": 5e-05, "loss": 0.0913, "num_input_tokens_seen": 370040188, "step": 4054 }, { "epoch": 16.891666666666666, "loss": 0.05034564435482025, "loss_ce": 6.896343620610423e-06, "loss_iou": 0.205078125, "loss_num": 0.01007080078125, "loss_xval": 0.05029296875, "num_input_tokens_seen": 370040188, "step": 4054 }, { "epoch": 16.895833333333332, "grad_norm": 3.4729443952903285, "learning_rate": 5e-05, "loss": 0.1036, "num_input_tokens_seen": 370130656, "step": 4055 }, { "epoch": 16.895833333333332, "loss": 0.06628492474555969, "loss_ce": 7.389370466626133e-07, "loss_iou": 0.37109375, "loss_num": 0.01324462890625, "loss_xval": 0.06640625, "num_input_tokens_seen": 370130656, "step": 4055 }, { "epoch": 16.9, "grad_norm": 2.3412868639267717, "learning_rate": 5e-05, "loss": 0.0664, "num_input_tokens_seen": 370222484, "step": 4056 }, { "epoch": 16.9, "loss": 0.08415798842906952, "loss_ce": 0.00017361801292281598, "loss_iou": 0.265625, "loss_num": 0.016845703125, "loss_xval": 0.083984375, "num_input_tokens_seen": 370222484, "step": 4056 }, { "epoch": 16.904166666666665, "grad_norm": 2.3881985763644384, "learning_rate": 5e-05, "loss": 0.1178, "num_input_tokens_seen": 370313636, "step": 4057 }, { "epoch": 16.904166666666665, "loss": 0.12934906780719757, "loss_ce": 0.0005191150703467429, "loss_iou": 0.1796875, "loss_num": 0.0257568359375, "loss_xval": 0.12890625, "num_input_tokens_seen": 370313636, "step": 4057 }, { "epoch": 16.908333333333335, "grad_norm": 3.0055102224007664, "learning_rate": 5e-05, "loss": 0.0472, "num_input_tokens_seen": 370405208, "step": 4058 }, { "epoch": 16.908333333333335, "loss": 0.04517017677426338, "loss_ce": 4.16390503232833e-06, "loss_iou": 0.298828125, "loss_num": 0.009033203125, "loss_xval": 0.045166015625, "num_input_tokens_seen": 370405208, "step": 4058 }, { "epoch": 16.9125, "grad_norm": 2.365789937828196, "learning_rate": 5e-05, "loss": 0.0509, "num_input_tokens_seen": 370496724, "step": 4059 }, { "epoch": 16.9125, "loss": 0.053006406873464584, "loss_ce": 0.010983701795339584, "loss_iou": 0.265625, "loss_num": 0.0084228515625, "loss_xval": 0.0419921875, "num_input_tokens_seen": 370496724, "step": 4059 }, { "epoch": 16.916666666666668, "grad_norm": 3.0113908440107897, "learning_rate": 5e-05, "loss": 0.125, "num_input_tokens_seen": 370588216, "step": 4060 }, { "epoch": 16.916666666666668, "loss": 0.2107238471508026, "loss_ce": 0.00015255525067914277, "loss_iou": 0.2080078125, "loss_num": 0.0419921875, "loss_xval": 0.2109375, "num_input_tokens_seen": 370588216, "step": 4060 }, { "epoch": 16.920833333333334, "grad_norm": 2.0049932346862103, "learning_rate": 5e-05, "loss": 0.0299, "num_input_tokens_seen": 370679492, "step": 4061 }, { "epoch": 16.920833333333334, "loss": 0.03363718464970589, "loss_ce": 8.787655679043382e-05, "loss_iou": 0.208984375, "loss_num": 0.0067138671875, "loss_xval": 0.033447265625, "num_input_tokens_seen": 370679492, "step": 4061 }, { "epoch": 16.925, "grad_norm": 3.3662195481642363, "learning_rate": 5e-05, "loss": 0.0565, "num_input_tokens_seen": 370770924, "step": 4062 }, { "epoch": 16.925, "loss": 0.05911504104733467, "loss_ce": 2.4945893528638408e-06, "loss_iou": 0.1708984375, "loss_num": 0.0118408203125, "loss_xval": 0.05908203125, "num_input_tokens_seen": 370770924, "step": 4062 }, { "epoch": 16.929166666666667, "grad_norm": 2.6258405561734888, "learning_rate": 5e-05, "loss": 0.0629, "num_input_tokens_seen": 370862820, "step": 4063 }, { "epoch": 16.929166666666667, "loss": 0.06796564161777496, "loss_ce": 2.9951866054034326e-06, "loss_iou": 0.19140625, "loss_num": 0.01361083984375, "loss_xval": 0.06787109375, "num_input_tokens_seen": 370862820, "step": 4063 }, { "epoch": 16.933333333333334, "grad_norm": 4.403878392486105, "learning_rate": 5e-05, "loss": 0.0363, "num_input_tokens_seen": 370953876, "step": 4064 }, { "epoch": 16.933333333333334, "loss": 0.03829577565193176, "loss_ce": 3.847268999379594e-06, "loss_iou": 0.154296875, "loss_num": 0.007659912109375, "loss_xval": 0.038330078125, "num_input_tokens_seen": 370953876, "step": 4064 }, { "epoch": 16.9375, "grad_norm": 1.9362293574628262, "learning_rate": 5e-05, "loss": 0.0369, "num_input_tokens_seen": 371045536, "step": 4065 }, { "epoch": 16.9375, "loss": 0.03752783685922623, "loss_ce": 2.660123755049426e-06, "loss_iou": 0.2451171875, "loss_num": 0.00750732421875, "loss_xval": 0.03759765625, "num_input_tokens_seen": 371045536, "step": 4065 }, { "epoch": 16.941666666666666, "grad_norm": 2.959179694047592, "learning_rate": 5e-05, "loss": 0.0647, "num_input_tokens_seen": 371136800, "step": 4066 }, { "epoch": 16.941666666666666, "loss": 0.06771986931562424, "loss_ce": 0.00012343730486463755, "loss_iou": 0.23828125, "loss_num": 0.01348876953125, "loss_xval": 0.0673828125, "num_input_tokens_seen": 371136800, "step": 4066 }, { "epoch": 16.945833333333333, "grad_norm": 2.6218360895053427, "learning_rate": 5e-05, "loss": 0.1005, "num_input_tokens_seen": 371228512, "step": 4067 }, { "epoch": 16.945833333333333, "loss": 0.12994059920310974, "loss_ce": 2.7274709282210097e-05, "loss_iou": 0.1630859375, "loss_num": 0.0260009765625, "loss_xval": 0.1298828125, "num_input_tokens_seen": 371228512, "step": 4067 }, { "epoch": 16.95, "grad_norm": 3.357540956809214, "learning_rate": 5e-05, "loss": 0.0484, "num_input_tokens_seen": 371320428, "step": 4068 }, { "epoch": 16.95, "loss": 0.03651657700538635, "loss_ce": 0.00015488412464037538, "loss_iou": 0.1787109375, "loss_num": 0.00726318359375, "loss_xval": 0.036376953125, "num_input_tokens_seen": 371320428, "step": 4068 }, { "epoch": 16.954166666666666, "grad_norm": 2.794785336945426, "learning_rate": 5e-05, "loss": 0.0641, "num_input_tokens_seen": 371410556, "step": 4069 }, { "epoch": 16.954166666666666, "loss": 0.08668608218431473, "loss_ce": 9.055524969880935e-07, "loss_iou": 0.32421875, "loss_num": 0.017333984375, "loss_xval": 0.0869140625, "num_input_tokens_seen": 371410556, "step": 4069 }, { "epoch": 16.958333333333332, "grad_norm": 3.6340431629838528, "learning_rate": 5e-05, "loss": 0.0407, "num_input_tokens_seen": 371502528, "step": 4070 }, { "epoch": 16.958333333333332, "loss": 0.03125636279582977, "loss_ce": 0.0002886476868297905, "loss_iou": 0.314453125, "loss_num": 0.006195068359375, "loss_xval": 0.031005859375, "num_input_tokens_seen": 371502528, "step": 4070 }, { "epoch": 16.9625, "grad_norm": 3.603111563405317, "learning_rate": 5e-05, "loss": 0.0723, "num_input_tokens_seen": 371593780, "step": 4071 }, { "epoch": 16.9625, "loss": 0.0696285292506218, "loss_ce": 1.7936108633875847e-05, "loss_iou": 0.1591796875, "loss_num": 0.013916015625, "loss_xval": 0.06982421875, "num_input_tokens_seen": 371593780, "step": 4071 }, { "epoch": 16.966666666666665, "grad_norm": 3.9638524560359367, "learning_rate": 5e-05, "loss": 0.0528, "num_input_tokens_seen": 371685136, "step": 4072 }, { "epoch": 16.966666666666665, "loss": 0.07412572205066681, "loss_ce": 0.0007156896172091365, "loss_iou": 0.26953125, "loss_num": 0.0146484375, "loss_xval": 0.0732421875, "num_input_tokens_seen": 371685136, "step": 4072 }, { "epoch": 16.970833333333335, "grad_norm": 2.6761592787839263, "learning_rate": 5e-05, "loss": 0.0601, "num_input_tokens_seen": 371776312, "step": 4073 }, { "epoch": 16.970833333333335, "loss": 0.032429054379463196, "loss_ce": 1.9384531697141938e-05, "loss_iou": 0.208984375, "loss_num": 0.0064697265625, "loss_xval": 0.032470703125, "num_input_tokens_seen": 371776312, "step": 4073 }, { "epoch": 16.975, "grad_norm": 10.428568635001916, "learning_rate": 5e-05, "loss": 0.0496, "num_input_tokens_seen": 371867092, "step": 4074 }, { "epoch": 16.975, "loss": 0.045186370611190796, "loss_ce": 5.093787876830902e-06, "loss_iou": 0.240234375, "loss_num": 0.009033203125, "loss_xval": 0.045166015625, "num_input_tokens_seen": 371867092, "step": 4074 }, { "epoch": 16.979166666666668, "grad_norm": 16.999770390677803, "learning_rate": 5e-05, "loss": 0.0867, "num_input_tokens_seen": 371957968, "step": 4075 }, { "epoch": 16.979166666666668, "loss": 0.14276185631752014, "loss_ce": 6.23431390067708e-07, "loss_iou": 0.314453125, "loss_num": 0.0284423828125, "loss_xval": 0.142578125, "num_input_tokens_seen": 371957968, "step": 4075 }, { "epoch": 16.983333333333334, "grad_norm": 2.6052418985851724, "learning_rate": 5e-05, "loss": 0.0568, "num_input_tokens_seen": 372048676, "step": 4076 }, { "epoch": 16.983333333333334, "loss": 0.04159224405884743, "loss_ce": 2.7303876777295955e-05, "loss_iou": 0.2392578125, "loss_num": 0.00830078125, "loss_xval": 0.04150390625, "num_input_tokens_seen": 372048676, "step": 4076 }, { "epoch": 16.9875, "grad_norm": 3.1679506494094456, "learning_rate": 5e-05, "loss": 0.0307, "num_input_tokens_seen": 372140012, "step": 4077 }, { "epoch": 16.9875, "loss": 0.028769517317414284, "loss_ce": 0.0001440290652681142, "loss_iou": 0.30859375, "loss_num": 0.0057373046875, "loss_xval": 0.028564453125, "num_input_tokens_seen": 372140012, "step": 4077 }, { "epoch": 16.991666666666667, "grad_norm": 4.048569369498823, "learning_rate": 5e-05, "loss": 0.0605, "num_input_tokens_seen": 372231528, "step": 4078 }, { "epoch": 16.991666666666667, "loss": 0.04449920356273651, "loss_ce": 1.98340458155144e-05, "loss_iou": 0.3203125, "loss_num": 0.0089111328125, "loss_xval": 0.04443359375, "num_input_tokens_seen": 372231528, "step": 4078 }, { "epoch": 16.995833333333334, "grad_norm": 36.34557991333924, "learning_rate": 5e-05, "loss": 0.0881, "num_input_tokens_seen": 372322872, "step": 4079 }, { "epoch": 16.995833333333334, "loss": 0.07955171167850494, "loss_ce": 6.867582851555198e-05, "loss_iou": 0.36328125, "loss_num": 0.015869140625, "loss_xval": 0.07958984375, "num_input_tokens_seen": 372322872, "step": 4079 }, { "epoch": 17.0, "grad_norm": 4.769963548994012, "learning_rate": 5e-05, "loss": 0.0583, "num_input_tokens_seen": 372414160, "step": 4080 }, { "epoch": 17.0, "loss": 0.051101259887218475, "loss_ce": 4.5351465814746916e-05, "loss_iou": 0.298828125, "loss_num": 0.01019287109375, "loss_xval": 0.051025390625, "num_input_tokens_seen": 372414160, "step": 4080 }, { "epoch": 17.004166666666666, "grad_norm": 2.662306389973745, "learning_rate": 5e-05, "loss": 0.0737, "num_input_tokens_seen": 372505456, "step": 4081 }, { "epoch": 17.004166666666666, "loss": 0.10184650123119354, "loss_ce": 9.338312338513788e-06, "loss_iou": 0.259765625, "loss_num": 0.0203857421875, "loss_xval": 0.10205078125, "num_input_tokens_seen": 372505456, "step": 4081 }, { "epoch": 17.008333333333333, "grad_norm": 2.9907076141286497, "learning_rate": 5e-05, "loss": 0.0346, "num_input_tokens_seen": 372596884, "step": 4082 }, { "epoch": 17.008333333333333, "loss": 0.03885851427912712, "loss_ce": 2.4898748961277306e-05, "loss_iou": 0.2734375, "loss_num": 0.00775146484375, "loss_xval": 0.038818359375, "num_input_tokens_seen": 372596884, "step": 4082 }, { "epoch": 17.0125, "grad_norm": 3.301548128088256, "learning_rate": 5e-05, "loss": 0.0449, "num_input_tokens_seen": 372687932, "step": 4083 }, { "epoch": 17.0125, "loss": 0.05405256524682045, "loss_ce": 1.3564389519160613e-05, "loss_iou": 0.15234375, "loss_num": 0.01080322265625, "loss_xval": 0.053955078125, "num_input_tokens_seen": 372687932, "step": 4083 }, { "epoch": 17.016666666666666, "grad_norm": 3.7730795965773454, "learning_rate": 5e-05, "loss": 0.07, "num_input_tokens_seen": 372778696, "step": 4084 }, { "epoch": 17.016666666666666, "loss": 0.08227451145648956, "loss_ce": 2.9634154998348095e-05, "loss_iou": 0.361328125, "loss_num": 0.0164794921875, "loss_xval": 0.08203125, "num_input_tokens_seen": 372778696, "step": 4084 }, { "epoch": 17.020833333333332, "grad_norm": 2.2226393759552336, "learning_rate": 5e-05, "loss": 0.0541, "num_input_tokens_seen": 372870048, "step": 4085 }, { "epoch": 17.020833333333332, "loss": 0.04948572814464569, "loss_ce": 1.4736596085640485e-06, "loss_iou": 0.41015625, "loss_num": 0.0098876953125, "loss_xval": 0.049560546875, "num_input_tokens_seen": 372870048, "step": 4085 }, { "epoch": 17.025, "grad_norm": 3.994770241494494, "learning_rate": 5e-05, "loss": 0.0907, "num_input_tokens_seen": 372961008, "step": 4086 }, { "epoch": 17.025, "loss": 0.10503510385751724, "loss_ce": 1.6490666894242167e-05, "loss_iou": 0.1748046875, "loss_num": 0.02099609375, "loss_xval": 0.10498046875, "num_input_tokens_seen": 372961008, "step": 4086 }, { "epoch": 17.029166666666665, "grad_norm": 1.298938361841086, "learning_rate": 5e-05, "loss": 0.0451, "num_input_tokens_seen": 373052104, "step": 4087 }, { "epoch": 17.029166666666665, "loss": 0.03302188217639923, "loss_ce": 1.8632302953847102e-06, "loss_iou": 0.134765625, "loss_num": 0.006591796875, "loss_xval": 0.032958984375, "num_input_tokens_seen": 373052104, "step": 4087 }, { "epoch": 17.033333333333335, "grad_norm": 1.9625396497308418, "learning_rate": 5e-05, "loss": 0.0755, "num_input_tokens_seen": 373143348, "step": 4088 }, { "epoch": 17.033333333333335, "loss": 0.11454355716705322, "loss_ce": 3.4617858091223752e-06, "loss_iou": 0.302734375, "loss_num": 0.02294921875, "loss_xval": 0.11474609375, "num_input_tokens_seen": 373143348, "step": 4088 }, { "epoch": 17.0375, "grad_norm": 3.986045902684438, "learning_rate": 5e-05, "loss": 0.0487, "num_input_tokens_seen": 373234840, "step": 4089 }, { "epoch": 17.0375, "loss": 0.054869748651981354, "loss_ce": 6.017951454850845e-05, "loss_iou": 0.16796875, "loss_num": 0.01092529296875, "loss_xval": 0.0546875, "num_input_tokens_seen": 373234840, "step": 4089 }, { "epoch": 17.041666666666668, "grad_norm": 2.2693008135127943, "learning_rate": 5e-05, "loss": 0.0511, "num_input_tokens_seen": 373326132, "step": 4090 }, { "epoch": 17.041666666666668, "loss": 0.04873867332935333, "loss_ce": 0.0002080958365695551, "loss_iou": 0.337890625, "loss_num": 0.00970458984375, "loss_xval": 0.048583984375, "num_input_tokens_seen": 373326132, "step": 4090 }, { "epoch": 17.045833333333334, "grad_norm": 1.701540327520391, "learning_rate": 5e-05, "loss": 0.0387, "num_input_tokens_seen": 373417112, "step": 4091 }, { "epoch": 17.045833333333334, "loss": 0.03578822314739227, "loss_ce": 2.1621737687382847e-05, "loss_iou": 0.279296875, "loss_num": 0.00714111328125, "loss_xval": 0.03564453125, "num_input_tokens_seen": 373417112, "step": 4091 }, { "epoch": 17.05, "grad_norm": 4.281085206791119, "learning_rate": 5e-05, "loss": 0.0503, "num_input_tokens_seen": 373508804, "step": 4092 }, { "epoch": 17.05, "loss": 0.04737226292490959, "loss_ce": 1.3522824247047538e-06, "loss_iou": 0.25, "loss_num": 0.00946044921875, "loss_xval": 0.04736328125, "num_input_tokens_seen": 373508804, "step": 4092 }, { "epoch": 17.054166666666667, "grad_norm": 2.945342924581056, "learning_rate": 5e-05, "loss": 0.0743, "num_input_tokens_seen": 373600388, "step": 4093 }, { "epoch": 17.054166666666667, "loss": 0.04209952801465988, "loss_ce": 0.00018363283015787601, "loss_iou": 0.302734375, "loss_num": 0.00836181640625, "loss_xval": 0.0419921875, "num_input_tokens_seen": 373600388, "step": 4093 }, { "epoch": 17.058333333333334, "grad_norm": 2.70778543173169, "learning_rate": 5e-05, "loss": 0.0676, "num_input_tokens_seen": 373692000, "step": 4094 }, { "epoch": 17.058333333333334, "loss": 0.04033127427101135, "loss_ce": 3.28097194142174e-05, "loss_iou": 0.35546875, "loss_num": 0.008056640625, "loss_xval": 0.040283203125, "num_input_tokens_seen": 373692000, "step": 4094 }, { "epoch": 17.0625, "grad_norm": 3.236834280345472, "learning_rate": 5e-05, "loss": 0.0395, "num_input_tokens_seen": 373783412, "step": 4095 }, { "epoch": 17.0625, "loss": 0.03508774936199188, "loss_ce": 3.0683379009133205e-05, "loss_iou": 0.279296875, "loss_num": 0.00701904296875, "loss_xval": 0.03515625, "num_input_tokens_seen": 373783412, "step": 4095 }, { "epoch": 17.066666666666666, "grad_norm": 3.560904092381228, "learning_rate": 5e-05, "loss": 0.043, "num_input_tokens_seen": 373875084, "step": 4096 }, { "epoch": 17.066666666666666, "loss": 0.02976866066455841, "loss_ce": 9.031443187268451e-05, "loss_iou": 0.328125, "loss_num": 0.00592041015625, "loss_xval": 0.0296630859375, "num_input_tokens_seen": 373875084, "step": 4096 }, { "epoch": 17.070833333333333, "grad_norm": 2.7391476161511314, "learning_rate": 5e-05, "loss": 0.0548, "num_input_tokens_seen": 373966272, "step": 4097 }, { "epoch": 17.070833333333333, "loss": 0.0847180038690567, "loss_ce": 1.6464753571199253e-05, "loss_iou": 0.3046875, "loss_num": 0.0169677734375, "loss_xval": 0.08447265625, "num_input_tokens_seen": 373966272, "step": 4097 }, { "epoch": 17.075, "grad_norm": 1.4101527221877117, "learning_rate": 5e-05, "loss": 0.0308, "num_input_tokens_seen": 374057068, "step": 4098 }, { "epoch": 17.075, "loss": 0.03125577047467232, "loss_ce": 5.771456471848069e-06, "loss_iou": 0.166015625, "loss_num": 0.006256103515625, "loss_xval": 0.03125, "num_input_tokens_seen": 374057068, "step": 4098 }, { "epoch": 17.079166666666666, "grad_norm": 2.191619742883779, "learning_rate": 5e-05, "loss": 0.0623, "num_input_tokens_seen": 374148516, "step": 4099 }, { "epoch": 17.079166666666666, "loss": 0.09695076942443848, "loss_ce": 2.693852729862556e-05, "loss_iou": 0.19140625, "loss_num": 0.0194091796875, "loss_xval": 0.0966796875, "num_input_tokens_seen": 374148516, "step": 4099 }, { "epoch": 17.083333333333332, "grad_norm": 3.593796620958026, "learning_rate": 5e-05, "loss": 0.0652, "num_input_tokens_seen": 374239580, "step": 4100 }, { "epoch": 17.083333333333332, "loss": 0.07341619580984116, "loss_ce": 6.16206853010226e-06, "loss_iou": 0.302734375, "loss_num": 0.01470947265625, "loss_xval": 0.0732421875, "num_input_tokens_seen": 374239580, "step": 4100 }, { "epoch": 17.0875, "grad_norm": 3.377762992988838, "learning_rate": 5e-05, "loss": 0.0446, "num_input_tokens_seen": 374330488, "step": 4101 }, { "epoch": 17.0875, "loss": 0.04819894954562187, "loss_ce": 0.00013376145216170698, "loss_iou": 0.291015625, "loss_num": 0.0096435546875, "loss_xval": 0.048095703125, "num_input_tokens_seen": 374330488, "step": 4101 }, { "epoch": 17.091666666666665, "grad_norm": 3.5510953998518926, "learning_rate": 5e-05, "loss": 0.0558, "num_input_tokens_seen": 374421696, "step": 4102 }, { "epoch": 17.091666666666665, "loss": 0.05382417142391205, "loss_ce": 6.4256664700224064e-06, "loss_iou": 0.37890625, "loss_num": 0.0107421875, "loss_xval": 0.0537109375, "num_input_tokens_seen": 374421696, "step": 4102 }, { "epoch": 17.095833333333335, "grad_norm": 2.3490478729597593, "learning_rate": 5e-05, "loss": 0.076, "num_input_tokens_seen": 374513196, "step": 4103 }, { "epoch": 17.095833333333335, "loss": 0.03897378221154213, "loss_ce": 2.8363986075419234e-06, "loss_iou": 0.30859375, "loss_num": 0.0078125, "loss_xval": 0.0390625, "num_input_tokens_seen": 374513196, "step": 4103 }, { "epoch": 17.1, "grad_norm": 2.139081143620778, "learning_rate": 5e-05, "loss": 0.0371, "num_input_tokens_seen": 374604392, "step": 4104 }, { "epoch": 17.1, "loss": 0.04249031841754913, "loss_ce": 9.846298780757934e-06, "loss_iou": 0.322265625, "loss_num": 0.00848388671875, "loss_xval": 0.04248046875, "num_input_tokens_seen": 374604392, "step": 4104 }, { "epoch": 17.104166666666668, "grad_norm": 3.1062600113588323, "learning_rate": 5e-05, "loss": 0.0637, "num_input_tokens_seen": 374696044, "step": 4105 }, { "epoch": 17.104166666666668, "loss": 0.06857717782258987, "loss_ce": 4.18104900745675e-06, "loss_iou": 0.047607421875, "loss_num": 0.013671875, "loss_xval": 0.068359375, "num_input_tokens_seen": 374696044, "step": 4105 }, { "epoch": 17.108333333333334, "grad_norm": 3.907289254950295, "learning_rate": 5e-05, "loss": 0.0317, "num_input_tokens_seen": 374787464, "step": 4106 }, { "epoch": 17.108333333333334, "loss": 0.035776399075984955, "loss_ce": 2.167586899304297e-06, "loss_iou": 0.2890625, "loss_num": 0.00714111328125, "loss_xval": 0.035888671875, "num_input_tokens_seen": 374787464, "step": 4106 }, { "epoch": 17.1125, "grad_norm": 1.5061928543804801, "learning_rate": 5e-05, "loss": 0.058, "num_input_tokens_seen": 374878312, "step": 4107 }, { "epoch": 17.1125, "loss": 0.029756616801023483, "loss_ce": 1.978759428311605e-06, "loss_iou": 0.220703125, "loss_num": 0.005950927734375, "loss_xval": 0.02978515625, "num_input_tokens_seen": 374878312, "step": 4107 }, { "epoch": 17.116666666666667, "grad_norm": 1.4717582325581207, "learning_rate": 5e-05, "loss": 0.0715, "num_input_tokens_seen": 374969656, "step": 4108 }, { "epoch": 17.116666666666667, "loss": 0.08082762360572815, "loss_ce": 9.448397577216383e-06, "loss_iou": 0.15625, "loss_num": 0.0162353515625, "loss_xval": 0.0810546875, "num_input_tokens_seen": 374969656, "step": 4108 }, { "epoch": 17.120833333333334, "grad_norm": 1.2700583951215059, "learning_rate": 5e-05, "loss": 0.0367, "num_input_tokens_seen": 375061020, "step": 4109 }, { "epoch": 17.120833333333334, "loss": 0.033961232751607895, "loss_ce": 8.932576065490139e-07, "loss_iou": 0.2236328125, "loss_num": 0.006805419921875, "loss_xval": 0.033935546875, "num_input_tokens_seen": 375061020, "step": 4109 }, { "epoch": 17.125, "grad_norm": 0.9037241210993366, "learning_rate": 5e-05, "loss": 0.0322, "num_input_tokens_seen": 375152520, "step": 4110 }, { "epoch": 17.125, "loss": 0.024991333484649658, "loss_ce": 0.00114184629637748, "loss_iou": 0.337890625, "loss_num": 0.0047607421875, "loss_xval": 0.0238037109375, "num_input_tokens_seen": 375152520, "step": 4110 }, { "epoch": 17.129166666666666, "grad_norm": 1.5900244598953917, "learning_rate": 5e-05, "loss": 0.0276, "num_input_tokens_seen": 375243896, "step": 4111 }, { "epoch": 17.129166666666666, "loss": 0.034204646944999695, "loss_ce": 2.495873559382744e-05, "loss_iou": 0.2177734375, "loss_num": 0.0068359375, "loss_xval": 0.0341796875, "num_input_tokens_seen": 375243896, "step": 4111 }, { "epoch": 17.133333333333333, "grad_norm": 2.2261162090891533, "learning_rate": 5e-05, "loss": 0.0397, "num_input_tokens_seen": 375335536, "step": 4112 }, { "epoch": 17.133333333333333, "loss": 0.03536083921790123, "loss_ce": 6.22510469838744e-06, "loss_iou": 0.2578125, "loss_num": 0.007080078125, "loss_xval": 0.035400390625, "num_input_tokens_seen": 375335536, "step": 4112 }, { "epoch": 17.1375, "grad_norm": 1.4003842951662424, "learning_rate": 5e-05, "loss": 0.0454, "num_input_tokens_seen": 375426984, "step": 4113 }, { "epoch": 17.1375, "loss": 0.05160084366798401, "loss_ce": 1.0880850823014043e-05, "loss_iou": 0.08935546875, "loss_num": 0.01031494140625, "loss_xval": 0.051513671875, "num_input_tokens_seen": 375426984, "step": 4113 }, { "epoch": 17.141666666666666, "grad_norm": 1.288984151087767, "learning_rate": 5e-05, "loss": 0.0431, "num_input_tokens_seen": 375517832, "step": 4114 }, { "epoch": 17.141666666666666, "loss": 0.04785723611712456, "loss_ce": 5.67401957596303e-06, "loss_iou": 0.275390625, "loss_num": 0.00958251953125, "loss_xval": 0.0478515625, "num_input_tokens_seen": 375517832, "step": 4114 }, { "epoch": 17.145833333333332, "grad_norm": 1.4372619479754545, "learning_rate": 5e-05, "loss": 0.0469, "num_input_tokens_seen": 375608728, "step": 4115 }, { "epoch": 17.145833333333332, "loss": 0.052819229662418365, "loss_ce": 8.561160939279944e-06, "loss_iou": 0.2392578125, "loss_num": 0.01055908203125, "loss_xval": 0.052734375, "num_input_tokens_seen": 375608728, "step": 4115 }, { "epoch": 17.15, "grad_norm": 2.6959178219886644, "learning_rate": 5e-05, "loss": 0.061, "num_input_tokens_seen": 375700688, "step": 4116 }, { "epoch": 17.15, "loss": 0.04783296585083008, "loss_ce": 4.293494384910446e-06, "loss_iou": 0.255859375, "loss_num": 0.00958251953125, "loss_xval": 0.0478515625, "num_input_tokens_seen": 375700688, "step": 4116 }, { "epoch": 17.154166666666665, "grad_norm": 8.95136108189134, "learning_rate": 5e-05, "loss": 0.0668, "num_input_tokens_seen": 375792212, "step": 4117 }, { "epoch": 17.154166666666665, "loss": 0.07952168583869934, "loss_ce": 0.000160721450811252, "loss_iou": 0.251953125, "loss_num": 0.015869140625, "loss_xval": 0.07958984375, "num_input_tokens_seen": 375792212, "step": 4117 }, { "epoch": 17.158333333333335, "grad_norm": 4.873580710701036, "learning_rate": 5e-05, "loss": 0.0593, "num_input_tokens_seen": 375883520, "step": 4118 }, { "epoch": 17.158333333333335, "loss": 0.09023542702198029, "loss_ce": 1.0212111192231532e-05, "loss_iou": 0.3203125, "loss_num": 0.01806640625, "loss_xval": 0.09033203125, "num_input_tokens_seen": 375883520, "step": 4118 }, { "epoch": 17.1625, "grad_norm": 2.642787145601659, "learning_rate": 5e-05, "loss": 0.0379, "num_input_tokens_seen": 375975216, "step": 4119 }, { "epoch": 17.1625, "loss": 0.026552706956863403, "loss_ce": 2.530133497202769e-05, "loss_iou": 0.267578125, "loss_num": 0.00531005859375, "loss_xval": 0.0264892578125, "num_input_tokens_seen": 375975216, "step": 4119 }, { "epoch": 17.166666666666668, "grad_norm": 2.403293357052846, "learning_rate": 5e-05, "loss": 0.0629, "num_input_tokens_seen": 376066804, "step": 4120 }, { "epoch": 17.166666666666668, "loss": 0.05794864892959595, "loss_ce": 1.102811802411452e-05, "loss_iou": 0.291015625, "loss_num": 0.0115966796875, "loss_xval": 0.057861328125, "num_input_tokens_seen": 376066804, "step": 4120 }, { "epoch": 17.170833333333334, "grad_norm": 2.187880455047866, "learning_rate": 5e-05, "loss": 0.0462, "num_input_tokens_seen": 376158144, "step": 4121 }, { "epoch": 17.170833333333334, "loss": 0.042102497071027756, "loss_ce": 3.4964855331054423e-06, "loss_iou": 0.267578125, "loss_num": 0.0084228515625, "loss_xval": 0.0419921875, "num_input_tokens_seen": 376158144, "step": 4121 }, { "epoch": 17.175, "grad_norm": 3.2858897350605276, "learning_rate": 5e-05, "loss": 0.0673, "num_input_tokens_seen": 376249596, "step": 4122 }, { "epoch": 17.175, "loss": 0.058754559606313705, "loss_ce": 8.451620669802651e-05, "loss_iou": 0.23046875, "loss_num": 0.01171875, "loss_xval": 0.05859375, "num_input_tokens_seen": 376249596, "step": 4122 }, { "epoch": 17.179166666666667, "grad_norm": 2.077742822143257, "learning_rate": 5e-05, "loss": 0.044, "num_input_tokens_seen": 376340460, "step": 4123 }, { "epoch": 17.179166666666667, "loss": 0.039612989872694016, "loss_ce": 1.1738272860384313e-06, "loss_iou": 0.255859375, "loss_num": 0.0079345703125, "loss_xval": 0.03955078125, "num_input_tokens_seen": 376340460, "step": 4123 }, { "epoch": 17.183333333333334, "grad_norm": 2.5078670252009854, "learning_rate": 5e-05, "loss": 0.0858, "num_input_tokens_seen": 376431304, "step": 4124 }, { "epoch": 17.183333333333334, "loss": 0.07966112345457077, "loss_ce": 1.7874119293992408e-05, "loss_iou": 0.248046875, "loss_num": 0.0159912109375, "loss_xval": 0.07958984375, "num_input_tokens_seen": 376431304, "step": 4124 }, { "epoch": 17.1875, "grad_norm": 2.5328257559712233, "learning_rate": 5e-05, "loss": 0.0267, "num_input_tokens_seen": 376522512, "step": 4125 }, { "epoch": 17.1875, "loss": 0.022409534081816673, "loss_ce": 0.0002537722757551819, "loss_iou": 0.20703125, "loss_num": 0.004425048828125, "loss_xval": 0.022216796875, "num_input_tokens_seen": 376522512, "step": 4125 }, { "epoch": 17.191666666666666, "grad_norm": 4.78932799513451, "learning_rate": 5e-05, "loss": 0.0548, "num_input_tokens_seen": 376613316, "step": 4126 }, { "epoch": 17.191666666666666, "loss": 0.07543014734983444, "loss_ce": 0.002737276954576373, "loss_iou": 0.1982421875, "loss_num": 0.0145263671875, "loss_xval": 0.07275390625, "num_input_tokens_seen": 376613316, "step": 4126 }, { "epoch": 17.195833333333333, "grad_norm": 3.4560754545560988, "learning_rate": 5e-05, "loss": 0.0634, "num_input_tokens_seen": 376704784, "step": 4127 }, { "epoch": 17.195833333333333, "loss": 0.053305864334106445, "loss_ce": 1.4543708857672755e-05, "loss_iou": 0.21875, "loss_num": 0.01068115234375, "loss_xval": 0.05322265625, "num_input_tokens_seen": 376704784, "step": 4127 }, { "epoch": 17.2, "grad_norm": 2.979212702062069, "learning_rate": 5e-05, "loss": 0.0893, "num_input_tokens_seen": 376796540, "step": 4128 }, { "epoch": 17.2, "loss": 0.1086147353053093, "loss_ce": 2.6740895009425003e-06, "loss_iou": 0.296875, "loss_num": 0.021728515625, "loss_xval": 0.1083984375, "num_input_tokens_seen": 376796540, "step": 4128 }, { "epoch": 17.204166666666666, "grad_norm": 2.9795527868387097, "learning_rate": 5e-05, "loss": 0.046, "num_input_tokens_seen": 376887624, "step": 4129 }, { "epoch": 17.204166666666666, "loss": 0.031053537502884865, "loss_ce": 1.901970676954079e-06, "loss_iou": 0.2578125, "loss_num": 0.006195068359375, "loss_xval": 0.031005859375, "num_input_tokens_seen": 376887624, "step": 4129 }, { "epoch": 17.208333333333332, "grad_norm": 3.1958465536868865, "learning_rate": 5e-05, "loss": 0.0645, "num_input_tokens_seen": 376978240, "step": 4130 }, { "epoch": 17.208333333333332, "loss": 0.040278829634189606, "loss_ce": 3.377207394805737e-05, "loss_iou": 0.244140625, "loss_num": 0.008056640625, "loss_xval": 0.040283203125, "num_input_tokens_seen": 376978240, "step": 4130 }, { "epoch": 17.2125, "grad_norm": 2.7260891925276276, "learning_rate": 5e-05, "loss": 0.039, "num_input_tokens_seen": 377069312, "step": 4131 }, { "epoch": 17.2125, "loss": 0.04666922241449356, "loss_ce": 7.84874373493949e-06, "loss_iou": 0.259765625, "loss_num": 0.00933837890625, "loss_xval": 0.046630859375, "num_input_tokens_seen": 377069312, "step": 4131 }, { "epoch": 17.216666666666665, "grad_norm": 2.651070129394983, "learning_rate": 5e-05, "loss": 0.0676, "num_input_tokens_seen": 377161164, "step": 4132 }, { "epoch": 17.216666666666665, "loss": 0.07825395464897156, "loss_ce": 6.8832137003482785e-06, "loss_iou": 0.4140625, "loss_num": 0.015625, "loss_xval": 0.078125, "num_input_tokens_seen": 377161164, "step": 4132 }, { "epoch": 17.220833333333335, "grad_norm": 2.2683718350970232, "learning_rate": 5e-05, "loss": 0.1661, "num_input_tokens_seen": 377251592, "step": 4133 }, { "epoch": 17.220833333333335, "loss": 0.1615155190229416, "loss_ce": 1.2367737554086489e-06, "loss_iou": 0.10791015625, "loss_num": 0.0322265625, "loss_xval": 0.1611328125, "num_input_tokens_seen": 377251592, "step": 4133 }, { "epoch": 17.225, "grad_norm": 2.6366418603858346, "learning_rate": 5e-05, "loss": 0.0953, "num_input_tokens_seen": 377342520, "step": 4134 }, { "epoch": 17.225, "loss": 0.1185772716999054, "loss_ce": 1.2214568414492533e-06, "loss_iou": 0.216796875, "loss_num": 0.023681640625, "loss_xval": 0.11865234375, "num_input_tokens_seen": 377342520, "step": 4134 }, { "epoch": 17.229166666666668, "grad_norm": 3.5264559338621893, "learning_rate": 5e-05, "loss": 0.043, "num_input_tokens_seen": 377434136, "step": 4135 }, { "epoch": 17.229166666666668, "loss": 0.054434407502412796, "loss_ce": 6.306642717390787e-06, "loss_iou": 0.21484375, "loss_num": 0.0108642578125, "loss_xval": 0.054443359375, "num_input_tokens_seen": 377434136, "step": 4135 }, { "epoch": 17.233333333333334, "grad_norm": 3.1055052262870766, "learning_rate": 5e-05, "loss": 0.041, "num_input_tokens_seen": 377525448, "step": 4136 }, { "epoch": 17.233333333333334, "loss": 0.043866340070962906, "loss_ce": 0.00011939093383261934, "loss_iou": 0.1708984375, "loss_num": 0.00872802734375, "loss_xval": 0.043701171875, "num_input_tokens_seen": 377525448, "step": 4136 }, { "epoch": 17.2375, "grad_norm": 2.208501603893322, "learning_rate": 5e-05, "loss": 0.0532, "num_input_tokens_seen": 377616740, "step": 4137 }, { "epoch": 17.2375, "loss": 0.06535966694355011, "loss_ce": 6.272749942581868e-06, "loss_iou": 0.255859375, "loss_num": 0.0130615234375, "loss_xval": 0.0654296875, "num_input_tokens_seen": 377616740, "step": 4137 }, { "epoch": 17.241666666666667, "grad_norm": 2.0908215843581397, "learning_rate": 5e-05, "loss": 0.0515, "num_input_tokens_seen": 377707964, "step": 4138 }, { "epoch": 17.241666666666667, "loss": 0.04869557544589043, "loss_ce": 4.780476956511848e-06, "loss_iou": 0.212890625, "loss_num": 0.00970458984375, "loss_xval": 0.048583984375, "num_input_tokens_seen": 377707964, "step": 4138 }, { "epoch": 17.245833333333334, "grad_norm": 3.4463473343026565, "learning_rate": 5e-05, "loss": 0.0763, "num_input_tokens_seen": 377799740, "step": 4139 }, { "epoch": 17.245833333333334, "loss": 0.1159019023180008, "loss_ce": 0.0008964104927144945, "loss_iou": 0.2578125, "loss_num": 0.02294921875, "loss_xval": 0.115234375, "num_input_tokens_seen": 377799740, "step": 4139 }, { "epoch": 17.25, "grad_norm": 4.382896012197364, "learning_rate": 5e-05, "loss": 0.0522, "num_input_tokens_seen": 377891368, "step": 4140 }, { "epoch": 17.25, "loss": 0.04679463803768158, "loss_ce": 0.00010274462692905217, "loss_iou": 0.2138671875, "loss_num": 0.00933837890625, "loss_xval": 0.046630859375, "num_input_tokens_seen": 377891368, "step": 4140 }, { "epoch": 17.254166666666666, "grad_norm": 3.0559161879412344, "learning_rate": 5e-05, "loss": 0.0594, "num_input_tokens_seen": 377982636, "step": 4141 }, { "epoch": 17.254166666666666, "loss": 0.06926178932189941, "loss_ce": 2.143523033737438e-06, "loss_iou": 0.203125, "loss_num": 0.01385498046875, "loss_xval": 0.0693359375, "num_input_tokens_seen": 377982636, "step": 4141 }, { "epoch": 17.258333333333333, "grad_norm": 1.0516071756116985, "learning_rate": 5e-05, "loss": 0.0891, "num_input_tokens_seen": 378073500, "step": 4142 }, { "epoch": 17.258333333333333, "loss": 0.07638757675886154, "loss_ce": 9.710823178465944e-06, "loss_iou": 0.1484375, "loss_num": 0.0152587890625, "loss_xval": 0.076171875, "num_input_tokens_seen": 378073500, "step": 4142 }, { "epoch": 17.2625, "grad_norm": 1.5393423311904353, "learning_rate": 5e-05, "loss": 0.0661, "num_input_tokens_seen": 378164848, "step": 4143 }, { "epoch": 17.2625, "loss": 0.05576720088720322, "loss_ce": 1.1585127140278928e-05, "loss_iou": 0.126953125, "loss_num": 0.01116943359375, "loss_xval": 0.0556640625, "num_input_tokens_seen": 378164848, "step": 4143 }, { "epoch": 17.266666666666666, "grad_norm": 2.2986887079216682, "learning_rate": 5e-05, "loss": 0.0641, "num_input_tokens_seen": 378256496, "step": 4144 }, { "epoch": 17.266666666666666, "loss": 0.05642838403582573, "loss_ce": 3.1898129236651585e-05, "loss_iou": 0.212890625, "loss_num": 0.01129150390625, "loss_xval": 0.056396484375, "num_input_tokens_seen": 378256496, "step": 4144 }, { "epoch": 17.270833333333332, "grad_norm": 3.637160341144884, "learning_rate": 5e-05, "loss": 0.0502, "num_input_tokens_seen": 378348148, "step": 4145 }, { "epoch": 17.270833333333332, "loss": 0.06469674408435822, "loss_ce": 0.00013680808478966355, "loss_iou": 0.337890625, "loss_num": 0.012939453125, "loss_xval": 0.064453125, "num_input_tokens_seen": 378348148, "step": 4145 }, { "epoch": 17.275, "grad_norm": 2.17254788913164, "learning_rate": 5e-05, "loss": 0.0361, "num_input_tokens_seen": 378439328, "step": 4146 }, { "epoch": 17.275, "loss": 0.03240314871072769, "loss_ce": 8.741089004615787e-06, "loss_iou": 0.33203125, "loss_num": 0.0064697265625, "loss_xval": 0.032470703125, "num_input_tokens_seen": 378439328, "step": 4146 }, { "epoch": 17.279166666666665, "grad_norm": 3.348143426755929, "learning_rate": 5e-05, "loss": 0.0463, "num_input_tokens_seen": 378530776, "step": 4147 }, { "epoch": 17.279166666666665, "loss": 0.0521889366209507, "loss_ce": 3.8771340769017115e-06, "loss_iou": 0.275390625, "loss_num": 0.01043701171875, "loss_xval": 0.05224609375, "num_input_tokens_seen": 378530776, "step": 4147 }, { "epoch": 17.283333333333335, "grad_norm": 4.117528658266027, "learning_rate": 5e-05, "loss": 0.0859, "num_input_tokens_seen": 378621812, "step": 4148 }, { "epoch": 17.283333333333335, "loss": 0.09836510568857193, "loss_ce": 6.947469046281185e-06, "loss_iou": 0.2294921875, "loss_num": 0.0196533203125, "loss_xval": 0.09814453125, "num_input_tokens_seen": 378621812, "step": 4148 }, { "epoch": 17.2875, "grad_norm": 2.0040948660561937, "learning_rate": 5e-05, "loss": 0.0489, "num_input_tokens_seen": 378713528, "step": 4149 }, { "epoch": 17.2875, "loss": 0.047562677413225174, "loss_ce": 1.030675889523991e-06, "loss_iou": 0.267578125, "loss_num": 0.009521484375, "loss_xval": 0.047607421875, "num_input_tokens_seen": 378713528, "step": 4149 }, { "epoch": 17.291666666666668, "grad_norm": 2.9819458574398814, "learning_rate": 5e-05, "loss": 0.0263, "num_input_tokens_seen": 378804916, "step": 4150 }, { "epoch": 17.291666666666668, "loss": 0.03186263144016266, "loss_ce": 2.277978410347714e-06, "loss_iou": 0.0849609375, "loss_num": 0.006378173828125, "loss_xval": 0.03173828125, "num_input_tokens_seen": 378804916, "step": 4150 }, { "epoch": 17.295833333333334, "grad_norm": 2.409193723532535, "learning_rate": 5e-05, "loss": 0.0403, "num_input_tokens_seen": 378896124, "step": 4151 }, { "epoch": 17.295833333333334, "loss": 0.04503517970442772, "loss_ce": 0.00014382405788637698, "loss_iou": 0.171875, "loss_num": 0.00897216796875, "loss_xval": 0.044921875, "num_input_tokens_seen": 378896124, "step": 4151 }, { "epoch": 17.3, "grad_norm": 1.5030586417918494, "learning_rate": 5e-05, "loss": 0.0809, "num_input_tokens_seen": 378987300, "step": 4152 }, { "epoch": 17.3, "loss": 0.12397777289152145, "loss_ce": 4.588945739669725e-05, "loss_iou": 0.154296875, "loss_num": 0.0247802734375, "loss_xval": 0.1240234375, "num_input_tokens_seen": 378987300, "step": 4152 }, { "epoch": 17.304166666666667, "grad_norm": 4.31789317113666, "learning_rate": 5e-05, "loss": 0.0346, "num_input_tokens_seen": 379078612, "step": 4153 }, { "epoch": 17.304166666666667, "loss": 0.03499021381139755, "loss_ce": 1.8140335669158958e-06, "loss_iou": 0.201171875, "loss_num": 0.006988525390625, "loss_xval": 0.034912109375, "num_input_tokens_seen": 379078612, "step": 4153 }, { "epoch": 17.308333333333334, "grad_norm": 2.107743511654392, "learning_rate": 5e-05, "loss": 0.0642, "num_input_tokens_seen": 379169924, "step": 4154 }, { "epoch": 17.308333333333334, "loss": 0.049952924251556396, "loss_ce": 1.0907536307058763e-05, "loss_iou": 0.1943359375, "loss_num": 0.010009765625, "loss_xval": 0.050048828125, "num_input_tokens_seen": 379169924, "step": 4154 }, { "epoch": 17.3125, "grad_norm": 2.507618094018235, "learning_rate": 5e-05, "loss": 0.0643, "num_input_tokens_seen": 379261204, "step": 4155 }, { "epoch": 17.3125, "loss": 0.047419168055057526, "loss_ce": 4.0629114664625376e-05, "loss_iou": 0.357421875, "loss_num": 0.00946044921875, "loss_xval": 0.04736328125, "num_input_tokens_seen": 379261204, "step": 4155 }, { "epoch": 17.316666666666666, "grad_norm": 2.961298842268306, "learning_rate": 5e-05, "loss": 0.0495, "num_input_tokens_seen": 379352556, "step": 4156 }, { "epoch": 17.316666666666666, "loss": 0.039200618863105774, "loss_ce": 7.923441671664477e-07, "loss_iou": 0.2392578125, "loss_num": 0.0078125, "loss_xval": 0.039306640625, "num_input_tokens_seen": 379352556, "step": 4156 }, { "epoch": 17.320833333333333, "grad_norm": 3.599819057412839, "learning_rate": 5e-05, "loss": 0.0293, "num_input_tokens_seen": 379444232, "step": 4157 }, { "epoch": 17.320833333333333, "loss": 0.028475811704993248, "loss_ce": 0.0016508603002876043, "loss_iou": 0.28125, "loss_num": 0.00537109375, "loss_xval": 0.02685546875, "num_input_tokens_seen": 379444232, "step": 4157 }, { "epoch": 17.325, "grad_norm": 4.796508671220804, "learning_rate": 5e-05, "loss": 0.0557, "num_input_tokens_seen": 379535720, "step": 4158 }, { "epoch": 17.325, "loss": 0.06499719619750977, "loss_ce": 1.0012766324507538e-05, "loss_iou": 0.36328125, "loss_num": 0.012939453125, "loss_xval": 0.06494140625, "num_input_tokens_seen": 379535720, "step": 4158 }, { "epoch": 17.329166666666666, "grad_norm": 3.0562988991039353, "learning_rate": 5e-05, "loss": 0.0862, "num_input_tokens_seen": 379627084, "step": 4159 }, { "epoch": 17.329166666666666, "loss": 0.1044941172003746, "loss_ce": 1.9318108570587356e-06, "loss_iou": 0.26171875, "loss_num": 0.0208740234375, "loss_xval": 0.1044921875, "num_input_tokens_seen": 379627084, "step": 4159 }, { "epoch": 17.333333333333332, "grad_norm": 2.30199537755403, "learning_rate": 5e-05, "loss": 0.0406, "num_input_tokens_seen": 379718140, "step": 4160 }, { "epoch": 17.333333333333332, "loss": 0.040326207876205444, "loss_ce": 1.2487752428569365e-05, "loss_iou": 0.1865234375, "loss_num": 0.008056640625, "loss_xval": 0.040283203125, "num_input_tokens_seen": 379718140, "step": 4160 }, { "epoch": 17.3375, "grad_norm": 17.81326134141207, "learning_rate": 5e-05, "loss": 0.0781, "num_input_tokens_seen": 379809208, "step": 4161 }, { "epoch": 17.3375, "loss": 0.05243492126464844, "loss_ce": 5.7217112043872476e-06, "loss_iou": 0.29296875, "loss_num": 0.010498046875, "loss_xval": 0.052490234375, "num_input_tokens_seen": 379809208, "step": 4161 }, { "epoch": 17.341666666666665, "grad_norm": 1.422884101591852, "learning_rate": 5e-05, "loss": 0.0299, "num_input_tokens_seen": 379901088, "step": 4162 }, { "epoch": 17.341666666666665, "loss": 0.023801235482096672, "loss_ce": 8.907601295504719e-05, "loss_iou": 0.11279296875, "loss_num": 0.004730224609375, "loss_xval": 0.023681640625, "num_input_tokens_seen": 379901088, "step": 4162 }, { "epoch": 17.345833333333335, "grad_norm": 2.9900858693580643, "learning_rate": 5e-05, "loss": 0.0447, "num_input_tokens_seen": 379992896, "step": 4163 }, { "epoch": 17.345833333333335, "loss": 0.028920790180563927, "loss_ce": 5.385740678320872e-06, "loss_iou": 0.291015625, "loss_num": 0.005767822265625, "loss_xval": 0.0289306640625, "num_input_tokens_seen": 379992896, "step": 4163 }, { "epoch": 17.35, "grad_norm": 5.004765317149916, "learning_rate": 5e-05, "loss": 0.0645, "num_input_tokens_seen": 380083900, "step": 4164 }, { "epoch": 17.35, "loss": 0.09242373704910278, "loss_ce": 1.2471190302676405e-06, "loss_iou": 0.24609375, "loss_num": 0.0184326171875, "loss_xval": 0.09228515625, "num_input_tokens_seen": 380083900, "step": 4164 }, { "epoch": 17.354166666666668, "grad_norm": 3.5720222777192694, "learning_rate": 5e-05, "loss": 0.0551, "num_input_tokens_seen": 380175476, "step": 4165 }, { "epoch": 17.354166666666668, "loss": 0.0531640350818634, "loss_ce": 2.41236944020784e-06, "loss_iou": 0.35546875, "loss_num": 0.0106201171875, "loss_xval": 0.05322265625, "num_input_tokens_seen": 380175476, "step": 4165 }, { "epoch": 17.358333333333334, "grad_norm": 2.918456205575103, "learning_rate": 5e-05, "loss": 0.0444, "num_input_tokens_seen": 380266748, "step": 4166 }, { "epoch": 17.358333333333334, "loss": 0.04650936275720596, "loss_ce": 5.731297960664961e-07, "loss_iou": 0.251953125, "loss_num": 0.00933837890625, "loss_xval": 0.04638671875, "num_input_tokens_seen": 380266748, "step": 4166 }, { "epoch": 17.3625, "grad_norm": 4.120291909874971, "learning_rate": 5e-05, "loss": 0.0542, "num_input_tokens_seen": 380358840, "step": 4167 }, { "epoch": 17.3625, "loss": 0.06932120025157928, "loss_ce": 0.00012258999049663544, "loss_iou": 0.1708984375, "loss_num": 0.01385498046875, "loss_xval": 0.0693359375, "num_input_tokens_seen": 380358840, "step": 4167 }, { "epoch": 17.366666666666667, "grad_norm": 17.50068370015219, "learning_rate": 5e-05, "loss": 0.0654, "num_input_tokens_seen": 380450272, "step": 4168 }, { "epoch": 17.366666666666667, "loss": 0.06793436408042908, "loss_ce": 1.748909562593326e-05, "loss_iou": 0.294921875, "loss_num": 0.01361083984375, "loss_xval": 0.06787109375, "num_input_tokens_seen": 380450272, "step": 4168 }, { "epoch": 17.370833333333334, "grad_norm": 2.6697212933765195, "learning_rate": 5e-05, "loss": 0.0495, "num_input_tokens_seen": 380541972, "step": 4169 }, { "epoch": 17.370833333333334, "loss": 0.05074727535247803, "loss_ce": 1.1801877917605452e-05, "loss_iou": 0.396484375, "loss_num": 0.0101318359375, "loss_xval": 0.05078125, "num_input_tokens_seen": 380541972, "step": 4169 }, { "epoch": 17.375, "grad_norm": 1.3486808798727778, "learning_rate": 5e-05, "loss": 0.0714, "num_input_tokens_seen": 380633324, "step": 4170 }, { "epoch": 17.375, "loss": 0.06902758777141571, "loss_ce": 7.31236141291447e-05, "loss_iou": 0.294921875, "loss_num": 0.0137939453125, "loss_xval": 0.06884765625, "num_input_tokens_seen": 380633324, "step": 4170 }, { "epoch": 17.379166666666666, "grad_norm": 5.02642605399187, "learning_rate": 5e-05, "loss": 0.0361, "num_input_tokens_seen": 380724316, "step": 4171 }, { "epoch": 17.379166666666666, "loss": 0.028513111174106598, "loss_ce": 2.4952072635642253e-05, "loss_iou": 0.173828125, "loss_num": 0.005706787109375, "loss_xval": 0.0284423828125, "num_input_tokens_seen": 380724316, "step": 4171 }, { "epoch": 17.383333333333333, "grad_norm": 5.776909080785385, "learning_rate": 5e-05, "loss": 0.0658, "num_input_tokens_seen": 380815592, "step": 4172 }, { "epoch": 17.383333333333333, "loss": 0.030104611068964005, "loss_ce": 1.4279077731771395e-05, "loss_iou": 0.244140625, "loss_num": 0.006011962890625, "loss_xval": 0.030029296875, "num_input_tokens_seen": 380815592, "step": 4172 }, { "epoch": 17.3875, "grad_norm": 3.3291427531728686, "learning_rate": 5e-05, "loss": 0.0543, "num_input_tokens_seen": 380906692, "step": 4173 }, { "epoch": 17.3875, "loss": 0.026032838970422745, "loss_ce": 1.660524503677152e-05, "loss_iou": 0.21875, "loss_num": 0.00518798828125, "loss_xval": 0.0260009765625, "num_input_tokens_seen": 380906692, "step": 4173 }, { "epoch": 17.391666666666666, "grad_norm": 2.04136096032851, "learning_rate": 5e-05, "loss": 0.0496, "num_input_tokens_seen": 380998264, "step": 4174 }, { "epoch": 17.391666666666666, "loss": 0.054833292961120605, "loss_ce": 0.0003136429295409471, "loss_iou": 0.125, "loss_num": 0.01092529296875, "loss_xval": 0.054443359375, "num_input_tokens_seen": 380998264, "step": 4174 }, { "epoch": 17.395833333333332, "grad_norm": 5.595745041098005, "learning_rate": 5e-05, "loss": 0.0636, "num_input_tokens_seen": 381089624, "step": 4175 }, { "epoch": 17.395833333333332, "loss": 0.07274062186479568, "loss_ce": 3.2486663258168846e-05, "loss_iou": 0.2451171875, "loss_num": 0.0145263671875, "loss_xval": 0.07275390625, "num_input_tokens_seen": 381089624, "step": 4175 }, { "epoch": 17.4, "grad_norm": 2.462672174230439, "learning_rate": 5e-05, "loss": 0.1027, "num_input_tokens_seen": 381181008, "step": 4176 }, { "epoch": 17.4, "loss": 0.1237187534570694, "loss_ce": 4.994043365513789e-07, "loss_iou": 0.1640625, "loss_num": 0.0247802734375, "loss_xval": 0.12353515625, "num_input_tokens_seen": 381181008, "step": 4176 }, { "epoch": 17.404166666666665, "grad_norm": 2.048999090525931, "learning_rate": 5e-05, "loss": 0.0568, "num_input_tokens_seen": 381272552, "step": 4177 }, { "epoch": 17.404166666666665, "loss": 0.061331361532211304, "loss_ce": 6.28693032922456e-06, "loss_iou": 0.189453125, "loss_num": 0.01226806640625, "loss_xval": 0.061279296875, "num_input_tokens_seen": 381272552, "step": 4177 }, { "epoch": 17.408333333333335, "grad_norm": 1.6223244040540357, "learning_rate": 5e-05, "loss": 0.0411, "num_input_tokens_seen": 381362932, "step": 4178 }, { "epoch": 17.408333333333335, "loss": 0.045600827783346176, "loss_ce": 7.567994089185959e-06, "loss_iou": 0.1416015625, "loss_num": 0.00909423828125, "loss_xval": 0.045654296875, "num_input_tokens_seen": 381362932, "step": 4178 }, { "epoch": 17.4125, "grad_norm": 2.025302773676103, "learning_rate": 5e-05, "loss": 0.0507, "num_input_tokens_seen": 381454308, "step": 4179 }, { "epoch": 17.4125, "loss": 0.07269463688135147, "loss_ce": 1.7684169506537728e-06, "loss_iou": 0.2734375, "loss_num": 0.0145263671875, "loss_xval": 0.07275390625, "num_input_tokens_seen": 381454308, "step": 4179 }, { "epoch": 17.416666666666668, "grad_norm": 2.5372328217531015, "learning_rate": 5e-05, "loss": 0.0526, "num_input_tokens_seen": 381545752, "step": 4180 }, { "epoch": 17.416666666666668, "loss": 0.04797760024666786, "loss_ce": 1.922258707054425e-05, "loss_iou": 0.296875, "loss_num": 0.00958251953125, "loss_xval": 0.0478515625, "num_input_tokens_seen": 381545752, "step": 4180 }, { "epoch": 17.420833333333334, "grad_norm": 5.241445928314043, "learning_rate": 5e-05, "loss": 0.0457, "num_input_tokens_seen": 381637208, "step": 4181 }, { "epoch": 17.420833333333334, "loss": 0.0530615970492363, "loss_ce": 6.7899372879765e-06, "loss_iou": 0.27734375, "loss_num": 0.0106201171875, "loss_xval": 0.052978515625, "num_input_tokens_seen": 381637208, "step": 4181 }, { "epoch": 17.425, "grad_norm": 3.2854474328181857, "learning_rate": 5e-05, "loss": 0.0825, "num_input_tokens_seen": 381728292, "step": 4182 }, { "epoch": 17.425, "loss": 0.08642810583114624, "loss_ce": 2.322962927792105e-06, "loss_iou": 0.298828125, "loss_num": 0.017333984375, "loss_xval": 0.08642578125, "num_input_tokens_seen": 381728292, "step": 4182 }, { "epoch": 17.429166666666667, "grad_norm": 1.9952205771229876, "learning_rate": 5e-05, "loss": 0.0391, "num_input_tokens_seen": 381820024, "step": 4183 }, { "epoch": 17.429166666666667, "loss": 0.03883390873670578, "loss_ce": 1.55478592205327e-05, "loss_iou": 0.3125, "loss_num": 0.00775146484375, "loss_xval": 0.038818359375, "num_input_tokens_seen": 381820024, "step": 4183 }, { "epoch": 17.433333333333334, "grad_norm": 2.067975560498226, "learning_rate": 5e-05, "loss": 0.0349, "num_input_tokens_seen": 381911112, "step": 4184 }, { "epoch": 17.433333333333334, "loss": 0.035965446382761, "loss_ce": 4.811201961274492e-07, "loss_iou": 0.1982421875, "loss_num": 0.0072021484375, "loss_xval": 0.035888671875, "num_input_tokens_seen": 381911112, "step": 4184 }, { "epoch": 17.4375, "grad_norm": 2.074459227907449, "learning_rate": 5e-05, "loss": 0.0452, "num_input_tokens_seen": 382002684, "step": 4185 }, { "epoch": 17.4375, "loss": 0.05124114826321602, "loss_ce": 7.843073399271816e-05, "loss_iou": 0.244140625, "loss_num": 0.01025390625, "loss_xval": 0.05126953125, "num_input_tokens_seen": 382002684, "step": 4185 }, { "epoch": 17.441666666666666, "grad_norm": 2.6020157273330744, "learning_rate": 5e-05, "loss": 0.042, "num_input_tokens_seen": 382094208, "step": 4186 }, { "epoch": 17.441666666666666, "loss": 0.048831477761268616, "loss_ce": 3.352569137859973e-06, "loss_iou": 0.30859375, "loss_num": 0.009765625, "loss_xval": 0.048828125, "num_input_tokens_seen": 382094208, "step": 4186 }, { "epoch": 17.445833333333333, "grad_norm": 4.115432540513496, "learning_rate": 5e-05, "loss": 0.0664, "num_input_tokens_seen": 382185420, "step": 4187 }, { "epoch": 17.445833333333333, "loss": 0.07571595907211304, "loss_ce": 0.00012392218923196197, "loss_iou": 0.20703125, "loss_num": 0.01513671875, "loss_xval": 0.07568359375, "num_input_tokens_seen": 382185420, "step": 4187 }, { "epoch": 17.45, "grad_norm": 2.021583164885262, "learning_rate": 5e-05, "loss": 0.0837, "num_input_tokens_seen": 382276752, "step": 4188 }, { "epoch": 17.45, "loss": 0.07489342987537384, "loss_ce": 7.1080962698033545e-06, "loss_iou": 0.294921875, "loss_num": 0.01495361328125, "loss_xval": 0.07470703125, "num_input_tokens_seen": 382276752, "step": 4188 }, { "epoch": 17.454166666666666, "grad_norm": 4.566095149534234, "learning_rate": 5e-05, "loss": 0.0612, "num_input_tokens_seen": 382368236, "step": 4189 }, { "epoch": 17.454166666666666, "loss": 0.05665755644440651, "loss_ce": 1.6720227904443163e-06, "loss_iou": 0.251953125, "loss_num": 0.0113525390625, "loss_xval": 0.056640625, "num_input_tokens_seen": 382368236, "step": 4189 }, { "epoch": 17.458333333333332, "grad_norm": 2.8811610544544695, "learning_rate": 5e-05, "loss": 0.0736, "num_input_tokens_seen": 382460640, "step": 4190 }, { "epoch": 17.458333333333332, "loss": 0.041686300188302994, "loss_ce": 3.743516936083324e-05, "loss_iou": 0.294921875, "loss_num": 0.00836181640625, "loss_xval": 0.041748046875, "num_input_tokens_seen": 382460640, "step": 4190 }, { "epoch": 17.4625, "grad_norm": 2.83894264255325, "learning_rate": 5e-05, "loss": 0.0387, "num_input_tokens_seen": 382552036, "step": 4191 }, { "epoch": 17.4625, "loss": 0.051210999488830566, "loss_ce": 2.5061237920454005e-06, "loss_iou": 0.251953125, "loss_num": 0.01025390625, "loss_xval": 0.05126953125, "num_input_tokens_seen": 382552036, "step": 4191 }, { "epoch": 17.466666666666665, "grad_norm": 2.913447387352696, "learning_rate": 5e-05, "loss": 0.0434, "num_input_tokens_seen": 382643988, "step": 4192 }, { "epoch": 17.466666666666665, "loss": 0.04521823674440384, "loss_ce": 5.2219100325601175e-05, "loss_iou": 0.28515625, "loss_num": 0.009033203125, "loss_xval": 0.045166015625, "num_input_tokens_seen": 382643988, "step": 4192 }, { "epoch": 17.470833333333335, "grad_norm": 2.455147735870591, "learning_rate": 5e-05, "loss": 0.0441, "num_input_tokens_seen": 382735388, "step": 4193 }, { "epoch": 17.470833333333335, "loss": 0.024509306997060776, "loss_ce": 3.691537131089717e-06, "loss_iou": 0.1328125, "loss_num": 0.004913330078125, "loss_xval": 0.0245361328125, "num_input_tokens_seen": 382735388, "step": 4193 }, { "epoch": 17.475, "grad_norm": 3.309451299202754, "learning_rate": 5e-05, "loss": 0.084, "num_input_tokens_seen": 382826768, "step": 4194 }, { "epoch": 17.475, "loss": 0.0785035565495491, "loss_ce": 4.716331659437856e-06, "loss_iou": 0.1826171875, "loss_num": 0.0157470703125, "loss_xval": 0.07861328125, "num_input_tokens_seen": 382826768, "step": 4194 }, { "epoch": 17.479166666666668, "grad_norm": 3.4075041239144768, "learning_rate": 5e-05, "loss": 0.0674, "num_input_tokens_seen": 382917520, "step": 4195 }, { "epoch": 17.479166666666668, "loss": 0.09112516045570374, "loss_ce": 1.4928999917174224e-05, "loss_iou": 0.2392578125, "loss_num": 0.0181884765625, "loss_xval": 0.09130859375, "num_input_tokens_seen": 382917520, "step": 4195 }, { "epoch": 17.483333333333334, "grad_norm": 1.562039716782248, "learning_rate": 5e-05, "loss": 0.0353, "num_input_tokens_seen": 383009252, "step": 4196 }, { "epoch": 17.483333333333334, "loss": 0.04242390766739845, "loss_ce": 4.4736902964359615e-06, "loss_iou": 0.2333984375, "loss_num": 0.00848388671875, "loss_xval": 0.04248046875, "num_input_tokens_seen": 383009252, "step": 4196 }, { "epoch": 17.4875, "grad_norm": 4.952842041521385, "learning_rate": 5e-05, "loss": 0.0961, "num_input_tokens_seen": 383100828, "step": 4197 }, { "epoch": 17.4875, "loss": 0.15947763621807098, "loss_ce": 8.03215880296193e-06, "loss_iou": 0.302734375, "loss_num": 0.031982421875, "loss_xval": 0.1591796875, "num_input_tokens_seen": 383100828, "step": 4197 }, { "epoch": 17.491666666666667, "grad_norm": 2.777819714122239, "learning_rate": 5e-05, "loss": 0.0539, "num_input_tokens_seen": 383192324, "step": 4198 }, { "epoch": 17.491666666666667, "loss": 0.0738849937915802, "loss_ce": 1.9351950868440326e-06, "loss_iou": 0.2421875, "loss_num": 0.0147705078125, "loss_xval": 0.07373046875, "num_input_tokens_seen": 383192324, "step": 4198 }, { "epoch": 17.495833333333334, "grad_norm": 3.2626916345833705, "learning_rate": 5e-05, "loss": 0.0384, "num_input_tokens_seen": 383284020, "step": 4199 }, { "epoch": 17.495833333333334, "loss": 0.03140312433242798, "loss_ce": 5.335076593837584e-07, "loss_iou": 0.240234375, "loss_num": 0.00628662109375, "loss_xval": 0.031494140625, "num_input_tokens_seen": 383284020, "step": 4199 }, { "epoch": 17.5, "grad_norm": 2.6749036434240514, "learning_rate": 5e-05, "loss": 0.0327, "num_input_tokens_seen": 383376196, "step": 4200 }, { "epoch": 17.5, "loss": 0.03776795417070389, "loss_ce": 0.00018555522547103465, "loss_iou": 0.3984375, "loss_num": 0.007537841796875, "loss_xval": 0.03759765625, "num_input_tokens_seen": 383376196, "step": 4200 }, { "epoch": 17.504166666666666, "grad_norm": 5.499155934500834, "learning_rate": 5e-05, "loss": 0.0523, "num_input_tokens_seen": 383467144, "step": 4201 }, { "epoch": 17.504166666666666, "loss": 0.06288354843854904, "loss_ce": 2.0762161057064077e-06, "loss_iou": 0.34765625, "loss_num": 0.0125732421875, "loss_xval": 0.06298828125, "num_input_tokens_seen": 383467144, "step": 4201 }, { "epoch": 17.508333333333333, "grad_norm": 2.013655166783423, "learning_rate": 5e-05, "loss": 0.0783, "num_input_tokens_seen": 383557920, "step": 4202 }, { "epoch": 17.508333333333333, "loss": 0.08707220107316971, "loss_ce": 5.555620191444177e-06, "loss_iou": 0.29296875, "loss_num": 0.0174560546875, "loss_xval": 0.0869140625, "num_input_tokens_seen": 383557920, "step": 4202 }, { "epoch": 17.5125, "grad_norm": 2.3711594009871253, "learning_rate": 5e-05, "loss": 0.0337, "num_input_tokens_seen": 383649372, "step": 4203 }, { "epoch": 17.5125, "loss": 0.03189709410071373, "loss_ce": 0.00018933152023237199, "loss_iou": 0.25, "loss_num": 0.00634765625, "loss_xval": 0.03173828125, "num_input_tokens_seen": 383649372, "step": 4203 }, { "epoch": 17.516666666666666, "grad_norm": 1.6791362866056327, "learning_rate": 5e-05, "loss": 0.0441, "num_input_tokens_seen": 383741344, "step": 4204 }, { "epoch": 17.516666666666666, "loss": 0.0628369152545929, "loss_ce": 1.2169030014774762e-06, "loss_iou": 0.30078125, "loss_num": 0.0125732421875, "loss_xval": 0.06298828125, "num_input_tokens_seen": 383741344, "step": 4204 }, { "epoch": 17.520833333333332, "grad_norm": 1.8910707791570547, "learning_rate": 5e-05, "loss": 0.0415, "num_input_tokens_seen": 383832172, "step": 4205 }, { "epoch": 17.520833333333332, "loss": 0.03927692770957947, "loss_ce": 8.04272076493362e-07, "loss_iou": 0.2236328125, "loss_num": 0.00787353515625, "loss_xval": 0.039306640625, "num_input_tokens_seen": 383832172, "step": 4205 }, { "epoch": 17.525, "grad_norm": 2.6070877254757354, "learning_rate": 5e-05, "loss": 0.0496, "num_input_tokens_seen": 383923912, "step": 4206 }, { "epoch": 17.525, "loss": 0.03360820189118385, "loss_ce": 2.3605247406521812e-05, "loss_iou": 0.31640625, "loss_num": 0.0067138671875, "loss_xval": 0.03369140625, "num_input_tokens_seen": 383923912, "step": 4206 }, { "epoch": 17.529166666666665, "grad_norm": 2.1064854829699424, "learning_rate": 5e-05, "loss": 0.0534, "num_input_tokens_seen": 384015528, "step": 4207 }, { "epoch": 17.529166666666665, "loss": 0.05154259502887726, "loss_ce": 0.0019362723687663674, "loss_iou": 0.224609375, "loss_num": 0.00994873046875, "loss_xval": 0.049560546875, "num_input_tokens_seen": 384015528, "step": 4207 }, { "epoch": 17.533333333333335, "grad_norm": 2.3138079290066504, "learning_rate": 5e-05, "loss": 0.0324, "num_input_tokens_seen": 384106768, "step": 4208 }, { "epoch": 17.533333333333335, "loss": 0.029217317700386047, "loss_ce": 2.7251966457697563e-05, "loss_iou": 0.31640625, "loss_num": 0.005828857421875, "loss_xval": 0.0291748046875, "num_input_tokens_seen": 384106768, "step": 4208 }, { "epoch": 17.5375, "grad_norm": 2.4430234781066864, "learning_rate": 5e-05, "loss": 0.0584, "num_input_tokens_seen": 384197860, "step": 4209 }, { "epoch": 17.5375, "loss": 0.0784795880317688, "loss_ce": 0.000995452981442213, "loss_iou": 0.2216796875, "loss_num": 0.0155029296875, "loss_xval": 0.07763671875, "num_input_tokens_seen": 384197860, "step": 4209 }, { "epoch": 17.541666666666668, "grad_norm": 2.737309471067965, "learning_rate": 5e-05, "loss": 0.1394, "num_input_tokens_seen": 384289052, "step": 4210 }, { "epoch": 17.541666666666668, "loss": 0.13933785259723663, "loss_ce": 2.511477578082122e-05, "loss_iou": 0.34375, "loss_num": 0.02783203125, "loss_xval": 0.1396484375, "num_input_tokens_seen": 384289052, "step": 4210 }, { "epoch": 17.545833333333334, "grad_norm": 2.7699618866317466, "learning_rate": 5e-05, "loss": 0.0615, "num_input_tokens_seen": 384380580, "step": 4211 }, { "epoch": 17.545833333333334, "loss": 0.06901293992996216, "loss_ce": 1.2692656127910595e-05, "loss_iou": 0.3125, "loss_num": 0.0137939453125, "loss_xval": 0.06884765625, "num_input_tokens_seen": 384380580, "step": 4211 }, { "epoch": 17.55, "grad_norm": 2.752521438874284, "learning_rate": 5e-05, "loss": 0.0503, "num_input_tokens_seen": 384471860, "step": 4212 }, { "epoch": 17.55, "loss": 0.033134959638118744, "loss_ce": 8.129944035317749e-06, "loss_iou": 0.2216796875, "loss_num": 0.006622314453125, "loss_xval": 0.033203125, "num_input_tokens_seen": 384471860, "step": 4212 }, { "epoch": 17.554166666666667, "grad_norm": 2.5678958327972503, "learning_rate": 5e-05, "loss": 0.0539, "num_input_tokens_seen": 384562960, "step": 4213 }, { "epoch": 17.554166666666667, "loss": 0.038819510489702225, "loss_ce": 1.1500437722133938e-06, "loss_iou": 0.23046875, "loss_num": 0.007781982421875, "loss_xval": 0.038818359375, "num_input_tokens_seen": 384562960, "step": 4213 }, { "epoch": 17.558333333333334, "grad_norm": 1.7329617000610125, "learning_rate": 5e-05, "loss": 0.0272, "num_input_tokens_seen": 384654876, "step": 4214 }, { "epoch": 17.558333333333334, "loss": 0.032617341727018356, "loss_ce": 5.494547622220125e-06, "loss_iou": 0.26171875, "loss_num": 0.00653076171875, "loss_xval": 0.03271484375, "num_input_tokens_seen": 384654876, "step": 4214 }, { "epoch": 17.5625, "grad_norm": 7.2811736273901175, "learning_rate": 5e-05, "loss": 0.0559, "num_input_tokens_seen": 384746264, "step": 4215 }, { "epoch": 17.5625, "loss": 0.08529709279537201, "loss_ce": 0.00012253341265022755, "loss_iou": 0.267578125, "loss_num": 0.0169677734375, "loss_xval": 0.0849609375, "num_input_tokens_seen": 384746264, "step": 4215 }, { "epoch": 17.566666666666666, "grad_norm": 1.4273657568497262, "learning_rate": 5e-05, "loss": 0.0603, "num_input_tokens_seen": 384837028, "step": 4216 }, { "epoch": 17.566666666666666, "loss": 0.05279720202088356, "loss_ce": 1.7941896430784254e-06, "loss_iou": 0.162109375, "loss_num": 0.01055908203125, "loss_xval": 0.052734375, "num_input_tokens_seen": 384837028, "step": 4216 }, { "epoch": 17.570833333333333, "grad_norm": 2.6049713329445083, "learning_rate": 5e-05, "loss": 0.0662, "num_input_tokens_seen": 384928004, "step": 4217 }, { "epoch": 17.570833333333333, "loss": 0.07219231128692627, "loss_ce": 0.00038445499376393855, "loss_iou": 0.376953125, "loss_num": 0.01434326171875, "loss_xval": 0.07177734375, "num_input_tokens_seen": 384928004, "step": 4217 }, { "epoch": 17.575, "grad_norm": 1.6813061374465061, "learning_rate": 5e-05, "loss": 0.0489, "num_input_tokens_seen": 385019184, "step": 4218 }, { "epoch": 17.575, "loss": 0.041826337575912476, "loss_ce": 1.9954518393205944e-06, "loss_iou": 0.314453125, "loss_num": 0.00836181640625, "loss_xval": 0.041748046875, "num_input_tokens_seen": 385019184, "step": 4218 }, { "epoch": 17.579166666666666, "grad_norm": 2.3886752163061593, "learning_rate": 5e-05, "loss": 0.0447, "num_input_tokens_seen": 385110276, "step": 4219 }, { "epoch": 17.579166666666666, "loss": 0.025012066587805748, "loss_ce": 2.911604269684176e-06, "loss_iou": 0.16015625, "loss_num": 0.0050048828125, "loss_xval": 0.0250244140625, "num_input_tokens_seen": 385110276, "step": 4219 }, { "epoch": 17.583333333333332, "grad_norm": 2.7330086944365672, "learning_rate": 5e-05, "loss": 0.0583, "num_input_tokens_seen": 385201992, "step": 4220 }, { "epoch": 17.583333333333332, "loss": 0.08029569685459137, "loss_ce": 1.9216951841372065e-05, "loss_iou": 0.2294921875, "loss_num": 0.0159912109375, "loss_xval": 0.080078125, "num_input_tokens_seen": 385201992, "step": 4220 }, { "epoch": 17.5875, "grad_norm": 2.422918394086421, "learning_rate": 5e-05, "loss": 0.0432, "num_input_tokens_seen": 385293352, "step": 4221 }, { "epoch": 17.5875, "loss": 0.04279577359557152, "loss_ce": 2.4986568405438447e-06, "loss_iou": 0.1572265625, "loss_num": 0.008544921875, "loss_xval": 0.042724609375, "num_input_tokens_seen": 385293352, "step": 4221 }, { "epoch": 17.591666666666665, "grad_norm": 2.777699149945361, "learning_rate": 5e-05, "loss": 0.0525, "num_input_tokens_seen": 385384168, "step": 4222 }, { "epoch": 17.591666666666665, "loss": 0.03143524378538132, "loss_ce": 1.7396700059180148e-05, "loss_iou": 0.193359375, "loss_num": 0.00628662109375, "loss_xval": 0.031494140625, "num_input_tokens_seen": 385384168, "step": 4222 }, { "epoch": 17.595833333333335, "grad_norm": 2.69353567615644, "learning_rate": 5e-05, "loss": 0.0677, "num_input_tokens_seen": 385475292, "step": 4223 }, { "epoch": 17.595833333333335, "loss": 0.04598440229892731, "loss_ce": 9.66764309850987e-06, "loss_iou": 0.326171875, "loss_num": 0.00921630859375, "loss_xval": 0.0458984375, "num_input_tokens_seen": 385475292, "step": 4223 }, { "epoch": 17.6, "grad_norm": 4.163966587950836, "learning_rate": 5e-05, "loss": 0.0549, "num_input_tokens_seen": 385567056, "step": 4224 }, { "epoch": 17.6, "loss": 0.04680160805583, "loss_ce": 1.8161270418204367e-05, "loss_iou": 0.296875, "loss_num": 0.00933837890625, "loss_xval": 0.046875, "num_input_tokens_seen": 385567056, "step": 4224 }, { "epoch": 17.604166666666668, "grad_norm": 2.2482295399288286, "learning_rate": 5e-05, "loss": 0.0492, "num_input_tokens_seen": 385658644, "step": 4225 }, { "epoch": 17.604166666666668, "loss": 0.03979950025677681, "loss_ce": 4.5782053348375484e-06, "loss_iou": 0.15234375, "loss_num": 0.0079345703125, "loss_xval": 0.039794921875, "num_input_tokens_seen": 385658644, "step": 4225 }, { "epoch": 17.608333333333334, "grad_norm": 2.7217679813302937, "learning_rate": 5e-05, "loss": 0.0435, "num_input_tokens_seen": 385749844, "step": 4226 }, { "epoch": 17.608333333333334, "loss": 0.05861446261405945, "loss_ce": 2.0709698219434358e-05, "loss_iou": 0.296875, "loss_num": 0.01171875, "loss_xval": 0.05859375, "num_input_tokens_seen": 385749844, "step": 4226 }, { "epoch": 17.6125, "grad_norm": 3.3383609570266652, "learning_rate": 5e-05, "loss": 0.0653, "num_input_tokens_seen": 385840696, "step": 4227 }, { "epoch": 17.6125, "loss": 0.08750728517770767, "loss_ce": 0.001165428082458675, "loss_iou": 0.201171875, "loss_num": 0.0172119140625, "loss_xval": 0.08642578125, "num_input_tokens_seen": 385840696, "step": 4227 }, { "epoch": 17.616666666666667, "grad_norm": 2.738495218076334, "learning_rate": 5e-05, "loss": 0.0363, "num_input_tokens_seen": 385932628, "step": 4228 }, { "epoch": 17.616666666666667, "loss": 0.030733108520507812, "loss_ce": 1.9056218434343464e-06, "loss_iou": 0.375, "loss_num": 0.006134033203125, "loss_xval": 0.03076171875, "num_input_tokens_seen": 385932628, "step": 4228 }, { "epoch": 17.620833333333334, "grad_norm": 3.0741839980590737, "learning_rate": 5e-05, "loss": 0.0506, "num_input_tokens_seen": 386023320, "step": 4229 }, { "epoch": 17.620833333333334, "loss": 0.06695705652236938, "loss_ce": 1.493851868872298e-06, "loss_iou": 0.259765625, "loss_num": 0.013427734375, "loss_xval": 0.06689453125, "num_input_tokens_seen": 386023320, "step": 4229 }, { "epoch": 17.625, "grad_norm": 2.208632856814461, "learning_rate": 5e-05, "loss": 0.0428, "num_input_tokens_seen": 386115504, "step": 4230 }, { "epoch": 17.625, "loss": 0.04812372103333473, "loss_ce": 0.00011957136302953586, "loss_iou": 0.279296875, "loss_num": 0.00958251953125, "loss_xval": 0.048095703125, "num_input_tokens_seen": 386115504, "step": 4230 }, { "epoch": 17.629166666666666, "grad_norm": 3.453935918291197, "learning_rate": 5e-05, "loss": 0.0389, "num_input_tokens_seen": 386206904, "step": 4231 }, { "epoch": 17.629166666666666, "loss": 0.04082659259438515, "loss_ce": 9.333534762845375e-06, "loss_iou": 0.2041015625, "loss_num": 0.0081787109375, "loss_xval": 0.040771484375, "num_input_tokens_seen": 386206904, "step": 4231 }, { "epoch": 17.633333333333333, "grad_norm": 4.551641943329968, "learning_rate": 5e-05, "loss": 0.0483, "num_input_tokens_seen": 386298048, "step": 4232 }, { "epoch": 17.633333333333333, "loss": 0.04810848459601402, "loss_ce": 4.329779767431319e-05, "loss_iou": 0.302734375, "loss_num": 0.0096435546875, "loss_xval": 0.048095703125, "num_input_tokens_seen": 386298048, "step": 4232 }, { "epoch": 17.6375, "grad_norm": 2.5608155879021144, "learning_rate": 5e-05, "loss": 0.0395, "num_input_tokens_seen": 386389596, "step": 4233 }, { "epoch": 17.6375, "loss": 0.03954213857650757, "loss_ce": 6.616890914301621e-06, "loss_iou": 0.2177734375, "loss_num": 0.00787353515625, "loss_xval": 0.03955078125, "num_input_tokens_seen": 386389596, "step": 4233 }, { "epoch": 17.641666666666666, "grad_norm": 3.5352760234306144, "learning_rate": 5e-05, "loss": 0.0595, "num_input_tokens_seen": 386481540, "step": 4234 }, { "epoch": 17.641666666666666, "loss": 0.053279146552085876, "loss_ce": 0.00034640979720279574, "loss_iou": 0.1806640625, "loss_num": 0.01055908203125, "loss_xval": 0.052978515625, "num_input_tokens_seen": 386481540, "step": 4234 }, { "epoch": 17.645833333333332, "grad_norm": 2.4756241576337223, "learning_rate": 5e-05, "loss": 0.0408, "num_input_tokens_seen": 386573248, "step": 4235 }, { "epoch": 17.645833333333332, "loss": 0.03585919737815857, "loss_ce": 1.630270890018437e-05, "loss_iou": 0.333984375, "loss_num": 0.007171630859375, "loss_xval": 0.035888671875, "num_input_tokens_seen": 386573248, "step": 4235 }, { "epoch": 17.65, "grad_norm": 2.016025969596603, "learning_rate": 5e-05, "loss": 0.0554, "num_input_tokens_seen": 386664364, "step": 4236 }, { "epoch": 17.65, "loss": 0.03518152981996536, "loss_ce": 2.393299155301065e-06, "loss_iou": 0.236328125, "loss_num": 0.00701904296875, "loss_xval": 0.03515625, "num_input_tokens_seen": 386664364, "step": 4236 }, { "epoch": 17.654166666666665, "grad_norm": 3.3414144838506767, "learning_rate": 5e-05, "loss": 0.0814, "num_input_tokens_seen": 386754752, "step": 4237 }, { "epoch": 17.654166666666665, "loss": 0.04250407963991165, "loss_ce": 0.00035930349258705974, "loss_iou": 0.203125, "loss_num": 0.0084228515625, "loss_xval": 0.042236328125, "num_input_tokens_seen": 386754752, "step": 4237 }, { "epoch": 17.658333333333335, "grad_norm": 2.1200143531779467, "learning_rate": 5e-05, "loss": 0.0386, "num_input_tokens_seen": 386846024, "step": 4238 }, { "epoch": 17.658333333333335, "loss": 0.04939752444624901, "loss_ce": 0.0008593180100433528, "loss_iou": 0.25390625, "loss_num": 0.00970458984375, "loss_xval": 0.048583984375, "num_input_tokens_seen": 386846024, "step": 4238 }, { "epoch": 17.6625, "grad_norm": 1.5079927276364646, "learning_rate": 5e-05, "loss": 0.0401, "num_input_tokens_seen": 386937504, "step": 4239 }, { "epoch": 17.6625, "loss": 0.03806938976049423, "loss_ce": 6.342200322251301e-06, "loss_iou": 0.2041015625, "loss_num": 0.007598876953125, "loss_xval": 0.0380859375, "num_input_tokens_seen": 386937504, "step": 4239 }, { "epoch": 17.666666666666668, "grad_norm": 2.0841912017626467, "learning_rate": 5e-05, "loss": 0.045, "num_input_tokens_seen": 387028972, "step": 4240 }, { "epoch": 17.666666666666668, "loss": 0.05647444352507591, "loss_ce": 1.666220214247005e-06, "loss_iou": 0.189453125, "loss_num": 0.01129150390625, "loss_xval": 0.056396484375, "num_input_tokens_seen": 387028972, "step": 4240 }, { "epoch": 17.670833333333334, "grad_norm": 4.928727740122159, "learning_rate": 5e-05, "loss": 0.0396, "num_input_tokens_seen": 387120320, "step": 4241 }, { "epoch": 17.670833333333334, "loss": 0.03015184961259365, "loss_ce": 4.818359684577445e-07, "loss_iou": 0.29296875, "loss_num": 0.00604248046875, "loss_xval": 0.0301513671875, "num_input_tokens_seen": 387120320, "step": 4241 }, { "epoch": 17.675, "grad_norm": 3.8113956592274336, "learning_rate": 5e-05, "loss": 0.0465, "num_input_tokens_seen": 387211544, "step": 4242 }, { "epoch": 17.675, "loss": 0.048737533390522, "loss_ce": 9.625723578210454e-07, "loss_iou": 0.1572265625, "loss_num": 0.009765625, "loss_xval": 0.048828125, "num_input_tokens_seen": 387211544, "step": 4242 }, { "epoch": 17.679166666666667, "grad_norm": 5.128999783647298, "learning_rate": 5e-05, "loss": 0.0513, "num_input_tokens_seen": 387301464, "step": 4243 }, { "epoch": 17.679166666666667, "loss": 0.055925507098436356, "loss_ce": 6.308028969215229e-05, "loss_iou": 0.1982421875, "loss_num": 0.01116943359375, "loss_xval": 0.055908203125, "num_input_tokens_seen": 387301464, "step": 4243 }, { "epoch": 17.683333333333334, "grad_norm": 2.89438733334762, "learning_rate": 5e-05, "loss": 0.071, "num_input_tokens_seen": 387393004, "step": 4244 }, { "epoch": 17.683333333333334, "loss": 0.05619873106479645, "loss_ce": 6.100194696045946e-07, "loss_iou": 0.21875, "loss_num": 0.01123046875, "loss_xval": 0.05615234375, "num_input_tokens_seen": 387393004, "step": 4244 }, { "epoch": 17.6875, "grad_norm": 2.610717063238736, "learning_rate": 5e-05, "loss": 0.0354, "num_input_tokens_seen": 387484404, "step": 4245 }, { "epoch": 17.6875, "loss": 0.03247163072228432, "loss_ce": 9.265061748919834e-07, "loss_iou": 0.298828125, "loss_num": 0.006500244140625, "loss_xval": 0.032470703125, "num_input_tokens_seen": 387484404, "step": 4245 }, { "epoch": 17.691666666666666, "grad_norm": 2.263550389067424, "learning_rate": 5e-05, "loss": 0.0414, "num_input_tokens_seen": 387575592, "step": 4246 }, { "epoch": 17.691666666666666, "loss": 0.039128877222537994, "loss_ce": 0.00020370917627587914, "loss_iou": 0.16015625, "loss_num": 0.007781982421875, "loss_xval": 0.038818359375, "num_input_tokens_seen": 387575592, "step": 4246 }, { "epoch": 17.695833333333333, "grad_norm": 7.143087195639301, "learning_rate": 5e-05, "loss": 0.0551, "num_input_tokens_seen": 387667004, "step": 4247 }, { "epoch": 17.695833333333333, "loss": 0.060753967612981796, "loss_ce": 1.0965741239488125e-06, "loss_iou": 0.390625, "loss_num": 0.01214599609375, "loss_xval": 0.060791015625, "num_input_tokens_seen": 387667004, "step": 4247 }, { "epoch": 17.7, "grad_norm": 5.350068515423604, "learning_rate": 5e-05, "loss": 0.0674, "num_input_tokens_seen": 387757924, "step": 4248 }, { "epoch": 17.7, "loss": 0.07637852430343628, "loss_ce": 6.557953611263656e-07, "loss_iou": 0.1875, "loss_num": 0.0152587890625, "loss_xval": 0.076171875, "num_input_tokens_seen": 387757924, "step": 4248 }, { "epoch": 17.704166666666666, "grad_norm": 1.8448489004454245, "learning_rate": 5e-05, "loss": 0.0545, "num_input_tokens_seen": 387849144, "step": 4249 }, { "epoch": 17.704166666666666, "loss": 0.07519252598285675, "loss_ce": 4.8400288505945355e-06, "loss_iou": 0.189453125, "loss_num": 0.0150146484375, "loss_xval": 0.0751953125, "num_input_tokens_seen": 387849144, "step": 4249 }, { "epoch": 17.708333333333332, "grad_norm": 1.2075256402713481, "learning_rate": 5e-05, "loss": 0.0628, "num_input_tokens_seen": 387940940, "step": 4250 }, { "epoch": 17.708333333333332, "eval_seeclick_CIoU": 0.25203998386859894, "eval_seeclick_GIoU": 0.23426888138055801, "eval_seeclick_IoU": 0.3567681908607483, "eval_seeclick_MAE_all": 0.10411766916513443, "eval_seeclick_MAE_h": 0.07500777393579483, "eval_seeclick_MAE_w": 0.235479474067688, "eval_seeclick_MAE_x_boxes": 0.2311866730451584, "eval_seeclick_MAE_y_boxes": 0.08258943632245064, "eval_seeclick_NUM_probability": 0.9999988377094269, "eval_seeclick_inside_bbox": 0.5397727340459824, "eval_seeclick_loss": 0.5964598655700684, "eval_seeclick_loss_ce": 0.14800745993852615, "eval_seeclick_loss_iou": 0.458740234375, "eval_seeclick_loss_num": 0.085205078125, "eval_seeclick_loss_xval": 0.42608642578125, "eval_seeclick_runtime": 80.6498, "eval_seeclick_samples_per_second": 0.533, "eval_seeclick_steps_per_second": 0.025, "num_input_tokens_seen": 387940940, "step": 4250 }, { "epoch": 17.708333333333332, "eval_icons_CIoU": 0.29239118099212646, "eval_icons_GIoU": 0.2768501341342926, "eval_icons_IoU": 0.38413749635219574, "eval_icons_MAE_all": 0.07174773141741753, "eval_icons_MAE_h": 0.1544894203543663, "eval_icons_MAE_w": 0.10443814098834991, "eval_icons_MAE_x_boxes": 0.10570014268159866, "eval_icons_MAE_y_boxes": 0.1550183743238449, "eval_icons_NUM_probability": 0.9999992549419403, "eval_icons_inside_bbox": 0.53125, "eval_icons_loss": 0.35228726267814636, "eval_icons_loss_ce": 2.2205644199857488e-05, "eval_icons_loss_iou": 0.24761962890625, "eval_icons_loss_num": 0.0735015869140625, "eval_icons_loss_xval": 0.3673095703125, "eval_icons_runtime": 94.4201, "eval_icons_samples_per_second": 0.53, "eval_icons_steps_per_second": 0.021, "num_input_tokens_seen": 387940940, "step": 4250 }, { "epoch": 17.708333333333332, "eval_screenspot_CIoU": 0.3815338710943858, "eval_screenspot_GIoU": 0.371933509906133, "eval_screenspot_IoU": 0.45208731293678284, "eval_screenspot_MAE_all": 0.09845635046561559, "eval_screenspot_MAE_h": 0.08088805278142293, "eval_screenspot_MAE_w": 0.194745272397995, "eval_screenspot_MAE_x_boxes": 0.18775259951750436, "eval_screenspot_MAE_y_boxes": 0.07606856028238933, "eval_screenspot_NUM_probability": 0.9999962250391642, "eval_screenspot_inside_bbox": 0.6833333373069763, "eval_screenspot_loss": 0.4975851774215698, "eval_screenspot_loss_ce": 0.002237203670508355, "eval_screenspot_loss_iou": 0.40283203125, "eval_screenspot_loss_num": 0.0996551513671875, "eval_screenspot_loss_xval": 0.498291015625, "eval_screenspot_runtime": 159.3398, "eval_screenspot_samples_per_second": 0.559, "eval_screenspot_steps_per_second": 0.019, "num_input_tokens_seen": 387940940, "step": 4250 }, { "epoch": 17.708333333333332, "eval_compot_CIoU": 0.45285023748874664, "eval_compot_GIoU": 0.4470098465681076, "eval_compot_IoU": 0.5330235660076141, "eval_compot_MAE_all": 0.0571780689060688, "eval_compot_MAE_h": 0.06364855542778969, "eval_compot_MAE_w": 0.1531415358185768, "eval_compot_MAE_x_boxes": 0.15447616577148438, "eval_compot_MAE_y_boxes": 0.06288901343941689, "eval_compot_NUM_probability": 0.9999961256980896, "eval_compot_inside_bbox": 0.7638888955116272, "eval_compot_loss": 0.35075217485427856, "eval_compot_loss_ce": 0.07240623980760574, "eval_compot_loss_iou": 0.302978515625, "eval_compot_loss_num": 0.050525665283203125, "eval_compot_loss_xval": 0.2526092529296875, "eval_compot_runtime": 94.7307, "eval_compot_samples_per_second": 0.528, "eval_compot_steps_per_second": 0.021, "num_input_tokens_seen": 387940940, "step": 4250 }, { "epoch": 17.708333333333332, "loss": 0.31521183252334595, "loss_ce": 0.06936222314834595, "loss_iou": 0.2890625, "loss_num": 0.049072265625, "loss_xval": 0.24609375, "num_input_tokens_seen": 387940940, "step": 4250 }, { "epoch": 17.7125, "grad_norm": 1.4582218440071648, "learning_rate": 5e-05, "loss": 0.045, "num_input_tokens_seen": 388032684, "step": 4251 }, { "epoch": 17.7125, "loss": 0.0526927188038826, "loss_ce": 0.002224275842308998, "loss_iou": 0.1806640625, "loss_num": 0.0101318359375, "loss_xval": 0.050537109375, "num_input_tokens_seen": 388032684, "step": 4251 }, { "epoch": 17.716666666666665, "grad_norm": 9.773943287946695, "learning_rate": 5e-05, "loss": 0.0657, "num_input_tokens_seen": 388124348, "step": 4252 }, { "epoch": 17.716666666666665, "loss": 0.030305448919534683, "loss_ce": 9.122079063672572e-06, "loss_iou": 0.275390625, "loss_num": 0.00604248046875, "loss_xval": 0.0302734375, "num_input_tokens_seen": 388124348, "step": 4252 }, { "epoch": 17.720833333333335, "grad_norm": 4.423771502817672, "learning_rate": 5e-05, "loss": 0.0868, "num_input_tokens_seen": 388215432, "step": 4253 }, { "epoch": 17.720833333333335, "loss": 0.10943731665611267, "loss_ce": 0.0024503269232809544, "loss_iou": 0.2119140625, "loss_num": 0.0213623046875, "loss_xval": 0.10693359375, "num_input_tokens_seen": 388215432, "step": 4253 }, { "epoch": 17.725, "grad_norm": 1.3108049913583504, "learning_rate": 5e-05, "loss": 0.0321, "num_input_tokens_seen": 388307252, "step": 4254 }, { "epoch": 17.725, "loss": 0.034032803028821945, "loss_ce": 5.704933755623642e-06, "loss_iou": 0.0810546875, "loss_num": 0.006805419921875, "loss_xval": 0.033935546875, "num_input_tokens_seen": 388307252, "step": 4254 }, { "epoch": 17.729166666666668, "grad_norm": 1.6607325795474122, "learning_rate": 5e-05, "loss": 0.0555, "num_input_tokens_seen": 388398568, "step": 4255 }, { "epoch": 17.729166666666668, "loss": 0.035407889634370804, "loss_ce": 9.14240226848051e-05, "loss_iou": 0.2890625, "loss_num": 0.007049560546875, "loss_xval": 0.035400390625, "num_input_tokens_seen": 388398568, "step": 4255 }, { "epoch": 17.733333333333334, "grad_norm": 1.8768786210329038, "learning_rate": 5e-05, "loss": 0.0383, "num_input_tokens_seen": 388489580, "step": 4256 }, { "epoch": 17.733333333333334, "loss": 0.04233090206980705, "loss_ce": 3.0214434900699416e-06, "loss_iou": 0.27734375, "loss_num": 0.00848388671875, "loss_xval": 0.042236328125, "num_input_tokens_seen": 388489580, "step": 4256 }, { "epoch": 17.7375, "grad_norm": 5.100596257151382, "learning_rate": 5e-05, "loss": 0.0713, "num_input_tokens_seen": 388580464, "step": 4257 }, { "epoch": 17.7375, "loss": 0.06953492760658264, "loss_ce": 6.269452228480077e-07, "loss_iou": 0.2255859375, "loss_num": 0.013916015625, "loss_xval": 0.0693359375, "num_input_tokens_seen": 388580464, "step": 4257 }, { "epoch": 17.741666666666667, "grad_norm": 4.620224294231164, "learning_rate": 5e-05, "loss": 0.0388, "num_input_tokens_seen": 388671656, "step": 4258 }, { "epoch": 17.741666666666667, "loss": 0.0440262109041214, "loss_ce": 4.604961759469006e-06, "loss_iou": 0.298828125, "loss_num": 0.0087890625, "loss_xval": 0.0439453125, "num_input_tokens_seen": 388671656, "step": 4258 }, { "epoch": 17.745833333333334, "grad_norm": 3.1621588907824587, "learning_rate": 5e-05, "loss": 0.031, "num_input_tokens_seen": 388763544, "step": 4259 }, { "epoch": 17.745833333333334, "loss": 0.03645715117454529, "loss_ce": 0.00011834719771286473, "loss_iou": 0.2099609375, "loss_num": 0.00726318359375, "loss_xval": 0.036376953125, "num_input_tokens_seen": 388763544, "step": 4259 }, { "epoch": 17.75, "grad_norm": 3.0997334239058745, "learning_rate": 5e-05, "loss": 0.0538, "num_input_tokens_seen": 388854532, "step": 4260 }, { "epoch": 17.75, "loss": 0.06612833589315414, "loss_ce": 0.002865397371351719, "loss_iou": 0.330078125, "loss_num": 0.01263427734375, "loss_xval": 0.0634765625, "num_input_tokens_seen": 388854532, "step": 4260 }, { "epoch": 17.754166666666666, "grad_norm": 3.878376093167518, "learning_rate": 5e-05, "loss": 0.0629, "num_input_tokens_seen": 388945092, "step": 4261 }, { "epoch": 17.754166666666666, "loss": 0.05230996012687683, "loss_ce": 2.8329745873634238e-06, "loss_iou": 0.283203125, "loss_num": 0.01043701171875, "loss_xval": 0.05224609375, "num_input_tokens_seen": 388945092, "step": 4261 }, { "epoch": 17.758333333333333, "grad_norm": 5.19090268810429, "learning_rate": 5e-05, "loss": 0.0371, "num_input_tokens_seen": 389036112, "step": 4262 }, { "epoch": 17.758333333333333, "loss": 0.03373678773641586, "loss_ce": 7.233080850710394e-06, "loss_iou": 0.296875, "loss_num": 0.006744384765625, "loss_xval": 0.03369140625, "num_input_tokens_seen": 389036112, "step": 4262 }, { "epoch": 17.7625, "grad_norm": 3.236849772082388, "learning_rate": 5e-05, "loss": 0.0569, "num_input_tokens_seen": 389127232, "step": 4263 }, { "epoch": 17.7625, "loss": 0.0690011978149414, "loss_ce": 9.548220987198874e-07, "loss_iou": 0.427734375, "loss_num": 0.0137939453125, "loss_xval": 0.06884765625, "num_input_tokens_seen": 389127232, "step": 4263 }, { "epoch": 17.766666666666666, "grad_norm": 2.869496523940095, "learning_rate": 5e-05, "loss": 0.0565, "num_input_tokens_seen": 389218456, "step": 4264 }, { "epoch": 17.766666666666666, "loss": 0.060494571924209595, "loss_ce": 0.00031390992808155715, "loss_iou": 0.1767578125, "loss_num": 0.01202392578125, "loss_xval": 0.06005859375, "num_input_tokens_seen": 389218456, "step": 4264 }, { "epoch": 17.770833333333332, "grad_norm": 2.7422667525481383, "learning_rate": 5e-05, "loss": 0.0463, "num_input_tokens_seen": 389310060, "step": 4265 }, { "epoch": 17.770833333333332, "loss": 0.03156965970993042, "loss_ce": 6.855726951471297e-06, "loss_iou": 0.27734375, "loss_num": 0.006317138671875, "loss_xval": 0.031494140625, "num_input_tokens_seen": 389310060, "step": 4265 }, { "epoch": 17.775, "grad_norm": 2.249769790802501, "learning_rate": 5e-05, "loss": 0.0577, "num_input_tokens_seen": 389401876, "step": 4266 }, { "epoch": 17.775, "loss": 0.08179079741239548, "loss_ce": 6.47258129902184e-05, "loss_iou": 0.34375, "loss_num": 0.016357421875, "loss_xval": 0.08154296875, "num_input_tokens_seen": 389401876, "step": 4266 }, { "epoch": 17.779166666666665, "grad_norm": 2.2773374954746934, "learning_rate": 5e-05, "loss": 0.0441, "num_input_tokens_seen": 389493344, "step": 4267 }, { "epoch": 17.779166666666665, "loss": 0.04999423027038574, "loss_ce": 6.435068826249335e-06, "loss_iou": 0.20703125, "loss_num": 0.010009765625, "loss_xval": 0.050048828125, "num_input_tokens_seen": 389493344, "step": 4267 }, { "epoch": 17.783333333333335, "grad_norm": 3.208639100920676, "learning_rate": 5e-05, "loss": 0.0366, "num_input_tokens_seen": 389584708, "step": 4268 }, { "epoch": 17.783333333333335, "loss": 0.04798169434070587, "loss_ce": 8.06142998044379e-06, "loss_iou": 0.2216796875, "loss_num": 0.00958251953125, "loss_xval": 0.0478515625, "num_input_tokens_seen": 389584708, "step": 4268 }, { "epoch": 17.7875, "grad_norm": 3.1089691432350524, "learning_rate": 5e-05, "loss": 0.0435, "num_input_tokens_seen": 389676300, "step": 4269 }, { "epoch": 17.7875, "loss": 0.03627746179699898, "loss_ce": 6.835483509348705e-05, "loss_iou": 0.248046875, "loss_num": 0.007232666015625, "loss_xval": 0.0361328125, "num_input_tokens_seen": 389676300, "step": 4269 }, { "epoch": 17.791666666666668, "grad_norm": 2.6802187206716517, "learning_rate": 5e-05, "loss": 0.0463, "num_input_tokens_seen": 389767748, "step": 4270 }, { "epoch": 17.791666666666668, "loss": 0.037262558937072754, "loss_ce": 5.946749297436327e-07, "loss_iou": 0.28125, "loss_num": 0.0074462890625, "loss_xval": 0.037353515625, "num_input_tokens_seen": 389767748, "step": 4270 }, { "epoch": 17.795833333333334, "grad_norm": 3.279966907675864, "learning_rate": 5e-05, "loss": 0.025, "num_input_tokens_seen": 389859076, "step": 4271 }, { "epoch": 17.795833333333334, "loss": 0.023572321981191635, "loss_ce": 5.122803031554213e-06, "loss_iou": 0.27734375, "loss_num": 0.00469970703125, "loss_xval": 0.0235595703125, "num_input_tokens_seen": 389859076, "step": 4271 }, { "epoch": 17.8, "grad_norm": 2.912014259002682, "learning_rate": 5e-05, "loss": 0.0958, "num_input_tokens_seen": 389950156, "step": 4272 }, { "epoch": 17.8, "loss": 0.1355554312467575, "loss_ce": 1.1604141946008895e-05, "loss_iou": 0.158203125, "loss_num": 0.027099609375, "loss_xval": 0.1357421875, "num_input_tokens_seen": 389950156, "step": 4272 }, { "epoch": 17.804166666666667, "grad_norm": 2.8477409993278586, "learning_rate": 5e-05, "loss": 0.0397, "num_input_tokens_seen": 390041020, "step": 4273 }, { "epoch": 17.804166666666667, "loss": 0.03160158917307854, "loss_ce": 6.368102276610443e-07, "loss_iou": 0.2373046875, "loss_num": 0.006317138671875, "loss_xval": 0.031494140625, "num_input_tokens_seen": 390041020, "step": 4273 }, { "epoch": 17.808333333333334, "grad_norm": 3.1060506067007077, "learning_rate": 5e-05, "loss": 0.0746, "num_input_tokens_seen": 390132496, "step": 4274 }, { "epoch": 17.808333333333334, "loss": 0.09680266678333282, "loss_ce": 9.127247153628559e-07, "loss_iou": 0.314453125, "loss_num": 0.0194091796875, "loss_xval": 0.0966796875, "num_input_tokens_seen": 390132496, "step": 4274 }, { "epoch": 17.8125, "grad_norm": 2.5458659203048075, "learning_rate": 5e-05, "loss": 0.0792, "num_input_tokens_seen": 390223444, "step": 4275 }, { "epoch": 17.8125, "loss": 0.0719880759716034, "loss_ce": 2.7624866561382078e-05, "loss_iou": 0.294921875, "loss_num": 0.014404296875, "loss_xval": 0.07177734375, "num_input_tokens_seen": 390223444, "step": 4275 }, { "epoch": 17.816666666666666, "grad_norm": 2.5042217476993076, "learning_rate": 5e-05, "loss": 0.0398, "num_input_tokens_seen": 390315716, "step": 4276 }, { "epoch": 17.816666666666666, "loss": 0.04687733203172684, "loss_ce": 9.960051102098078e-06, "loss_iou": 0.216796875, "loss_num": 0.0093994140625, "loss_xval": 0.046875, "num_input_tokens_seen": 390315716, "step": 4276 }, { "epoch": 17.820833333333333, "grad_norm": 2.199339368642041, "learning_rate": 5e-05, "loss": 0.0738, "num_input_tokens_seen": 390406628, "step": 4277 }, { "epoch": 17.820833333333333, "loss": 0.07241851091384888, "loss_ce": 1.5560059182462282e-05, "loss_iou": 0.1845703125, "loss_num": 0.01446533203125, "loss_xval": 0.072265625, "num_input_tokens_seen": 390406628, "step": 4277 }, { "epoch": 17.825, "grad_norm": 2.845952078746692, "learning_rate": 5e-05, "loss": 0.0622, "num_input_tokens_seen": 390497156, "step": 4278 }, { "epoch": 17.825, "loss": 0.04003407433629036, "loss_ce": 0.00020863440295215696, "loss_iou": 0.361328125, "loss_num": 0.0079345703125, "loss_xval": 0.039794921875, "num_input_tokens_seen": 390497156, "step": 4278 }, { "epoch": 17.829166666666666, "grad_norm": 3.380502408546681, "learning_rate": 5e-05, "loss": 0.0532, "num_input_tokens_seen": 390588864, "step": 4279 }, { "epoch": 17.829166666666666, "loss": 0.055008068680763245, "loss_ce": 3.064938937313855e-05, "loss_iou": 0.3203125, "loss_num": 0.010986328125, "loss_xval": 0.054931640625, "num_input_tokens_seen": 390588864, "step": 4279 }, { "epoch": 17.833333333333332, "grad_norm": 3.216941064128063, "learning_rate": 5e-05, "loss": 0.0407, "num_input_tokens_seen": 390679656, "step": 4280 }, { "epoch": 17.833333333333332, "loss": 0.046098776161670685, "loss_ce": 1.9742674339795485e-06, "loss_iou": 0.2451171875, "loss_num": 0.00921630859375, "loss_xval": 0.046142578125, "num_input_tokens_seen": 390679656, "step": 4280 }, { "epoch": 17.8375, "grad_norm": 2.2422021450978504, "learning_rate": 5e-05, "loss": 0.0466, "num_input_tokens_seen": 390771036, "step": 4281 }, { "epoch": 17.8375, "loss": 0.05915020406246185, "loss_ce": 8.343104855157435e-05, "loss_iou": 0.1943359375, "loss_num": 0.0118408203125, "loss_xval": 0.05908203125, "num_input_tokens_seen": 390771036, "step": 4281 }, { "epoch": 17.841666666666665, "grad_norm": 2.3078711383220663, "learning_rate": 5e-05, "loss": 0.0538, "num_input_tokens_seen": 390862020, "step": 4282 }, { "epoch": 17.841666666666665, "loss": 0.060885027050971985, "loss_ce": 2.455707999615697e-06, "loss_iou": 0.28515625, "loss_num": 0.01214599609375, "loss_xval": 0.060791015625, "num_input_tokens_seen": 390862020, "step": 4282 }, { "epoch": 17.845833333333335, "grad_norm": 3.894351865296879, "learning_rate": 5e-05, "loss": 0.0597, "num_input_tokens_seen": 390953620, "step": 4283 }, { "epoch": 17.845833333333335, "loss": 0.04284176975488663, "loss_ce": 1.0351399396313354e-05, "loss_iou": 0.28125, "loss_num": 0.008544921875, "loss_xval": 0.042724609375, "num_input_tokens_seen": 390953620, "step": 4283 }, { "epoch": 17.85, "grad_norm": 3.5549017376036667, "learning_rate": 5e-05, "loss": 0.0549, "num_input_tokens_seen": 391044964, "step": 4284 }, { "epoch": 17.85, "loss": 0.0379575677216053, "loss_ce": 5.473750934470445e-05, "loss_iou": 0.333984375, "loss_num": 0.007568359375, "loss_xval": 0.037841796875, "num_input_tokens_seen": 391044964, "step": 4284 }, { "epoch": 17.854166666666668, "grad_norm": 3.868964183207432, "learning_rate": 5e-05, "loss": 0.1196, "num_input_tokens_seen": 391136172, "step": 4285 }, { "epoch": 17.854166666666668, "loss": 0.1410410851240158, "loss_ce": 4.089220965397544e-06, "loss_iou": 0.26171875, "loss_num": 0.0283203125, "loss_xval": 0.140625, "num_input_tokens_seen": 391136172, "step": 4285 }, { "epoch": 17.858333333333334, "grad_norm": 3.646749375191905, "learning_rate": 5e-05, "loss": 0.0629, "num_input_tokens_seen": 391227456, "step": 4286 }, { "epoch": 17.858333333333334, "loss": 0.09271462261676788, "loss_ce": 2.2152019027998904e-06, "loss_iou": 0.388671875, "loss_num": 0.0185546875, "loss_xval": 0.0927734375, "num_input_tokens_seen": 391227456, "step": 4286 }, { "epoch": 17.8625, "grad_norm": 2.4422908842745312, "learning_rate": 5e-05, "loss": 0.0948, "num_input_tokens_seen": 391318812, "step": 4287 }, { "epoch": 17.8625, "loss": 0.051978304982185364, "loss_ce": 2.9758124583167955e-05, "loss_iou": 0.287109375, "loss_num": 0.0103759765625, "loss_xval": 0.052001953125, "num_input_tokens_seen": 391318812, "step": 4287 }, { "epoch": 17.866666666666667, "grad_norm": 7.856508421494934, "learning_rate": 5e-05, "loss": 0.0614, "num_input_tokens_seen": 391410136, "step": 4288 }, { "epoch": 17.866666666666667, "loss": 0.08443892002105713, "loss_ce": 0.0003019559953827411, "loss_iou": 0.375, "loss_num": 0.016845703125, "loss_xval": 0.083984375, "num_input_tokens_seen": 391410136, "step": 4288 }, { "epoch": 17.870833333333334, "grad_norm": 3.2314063400427, "learning_rate": 5e-05, "loss": 0.0523, "num_input_tokens_seen": 391501260, "step": 4289 }, { "epoch": 17.870833333333334, "loss": 0.0704357773065567, "loss_ce": 8.843479008646682e-06, "loss_iou": 0.32421875, "loss_num": 0.0140380859375, "loss_xval": 0.0703125, "num_input_tokens_seen": 391501260, "step": 4289 }, { "epoch": 17.875, "grad_norm": 3.040312817410731, "learning_rate": 5e-05, "loss": 0.0476, "num_input_tokens_seen": 391592232, "step": 4290 }, { "epoch": 17.875, "loss": 0.053047988563776016, "loss_ce": 8.084026035248826e-07, "loss_iou": 0.287109375, "loss_num": 0.0106201171875, "loss_xval": 0.052978515625, "num_input_tokens_seen": 391592232, "step": 4290 }, { "epoch": 17.879166666666666, "grad_norm": 4.770148941741689, "learning_rate": 5e-05, "loss": 0.0382, "num_input_tokens_seen": 391683592, "step": 4291 }, { "epoch": 17.879166666666666, "loss": 0.03159454092383385, "loss_ce": 1.075425825547427e-05, "loss_iou": 0.203125, "loss_num": 0.006317138671875, "loss_xval": 0.031494140625, "num_input_tokens_seen": 391683592, "step": 4291 }, { "epoch": 17.883333333333333, "grad_norm": 2.5823819704017885, "learning_rate": 5e-05, "loss": 0.0555, "num_input_tokens_seen": 391774764, "step": 4292 }, { "epoch": 17.883333333333333, "loss": 0.03879944980144501, "loss_ce": 3.982367161370348e-06, "loss_iou": 0.2294921875, "loss_num": 0.00775146484375, "loss_xval": 0.038818359375, "num_input_tokens_seen": 391774764, "step": 4292 }, { "epoch": 17.8875, "grad_norm": 1.699316394700182, "learning_rate": 5e-05, "loss": 0.0378, "num_input_tokens_seen": 391866520, "step": 4293 }, { "epoch": 17.8875, "loss": 0.04242832958698273, "loss_ce": 6.99307129252702e-05, "loss_iou": 0.17578125, "loss_num": 0.00848388671875, "loss_xval": 0.04248046875, "num_input_tokens_seen": 391866520, "step": 4293 }, { "epoch": 17.891666666666666, "grad_norm": 2.1108131520166973, "learning_rate": 5e-05, "loss": 0.0367, "num_input_tokens_seen": 391957344, "step": 4294 }, { "epoch": 17.891666666666666, "loss": 0.04325953498482704, "loss_ce": 8.688614343554946e-07, "loss_iou": 0.294921875, "loss_num": 0.0086669921875, "loss_xval": 0.043212890625, "num_input_tokens_seen": 391957344, "step": 4294 }, { "epoch": 17.895833333333332, "grad_norm": 2.206906552531031, "learning_rate": 5e-05, "loss": 0.0535, "num_input_tokens_seen": 392048664, "step": 4295 }, { "epoch": 17.895833333333332, "loss": 0.058287523686885834, "loss_ce": 7.524287502747029e-05, "loss_iou": 0.2734375, "loss_num": 0.01165771484375, "loss_xval": 0.05810546875, "num_input_tokens_seen": 392048664, "step": 4295 }, { "epoch": 17.9, "grad_norm": 2.146888954383353, "learning_rate": 5e-05, "loss": 0.0472, "num_input_tokens_seen": 392140128, "step": 4296 }, { "epoch": 17.9, "loss": 0.0633581280708313, "loss_ce": 3.6382100461196387e-06, "loss_iou": 0.27734375, "loss_num": 0.0126953125, "loss_xval": 0.0634765625, "num_input_tokens_seen": 392140128, "step": 4296 }, { "epoch": 17.904166666666665, "grad_norm": 2.5035802799334212, "learning_rate": 5e-05, "loss": 0.0687, "num_input_tokens_seen": 392231976, "step": 4297 }, { "epoch": 17.904166666666665, "loss": 0.06069495528936386, "loss_ce": 3.36383527610451e-05, "loss_iou": 0.1826171875, "loss_num": 0.01214599609375, "loss_xval": 0.060546875, "num_input_tokens_seen": 392231976, "step": 4297 }, { "epoch": 17.908333333333335, "grad_norm": 2.688228632179773, "learning_rate": 5e-05, "loss": 0.0664, "num_input_tokens_seen": 392323720, "step": 4298 }, { "epoch": 17.908333333333335, "loss": 0.0538039356470108, "loss_ce": 1.4436795936489943e-06, "loss_iou": 0.189453125, "loss_num": 0.0107421875, "loss_xval": 0.0537109375, "num_input_tokens_seen": 392323720, "step": 4298 }, { "epoch": 17.9125, "grad_norm": 2.4486532513400023, "learning_rate": 5e-05, "loss": 0.0416, "num_input_tokens_seen": 392415156, "step": 4299 }, { "epoch": 17.9125, "loss": 0.03337834030389786, "loss_ce": 6.840488640591502e-05, "loss_iou": 0.19921875, "loss_num": 0.00665283203125, "loss_xval": 0.033203125, "num_input_tokens_seen": 392415156, "step": 4299 }, { "epoch": 17.916666666666668, "grad_norm": 3.0446213179202104, "learning_rate": 5e-05, "loss": 0.0517, "num_input_tokens_seen": 392506352, "step": 4300 }, { "epoch": 17.916666666666668, "loss": 0.0623907670378685, "loss_ce": 2.8097461836296134e-05, "loss_iou": 0.201171875, "loss_num": 0.012451171875, "loss_xval": 0.062255859375, "num_input_tokens_seen": 392506352, "step": 4300 }, { "epoch": 17.920833333333334, "grad_norm": 12.97766674779127, "learning_rate": 5e-05, "loss": 0.081, "num_input_tokens_seen": 392597360, "step": 4301 }, { "epoch": 17.920833333333334, "loss": 0.10218116641044617, "loss_ce": 8.312406862387434e-06, "loss_iou": 0.28515625, "loss_num": 0.0203857421875, "loss_xval": 0.10205078125, "num_input_tokens_seen": 392597360, "step": 4301 }, { "epoch": 17.925, "grad_norm": 4.701325634383643, "learning_rate": 5e-05, "loss": 0.054, "num_input_tokens_seen": 392688784, "step": 4302 }, { "epoch": 17.925, "loss": 0.06068724766373634, "loss_ce": 3.047289055757574e-06, "loss_iou": 0.294921875, "loss_num": 0.01214599609375, "loss_xval": 0.060791015625, "num_input_tokens_seen": 392688784, "step": 4302 }, { "epoch": 17.929166666666667, "grad_norm": 3.9597801825447636, "learning_rate": 5e-05, "loss": 0.0439, "num_input_tokens_seen": 392780144, "step": 4303 }, { "epoch": 17.929166666666667, "loss": 0.04145951569080353, "loss_ce": 1.387702468491625e-06, "loss_iou": 0.298828125, "loss_num": 0.00830078125, "loss_xval": 0.04150390625, "num_input_tokens_seen": 392780144, "step": 4303 }, { "epoch": 17.933333333333334, "grad_norm": 16.030772287954104, "learning_rate": 5e-05, "loss": 0.0752, "num_input_tokens_seen": 392871692, "step": 4304 }, { "epoch": 17.933333333333334, "loss": 0.08494758605957031, "loss_ce": 0.0015430464409291744, "loss_iou": 0.337890625, "loss_num": 0.0167236328125, "loss_xval": 0.08349609375, "num_input_tokens_seen": 392871692, "step": 4304 }, { "epoch": 17.9375, "grad_norm": 2.044012042465525, "learning_rate": 5e-05, "loss": 0.0652, "num_input_tokens_seen": 392963336, "step": 4305 }, { "epoch": 17.9375, "loss": 0.07563067972660065, "loss_ce": 0.0002598950522951782, "loss_iou": 0.11083984375, "loss_num": 0.01507568359375, "loss_xval": 0.0751953125, "num_input_tokens_seen": 392963336, "step": 4305 }, { "epoch": 17.941666666666666, "grad_norm": 2.4403896730304293, "learning_rate": 5e-05, "loss": 0.0444, "num_input_tokens_seen": 393054780, "step": 4306 }, { "epoch": 17.941666666666666, "loss": 0.05522942170500755, "loss_ce": 0.0011827910784631968, "loss_iou": 0.18359375, "loss_num": 0.01080322265625, "loss_xval": 0.053955078125, "num_input_tokens_seen": 393054780, "step": 4306 }, { "epoch": 17.945833333333333, "grad_norm": 1.957076259446295, "learning_rate": 5e-05, "loss": 0.06, "num_input_tokens_seen": 393145220, "step": 4307 }, { "epoch": 17.945833333333333, "loss": 0.051417890936136246, "loss_ce": 0.00016362001770175993, "loss_iou": 0.189453125, "loss_num": 0.01025390625, "loss_xval": 0.05126953125, "num_input_tokens_seen": 393145220, "step": 4307 }, { "epoch": 17.95, "grad_norm": 2.9056474378085673, "learning_rate": 5e-05, "loss": 0.098, "num_input_tokens_seen": 393236968, "step": 4308 }, { "epoch": 17.95, "loss": 0.13753195106983185, "loss_ce": 4.49333720098366e-06, "loss_iou": 0.25390625, "loss_num": 0.027587890625, "loss_xval": 0.1376953125, "num_input_tokens_seen": 393236968, "step": 4308 }, { "epoch": 17.954166666666666, "grad_norm": 2.9390624829772265, "learning_rate": 5e-05, "loss": 0.056, "num_input_tokens_seen": 393328088, "step": 4309 }, { "epoch": 17.954166666666666, "loss": 0.05638705566525459, "loss_ce": 0.0005322614451870322, "loss_iou": 0.27734375, "loss_num": 0.01116943359375, "loss_xval": 0.055908203125, "num_input_tokens_seen": 393328088, "step": 4309 }, { "epoch": 17.958333333333332, "grad_norm": 2.3808902559453875, "learning_rate": 5e-05, "loss": 0.0634, "num_input_tokens_seen": 393419056, "step": 4310 }, { "epoch": 17.958333333333332, "loss": 0.05448612570762634, "loss_ce": 2.7511428925208747e-05, "loss_iou": 0.30078125, "loss_num": 0.0108642578125, "loss_xval": 0.054443359375, "num_input_tokens_seen": 393419056, "step": 4310 }, { "epoch": 17.9625, "grad_norm": 11.42592444582553, "learning_rate": 5e-05, "loss": 0.053, "num_input_tokens_seen": 393509936, "step": 4311 }, { "epoch": 17.9625, "loss": 0.03451695665717125, "loss_ce": 1.5761654594825814e-06, "loss_iou": 0.2431640625, "loss_num": 0.00689697265625, "loss_xval": 0.034423828125, "num_input_tokens_seen": 393509936, "step": 4311 }, { "epoch": 17.966666666666665, "grad_norm": 2.026598228509948, "learning_rate": 5e-05, "loss": 0.0359, "num_input_tokens_seen": 393601652, "step": 4312 }, { "epoch": 17.966666666666665, "loss": 0.026221077889204025, "loss_ce": 6.477197530330159e-06, "loss_iou": 0.1796875, "loss_num": 0.0052490234375, "loss_xval": 0.0262451171875, "num_input_tokens_seen": 393601652, "step": 4312 }, { "epoch": 17.970833333333335, "grad_norm": 2.545206358777459, "learning_rate": 5e-05, "loss": 0.0377, "num_input_tokens_seen": 393692832, "step": 4313 }, { "epoch": 17.970833333333335, "loss": 0.03803935647010803, "loss_ce": 0.00018992825062014163, "loss_iou": 0.2470703125, "loss_num": 0.007568359375, "loss_xval": 0.037841796875, "num_input_tokens_seen": 393692832, "step": 4313 }, { "epoch": 17.975, "grad_norm": 2.5300715189162353, "learning_rate": 5e-05, "loss": 0.0454, "num_input_tokens_seen": 393784424, "step": 4314 }, { "epoch": 17.975, "loss": 0.0279478058218956, "loss_ce": 1.3326807675184682e-06, "loss_iou": 0.263671875, "loss_num": 0.005584716796875, "loss_xval": 0.0279541015625, "num_input_tokens_seen": 393784424, "step": 4314 }, { "epoch": 17.979166666666668, "grad_norm": 3.1352700422238295, "learning_rate": 5e-05, "loss": 0.048, "num_input_tokens_seen": 393875060, "step": 4315 }, { "epoch": 17.979166666666668, "loss": 0.0344746857881546, "loss_ce": 5.083212727186037e-06, "loss_iou": 0.1748046875, "loss_num": 0.00689697265625, "loss_xval": 0.034423828125, "num_input_tokens_seen": 393875060, "step": 4315 }, { "epoch": 17.983333333333334, "grad_norm": 2.1763173304718, "learning_rate": 5e-05, "loss": 0.0618, "num_input_tokens_seen": 393966176, "step": 4316 }, { "epoch": 17.983333333333334, "loss": 0.03647957369685173, "loss_ce": 3.4403892641421407e-06, "loss_iou": 0.2080078125, "loss_num": 0.007293701171875, "loss_xval": 0.036376953125, "num_input_tokens_seen": 393966176, "step": 4316 }, { "epoch": 17.9875, "grad_norm": 2.6116494285117655, "learning_rate": 5e-05, "loss": 0.0449, "num_input_tokens_seen": 394057324, "step": 4317 }, { "epoch": 17.9875, "loss": 0.04278308153152466, "loss_ce": 2.7956442863796838e-05, "loss_iou": 0.1826171875, "loss_num": 0.008544921875, "loss_xval": 0.042724609375, "num_input_tokens_seen": 394057324, "step": 4317 }, { "epoch": 17.991666666666667, "grad_norm": 3.24737325938247, "learning_rate": 5e-05, "loss": 0.0418, "num_input_tokens_seen": 394148996, "step": 4318 }, { "epoch": 17.991666666666667, "loss": 0.037456609308719635, "loss_ce": 0.00020227550703566521, "loss_iou": 0.16015625, "loss_num": 0.0074462890625, "loss_xval": 0.037353515625, "num_input_tokens_seen": 394148996, "step": 4318 }, { "epoch": 17.995833333333334, "grad_norm": 2.2539145328902404, "learning_rate": 5e-05, "loss": 0.0969, "num_input_tokens_seen": 394240284, "step": 4319 }, { "epoch": 17.995833333333334, "loss": 0.13667461276054382, "loss_ce": 1.6307496935041854e-06, "loss_iou": 0.2265625, "loss_num": 0.02734375, "loss_xval": 0.13671875, "num_input_tokens_seen": 394240284, "step": 4319 }, { "epoch": 18.0, "grad_norm": 2.718931662003016, "learning_rate": 5e-05, "loss": 0.0475, "num_input_tokens_seen": 394331148, "step": 4320 }, { "epoch": 18.0, "loss": 0.06142626702785492, "loss_ce": 2.0113529899390414e-06, "loss_iou": 0.2490234375, "loss_num": 0.01226806640625, "loss_xval": 0.0615234375, "num_input_tokens_seen": 394331148, "step": 4320 }, { "epoch": 18.004166666666666, "grad_norm": 3.195365105851593, "learning_rate": 5e-05, "loss": 0.0428, "num_input_tokens_seen": 394420940, "step": 4321 }, { "epoch": 18.004166666666666, "loss": 0.0539390966296196, "loss_ce": 1.4537547031068243e-05, "loss_iou": 0.251953125, "loss_num": 0.01080322265625, "loss_xval": 0.053955078125, "num_input_tokens_seen": 394420940, "step": 4321 }, { "epoch": 18.008333333333333, "grad_norm": 3.298762019359425, "learning_rate": 5e-05, "loss": 0.0733, "num_input_tokens_seen": 394511684, "step": 4322 }, { "epoch": 18.008333333333333, "loss": 0.10038083791732788, "loss_ce": 8.525988960172981e-06, "loss_iou": 0.27734375, "loss_num": 0.0201416015625, "loss_xval": 0.1005859375, "num_input_tokens_seen": 394511684, "step": 4322 }, { "epoch": 18.0125, "grad_norm": 3.905792510038796, "learning_rate": 5e-05, "loss": 0.0641, "num_input_tokens_seen": 394603636, "step": 4323 }, { "epoch": 18.0125, "loss": 0.046825211495161057, "loss_ce": 1.124912978411885e-05, "loss_iou": 0.298828125, "loss_num": 0.00933837890625, "loss_xval": 0.046875, "num_input_tokens_seen": 394603636, "step": 4323 }, { "epoch": 18.016666666666666, "grad_norm": 1.8703517037834887, "learning_rate": 5e-05, "loss": 0.0372, "num_input_tokens_seen": 394694048, "step": 4324 }, { "epoch": 18.016666666666666, "loss": 0.027627240866422653, "loss_ce": 1.203496367452317e-06, "loss_iou": 0.203125, "loss_num": 0.005523681640625, "loss_xval": 0.027587890625, "num_input_tokens_seen": 394694048, "step": 4324 }, { "epoch": 18.020833333333332, "grad_norm": 1.514189991927366, "learning_rate": 5e-05, "loss": 0.0368, "num_input_tokens_seen": 394785636, "step": 4325 }, { "epoch": 18.020833333333332, "loss": 0.04702939838171005, "loss_ce": 1.8090759112965316e-06, "loss_iou": 0.1787109375, "loss_num": 0.0093994140625, "loss_xval": 0.047119140625, "num_input_tokens_seen": 394785636, "step": 4325 }, { "epoch": 18.025, "grad_norm": 1.0784396605015283, "learning_rate": 5e-05, "loss": 0.032, "num_input_tokens_seen": 394876396, "step": 4326 }, { "epoch": 18.025, "loss": 0.04121756553649902, "loss_ce": 3.5790712900052313e-06, "loss_iou": 0.322265625, "loss_num": 0.00823974609375, "loss_xval": 0.041259765625, "num_input_tokens_seen": 394876396, "step": 4326 }, { "epoch": 18.029166666666665, "grad_norm": 0.862503992716872, "learning_rate": 5e-05, "loss": 0.0409, "num_input_tokens_seen": 394966044, "step": 4327 }, { "epoch": 18.029166666666665, "loss": 0.048645853996276855, "loss_ce": 8.345961077793618e-07, "loss_iou": 0.2353515625, "loss_num": 0.00970458984375, "loss_xval": 0.048583984375, "num_input_tokens_seen": 394966044, "step": 4327 }, { "epoch": 18.033333333333335, "grad_norm": 1.8267676354610045, "learning_rate": 5e-05, "loss": 0.0326, "num_input_tokens_seen": 395057856, "step": 4328 }, { "epoch": 18.033333333333335, "loss": 0.026156704872846603, "loss_ce": 3.141112074445118e-06, "loss_iou": 0.275390625, "loss_num": 0.005218505859375, "loss_xval": 0.026123046875, "num_input_tokens_seen": 395057856, "step": 4328 }, { "epoch": 18.0375, "grad_norm": 1.1381372255017146, "learning_rate": 5e-05, "loss": 0.0339, "num_input_tokens_seen": 395149312, "step": 4329 }, { "epoch": 18.0375, "loss": 0.03376764431595802, "loss_ce": 3.8090514863142744e-05, "loss_iou": 0.296875, "loss_num": 0.006744384765625, "loss_xval": 0.03369140625, "num_input_tokens_seen": 395149312, "step": 4329 }, { "epoch": 18.041666666666668, "grad_norm": 1.785275220468087, "learning_rate": 5e-05, "loss": 0.0504, "num_input_tokens_seen": 395240220, "step": 4330 }, { "epoch": 18.041666666666668, "loss": 0.07155978679656982, "loss_ce": 1.8952950995299034e-05, "loss_iou": 0.234375, "loss_num": 0.01434326171875, "loss_xval": 0.07177734375, "num_input_tokens_seen": 395240220, "step": 4330 }, { "epoch": 18.045833333333334, "grad_norm": 1.9882529377175069, "learning_rate": 5e-05, "loss": 0.0445, "num_input_tokens_seen": 395331656, "step": 4331 }, { "epoch": 18.045833333333334, "loss": 0.042426083236932755, "loss_ce": 6.651344392594183e-06, "loss_iou": 0.3125, "loss_num": 0.00848388671875, "loss_xval": 0.04248046875, "num_input_tokens_seen": 395331656, "step": 4331 }, { "epoch": 18.05, "grad_norm": 2.344335366295014, "learning_rate": 5e-05, "loss": 0.0724, "num_input_tokens_seen": 395423220, "step": 4332 }, { "epoch": 18.05, "loss": 0.04951345548033714, "loss_ce": 0.009115813300013542, "loss_iou": 0.0478515625, "loss_num": 0.008056640625, "loss_xval": 0.040283203125, "num_input_tokens_seen": 395423220, "step": 4332 }, { "epoch": 18.054166666666667, "grad_norm": 2.4558276168301756, "learning_rate": 5e-05, "loss": 0.0502, "num_input_tokens_seen": 395514576, "step": 4333 }, { "epoch": 18.054166666666667, "loss": 0.03360137343406677, "loss_ce": 1.5182785091383266e-06, "loss_iou": 0.33203125, "loss_num": 0.0067138671875, "loss_xval": 0.03369140625, "num_input_tokens_seen": 395514576, "step": 4333 }, { "epoch": 18.058333333333334, "grad_norm": 2.0992927584266283, "learning_rate": 5e-05, "loss": 0.0657, "num_input_tokens_seen": 395606148, "step": 4334 }, { "epoch": 18.058333333333334, "loss": 0.047148894518613815, "loss_ce": 1.44934074342018e-05, "loss_iou": 0.314453125, "loss_num": 0.0093994140625, "loss_xval": 0.047119140625, "num_input_tokens_seen": 395606148, "step": 4334 }, { "epoch": 18.0625, "grad_norm": 1.6144222585513672, "learning_rate": 5e-05, "loss": 0.0685, "num_input_tokens_seen": 395697324, "step": 4335 }, { "epoch": 18.0625, "loss": 0.03702807053923607, "loss_ce": 2.617456175357802e-06, "loss_iou": 0.1982421875, "loss_num": 0.007415771484375, "loss_xval": 0.037109375, "num_input_tokens_seen": 395697324, "step": 4335 }, { "epoch": 18.066666666666666, "grad_norm": 2.6503863866823036, "learning_rate": 5e-05, "loss": 0.0531, "num_input_tokens_seen": 395789076, "step": 4336 }, { "epoch": 18.066666666666666, "loss": 0.04408771172165871, "loss_ce": 2.0329125618445687e-05, "loss_iou": 0.3125, "loss_num": 0.0087890625, "loss_xval": 0.0439453125, "num_input_tokens_seen": 395789076, "step": 4336 }, { "epoch": 18.070833333333333, "grad_norm": 2.6723716054679945, "learning_rate": 5e-05, "loss": 0.0601, "num_input_tokens_seen": 395880504, "step": 4337 }, { "epoch": 18.070833333333333, "loss": 0.07788537442684174, "loss_ce": 4.5142041926737875e-06, "loss_iou": 0.16015625, "loss_num": 0.01556396484375, "loss_xval": 0.078125, "num_input_tokens_seen": 395880504, "step": 4337 }, { "epoch": 18.075, "grad_norm": 2.225487763718285, "learning_rate": 5e-05, "loss": 0.0513, "num_input_tokens_seen": 395972012, "step": 4338 }, { "epoch": 18.075, "loss": 0.030444277450442314, "loss_ce": 1.0621975889080204e-05, "loss_iou": 0.040283203125, "loss_num": 0.006103515625, "loss_xval": 0.0303955078125, "num_input_tokens_seen": 395972012, "step": 4338 }, { "epoch": 18.079166666666666, "grad_norm": 2.464868968170747, "learning_rate": 5e-05, "loss": 0.0282, "num_input_tokens_seen": 396063152, "step": 4339 }, { "epoch": 18.079166666666666, "loss": 0.03203795477747917, "loss_ce": 2.1279520296957344e-06, "loss_iou": 0.1796875, "loss_num": 0.00640869140625, "loss_xval": 0.031982421875, "num_input_tokens_seen": 396063152, "step": 4339 }, { "epoch": 18.083333333333332, "grad_norm": 3.451951339092108, "learning_rate": 5e-05, "loss": 0.0816, "num_input_tokens_seen": 396154468, "step": 4340 }, { "epoch": 18.083333333333332, "loss": 0.13343852758407593, "loss_ce": 4.1616868884375435e-07, "loss_iou": 0.353515625, "loss_num": 0.0267333984375, "loss_xval": 0.1337890625, "num_input_tokens_seen": 396154468, "step": 4340 }, { "epoch": 18.0875, "grad_norm": 2.902394444532942, "learning_rate": 5e-05, "loss": 0.0421, "num_input_tokens_seen": 396245768, "step": 4341 }, { "epoch": 18.0875, "loss": 0.04989688843488693, "loss_ce": 6.509374088636832e-07, "loss_iou": 0.236328125, "loss_num": 0.00994873046875, "loss_xval": 0.0498046875, "num_input_tokens_seen": 396245768, "step": 4341 }, { "epoch": 18.091666666666665, "grad_norm": 2.314349492678863, "learning_rate": 5e-05, "loss": 0.0823, "num_input_tokens_seen": 396336292, "step": 4342 }, { "epoch": 18.091666666666665, "loss": 0.1165471151471138, "loss_ce": 4.858984539168887e-07, "loss_iou": 0.275390625, "loss_num": 0.0233154296875, "loss_xval": 0.11669921875, "num_input_tokens_seen": 396336292, "step": 4342 }, { "epoch": 18.095833333333335, "grad_norm": 11.428303916656995, "learning_rate": 5e-05, "loss": 0.0589, "num_input_tokens_seen": 396427380, "step": 4343 }, { "epoch": 18.095833333333335, "loss": 0.05671941116452217, "loss_ce": 2.4900548396544764e-06, "loss_iou": 0.240234375, "loss_num": 0.0113525390625, "loss_xval": 0.056640625, "num_input_tokens_seen": 396427380, "step": 4343 }, { "epoch": 18.1, "grad_norm": 2.4921457118510366, "learning_rate": 5e-05, "loss": 0.0494, "num_input_tokens_seen": 396519188, "step": 4344 }, { "epoch": 18.1, "loss": 0.045594222843647, "loss_ce": 8.590166544308886e-06, "loss_iou": 0.146484375, "loss_num": 0.00909423828125, "loss_xval": 0.045654296875, "num_input_tokens_seen": 396519188, "step": 4344 }, { "epoch": 18.104166666666668, "grad_norm": 2.9869058534307946, "learning_rate": 5e-05, "loss": 0.0386, "num_input_tokens_seen": 396610296, "step": 4345 }, { "epoch": 18.104166666666668, "loss": 0.044300176203250885, "loss_ce": 3.910792202077573e-06, "loss_iou": 0.32421875, "loss_num": 0.00885009765625, "loss_xval": 0.044189453125, "num_input_tokens_seen": 396610296, "step": 4345 }, { "epoch": 18.108333333333334, "grad_norm": 2.334854811432145, "learning_rate": 5e-05, "loss": 0.0443, "num_input_tokens_seen": 396701692, "step": 4346 }, { "epoch": 18.108333333333334, "loss": 0.04280184209346771, "loss_ce": 9.392442734679207e-07, "loss_iou": 0.054443359375, "loss_num": 0.008544921875, "loss_xval": 0.042724609375, "num_input_tokens_seen": 396701692, "step": 4346 }, { "epoch": 18.1125, "grad_norm": 3.2656062480042594, "learning_rate": 5e-05, "loss": 0.0307, "num_input_tokens_seen": 396792740, "step": 4347 }, { "epoch": 18.1125, "loss": 0.025705434381961823, "loss_ce": 1.7265369024244137e-05, "loss_iou": 0.1513671875, "loss_num": 0.005126953125, "loss_xval": 0.025634765625, "num_input_tokens_seen": 396792740, "step": 4347 }, { "epoch": 18.116666666666667, "grad_norm": 1.782148683118789, "learning_rate": 5e-05, "loss": 0.0685, "num_input_tokens_seen": 396884024, "step": 4348 }, { "epoch": 18.116666666666667, "loss": 0.04716215282678604, "loss_ce": 0.0009890544461086392, "loss_iou": 0.259765625, "loss_num": 0.00921630859375, "loss_xval": 0.046142578125, "num_input_tokens_seen": 396884024, "step": 4348 }, { "epoch": 18.120833333333334, "grad_norm": 2.713392053335857, "learning_rate": 5e-05, "loss": 0.0416, "num_input_tokens_seen": 396975032, "step": 4349 }, { "epoch": 18.120833333333334, "loss": 0.0341886505484581, "loss_ce": 2.4221051717177033e-05, "loss_iou": 0.2080078125, "loss_num": 0.0068359375, "loss_xval": 0.0341796875, "num_input_tokens_seen": 396975032, "step": 4349 }, { "epoch": 18.125, "grad_norm": 1.4380388319248392, "learning_rate": 5e-05, "loss": 0.0326, "num_input_tokens_seen": 397065464, "step": 4350 }, { "epoch": 18.125, "loss": 0.04002754017710686, "loss_ce": 3.737887709576171e-06, "loss_iou": 0.146484375, "loss_num": 0.00799560546875, "loss_xval": 0.0400390625, "num_input_tokens_seen": 397065464, "step": 4350 }, { "epoch": 18.129166666666666, "grad_norm": 1.385735047535627, "learning_rate": 5e-05, "loss": 0.0379, "num_input_tokens_seen": 397156184, "step": 4351 }, { "epoch": 18.129166666666666, "loss": 0.0432908833026886, "loss_ce": 1.6999715626297984e-06, "loss_iou": 0.1513671875, "loss_num": 0.0086669921875, "loss_xval": 0.043212890625, "num_input_tokens_seen": 397156184, "step": 4351 }, { "epoch": 18.133333333333333, "grad_norm": 1.539877158566921, "learning_rate": 5e-05, "loss": 0.0605, "num_input_tokens_seen": 397247468, "step": 4352 }, { "epoch": 18.133333333333333, "loss": 0.09733171761035919, "loss_ce": 3.5312802992848447e-06, "loss_iou": 0.220703125, "loss_num": 0.0194091796875, "loss_xval": 0.09716796875, "num_input_tokens_seen": 397247468, "step": 4352 }, { "epoch": 18.1375, "grad_norm": 2.5284698248198536, "learning_rate": 5e-05, "loss": 0.0588, "num_input_tokens_seen": 397338636, "step": 4353 }, { "epoch": 18.1375, "loss": 0.038271788507699966, "loss_ce": 1.0375433703302406e-05, "loss_iou": 0.287109375, "loss_num": 0.007659912109375, "loss_xval": 0.038330078125, "num_input_tokens_seen": 397338636, "step": 4353 }, { "epoch": 18.141666666666666, "grad_norm": 2.5679338017934827, "learning_rate": 5e-05, "loss": 0.0453, "num_input_tokens_seen": 397428860, "step": 4354 }, { "epoch": 18.141666666666666, "loss": 0.04413112998008728, "loss_ce": 0.002199977170675993, "loss_iou": 0.404296875, "loss_num": 0.0084228515625, "loss_xval": 0.0419921875, "num_input_tokens_seen": 397428860, "step": 4354 }, { "epoch": 18.145833333333332, "grad_norm": 3.5753304198707, "learning_rate": 5e-05, "loss": 0.0861, "num_input_tokens_seen": 397520156, "step": 4355 }, { "epoch": 18.145833333333332, "loss": 0.12257055938243866, "loss_ce": 1.1972469110332895e-05, "loss_iou": 0.162109375, "loss_num": 0.0245361328125, "loss_xval": 0.12255859375, "num_input_tokens_seen": 397520156, "step": 4355 }, { "epoch": 18.15, "grad_norm": 2.6806511987389325, "learning_rate": 5e-05, "loss": 0.0678, "num_input_tokens_seen": 397611860, "step": 4356 }, { "epoch": 18.15, "loss": 0.07975918054580688, "loss_ce": 3.20112521876581e-05, "loss_iou": 0.2314453125, "loss_num": 0.015869140625, "loss_xval": 0.07958984375, "num_input_tokens_seen": 397611860, "step": 4356 }, { "epoch": 18.154166666666665, "grad_norm": 2.273739343091957, "learning_rate": 5e-05, "loss": 0.0978, "num_input_tokens_seen": 397702408, "step": 4357 }, { "epoch": 18.154166666666665, "loss": 0.13732938468456268, "loss_ce": 2.8608226898541034e-07, "loss_iou": 0.33203125, "loss_num": 0.0274658203125, "loss_xval": 0.1376953125, "num_input_tokens_seen": 397702408, "step": 4357 }, { "epoch": 18.158333333333335, "grad_norm": 6.861383555688523, "learning_rate": 5e-05, "loss": 0.0405, "num_input_tokens_seen": 397793940, "step": 4358 }, { "epoch": 18.158333333333335, "loss": 0.03570987284183502, "loss_ce": 4.307841663830914e-06, "loss_iou": 0.306640625, "loss_num": 0.00714111328125, "loss_xval": 0.03564453125, "num_input_tokens_seen": 397793940, "step": 4358 }, { "epoch": 18.1625, "grad_norm": 1.3036399508182497, "learning_rate": 5e-05, "loss": 0.0256, "num_input_tokens_seen": 397884480, "step": 4359 }, { "epoch": 18.1625, "loss": 0.022728780284523964, "loss_ce": 8.135599500747048e-07, "loss_iou": 0.267578125, "loss_num": 0.004547119140625, "loss_xval": 0.022705078125, "num_input_tokens_seen": 397884480, "step": 4359 }, { "epoch": 18.166666666666668, "grad_norm": 1.7651094670492964, "learning_rate": 5e-05, "loss": 0.0421, "num_input_tokens_seen": 397975704, "step": 4360 }, { "epoch": 18.166666666666668, "loss": 0.03605952113866806, "loss_ce": 3.0001363029441563e-06, "loss_iou": 0.1787109375, "loss_num": 0.0072021484375, "loss_xval": 0.0361328125, "num_input_tokens_seen": 397975704, "step": 4360 }, { "epoch": 18.170833333333334, "grad_norm": 2.5718917594701285, "learning_rate": 5e-05, "loss": 0.0518, "num_input_tokens_seen": 398067552, "step": 4361 }, { "epoch": 18.170833333333334, "loss": 0.03395594656467438, "loss_ce": 5.143494945514249e-06, "loss_iou": 0.244140625, "loss_num": 0.00677490234375, "loss_xval": 0.033935546875, "num_input_tokens_seen": 398067552, "step": 4361 }, { "epoch": 18.175, "grad_norm": 2.618190785133452, "learning_rate": 5e-05, "loss": 0.0438, "num_input_tokens_seen": 398159520, "step": 4362 }, { "epoch": 18.175, "loss": 0.04618554934859276, "loss_ce": 9.637584298616275e-05, "loss_iou": 0.26171875, "loss_num": 0.00921630859375, "loss_xval": 0.046142578125, "num_input_tokens_seen": 398159520, "step": 4362 }, { "epoch": 18.179166666666667, "grad_norm": 2.9221136017576477, "learning_rate": 5e-05, "loss": 0.0572, "num_input_tokens_seen": 398251144, "step": 4363 }, { "epoch": 18.179166666666667, "loss": 0.0848899558186531, "loss_ce": 5.3128806030144915e-06, "loss_iou": 0.3125, "loss_num": 0.0169677734375, "loss_xval": 0.0849609375, "num_input_tokens_seen": 398251144, "step": 4363 }, { "epoch": 18.183333333333334, "grad_norm": 2.9717430372577516, "learning_rate": 5e-05, "loss": 0.076, "num_input_tokens_seen": 398342744, "step": 4364 }, { "epoch": 18.183333333333334, "loss": 0.09848225116729736, "loss_ce": 2.021944965235889e-06, "loss_iou": 0.328125, "loss_num": 0.0196533203125, "loss_xval": 0.0986328125, "num_input_tokens_seen": 398342744, "step": 4364 }, { "epoch": 18.1875, "grad_norm": 3.109744655408144, "learning_rate": 5e-05, "loss": 0.0459, "num_input_tokens_seen": 398434288, "step": 4365 }, { "epoch": 18.1875, "loss": 0.05099605768918991, "loss_ce": 1.1823774457297986e-06, "loss_iou": 0.353515625, "loss_num": 0.01019287109375, "loss_xval": 0.051025390625, "num_input_tokens_seen": 398434288, "step": 4365 }, { "epoch": 18.191666666666666, "grad_norm": 2.159842625416849, "learning_rate": 5e-05, "loss": 0.0407, "num_input_tokens_seen": 398525456, "step": 4366 }, { "epoch": 18.191666666666666, "loss": 0.04072629660367966, "loss_ce": 5.907006084271416e-07, "loss_iou": 0.2490234375, "loss_num": 0.00811767578125, "loss_xval": 0.040771484375, "num_input_tokens_seen": 398525456, "step": 4366 }, { "epoch": 18.195833333333333, "grad_norm": 8.228009704876078, "learning_rate": 5e-05, "loss": 0.0565, "num_input_tokens_seen": 398616876, "step": 4367 }, { "epoch": 18.195833333333333, "loss": 0.05673077702522278, "loss_ce": 1.3857466910849325e-05, "loss_iou": 0.2294921875, "loss_num": 0.0113525390625, "loss_xval": 0.056640625, "num_input_tokens_seen": 398616876, "step": 4367 }, { "epoch": 18.2, "grad_norm": 2.012030966733271, "learning_rate": 5e-05, "loss": 0.0701, "num_input_tokens_seen": 398708284, "step": 4368 }, { "epoch": 18.2, "loss": 0.047302864491939545, "loss_ce": 6.211755589902168e-07, "loss_iou": 0.32421875, "loss_num": 0.00946044921875, "loss_xval": 0.04736328125, "num_input_tokens_seen": 398708284, "step": 4368 }, { "epoch": 18.204166666666666, "grad_norm": 8.039653286483375, "learning_rate": 5e-05, "loss": 0.0591, "num_input_tokens_seen": 398800056, "step": 4369 }, { "epoch": 18.204166666666666, "loss": 0.06590539216995239, "loss_ce": 2.680851594050182e-06, "loss_iou": 0.2236328125, "loss_num": 0.01318359375, "loss_xval": 0.06591796875, "num_input_tokens_seen": 398800056, "step": 4369 }, { "epoch": 18.208333333333332, "grad_norm": 2.2663464506267648, "learning_rate": 5e-05, "loss": 0.0451, "num_input_tokens_seen": 398891224, "step": 4370 }, { "epoch": 18.208333333333332, "loss": 0.04339677095413208, "loss_ce": 7.760647804389009e-07, "loss_iou": 0.26171875, "loss_num": 0.0086669921875, "loss_xval": 0.04345703125, "num_input_tokens_seen": 398891224, "step": 4370 }, { "epoch": 18.2125, "grad_norm": 9.314328389208157, "learning_rate": 5e-05, "loss": 0.0345, "num_input_tokens_seen": 398983332, "step": 4371 }, { "epoch": 18.2125, "loss": 0.04535383731126785, "loss_ce": 4.71842167826253e-06, "loss_iou": 0.29296875, "loss_num": 0.00909423828125, "loss_xval": 0.04541015625, "num_input_tokens_seen": 398983332, "step": 4371 }, { "epoch": 18.216666666666665, "grad_norm": 3.217224616633414, "learning_rate": 5e-05, "loss": 0.0486, "num_input_tokens_seen": 399074764, "step": 4372 }, { "epoch": 18.216666666666665, "loss": 0.0589253231883049, "loss_ce": 5.691545084118843e-05, "loss_iou": 0.3046875, "loss_num": 0.01177978515625, "loss_xval": 0.058837890625, "num_input_tokens_seen": 399074764, "step": 4372 }, { "epoch": 18.220833333333335, "grad_norm": 2.730079477146947, "learning_rate": 5e-05, "loss": 0.0491, "num_input_tokens_seen": 399165704, "step": 4373 }, { "epoch": 18.220833333333335, "loss": 0.042066872119903564, "loss_ce": 0.004362402018159628, "loss_iou": 0.2275390625, "loss_num": 0.007537841796875, "loss_xval": 0.03759765625, "num_input_tokens_seen": 399165704, "step": 4373 }, { "epoch": 18.225, "grad_norm": 2.0687422921064287, "learning_rate": 5e-05, "loss": 0.0668, "num_input_tokens_seen": 399257212, "step": 4374 }, { "epoch": 18.225, "loss": 0.09087371826171875, "loss_ce": 7.631599146407098e-06, "loss_iou": 0.29296875, "loss_num": 0.0181884765625, "loss_xval": 0.0908203125, "num_input_tokens_seen": 399257212, "step": 4374 }, { "epoch": 18.229166666666668, "grad_norm": 1.6668812988733708, "learning_rate": 5e-05, "loss": 0.041, "num_input_tokens_seen": 399347372, "step": 4375 }, { "epoch": 18.229166666666668, "loss": 0.046204306185245514, "loss_ce": 0.0001075049804057926, "loss_iou": 0.06689453125, "loss_num": 0.00921630859375, "loss_xval": 0.046142578125, "num_input_tokens_seen": 399347372, "step": 4375 }, { "epoch": 18.233333333333334, "grad_norm": 2.0372467556218967, "learning_rate": 5e-05, "loss": 0.0602, "num_input_tokens_seen": 399438828, "step": 4376 }, { "epoch": 18.233333333333334, "loss": 0.07019403576850891, "loss_ce": 3.6076583000976825e-06, "loss_iou": 0.271484375, "loss_num": 0.0140380859375, "loss_xval": 0.0703125, "num_input_tokens_seen": 399438828, "step": 4376 }, { "epoch": 18.2375, "grad_norm": 3.055557767338919, "learning_rate": 5e-05, "loss": 0.0574, "num_input_tokens_seen": 399530060, "step": 4377 }, { "epoch": 18.2375, "loss": 0.06096126139163971, "loss_ce": 2.3983773189684143e-06, "loss_iou": 0.27734375, "loss_num": 0.01220703125, "loss_xval": 0.06103515625, "num_input_tokens_seen": 399530060, "step": 4377 }, { "epoch": 18.241666666666667, "grad_norm": 2.99983240473986, "learning_rate": 5e-05, "loss": 0.0398, "num_input_tokens_seen": 399621304, "step": 4378 }, { "epoch": 18.241666666666667, "loss": 0.0324772372841835, "loss_ce": 0.00018963986076414585, "loss_iou": 0.328125, "loss_num": 0.0064697265625, "loss_xval": 0.0322265625, "num_input_tokens_seen": 399621304, "step": 4378 }, { "epoch": 18.245833333333334, "grad_norm": 2.5880110630261663, "learning_rate": 5e-05, "loss": 0.0561, "num_input_tokens_seen": 399712828, "step": 4379 }, { "epoch": 18.245833333333334, "loss": 0.06883368641138077, "loss_ce": 1.2874111234850716e-06, "loss_iou": 0.2001953125, "loss_num": 0.01373291015625, "loss_xval": 0.06884765625, "num_input_tokens_seen": 399712828, "step": 4379 }, { "epoch": 18.25, "grad_norm": 3.0573426753050614, "learning_rate": 5e-05, "loss": 0.0525, "num_input_tokens_seen": 399804176, "step": 4380 }, { "epoch": 18.25, "loss": 0.06015327572822571, "loss_ce": 9.468204370932654e-05, "loss_iou": 0.34765625, "loss_num": 0.01202392578125, "loss_xval": 0.06005859375, "num_input_tokens_seen": 399804176, "step": 4380 }, { "epoch": 18.254166666666666, "grad_norm": 2.6736121122866146, "learning_rate": 5e-05, "loss": 0.0804, "num_input_tokens_seen": 399895460, "step": 4381 }, { "epoch": 18.254166666666666, "loss": 0.0865228921175003, "loss_ce": 5.557565145863919e-06, "loss_iou": 0.2490234375, "loss_num": 0.017333984375, "loss_xval": 0.08642578125, "num_input_tokens_seen": 399895460, "step": 4381 }, { "epoch": 18.258333333333333, "grad_norm": 4.602344446400555, "learning_rate": 5e-05, "loss": 0.0707, "num_input_tokens_seen": 399987048, "step": 4382 }, { "epoch": 18.258333333333333, "loss": 0.10242481529712677, "loss_ce": 7.821878170943819e-06, "loss_iou": 0.12451171875, "loss_num": 0.0205078125, "loss_xval": 0.1025390625, "num_input_tokens_seen": 399987048, "step": 4382 }, { "epoch": 18.2625, "grad_norm": 1.8709337571104105, "learning_rate": 5e-05, "loss": 0.0518, "num_input_tokens_seen": 400078308, "step": 4383 }, { "epoch": 18.2625, "loss": 0.043266646564006805, "loss_ce": 3.4821675853891065e-07, "loss_iou": 0.23046875, "loss_num": 0.0086669921875, "loss_xval": 0.043212890625, "num_input_tokens_seen": 400078308, "step": 4383 }, { "epoch": 18.266666666666666, "grad_norm": 2.7737642445213506, "learning_rate": 5e-05, "loss": 0.1023, "num_input_tokens_seen": 400169996, "step": 4384 }, { "epoch": 18.266666666666666, "loss": 0.15319259464740753, "loss_ce": 1.9784554297075374e-06, "loss_iou": 0.3203125, "loss_num": 0.0306396484375, "loss_xval": 0.1533203125, "num_input_tokens_seen": 400169996, "step": 4384 }, { "epoch": 18.270833333333332, "grad_norm": 2.9567732403055467, "learning_rate": 5e-05, "loss": 0.0391, "num_input_tokens_seen": 400261664, "step": 4385 }, { "epoch": 18.270833333333332, "loss": 0.037430521100759506, "loss_ce": 7.110501201168518e-07, "loss_iou": 0.23828125, "loss_num": 0.007476806640625, "loss_xval": 0.037353515625, "num_input_tokens_seen": 400261664, "step": 4385 }, { "epoch": 18.275, "grad_norm": 2.929703617373839, "learning_rate": 5e-05, "loss": 0.0613, "num_input_tokens_seen": 400353208, "step": 4386 }, { "epoch": 18.275, "loss": 0.08940213918685913, "loss_ce": 3.1412902899319306e-05, "loss_iou": 0.287109375, "loss_num": 0.0179443359375, "loss_xval": 0.08935546875, "num_input_tokens_seen": 400353208, "step": 4386 }, { "epoch": 18.279166666666665, "grad_norm": 4.740133168669614, "learning_rate": 5e-05, "loss": 0.0714, "num_input_tokens_seen": 400444956, "step": 4387 }, { "epoch": 18.279166666666665, "loss": 0.05769924074411392, "loss_ce": 0.00014308842946775258, "loss_iou": 0.240234375, "loss_num": 0.011474609375, "loss_xval": 0.0576171875, "num_input_tokens_seen": 400444956, "step": 4387 }, { "epoch": 18.283333333333335, "grad_norm": 2.5047182403696144, "learning_rate": 5e-05, "loss": 0.0447, "num_input_tokens_seen": 400536092, "step": 4388 }, { "epoch": 18.283333333333335, "loss": 0.0511624850332737, "loss_ce": 1.5020019418443553e-05, "loss_iou": 0.2109375, "loss_num": 0.01025390625, "loss_xval": 0.05126953125, "num_input_tokens_seen": 400536092, "step": 4388 }, { "epoch": 18.2875, "grad_norm": 3.5861856966319756, "learning_rate": 5e-05, "loss": 0.13, "num_input_tokens_seen": 400626832, "step": 4389 }, { "epoch": 18.2875, "loss": 0.21249884366989136, "loss_ce": 4.950234142597765e-06, "loss_iou": 0.26171875, "loss_num": 0.04248046875, "loss_xval": 0.212890625, "num_input_tokens_seen": 400626832, "step": 4389 }, { "epoch": 18.291666666666668, "grad_norm": 2.484994269711342, "learning_rate": 5e-05, "loss": 0.0502, "num_input_tokens_seen": 400718168, "step": 4390 }, { "epoch": 18.291666666666668, "loss": 0.04919488728046417, "loss_ce": 4.633032949641347e-05, "loss_iou": 0.26953125, "loss_num": 0.00982666015625, "loss_xval": 0.049072265625, "num_input_tokens_seen": 400718168, "step": 4390 }, { "epoch": 18.295833333333334, "grad_norm": 3.143673198657142, "learning_rate": 5e-05, "loss": 0.0458, "num_input_tokens_seen": 400808980, "step": 4391 }, { "epoch": 18.295833333333334, "loss": 0.06095048785209656, "loss_ce": 3.740333704627119e-05, "loss_iou": 0.1796875, "loss_num": 0.01214599609375, "loss_xval": 0.06103515625, "num_input_tokens_seen": 400808980, "step": 4391 }, { "epoch": 18.3, "grad_norm": 2.916985689361712, "learning_rate": 5e-05, "loss": 0.0464, "num_input_tokens_seen": 400900012, "step": 4392 }, { "epoch": 18.3, "loss": 0.032847288995981216, "loss_ce": 2.74403805633483e-06, "loss_iou": 0.1767578125, "loss_num": 0.006561279296875, "loss_xval": 0.032958984375, "num_input_tokens_seen": 400900012, "step": 4392 }, { "epoch": 18.304166666666667, "grad_norm": 3.421911908192797, "learning_rate": 5e-05, "loss": 0.0631, "num_input_tokens_seen": 400991240, "step": 4393 }, { "epoch": 18.304166666666667, "loss": 0.054063111543655396, "loss_ce": 1.2219868494867114e-06, "loss_iou": 0.330078125, "loss_num": 0.01080322265625, "loss_xval": 0.053955078125, "num_input_tokens_seen": 400991240, "step": 4393 }, { "epoch": 18.308333333333334, "grad_norm": 2.4731271097465477, "learning_rate": 5e-05, "loss": 0.091, "num_input_tokens_seen": 401082824, "step": 4394 }, { "epoch": 18.308333333333334, "loss": 0.0979473888874054, "loss_ce": 1.222972855430271e-06, "loss_iou": 0.259765625, "loss_num": 0.01953125, "loss_xval": 0.09814453125, "num_input_tokens_seen": 401082824, "step": 4394 }, { "epoch": 18.3125, "grad_norm": 2.725491616054956, "learning_rate": 5e-05, "loss": 0.0643, "num_input_tokens_seen": 401174452, "step": 4395 }, { "epoch": 18.3125, "loss": 0.040101826190948486, "loss_ce": 3.224733518436551e-05, "loss_iou": 0.298828125, "loss_num": 0.00799560546875, "loss_xval": 0.0400390625, "num_input_tokens_seen": 401174452, "step": 4395 }, { "epoch": 18.316666666666666, "grad_norm": 2.2491274306415954, "learning_rate": 5e-05, "loss": 0.0591, "num_input_tokens_seen": 401266076, "step": 4396 }, { "epoch": 18.316666666666666, "loss": 0.06935058534145355, "loss_ce": 1.4649865079263691e-05, "loss_iou": 0.224609375, "loss_num": 0.01385498046875, "loss_xval": 0.0693359375, "num_input_tokens_seen": 401266076, "step": 4396 }, { "epoch": 18.320833333333333, "grad_norm": 1.9773627042160795, "learning_rate": 5e-05, "loss": 0.0444, "num_input_tokens_seen": 401357832, "step": 4397 }, { "epoch": 18.320833333333333, "loss": 0.03888298198580742, "loss_ce": 3.5863831726601347e-06, "loss_iou": 0.2578125, "loss_num": 0.007781982421875, "loss_xval": 0.038818359375, "num_input_tokens_seen": 401357832, "step": 4397 }, { "epoch": 18.325, "grad_norm": 4.594254689727872, "learning_rate": 5e-05, "loss": 0.0475, "num_input_tokens_seen": 401449124, "step": 4398 }, { "epoch": 18.325, "loss": 0.0331290028989315, "loss_ce": 2.1720934455515817e-06, "loss_iou": 0.1171875, "loss_num": 0.006622314453125, "loss_xval": 0.033203125, "num_input_tokens_seen": 401449124, "step": 4398 }, { "epoch": 18.329166666666666, "grad_norm": 1.8354731404458435, "learning_rate": 5e-05, "loss": 0.0433, "num_input_tokens_seen": 401540020, "step": 4399 }, { "epoch": 18.329166666666666, "loss": 0.036660533398389816, "loss_ce": 5.1063821047137026e-06, "loss_iou": 0.169921875, "loss_num": 0.00732421875, "loss_xval": 0.03662109375, "num_input_tokens_seen": 401540020, "step": 4399 }, { "epoch": 18.333333333333332, "grad_norm": 0.8685643974307135, "learning_rate": 5e-05, "loss": 0.0278, "num_input_tokens_seen": 401631740, "step": 4400 }, { "epoch": 18.333333333333332, "loss": 0.030869200825691223, "loss_ce": 6.681293598376215e-07, "loss_iou": 0.09375, "loss_num": 0.00616455078125, "loss_xval": 0.0308837890625, "num_input_tokens_seen": 401631740, "step": 4400 }, { "epoch": 18.3375, "grad_norm": 1.619917861504503, "learning_rate": 5e-05, "loss": 0.0438, "num_input_tokens_seen": 401722968, "step": 4401 }, { "epoch": 18.3375, "loss": 0.05206376314163208, "loss_ce": 4.2737061448860914e-05, "loss_iou": 0.1748046875, "loss_num": 0.0103759765625, "loss_xval": 0.052001953125, "num_input_tokens_seen": 401722968, "step": 4401 }, { "epoch": 18.341666666666665, "grad_norm": 2.9477429486798785, "learning_rate": 5e-05, "loss": 0.1039, "num_input_tokens_seen": 401814388, "step": 4402 }, { "epoch": 18.341666666666665, "loss": 0.07863037288188934, "loss_ce": 0.00030701086507178843, "loss_iou": 0.15625, "loss_num": 0.015625, "loss_xval": 0.078125, "num_input_tokens_seen": 401814388, "step": 4402 }, { "epoch": 18.345833333333335, "grad_norm": 3.1687821627968518, "learning_rate": 5e-05, "loss": 0.0538, "num_input_tokens_seen": 401905272, "step": 4403 }, { "epoch": 18.345833333333335, "loss": 0.05850303918123245, "loss_ce": 8.430246225543669e-07, "loss_iou": 0.35546875, "loss_num": 0.01171875, "loss_xval": 0.05859375, "num_input_tokens_seen": 401905272, "step": 4403 }, { "epoch": 18.35, "grad_norm": 2.245507306740881, "learning_rate": 5e-05, "loss": 0.071, "num_input_tokens_seen": 401996540, "step": 4404 }, { "epoch": 18.35, "loss": 0.034206412732601166, "loss_ce": 3.837065378320403e-06, "loss_iou": 0.123046875, "loss_num": 0.0068359375, "loss_xval": 0.0341796875, "num_input_tokens_seen": 401996540, "step": 4404 }, { "epoch": 18.354166666666668, "grad_norm": 2.1863927493050928, "learning_rate": 5e-05, "loss": 0.0447, "num_input_tokens_seen": 402088296, "step": 4405 }, { "epoch": 18.354166666666668, "loss": 0.049772344529628754, "loss_ce": 5.807106390420813e-06, "loss_iou": 0.275390625, "loss_num": 0.00994873046875, "loss_xval": 0.0498046875, "num_input_tokens_seen": 402088296, "step": 4405 }, { "epoch": 18.358333333333334, "grad_norm": 2.371649199490193, "learning_rate": 5e-05, "loss": 0.0338, "num_input_tokens_seen": 402179204, "step": 4406 }, { "epoch": 18.358333333333334, "loss": 0.03646884858608246, "loss_ce": 3.4508883572925697e-07, "loss_iou": 0.193359375, "loss_num": 0.007293701171875, "loss_xval": 0.036376953125, "num_input_tokens_seen": 402179204, "step": 4406 }, { "epoch": 18.3625, "grad_norm": 1.8672995620837733, "learning_rate": 5e-05, "loss": 0.0436, "num_input_tokens_seen": 402270828, "step": 4407 }, { "epoch": 18.3625, "loss": 0.03133529797196388, "loss_ce": 1.376296381749853e-06, "loss_iou": 0.173828125, "loss_num": 0.006256103515625, "loss_xval": 0.03125, "num_input_tokens_seen": 402270828, "step": 4407 }, { "epoch": 18.366666666666667, "grad_norm": 2.168913292443573, "learning_rate": 5e-05, "loss": 0.0647, "num_input_tokens_seen": 402361824, "step": 4408 }, { "epoch": 18.366666666666667, "loss": 0.05605369806289673, "loss_ce": 8.166640327544883e-06, "loss_iou": 0.13671875, "loss_num": 0.01123046875, "loss_xval": 0.05615234375, "num_input_tokens_seen": 402361824, "step": 4408 }, { "epoch": 18.370833333333334, "grad_norm": 1.7590564446359138, "learning_rate": 5e-05, "loss": 0.0618, "num_input_tokens_seen": 402453848, "step": 4409 }, { "epoch": 18.370833333333334, "loss": 0.048607781529426575, "loss_ce": 9.035489938469254e-07, "loss_iou": 0.326171875, "loss_num": 0.00970458984375, "loss_xval": 0.048583984375, "num_input_tokens_seen": 402453848, "step": 4409 }, { "epoch": 18.375, "grad_norm": 1.5070976292271845, "learning_rate": 5e-05, "loss": 0.0766, "num_input_tokens_seen": 402543956, "step": 4410 }, { "epoch": 18.375, "loss": 0.11715473979711533, "loss_ce": 0.00024190156545955688, "loss_iou": 0.080078125, "loss_num": 0.0234375, "loss_xval": 0.11669921875, "num_input_tokens_seen": 402543956, "step": 4410 }, { "epoch": 18.379166666666666, "grad_norm": 0.8670167969666163, "learning_rate": 5e-05, "loss": 0.0334, "num_input_tokens_seen": 402635120, "step": 4411 }, { "epoch": 18.379166666666666, "loss": 0.04164545238018036, "loss_ce": 2.710960507101845e-05, "loss_iou": 0.228515625, "loss_num": 0.00830078125, "loss_xval": 0.04150390625, "num_input_tokens_seen": 402635120, "step": 4411 }, { "epoch": 18.383333333333333, "grad_norm": 2.7655246359324224, "learning_rate": 5e-05, "loss": 0.0293, "num_input_tokens_seen": 402726100, "step": 4412 }, { "epoch": 18.383333333333333, "loss": 0.02951713465154171, "loss_ce": 2.1895530153415166e-05, "loss_iou": 0.171875, "loss_num": 0.005889892578125, "loss_xval": 0.029541015625, "num_input_tokens_seen": 402726100, "step": 4412 }, { "epoch": 18.3875, "grad_norm": 5.6264590678765085, "learning_rate": 5e-05, "loss": 0.0395, "num_input_tokens_seen": 402817124, "step": 4413 }, { "epoch": 18.3875, "loss": 0.034448638558387756, "loss_ce": 0.00016213968046940863, "loss_iou": 0.29296875, "loss_num": 0.006866455078125, "loss_xval": 0.0341796875, "num_input_tokens_seen": 402817124, "step": 4413 }, { "epoch": 18.391666666666666, "grad_norm": 3.711848747015358, "learning_rate": 5e-05, "loss": 0.0451, "num_input_tokens_seen": 402908288, "step": 4414 }, { "epoch": 18.391666666666666, "loss": 0.02724681980907917, "loss_ce": 8.617481944384053e-05, "loss_iou": 0.212890625, "loss_num": 0.00543212890625, "loss_xval": 0.027099609375, "num_input_tokens_seen": 402908288, "step": 4414 }, { "epoch": 18.395833333333332, "grad_norm": 6.133768116652266, "learning_rate": 5e-05, "loss": 0.0456, "num_input_tokens_seen": 402999680, "step": 4415 }, { "epoch": 18.395833333333332, "loss": 0.05777106434106827, "loss_ce": 1.2924733709951397e-06, "loss_iou": 0.26171875, "loss_num": 0.01153564453125, "loss_xval": 0.057861328125, "num_input_tokens_seen": 402999680, "step": 4415 }, { "epoch": 18.4, "grad_norm": 5.143096250225353, "learning_rate": 5e-05, "loss": 0.0781, "num_input_tokens_seen": 403089660, "step": 4416 }, { "epoch": 18.4, "loss": 0.0555812232196331, "loss_ce": 1.0821604519151151e-06, "loss_iou": 0.337890625, "loss_num": 0.0111083984375, "loss_xval": 0.0556640625, "num_input_tokens_seen": 403089660, "step": 4416 }, { "epoch": 18.404166666666665, "grad_norm": 3.328742934850162, "learning_rate": 5e-05, "loss": 0.0406, "num_input_tokens_seen": 403181092, "step": 4417 }, { "epoch": 18.404166666666665, "loss": 0.03622487559914589, "loss_ce": 5.093725690130668e-07, "loss_iou": 0.259765625, "loss_num": 0.007232666015625, "loss_xval": 0.0361328125, "num_input_tokens_seen": 403181092, "step": 4417 }, { "epoch": 18.408333333333335, "grad_norm": 3.3260340702202895, "learning_rate": 5e-05, "loss": 0.059, "num_input_tokens_seen": 403270924, "step": 4418 }, { "epoch": 18.408333333333335, "loss": 0.04696984589099884, "loss_ce": 0.0005526099121198058, "loss_iou": 0.255859375, "loss_num": 0.00927734375, "loss_xval": 0.04638671875, "num_input_tokens_seen": 403270924, "step": 4418 }, { "epoch": 18.4125, "grad_norm": 2.8574730100360175, "learning_rate": 5e-05, "loss": 0.0495, "num_input_tokens_seen": 403362356, "step": 4419 }, { "epoch": 18.4125, "loss": 0.05734871327877045, "loss_ce": 6.184901849337621e-06, "loss_iou": 0.408203125, "loss_num": 0.011474609375, "loss_xval": 0.057373046875, "num_input_tokens_seen": 403362356, "step": 4419 }, { "epoch": 18.416666666666668, "grad_norm": 2.5362755439790603, "learning_rate": 5e-05, "loss": 0.0554, "num_input_tokens_seen": 403453812, "step": 4420 }, { "epoch": 18.416666666666668, "loss": 0.0601063147187233, "loss_ce": 1.948015324160224e-06, "loss_iou": 0.1728515625, "loss_num": 0.01202392578125, "loss_xval": 0.06005859375, "num_input_tokens_seen": 403453812, "step": 4420 }, { "epoch": 18.420833333333334, "grad_norm": 2.1787337321820788, "learning_rate": 5e-05, "loss": 0.0442, "num_input_tokens_seen": 403543832, "step": 4421 }, { "epoch": 18.420833333333334, "loss": 0.04106995835900307, "loss_ce": 0.0009317125659435987, "loss_iou": 0.271484375, "loss_num": 0.008056640625, "loss_xval": 0.0400390625, "num_input_tokens_seen": 403543832, "step": 4421 }, { "epoch": 18.425, "grad_norm": 5.489285242620302, "learning_rate": 5e-05, "loss": 0.0389, "num_input_tokens_seen": 403634960, "step": 4422 }, { "epoch": 18.425, "loss": 0.041658949106931686, "loss_ce": 2.4570922505517956e-06, "loss_iou": 0.228515625, "loss_num": 0.00830078125, "loss_xval": 0.041748046875, "num_input_tokens_seen": 403634960, "step": 4422 }, { "epoch": 18.429166666666667, "grad_norm": 1.7580315074816295, "learning_rate": 5e-05, "loss": 0.0572, "num_input_tokens_seen": 403725460, "step": 4423 }, { "epoch": 18.429166666666667, "loss": 0.058549076318740845, "loss_ce": 2.3993075956241228e-05, "loss_iou": 0.24609375, "loss_num": 0.01171875, "loss_xval": 0.05859375, "num_input_tokens_seen": 403725460, "step": 4423 }, { "epoch": 18.433333333333334, "grad_norm": 4.338079935951398, "learning_rate": 5e-05, "loss": 0.0408, "num_input_tokens_seen": 403816960, "step": 4424 }, { "epoch": 18.433333333333334, "loss": 0.03503740578889847, "loss_ce": 0.0014222942991182208, "loss_iou": 0.0966796875, "loss_num": 0.0067138671875, "loss_xval": 0.03369140625, "num_input_tokens_seen": 403816960, "step": 4424 }, { "epoch": 18.4375, "grad_norm": 4.382683754585659, "learning_rate": 5e-05, "loss": 0.0753, "num_input_tokens_seen": 403908220, "step": 4425 }, { "epoch": 18.4375, "loss": 0.08374170958995819, "loss_ce": 1.4761058082513046e-06, "loss_iou": 0.265625, "loss_num": 0.0167236328125, "loss_xval": 0.083984375, "num_input_tokens_seen": 403908220, "step": 4425 }, { "epoch": 18.441666666666666, "grad_norm": 1.652297345716666, "learning_rate": 5e-05, "loss": 0.0308, "num_input_tokens_seen": 403999452, "step": 4426 }, { "epoch": 18.441666666666666, "loss": 0.03246862068772316, "loss_ce": 7.420943438773975e-05, "loss_iou": 0.224609375, "loss_num": 0.0064697265625, "loss_xval": 0.032470703125, "num_input_tokens_seen": 403999452, "step": 4426 }, { "epoch": 18.445833333333333, "grad_norm": 2.0580041388298214, "learning_rate": 5e-05, "loss": 0.0498, "num_input_tokens_seen": 404091052, "step": 4427 }, { "epoch": 18.445833333333333, "loss": 0.04866918921470642, "loss_ce": 1.2797968338418286e-06, "loss_iou": 0.169921875, "loss_num": 0.009765625, "loss_xval": 0.048583984375, "num_input_tokens_seen": 404091052, "step": 4427 }, { "epoch": 18.45, "grad_norm": 5.904451027653659, "learning_rate": 5e-05, "loss": 0.0429, "num_input_tokens_seen": 404181768, "step": 4428 }, { "epoch": 18.45, "loss": 0.03854472562670708, "loss_ce": 1.0242162034046487e-06, "loss_iou": 0.3125, "loss_num": 0.0076904296875, "loss_xval": 0.03857421875, "num_input_tokens_seen": 404181768, "step": 4428 }, { "epoch": 18.454166666666666, "grad_norm": 2.055584232293814, "learning_rate": 5e-05, "loss": 0.0328, "num_input_tokens_seen": 404273320, "step": 4429 }, { "epoch": 18.454166666666666, "loss": 0.040410809218883514, "loss_ce": 5.537119704968063e-06, "loss_iou": 0.1611328125, "loss_num": 0.008056640625, "loss_xval": 0.04052734375, "num_input_tokens_seen": 404273320, "step": 4429 }, { "epoch": 18.458333333333332, "grad_norm": 2.711378918960716, "learning_rate": 5e-05, "loss": 0.0464, "num_input_tokens_seen": 404364380, "step": 4430 }, { "epoch": 18.458333333333332, "loss": 0.04379798844456673, "loss_ce": 0.0011038967641070485, "loss_iou": 0.2412109375, "loss_num": 0.008544921875, "loss_xval": 0.042724609375, "num_input_tokens_seen": 404364380, "step": 4430 }, { "epoch": 18.4625, "grad_norm": 2.3864020110086632, "learning_rate": 5e-05, "loss": 0.0461, "num_input_tokens_seen": 404455832, "step": 4431 }, { "epoch": 18.4625, "loss": 0.03300423175096512, "loss_ce": 6.0506183217512444e-05, "loss_iou": 0.25, "loss_num": 0.006591796875, "loss_xval": 0.032958984375, "num_input_tokens_seen": 404455832, "step": 4431 }, { "epoch": 18.466666666666665, "grad_norm": 3.756837527836502, "learning_rate": 5e-05, "loss": 0.0442, "num_input_tokens_seen": 404546108, "step": 4432 }, { "epoch": 18.466666666666665, "loss": 0.055632077157497406, "loss_ce": 6.161809324112255e-06, "loss_iou": 0.189453125, "loss_num": 0.0111083984375, "loss_xval": 0.0556640625, "num_input_tokens_seen": 404546108, "step": 4432 }, { "epoch": 18.470833333333335, "grad_norm": 2.249420188952619, "learning_rate": 5e-05, "loss": 0.0574, "num_input_tokens_seen": 404636968, "step": 4433 }, { "epoch": 18.470833333333335, "loss": 0.06816327571868896, "loss_ce": 2.2612073280470213e-06, "loss_iou": 0.146484375, "loss_num": 0.01361083984375, "loss_xval": 0.068359375, "num_input_tokens_seen": 404636968, "step": 4433 }, { "epoch": 18.475, "grad_norm": 1.7131657942600311, "learning_rate": 5e-05, "loss": 0.0565, "num_input_tokens_seen": 404728108, "step": 4434 }, { "epoch": 18.475, "loss": 0.07907183468341827, "loss_ce": 7.842800187063403e-07, "loss_iou": 0.1708984375, "loss_num": 0.0157470703125, "loss_xval": 0.0791015625, "num_input_tokens_seen": 404728108, "step": 4434 }, { "epoch": 18.479166666666668, "grad_norm": 1.8772650748721937, "learning_rate": 5e-05, "loss": 0.041, "num_input_tokens_seen": 404819668, "step": 4435 }, { "epoch": 18.479166666666668, "loss": 0.04216513782739639, "loss_ce": 2.0358822439447977e-05, "loss_iou": 0.189453125, "loss_num": 0.0084228515625, "loss_xval": 0.042236328125, "num_input_tokens_seen": 404819668, "step": 4435 }, { "epoch": 18.483333333333334, "grad_norm": 3.3095559754291344, "learning_rate": 5e-05, "loss": 0.0934, "num_input_tokens_seen": 404910868, "step": 4436 }, { "epoch": 18.483333333333334, "loss": 0.12461771070957184, "loss_ce": 1.443784549337579e-05, "loss_iou": 0.2255859375, "loss_num": 0.02490234375, "loss_xval": 0.12451171875, "num_input_tokens_seen": 404910868, "step": 4436 }, { "epoch": 18.4875, "grad_norm": 2.51422835105707, "learning_rate": 5e-05, "loss": 0.0423, "num_input_tokens_seen": 405001984, "step": 4437 }, { "epoch": 18.4875, "loss": 0.05847553163766861, "loss_ce": 0.0019722371362149715, "loss_iou": 0.16796875, "loss_num": 0.01129150390625, "loss_xval": 0.056396484375, "num_input_tokens_seen": 405001984, "step": 4437 }, { "epoch": 18.491666666666667, "grad_norm": 2.3362978882356513, "learning_rate": 5e-05, "loss": 0.055, "num_input_tokens_seen": 405092804, "step": 4438 }, { "epoch": 18.491666666666667, "loss": 0.06804253160953522, "loss_ce": 3.593964265746763e-06, "loss_iou": 0.287109375, "loss_num": 0.01361083984375, "loss_xval": 0.06787109375, "num_input_tokens_seen": 405092804, "step": 4438 }, { "epoch": 18.495833333333334, "grad_norm": 2.837301728982645, "learning_rate": 5e-05, "loss": 0.0396, "num_input_tokens_seen": 405184724, "step": 4439 }, { "epoch": 18.495833333333334, "loss": 0.03965744748711586, "loss_ce": 0.0007780530722811818, "loss_iou": 0.34765625, "loss_num": 0.007781982421875, "loss_xval": 0.038818359375, "num_input_tokens_seen": 405184724, "step": 4439 }, { "epoch": 18.5, "grad_norm": 2.1634899230189095, "learning_rate": 5e-05, "loss": 0.108, "num_input_tokens_seen": 405276324, "step": 4440 }, { "epoch": 18.5, "loss": 0.14463144540786743, "loss_ce": 0.00022226519649848342, "loss_iou": 0.22265625, "loss_num": 0.02880859375, "loss_xval": 0.14453125, "num_input_tokens_seen": 405276324, "step": 4440 }, { "epoch": 18.504166666666666, "grad_norm": 1.7895128782846779, "learning_rate": 5e-05, "loss": 0.0333, "num_input_tokens_seen": 405368004, "step": 4441 }, { "epoch": 18.504166666666666, "loss": 0.04148627072572708, "loss_ce": 5.254165898804786e-06, "loss_iou": 0.1494140625, "loss_num": 0.00830078125, "loss_xval": 0.04150390625, "num_input_tokens_seen": 405368004, "step": 4441 }, { "epoch": 18.508333333333333, "grad_norm": 2.8820974379135524, "learning_rate": 5e-05, "loss": 0.0459, "num_input_tokens_seen": 405459140, "step": 4442 }, { "epoch": 18.508333333333333, "loss": 0.04661928489804268, "loss_ce": 3.685253886942519e-06, "loss_iou": 0.267578125, "loss_num": 0.00933837890625, "loss_xval": 0.046630859375, "num_input_tokens_seen": 405459140, "step": 4442 }, { "epoch": 18.5125, "grad_norm": 2.7480017872293847, "learning_rate": 5e-05, "loss": 0.0627, "num_input_tokens_seen": 405550360, "step": 4443 }, { "epoch": 18.5125, "loss": 0.08555868268013, "loss_ce": 2.650637043188908e-06, "loss_iou": 0.2060546875, "loss_num": 0.01708984375, "loss_xval": 0.08544921875, "num_input_tokens_seen": 405550360, "step": 4443 }, { "epoch": 18.516666666666666, "grad_norm": 1.9883482848739207, "learning_rate": 5e-05, "loss": 0.0571, "num_input_tokens_seen": 405641536, "step": 4444 }, { "epoch": 18.516666666666666, "loss": 0.042246539145708084, "loss_ce": 2.583612285889103e-06, "loss_iou": 0.1982421875, "loss_num": 0.0084228515625, "loss_xval": 0.042236328125, "num_input_tokens_seen": 405641536, "step": 4444 }, { "epoch": 18.520833333333332, "grad_norm": 2.1394162140134063, "learning_rate": 5e-05, "loss": 0.0624, "num_input_tokens_seen": 405732288, "step": 4445 }, { "epoch": 18.520833333333332, "loss": 0.07797446101903915, "loss_ce": 2.0443444554985035e-06, "loss_iou": 0.1708984375, "loss_num": 0.01556396484375, "loss_xval": 0.078125, "num_input_tokens_seen": 405732288, "step": 4445 }, { "epoch": 18.525, "grad_norm": 2.8044908787997693, "learning_rate": 5e-05, "loss": 0.0308, "num_input_tokens_seen": 405822968, "step": 4446 }, { "epoch": 18.525, "loss": 0.04367266595363617, "loss_ce": 2.011754531849874e-06, "loss_iou": 0.32421875, "loss_num": 0.00872802734375, "loss_xval": 0.043701171875, "num_input_tokens_seen": 405822968, "step": 4446 }, { "epoch": 18.529166666666665, "grad_norm": 3.0426163524559593, "learning_rate": 5e-05, "loss": 0.0476, "num_input_tokens_seen": 405913680, "step": 4447 }, { "epoch": 18.529166666666665, "loss": 0.04268840700387955, "loss_ce": 4.0092592826113105e-05, "loss_iou": 0.236328125, "loss_num": 0.008544921875, "loss_xval": 0.042724609375, "num_input_tokens_seen": 405913680, "step": 4447 }, { "epoch": 18.533333333333335, "grad_norm": 3.4613499678428172, "learning_rate": 5e-05, "loss": 0.0558, "num_input_tokens_seen": 406005064, "step": 4448 }, { "epoch": 18.533333333333335, "loss": 0.05986738204956055, "loss_ce": 2.2407934011425823e-05, "loss_iou": 0.27734375, "loss_num": 0.011962890625, "loss_xval": 0.059814453125, "num_input_tokens_seen": 406005064, "step": 4448 }, { "epoch": 18.5375, "grad_norm": 2.700220807710911, "learning_rate": 5e-05, "loss": 0.0452, "num_input_tokens_seen": 406096892, "step": 4449 }, { "epoch": 18.5375, "loss": 0.05822140723466873, "loss_ce": 3.9642905903747305e-05, "loss_iou": 0.3203125, "loss_num": 0.01165771484375, "loss_xval": 0.05810546875, "num_input_tokens_seen": 406096892, "step": 4449 }, { "epoch": 18.541666666666668, "grad_norm": 4.063404233508145, "learning_rate": 5e-05, "loss": 0.0839, "num_input_tokens_seen": 406188952, "step": 4450 }, { "epoch": 18.541666666666668, "loss": 0.10297872126102448, "loss_ce": 5.8184596127830446e-05, "loss_iou": 0.23046875, "loss_num": 0.0206298828125, "loss_xval": 0.10302734375, "num_input_tokens_seen": 406188952, "step": 4450 }, { "epoch": 18.545833333333334, "grad_norm": 1.4196393264059073, "learning_rate": 5e-05, "loss": 0.0273, "num_input_tokens_seen": 406279820, "step": 4451 }, { "epoch": 18.545833333333334, "loss": 0.023423004895448685, "loss_ce": 7.639623049726652e-07, "loss_iou": 0.2734375, "loss_num": 0.00469970703125, "loss_xval": 0.0234375, "num_input_tokens_seen": 406279820, "step": 4451 }, { "epoch": 18.55, "grad_norm": 2.155221835305125, "learning_rate": 5e-05, "loss": 0.0326, "num_input_tokens_seen": 406371184, "step": 4452 }, { "epoch": 18.55, "loss": 0.027498047798871994, "loss_ce": 1.710833430479397e-06, "loss_iou": 0.09423828125, "loss_num": 0.0054931640625, "loss_xval": 0.0274658203125, "num_input_tokens_seen": 406371184, "step": 4452 }, { "epoch": 18.554166666666667, "grad_norm": 2.208679417966855, "learning_rate": 5e-05, "loss": 0.0345, "num_input_tokens_seen": 406462272, "step": 4453 }, { "epoch": 18.554166666666667, "loss": 0.03785201162099838, "loss_ce": 2.587808467069408e-06, "loss_iou": 0.2734375, "loss_num": 0.007568359375, "loss_xval": 0.037841796875, "num_input_tokens_seen": 406462272, "step": 4453 }, { "epoch": 18.558333333333334, "grad_norm": 2.517048340877465, "learning_rate": 5e-05, "loss": 0.0417, "num_input_tokens_seen": 406553752, "step": 4454 }, { "epoch": 18.558333333333334, "loss": 0.03823276609182358, "loss_ce": 1.8682237623579567e-06, "loss_iou": 0.267578125, "loss_num": 0.00762939453125, "loss_xval": 0.038330078125, "num_input_tokens_seen": 406553752, "step": 4454 }, { "epoch": 18.5625, "grad_norm": 2.1451262472298533, "learning_rate": 5e-05, "loss": 0.0716, "num_input_tokens_seen": 406644692, "step": 4455 }, { "epoch": 18.5625, "loss": 0.044374290853738785, "loss_ce": 1.730572193991975e-06, "loss_iou": 0.275390625, "loss_num": 0.0089111328125, "loss_xval": 0.04443359375, "num_input_tokens_seen": 406644692, "step": 4455 }, { "epoch": 18.566666666666666, "grad_norm": 2.750847890813243, "learning_rate": 5e-05, "loss": 0.0482, "num_input_tokens_seen": 406735980, "step": 4456 }, { "epoch": 18.566666666666666, "loss": 0.03298502415418625, "loss_ce": 0.0001023312215693295, "loss_iou": 0.400390625, "loss_num": 0.006561279296875, "loss_xval": 0.032958984375, "num_input_tokens_seen": 406735980, "step": 4456 }, { "epoch": 18.570833333333333, "grad_norm": 3.1762164731004154, "learning_rate": 5e-05, "loss": 0.0535, "num_input_tokens_seen": 406827296, "step": 4457 }, { "epoch": 18.570833333333333, "loss": 0.047379009425640106, "loss_ce": 4.7071023345779395e-07, "loss_iou": 0.22265625, "loss_num": 0.00946044921875, "loss_xval": 0.04736328125, "num_input_tokens_seen": 406827296, "step": 4457 }, { "epoch": 18.575, "grad_norm": 3.022836034055383, "learning_rate": 5e-05, "loss": 0.0402, "num_input_tokens_seen": 406918792, "step": 4458 }, { "epoch": 18.575, "loss": 0.04400699585676193, "loss_ce": 1.590611100255046e-05, "loss_iou": 0.291015625, "loss_num": 0.0087890625, "loss_xval": 0.0439453125, "num_input_tokens_seen": 406918792, "step": 4458 }, { "epoch": 18.579166666666666, "grad_norm": 3.344276702917487, "learning_rate": 5e-05, "loss": 0.0477, "num_input_tokens_seen": 407009924, "step": 4459 }, { "epoch": 18.579166666666666, "loss": 0.032357875257730484, "loss_ce": 1.6871223124326207e-05, "loss_iou": 0.26953125, "loss_num": 0.0064697265625, "loss_xval": 0.0322265625, "num_input_tokens_seen": 407009924, "step": 4459 }, { "epoch": 18.583333333333332, "grad_norm": 8.865744014817219, "learning_rate": 5e-05, "loss": 0.0538, "num_input_tokens_seen": 407101744, "step": 4460 }, { "epoch": 18.583333333333332, "loss": 0.05924474075436592, "loss_ce": 7.115265907486901e-05, "loss_iou": 0.326171875, "loss_num": 0.0118408203125, "loss_xval": 0.05908203125, "num_input_tokens_seen": 407101744, "step": 4460 }, { "epoch": 18.5875, "grad_norm": 3.0703449184646323, "learning_rate": 5e-05, "loss": 0.0609, "num_input_tokens_seen": 407193012, "step": 4461 }, { "epoch": 18.5875, "loss": 0.030109165236353874, "loss_ce": 3.576171820895979e-06, "loss_iou": 0.212890625, "loss_num": 0.006011962890625, "loss_xval": 0.0301513671875, "num_input_tokens_seen": 407193012, "step": 4461 }, { "epoch": 18.591666666666665, "grad_norm": 2.323271357780661, "learning_rate": 5e-05, "loss": 0.0788, "num_input_tokens_seen": 407284424, "step": 4462 }, { "epoch": 18.591666666666665, "loss": 0.04477598890662193, "loss_ce": 0.00034239343949593604, "loss_iou": 0.142578125, "loss_num": 0.0089111328125, "loss_xval": 0.04443359375, "num_input_tokens_seen": 407284424, "step": 4462 }, { "epoch": 18.595833333333335, "grad_norm": 1.7998223791646442, "learning_rate": 5e-05, "loss": 0.0306, "num_input_tokens_seen": 407376020, "step": 4463 }, { "epoch": 18.595833333333335, "loss": 0.020944489166140556, "loss_ce": 9.43049235502258e-06, "loss_iou": 0.25390625, "loss_num": 0.004180908203125, "loss_xval": 0.02099609375, "num_input_tokens_seen": 407376020, "step": 4463 }, { "epoch": 18.6, "grad_norm": 1.7772887415471503, "learning_rate": 5e-05, "loss": 0.0409, "num_input_tokens_seen": 407467812, "step": 4464 }, { "epoch": 18.6, "loss": 0.05665751174092293, "loss_ce": 1.6278679595416179e-06, "loss_iou": 0.32421875, "loss_num": 0.0113525390625, "loss_xval": 0.056640625, "num_input_tokens_seen": 407467812, "step": 4464 }, { "epoch": 18.604166666666668, "grad_norm": 1.2942589953418278, "learning_rate": 5e-05, "loss": 0.0263, "num_input_tokens_seen": 407559336, "step": 4465 }, { "epoch": 18.604166666666668, "loss": 0.018004463985562325, "loss_ce": 0.0007467729155905545, "loss_iou": 0.2109375, "loss_num": 0.003448486328125, "loss_xval": 0.0172119140625, "num_input_tokens_seen": 407559336, "step": 4465 }, { "epoch": 18.608333333333334, "grad_norm": 2.436094168196786, "learning_rate": 5e-05, "loss": 0.1048, "num_input_tokens_seen": 407650544, "step": 4466 }, { "epoch": 18.608333333333334, "loss": 0.12345411628484726, "loss_ce": 1.0510191714274697e-05, "loss_iou": 0.279296875, "loss_num": 0.024658203125, "loss_xval": 0.12353515625, "num_input_tokens_seen": 407650544, "step": 4466 }, { "epoch": 18.6125, "grad_norm": 2.6223977454659404, "learning_rate": 5e-05, "loss": 0.0474, "num_input_tokens_seen": 407742424, "step": 4467 }, { "epoch": 18.6125, "loss": 0.05559170991182327, "loss_ce": 3.939579983125441e-06, "loss_iou": 0.1806640625, "loss_num": 0.0111083984375, "loss_xval": 0.0556640625, "num_input_tokens_seen": 407742424, "step": 4467 }, { "epoch": 18.616666666666667, "grad_norm": 1.3664409769636283, "learning_rate": 5e-05, "loss": 0.0314, "num_input_tokens_seen": 407834412, "step": 4468 }, { "epoch": 18.616666666666667, "loss": 0.031441450119018555, "loss_ce": 0.0006034359685145319, "loss_iou": 0.2421875, "loss_num": 0.00616455078125, "loss_xval": 0.0308837890625, "num_input_tokens_seen": 407834412, "step": 4468 }, { "epoch": 18.620833333333334, "grad_norm": 2.023822163166781, "learning_rate": 5e-05, "loss": 0.0414, "num_input_tokens_seen": 407924272, "step": 4469 }, { "epoch": 18.620833333333334, "loss": 0.030810587108135223, "loss_ce": 3.360964183229953e-05, "loss_iou": 0.2158203125, "loss_num": 0.00616455078125, "loss_xval": 0.03076171875, "num_input_tokens_seen": 407924272, "step": 4469 }, { "epoch": 18.625, "grad_norm": 2.7439402399615047, "learning_rate": 5e-05, "loss": 0.0405, "num_input_tokens_seen": 408015508, "step": 4470 }, { "epoch": 18.625, "loss": 0.03986073285341263, "loss_ce": 4.775926299771527e-06, "loss_iou": 0.1806640625, "loss_num": 0.00799560546875, "loss_xval": 0.039794921875, "num_input_tokens_seen": 408015508, "step": 4470 }, { "epoch": 18.629166666666666, "grad_norm": 2.9815488516198254, "learning_rate": 5e-05, "loss": 0.0717, "num_input_tokens_seen": 408105620, "step": 4471 }, { "epoch": 18.629166666666666, "loss": 0.09448256343603134, "loss_ce": 4.59203110949602e-05, "loss_iou": 0.396484375, "loss_num": 0.0189208984375, "loss_xval": 0.09423828125, "num_input_tokens_seen": 408105620, "step": 4471 }, { "epoch": 18.633333333333333, "grad_norm": 2.66745813251496, "learning_rate": 5e-05, "loss": 0.0496, "num_input_tokens_seen": 408196668, "step": 4472 }, { "epoch": 18.633333333333333, "loss": 0.038133613765239716, "loss_ce": 1.9017574004465132e-06, "loss_iou": 0.224609375, "loss_num": 0.00762939453125, "loss_xval": 0.0380859375, "num_input_tokens_seen": 408196668, "step": 4472 }, { "epoch": 18.6375, "grad_norm": 2.131843660187116, "learning_rate": 5e-05, "loss": 0.0343, "num_input_tokens_seen": 408287424, "step": 4473 }, { "epoch": 18.6375, "loss": 0.0305347740650177, "loss_ce": 1.719842475722544e-05, "loss_iou": 0.271484375, "loss_num": 0.006103515625, "loss_xval": 0.030517578125, "num_input_tokens_seen": 408287424, "step": 4473 }, { "epoch": 18.641666666666666, "grad_norm": 6.60588283629867, "learning_rate": 5e-05, "loss": 0.0405, "num_input_tokens_seen": 408378844, "step": 4474 }, { "epoch": 18.641666666666666, "loss": 0.04872913286089897, "loss_ce": 1.544825681776274e-05, "loss_iou": 0.15625, "loss_num": 0.009765625, "loss_xval": 0.048828125, "num_input_tokens_seen": 408378844, "step": 4474 }, { "epoch": 18.645833333333332, "grad_norm": 3.1277533404717297, "learning_rate": 5e-05, "loss": 0.033, "num_input_tokens_seen": 408470320, "step": 4475 }, { "epoch": 18.645833333333332, "loss": 0.028999999165534973, "loss_ce": 6.703672283947526e-07, "loss_iou": 0.318359375, "loss_num": 0.00579833984375, "loss_xval": 0.029052734375, "num_input_tokens_seen": 408470320, "step": 4475 }, { "epoch": 18.65, "grad_norm": 2.1571541506630747, "learning_rate": 5e-05, "loss": 0.0466, "num_input_tokens_seen": 408562520, "step": 4476 }, { "epoch": 18.65, "loss": 0.05866888538002968, "loss_ce": 1.4098356587055605e-05, "loss_iou": 0.39453125, "loss_num": 0.01171875, "loss_xval": 0.05859375, "num_input_tokens_seen": 408562520, "step": 4476 }, { "epoch": 18.654166666666665, "grad_norm": 2.1129025810806996, "learning_rate": 5e-05, "loss": 0.0519, "num_input_tokens_seen": 408654496, "step": 4477 }, { "epoch": 18.654166666666665, "loss": 0.028653541579842567, "loss_ce": 7.383100455626845e-05, "loss_iou": 0.318359375, "loss_num": 0.005706787109375, "loss_xval": 0.028564453125, "num_input_tokens_seen": 408654496, "step": 4477 }, { "epoch": 18.658333333333335, "grad_norm": 4.128846484593055, "learning_rate": 5e-05, "loss": 0.0923, "num_input_tokens_seen": 408745820, "step": 4478 }, { "epoch": 18.658333333333335, "loss": 0.054933082312345505, "loss_ce": 9.29926973185502e-05, "loss_iou": 0.3515625, "loss_num": 0.010986328125, "loss_xval": 0.054931640625, "num_input_tokens_seen": 408745820, "step": 4478 }, { "epoch": 18.6625, "grad_norm": 4.826721212274139, "learning_rate": 5e-05, "loss": 0.0769, "num_input_tokens_seen": 408836832, "step": 4479 }, { "epoch": 18.6625, "loss": 0.07643422484397888, "loss_ce": 2.9575387543445686e-06, "loss_iou": 0.287109375, "loss_num": 0.0152587890625, "loss_xval": 0.07666015625, "num_input_tokens_seen": 408836832, "step": 4479 }, { "epoch": 18.666666666666668, "grad_norm": 2.6585938697310487, "learning_rate": 5e-05, "loss": 0.0346, "num_input_tokens_seen": 408927732, "step": 4480 }, { "epoch": 18.666666666666668, "loss": 0.027991794049739838, "loss_ce": 0.0001292466913582757, "loss_iou": 0.21875, "loss_num": 0.005584716796875, "loss_xval": 0.02783203125, "num_input_tokens_seen": 408927732, "step": 4480 }, { "epoch": 18.670833333333334, "grad_norm": 2.5149413948569173, "learning_rate": 5e-05, "loss": 0.0564, "num_input_tokens_seen": 409018648, "step": 4481 }, { "epoch": 18.670833333333334, "loss": 0.04316835105419159, "loss_ce": 1.6492605936946347e-05, "loss_iou": 0.173828125, "loss_num": 0.00860595703125, "loss_xval": 0.043212890625, "num_input_tokens_seen": 409018648, "step": 4481 }, { "epoch": 18.675, "grad_norm": 2.164456274812016, "learning_rate": 5e-05, "loss": 0.0366, "num_input_tokens_seen": 409108832, "step": 4482 }, { "epoch": 18.675, "loss": 0.04486284404993057, "loss_ce": 2.0009613308502594e-06, "loss_iou": 0.2578125, "loss_num": 0.00897216796875, "loss_xval": 0.044921875, "num_input_tokens_seen": 409108832, "step": 4482 }, { "epoch": 18.679166666666667, "grad_norm": 2.2556710075056596, "learning_rate": 5e-05, "loss": 0.0404, "num_input_tokens_seen": 409200360, "step": 4483 }, { "epoch": 18.679166666666667, "loss": 0.04485397785902023, "loss_ce": 7.65622019116563e-07, "loss_iou": 0.1875, "loss_num": 0.00897216796875, "loss_xval": 0.044921875, "num_input_tokens_seen": 409200360, "step": 4483 }, { "epoch": 18.683333333333334, "grad_norm": 5.93230040799808, "learning_rate": 5e-05, "loss": 0.077, "num_input_tokens_seen": 409291544, "step": 4484 }, { "epoch": 18.683333333333334, "loss": 0.06880977749824524, "loss_ce": 7.897714567661751e-06, "loss_iou": 0.28125, "loss_num": 0.0137939453125, "loss_xval": 0.06884765625, "num_input_tokens_seen": 409291544, "step": 4484 }, { "epoch": 18.6875, "grad_norm": 3.767648367271881, "learning_rate": 5e-05, "loss": 0.0394, "num_input_tokens_seen": 409382640, "step": 4485 }, { "epoch": 18.6875, "loss": 0.05254533141851425, "loss_ce": 1.6948622942436486e-05, "loss_iou": 0.291015625, "loss_num": 0.010498046875, "loss_xval": 0.052490234375, "num_input_tokens_seen": 409382640, "step": 4485 }, { "epoch": 18.691666666666666, "grad_norm": 3.1886877603013253, "learning_rate": 5e-05, "loss": 0.0532, "num_input_tokens_seen": 409473580, "step": 4486 }, { "epoch": 18.691666666666666, "loss": 0.048356104642152786, "loss_ce": 1.0024384664575337e-06, "loss_iou": 0.322265625, "loss_num": 0.00970458984375, "loss_xval": 0.04833984375, "num_input_tokens_seen": 409473580, "step": 4486 }, { "epoch": 18.695833333333333, "grad_norm": 2.1785204289187017, "learning_rate": 5e-05, "loss": 0.0418, "num_input_tokens_seen": 409565448, "step": 4487 }, { "epoch": 18.695833333333333, "loss": 0.0359908752143383, "loss_ce": 3.022179271283676e-06, "loss_iou": 0.28125, "loss_num": 0.0072021484375, "loss_xval": 0.035888671875, "num_input_tokens_seen": 409565448, "step": 4487 }, { "epoch": 18.7, "grad_norm": 2.1407511218268613, "learning_rate": 5e-05, "loss": 0.0477, "num_input_tokens_seen": 409656152, "step": 4488 }, { "epoch": 18.7, "loss": 0.054323356598615646, "loss_ce": 2.0684299215645296e-06, "loss_iou": 0.30078125, "loss_num": 0.0108642578125, "loss_xval": 0.05419921875, "num_input_tokens_seen": 409656152, "step": 4488 }, { "epoch": 18.704166666666666, "grad_norm": 3.111631253031894, "learning_rate": 5e-05, "loss": 0.0715, "num_input_tokens_seen": 409747656, "step": 4489 }, { "epoch": 18.704166666666666, "loss": 0.07458843290805817, "loss_ce": 3.4666340980038512e-06, "loss_iou": 0.1640625, "loss_num": 0.014892578125, "loss_xval": 0.07470703125, "num_input_tokens_seen": 409747656, "step": 4489 }, { "epoch": 18.708333333333332, "grad_norm": 1.5551129829754005, "learning_rate": 5e-05, "loss": 0.0756, "num_input_tokens_seen": 409838856, "step": 4490 }, { "epoch": 18.708333333333332, "loss": 0.09391278028488159, "loss_ce": 1.0192829904553946e-05, "loss_iou": 0.302734375, "loss_num": 0.018798828125, "loss_xval": 0.09375, "num_input_tokens_seen": 409838856, "step": 4490 }, { "epoch": 18.7125, "grad_norm": 2.520127418122806, "learning_rate": 5e-05, "loss": 0.039, "num_input_tokens_seen": 409930208, "step": 4491 }, { "epoch": 18.7125, "loss": 0.0317520946264267, "loss_ce": 6.1826676756027155e-06, "loss_iou": 0.32421875, "loss_num": 0.00634765625, "loss_xval": 0.03173828125, "num_input_tokens_seen": 409930208, "step": 4491 }, { "epoch": 18.716666666666665, "grad_norm": 1.556512662051991, "learning_rate": 5e-05, "loss": 0.0364, "num_input_tokens_seen": 410021368, "step": 4492 }, { "epoch": 18.716666666666665, "loss": 0.025783251971006393, "loss_ce": 3.5267755720269633e-06, "loss_iou": 0.1953125, "loss_num": 0.005157470703125, "loss_xval": 0.0257568359375, "num_input_tokens_seen": 410021368, "step": 4492 }, { "epoch": 18.720833333333335, "grad_norm": 8.884609105765334, "learning_rate": 5e-05, "loss": 0.0424, "num_input_tokens_seen": 410113392, "step": 4493 }, { "epoch": 18.720833333333335, "loss": 0.053828682750463486, "loss_ce": 7.196986553026363e-05, "loss_iou": 0.279296875, "loss_num": 0.0107421875, "loss_xval": 0.0537109375, "num_input_tokens_seen": 410113392, "step": 4493 }, { "epoch": 18.725, "grad_norm": 2.750731941866525, "learning_rate": 5e-05, "loss": 0.0353, "num_input_tokens_seen": 410205120, "step": 4494 }, { "epoch": 18.725, "loss": 0.03087478131055832, "loss_ce": 1.0064355592476204e-05, "loss_iou": 0.3125, "loss_num": 0.00616455078125, "loss_xval": 0.0308837890625, "num_input_tokens_seen": 410205120, "step": 4494 }, { "epoch": 18.729166666666668, "grad_norm": 2.6665230765646117, "learning_rate": 5e-05, "loss": 0.049, "num_input_tokens_seen": 410296348, "step": 4495 }, { "epoch": 18.729166666666668, "loss": 0.03424867242574692, "loss_ce": 7.951766747282818e-06, "loss_iou": 0.234375, "loss_num": 0.006866455078125, "loss_xval": 0.0341796875, "num_input_tokens_seen": 410296348, "step": 4495 }, { "epoch": 18.733333333333334, "grad_norm": 4.025624968892128, "learning_rate": 5e-05, "loss": 0.0379, "num_input_tokens_seen": 410386296, "step": 4496 }, { "epoch": 18.733333333333334, "loss": 0.04179053008556366, "loss_ce": 4.333990546001587e-06, "loss_iou": 0.306640625, "loss_num": 0.00836181640625, "loss_xval": 0.041748046875, "num_input_tokens_seen": 410386296, "step": 4496 }, { "epoch": 18.7375, "grad_norm": 2.9287391966615077, "learning_rate": 5e-05, "loss": 0.0729, "num_input_tokens_seen": 410477784, "step": 4497 }, { "epoch": 18.7375, "loss": 0.07163071632385254, "loss_ce": 5.957826033409219e-06, "loss_iou": 0.1796875, "loss_num": 0.0142822265625, "loss_xval": 0.07177734375, "num_input_tokens_seen": 410477784, "step": 4497 }, { "epoch": 18.741666666666667, "grad_norm": 2.000887712646304, "learning_rate": 5e-05, "loss": 0.0394, "num_input_tokens_seen": 410569244, "step": 4498 }, { "epoch": 18.741666666666667, "loss": 0.0395614430308342, "loss_ce": 7.16970898793079e-05, "loss_iou": 0.212890625, "loss_num": 0.00787353515625, "loss_xval": 0.03955078125, "num_input_tokens_seen": 410569244, "step": 4498 }, { "epoch": 18.745833333333334, "grad_norm": 2.29575014252196, "learning_rate": 5e-05, "loss": 0.0362, "num_input_tokens_seen": 410660492, "step": 4499 }, { "epoch": 18.745833333333334, "loss": 0.04005458950996399, "loss_ce": 2.6626599947121576e-07, "loss_iou": 0.23828125, "loss_num": 0.00799560546875, "loss_xval": 0.0400390625, "num_input_tokens_seen": 410660492, "step": 4499 }, { "epoch": 18.75, "grad_norm": 2.5630762248721237, "learning_rate": 5e-05, "loss": 0.087, "num_input_tokens_seen": 410751560, "step": 4500 }, { "epoch": 18.75, "eval_seeclick_CIoU": 0.24041260033845901, "eval_seeclick_GIoU": 0.229469396173954, "eval_seeclick_IoU": 0.3364127427339554, "eval_seeclick_MAE_all": 0.09035475924611092, "eval_seeclick_MAE_h": 0.08218218386173248, "eval_seeclick_MAE_w": 0.17696396261453629, "eval_seeclick_MAE_x_boxes": 0.18398155272006989, "eval_seeclick_MAE_y_boxes": 0.08752219006419182, "eval_seeclick_NUM_probability": 0.9999991357326508, "eval_seeclick_inside_bbox": 0.5255681872367859, "eval_seeclick_loss": 0.5564561486244202, "eval_seeclick_loss_ce": 0.15300309658050537, "eval_seeclick_loss_iou": 0.45245361328125, "eval_seeclick_loss_num": 0.0796356201171875, "eval_seeclick_loss_xval": 0.39813232421875, "eval_seeclick_runtime": 77.9915, "eval_seeclick_samples_per_second": 0.551, "eval_seeclick_steps_per_second": 0.026, "num_input_tokens_seen": 410751560, "step": 4500 }, { "epoch": 18.75, "eval_icons_CIoU": 0.31817570328712463, "eval_icons_GIoU": 0.28907932341098785, "eval_icons_IoU": 0.411752387881279, "eval_icons_MAE_all": 0.07357519492506981, "eval_icons_MAE_h": 0.1560768559575081, "eval_icons_MAE_w": 0.1098833754658699, "eval_icons_MAE_x_boxes": 0.10879017040133476, "eval_icons_MAE_y_boxes": 0.1561029627919197, "eval_icons_NUM_probability": 0.999999463558197, "eval_icons_inside_bbox": 0.6336805522441864, "eval_icons_loss": 0.3551454246044159, "eval_icons_loss_ce": 0.000737828500859905, "eval_icons_loss_iou": 0.25079345703125, "eval_icons_loss_num": 0.0764312744140625, "eval_icons_loss_xval": 0.382080078125, "eval_icons_runtime": 90.0127, "eval_icons_samples_per_second": 0.555, "eval_icons_steps_per_second": 0.022, "num_input_tokens_seen": 410751560, "step": 4500 }, { "epoch": 18.75, "eval_screenspot_CIoU": 0.3529522220293681, "eval_screenspot_GIoU": 0.34071413179238635, "eval_screenspot_IoU": 0.43253129720687866, "eval_screenspot_MAE_all": 0.10080522298812866, "eval_screenspot_MAE_h": 0.08905058105786641, "eval_screenspot_MAE_w": 0.22549272576967874, "eval_screenspot_MAE_x_boxes": 0.18476370722055435, "eval_screenspot_MAE_y_boxes": 0.08808410912752151, "eval_screenspot_NUM_probability": 0.9999993046124777, "eval_screenspot_inside_bbox": 0.6775000095367432, "eval_screenspot_loss": 0.5045076012611389, "eval_screenspot_loss_ce": 0.001676593108754787, "eval_screenspot_loss_iou": 0.3478190104166667, "eval_screenspot_loss_num": 0.10205586751302083, "eval_screenspot_loss_xval": 0.5101521809895834, "eval_screenspot_runtime": 163.3499, "eval_screenspot_samples_per_second": 0.545, "eval_screenspot_steps_per_second": 0.018, "num_input_tokens_seen": 410751560, "step": 4500 }, { "epoch": 18.75, "eval_compot_CIoU": 0.44827909767627716, "eval_compot_GIoU": 0.4435681253671646, "eval_compot_IoU": 0.5230425000190735, "eval_compot_MAE_all": 0.06078624911606312, "eval_compot_MAE_h": 0.06127396039664745, "eval_compot_MAE_w": 0.16494429856538773, "eval_compot_MAE_x_boxes": 0.16555871069431305, "eval_compot_MAE_y_boxes": 0.057475872337818146, "eval_compot_NUM_probability": 0.9999968111515045, "eval_compot_inside_bbox": 0.6927083432674408, "eval_compot_loss": 0.33864617347717285, "eval_compot_loss_ce": 0.05631054379045963, "eval_compot_loss_iou": 0.29205322265625, "eval_compot_loss_num": 0.053653717041015625, "eval_compot_loss_xval": 0.268218994140625, "eval_compot_runtime": 92.952, "eval_compot_samples_per_second": 0.538, "eval_compot_steps_per_second": 0.022, "num_input_tokens_seen": 410751560, "step": 4500 }, { "epoch": 18.75, "loss": 0.35820692777633667, "loss_ce": 0.05205457657575607, "loss_iou": 0.267578125, "loss_num": 0.061279296875, "loss_xval": 0.306640625, "num_input_tokens_seen": 410751560, "step": 4500 }, { "epoch": 18.754166666666666, "grad_norm": 4.165016190881154, "learning_rate": 5e-05, "loss": 0.0477, "num_input_tokens_seen": 410842956, "step": 4501 }, { "epoch": 18.754166666666666, "loss": 0.059623926877975464, "loss_ce": 2.3097214580047876e-05, "loss_iou": 0.267578125, "loss_num": 0.01190185546875, "loss_xval": 0.0595703125, "num_input_tokens_seen": 410842956, "step": 4501 }, { "epoch": 18.758333333333333, "grad_norm": 3.8968387340439476, "learning_rate": 5e-05, "loss": 0.0772, "num_input_tokens_seen": 410934796, "step": 4502 }, { "epoch": 18.758333333333333, "loss": 0.0845027044415474, "loss_ce": 0.000892168958671391, "loss_iou": 0.369140625, "loss_num": 0.0167236328125, "loss_xval": 0.08349609375, "num_input_tokens_seen": 410934796, "step": 4502 }, { "epoch": 18.7625, "grad_norm": 5.70250243158941, "learning_rate": 5e-05, "loss": 0.0394, "num_input_tokens_seen": 411025664, "step": 4503 }, { "epoch": 18.7625, "loss": 0.04231572151184082, "loss_ce": 3.0992389383754926e-06, "loss_iou": 0.28515625, "loss_num": 0.00848388671875, "loss_xval": 0.042236328125, "num_input_tokens_seen": 411025664, "step": 4503 }, { "epoch": 18.766666666666666, "grad_norm": 2.9812879644219796, "learning_rate": 5e-05, "loss": 0.0538, "num_input_tokens_seen": 411116996, "step": 4504 }, { "epoch": 18.766666666666666, "loss": 0.04243713617324829, "loss_ce": 2.4412829588982277e-06, "loss_iou": 0.279296875, "loss_num": 0.00848388671875, "loss_xval": 0.04248046875, "num_input_tokens_seen": 411116996, "step": 4504 }, { "epoch": 18.770833333333332, "grad_norm": 2.9826605958507417, "learning_rate": 5e-05, "loss": 0.0668, "num_input_tokens_seen": 411208000, "step": 4505 }, { "epoch": 18.770833333333332, "loss": 0.043711256235837936, "loss_ce": 0.0001474139717174694, "loss_iou": 0.26171875, "loss_num": 0.00872802734375, "loss_xval": 0.04345703125, "num_input_tokens_seen": 411208000, "step": 4505 }, { "epoch": 18.775, "grad_norm": 1.8056504080821845, "learning_rate": 5e-05, "loss": 0.0407, "num_input_tokens_seen": 411299380, "step": 4506 }, { "epoch": 18.775, "loss": 0.04516106843948364, "loss_ce": 2.5572267986717634e-05, "loss_iou": 0.1943359375, "loss_num": 0.009033203125, "loss_xval": 0.045166015625, "num_input_tokens_seen": 411299380, "step": 4506 }, { "epoch": 18.779166666666665, "grad_norm": 2.503334389176959, "learning_rate": 5e-05, "loss": 0.0574, "num_input_tokens_seen": 411390908, "step": 4507 }, { "epoch": 18.779166666666665, "loss": 0.04811955988407135, "loss_ce": 2.385993502684869e-05, "loss_iou": 0.30078125, "loss_num": 0.0096435546875, "loss_xval": 0.048095703125, "num_input_tokens_seen": 411390908, "step": 4507 }, { "epoch": 18.783333333333335, "grad_norm": 2.3792539602486307, "learning_rate": 5e-05, "loss": 0.0372, "num_input_tokens_seen": 411482032, "step": 4508 }, { "epoch": 18.783333333333335, "loss": 0.043492428958415985, "loss_ce": 4.879675998381572e-06, "loss_iou": 0.203125, "loss_num": 0.0086669921875, "loss_xval": 0.04345703125, "num_input_tokens_seen": 411482032, "step": 4508 }, { "epoch": 18.7875, "grad_norm": 4.089348104331533, "learning_rate": 5e-05, "loss": 0.0395, "num_input_tokens_seen": 411573448, "step": 4509 }, { "epoch": 18.7875, "loss": 0.049597617238759995, "loss_ce": 6.554991159646306e-06, "loss_iou": 0.1416015625, "loss_num": 0.0098876953125, "loss_xval": 0.049560546875, "num_input_tokens_seen": 411573448, "step": 4509 }, { "epoch": 18.791666666666668, "grad_norm": 3.2327709000725986, "learning_rate": 5e-05, "loss": 0.0678, "num_input_tokens_seen": 411664588, "step": 4510 }, { "epoch": 18.791666666666668, "loss": 0.06886570155620575, "loss_ce": 2.7887392661796184e-06, "loss_iou": 0.30859375, "loss_num": 0.0137939453125, "loss_xval": 0.06884765625, "num_input_tokens_seen": 411664588, "step": 4510 }, { "epoch": 18.795833333333334, "grad_norm": 2.372303920105501, "learning_rate": 5e-05, "loss": 0.0701, "num_input_tokens_seen": 411755708, "step": 4511 }, { "epoch": 18.795833333333334, "loss": 0.03130407631397247, "loss_ce": 6.694058356515598e-07, "loss_iou": 0.1884765625, "loss_num": 0.006256103515625, "loss_xval": 0.03125, "num_input_tokens_seen": 411755708, "step": 4511 }, { "epoch": 18.8, "grad_norm": 2.3058725173417174, "learning_rate": 5e-05, "loss": 0.0415, "num_input_tokens_seen": 411847404, "step": 4512 }, { "epoch": 18.8, "loss": 0.047491900622844696, "loss_ce": 1.4178664059727453e-05, "loss_iou": 0.283203125, "loss_num": 0.009521484375, "loss_xval": 0.04736328125, "num_input_tokens_seen": 411847404, "step": 4512 }, { "epoch": 18.804166666666667, "grad_norm": 1.5764967521065583, "learning_rate": 5e-05, "loss": 0.0486, "num_input_tokens_seen": 411938976, "step": 4513 }, { "epoch": 18.804166666666667, "loss": 0.06534408777952194, "loss_ce": 9.751153993420303e-05, "loss_iou": 0.36328125, "loss_num": 0.0130615234375, "loss_xval": 0.0654296875, "num_input_tokens_seen": 411938976, "step": 4513 }, { "epoch": 18.808333333333334, "grad_norm": 1.063531570998383, "learning_rate": 5e-05, "loss": 0.0527, "num_input_tokens_seen": 412030072, "step": 4514 }, { "epoch": 18.808333333333334, "loss": 0.07547049224376678, "loss_ce": 5.137699190527201e-07, "loss_iou": 0.21484375, "loss_num": 0.01507568359375, "loss_xval": 0.07568359375, "num_input_tokens_seen": 412030072, "step": 4514 }, { "epoch": 18.8125, "grad_norm": 2.4462235116955395, "learning_rate": 5e-05, "loss": 0.0774, "num_input_tokens_seen": 412121340, "step": 4515 }, { "epoch": 18.8125, "loss": 0.07516561448574066, "loss_ce": 8.244851414929144e-07, "loss_iou": 0.2197265625, "loss_num": 0.0150146484375, "loss_xval": 0.0751953125, "num_input_tokens_seen": 412121340, "step": 4515 }, { "epoch": 18.816666666666666, "grad_norm": 3.174488492095962, "learning_rate": 5e-05, "loss": 0.0494, "num_input_tokens_seen": 412212332, "step": 4516 }, { "epoch": 18.816666666666666, "loss": 0.07436549663543701, "loss_ce": 1.7897373254527338e-06, "loss_iou": 0.28515625, "loss_num": 0.014892578125, "loss_xval": 0.07421875, "num_input_tokens_seen": 412212332, "step": 4516 }, { "epoch": 18.820833333333333, "grad_norm": 2.777840323934893, "learning_rate": 5e-05, "loss": 0.0638, "num_input_tokens_seen": 412303960, "step": 4517 }, { "epoch": 18.820833333333333, "loss": 0.07483154535293579, "loss_ce": 2.4458545340166893e-06, "loss_iou": 0.2890625, "loss_num": 0.01495361328125, "loss_xval": 0.07470703125, "num_input_tokens_seen": 412303960, "step": 4517 }, { "epoch": 18.825, "grad_norm": 3.037732930460235, "learning_rate": 5e-05, "loss": 0.0434, "num_input_tokens_seen": 412395028, "step": 4518 }, { "epoch": 18.825, "loss": 0.043012239038944244, "loss_ce": 0.0004402203776407987, "loss_iou": 0.185546875, "loss_num": 0.00848388671875, "loss_xval": 0.04248046875, "num_input_tokens_seen": 412395028, "step": 4518 }, { "epoch": 18.829166666666666, "grad_norm": 8.699284437617996, "learning_rate": 5e-05, "loss": 0.047, "num_input_tokens_seen": 412486328, "step": 4519 }, { "epoch": 18.829166666666666, "loss": 0.03607865422964096, "loss_ce": 0.00011368915147613734, "loss_iou": 0.30859375, "loss_num": 0.0072021484375, "loss_xval": 0.035888671875, "num_input_tokens_seen": 412486328, "step": 4519 }, { "epoch": 18.833333333333332, "grad_norm": 5.473940822177437, "learning_rate": 5e-05, "loss": 0.0642, "num_input_tokens_seen": 412577844, "step": 4520 }, { "epoch": 18.833333333333332, "loss": 0.05875308811664581, "loss_ce": 6.748792202415643e-06, "loss_iou": 0.1845703125, "loss_num": 0.01171875, "loss_xval": 0.058837890625, "num_input_tokens_seen": 412577844, "step": 4520 }, { "epoch": 18.8375, "grad_norm": 2.3231654327618143, "learning_rate": 5e-05, "loss": 0.0569, "num_input_tokens_seen": 412668408, "step": 4521 }, { "epoch": 18.8375, "loss": 0.060074321925640106, "loss_ce": 4.6995239699754165e-07, "loss_iou": 0.275390625, "loss_num": 0.01202392578125, "loss_xval": 0.06005859375, "num_input_tokens_seen": 412668408, "step": 4521 }, { "epoch": 18.841666666666665, "grad_norm": 5.512980870961963, "learning_rate": 5e-05, "loss": 0.057, "num_input_tokens_seen": 412759712, "step": 4522 }, { "epoch": 18.841666666666665, "loss": 0.07790729403495789, "loss_ce": 3.5450657378532924e-06, "loss_iou": 0.28515625, "loss_num": 0.01556396484375, "loss_xval": 0.078125, "num_input_tokens_seen": 412759712, "step": 4522 }, { "epoch": 18.845833333333335, "grad_norm": 3.418687552224435, "learning_rate": 5e-05, "loss": 0.0608, "num_input_tokens_seen": 412850176, "step": 4523 }, { "epoch": 18.845833333333335, "loss": 0.05151607096195221, "loss_ce": 2.398493052169215e-06, "loss_iou": 0.263671875, "loss_num": 0.01031494140625, "loss_xval": 0.051513671875, "num_input_tokens_seen": 412850176, "step": 4523 }, { "epoch": 18.85, "grad_norm": 3.065566699479895, "learning_rate": 5e-05, "loss": 0.0422, "num_input_tokens_seen": 412941256, "step": 4524 }, { "epoch": 18.85, "loss": 0.02815604954957962, "loss_ce": 3.5835851122101303e-06, "loss_iou": 0.22265625, "loss_num": 0.005645751953125, "loss_xval": 0.0281982421875, "num_input_tokens_seen": 412941256, "step": 4524 }, { "epoch": 18.854166666666668, "grad_norm": 4.539063725156864, "learning_rate": 5e-05, "loss": 0.0348, "num_input_tokens_seen": 413031964, "step": 4525 }, { "epoch": 18.854166666666668, "loss": 0.035888951271772385, "loss_ce": 2.80285235021438e-07, "loss_iou": 0.310546875, "loss_num": 0.007171630859375, "loss_xval": 0.035888671875, "num_input_tokens_seen": 413031964, "step": 4525 }, { "epoch": 18.858333333333334, "grad_norm": 2.0341420317401595, "learning_rate": 5e-05, "loss": 0.0589, "num_input_tokens_seen": 413123348, "step": 4526 }, { "epoch": 18.858333333333334, "loss": 0.06325916200876236, "loss_ce": 3.851961537293391e-06, "loss_iou": 0.193359375, "loss_num": 0.01263427734375, "loss_xval": 0.0634765625, "num_input_tokens_seen": 413123348, "step": 4526 }, { "epoch": 18.8625, "grad_norm": 2.5688119812235595, "learning_rate": 5e-05, "loss": 0.0492, "num_input_tokens_seen": 413214692, "step": 4527 }, { "epoch": 18.8625, "loss": 0.05270035192370415, "loss_ce": 7.279007695615292e-05, "loss_iou": 0.1728515625, "loss_num": 0.010498046875, "loss_xval": 0.052734375, "num_input_tokens_seen": 413214692, "step": 4527 }, { "epoch": 18.866666666666667, "grad_norm": 2.665680782878335, "learning_rate": 5e-05, "loss": 0.0511, "num_input_tokens_seen": 413306240, "step": 4528 }, { "epoch": 18.866666666666667, "loss": 0.04966755211353302, "loss_ce": 1.9118917293781124e-07, "loss_iou": 0.27734375, "loss_num": 0.00994873046875, "loss_xval": 0.049560546875, "num_input_tokens_seen": 413306240, "step": 4528 }, { "epoch": 18.870833333333334, "grad_norm": 2.777606940324933, "learning_rate": 5e-05, "loss": 0.0419, "num_input_tokens_seen": 413397748, "step": 4529 }, { "epoch": 18.870833333333334, "loss": 0.045915387570858, "loss_ce": 0.0009019593708217144, "loss_iou": 0.29296875, "loss_num": 0.009033203125, "loss_xval": 0.044921875, "num_input_tokens_seen": 413397748, "step": 4529 }, { "epoch": 18.875, "grad_norm": 3.047683174179071, "learning_rate": 5e-05, "loss": 0.0429, "num_input_tokens_seen": 413489768, "step": 4530 }, { "epoch": 18.875, "loss": 0.04263220354914665, "loss_ce": 1.4406334230443463e-05, "loss_iou": 0.265625, "loss_num": 0.008544921875, "loss_xval": 0.042724609375, "num_input_tokens_seen": 413489768, "step": 4530 }, { "epoch": 18.879166666666666, "grad_norm": 2.441751202633568, "learning_rate": 5e-05, "loss": 0.039, "num_input_tokens_seen": 413580660, "step": 4531 }, { "epoch": 18.879166666666666, "loss": 0.03846908360719681, "loss_ce": 1.6747765130276093e-06, "loss_iou": 0.244140625, "loss_num": 0.0076904296875, "loss_xval": 0.03857421875, "num_input_tokens_seen": 413580660, "step": 4531 }, { "epoch": 18.883333333333333, "grad_norm": 2.239737246516112, "learning_rate": 5e-05, "loss": 0.0466, "num_input_tokens_seen": 413672152, "step": 4532 }, { "epoch": 18.883333333333333, "loss": 0.04217064380645752, "loss_ce": 1.0608757293084636e-05, "loss_iou": 0.28515625, "loss_num": 0.0084228515625, "loss_xval": 0.042236328125, "num_input_tokens_seen": 413672152, "step": 4532 }, { "epoch": 18.8875, "grad_norm": 2.599374959619232, "learning_rate": 5e-05, "loss": 0.0304, "num_input_tokens_seen": 413763232, "step": 4533 }, { "epoch": 18.8875, "loss": 0.02673015370965004, "loss_ce": 1.201295162900351e-05, "loss_iou": 0.1669921875, "loss_num": 0.005340576171875, "loss_xval": 0.0267333984375, "num_input_tokens_seen": 413763232, "step": 4533 }, { "epoch": 18.891666666666666, "grad_norm": 2.8679999767678472, "learning_rate": 5e-05, "loss": 0.0329, "num_input_tokens_seen": 413854784, "step": 4534 }, { "epoch": 18.891666666666666, "loss": 0.03278213366866112, "loss_ce": 3.677474887808785e-05, "loss_iou": 0.2197265625, "loss_num": 0.006561279296875, "loss_xval": 0.03271484375, "num_input_tokens_seen": 413854784, "step": 4534 }, { "epoch": 18.895833333333332, "grad_norm": 2.3359688540852184, "learning_rate": 5e-05, "loss": 0.0326, "num_input_tokens_seen": 413946248, "step": 4535 }, { "epoch": 18.895833333333332, "loss": 0.027322562411427498, "loss_ce": 1.7009778048304725e-06, "loss_iou": 0.173828125, "loss_num": 0.005462646484375, "loss_xval": 0.02734375, "num_input_tokens_seen": 413946248, "step": 4535 }, { "epoch": 18.9, "grad_norm": 2.3552836654186584, "learning_rate": 5e-05, "loss": 0.08, "num_input_tokens_seen": 414037748, "step": 4536 }, { "epoch": 18.9, "loss": 0.11393754184246063, "loss_ce": 1.541888741485309e-05, "loss_iou": 0.26171875, "loss_num": 0.022705078125, "loss_xval": 0.11376953125, "num_input_tokens_seen": 414037748, "step": 4536 }, { "epoch": 18.904166666666665, "grad_norm": 3.160657561051584, "learning_rate": 5e-05, "loss": 0.0358, "num_input_tokens_seen": 414128580, "step": 4537 }, { "epoch": 18.904166666666665, "loss": 0.02459779940545559, "loss_ce": 6.322184162854683e-07, "loss_iou": 0.349609375, "loss_num": 0.004913330078125, "loss_xval": 0.024658203125, "num_input_tokens_seen": 414128580, "step": 4537 }, { "epoch": 18.908333333333335, "grad_norm": 3.2531765243968995, "learning_rate": 5e-05, "loss": 0.0527, "num_input_tokens_seen": 414219824, "step": 4538 }, { "epoch": 18.908333333333335, "loss": 0.06067047268152237, "loss_ce": 1.5271011761797126e-06, "loss_iou": 0.31640625, "loss_num": 0.01214599609375, "loss_xval": 0.060546875, "num_input_tokens_seen": 414219824, "step": 4538 }, { "epoch": 18.9125, "grad_norm": 3.175017463980461, "learning_rate": 5e-05, "loss": 0.0541, "num_input_tokens_seen": 414311600, "step": 4539 }, { "epoch": 18.9125, "loss": 0.05885850638151169, "loss_ce": 0.0002494982036296278, "loss_iou": 0.314453125, "loss_num": 0.01171875, "loss_xval": 0.05859375, "num_input_tokens_seen": 414311600, "step": 4539 }, { "epoch": 18.916666666666668, "grad_norm": 2.8530111649795535, "learning_rate": 5e-05, "loss": 0.0527, "num_input_tokens_seen": 414402868, "step": 4540 }, { "epoch": 18.916666666666668, "loss": 0.04116272181272507, "loss_ce": 2.5028559321071953e-05, "loss_iou": 0.177734375, "loss_num": 0.00823974609375, "loss_xval": 0.041015625, "num_input_tokens_seen": 414402868, "step": 4540 }, { "epoch": 18.920833333333334, "grad_norm": 1.4914725595256315, "learning_rate": 5e-05, "loss": 0.0606, "num_input_tokens_seen": 414494496, "step": 4541 }, { "epoch": 18.920833333333334, "loss": 0.024547066539525986, "loss_ce": 0.0008005747804418206, "loss_iou": 0.1689453125, "loss_num": 0.0047607421875, "loss_xval": 0.0238037109375, "num_input_tokens_seen": 414494496, "step": 4541 }, { "epoch": 18.925, "grad_norm": 2.1658664665582736, "learning_rate": 5e-05, "loss": 0.0567, "num_input_tokens_seen": 414585944, "step": 4542 }, { "epoch": 18.925, "loss": 0.04802921786904335, "loss_ce": 2.5066014131880365e-05, "loss_iou": 0.1767578125, "loss_num": 0.00958251953125, "loss_xval": 0.048095703125, "num_input_tokens_seen": 414585944, "step": 4542 }, { "epoch": 18.929166666666667, "grad_norm": 4.970453465965191, "learning_rate": 5e-05, "loss": 0.0451, "num_input_tokens_seen": 414676744, "step": 4543 }, { "epoch": 18.929166666666667, "loss": 0.043091583997011185, "loss_ce": 7.623035571668879e-07, "loss_iou": 0.30078125, "loss_num": 0.00860595703125, "loss_xval": 0.04296875, "num_input_tokens_seen": 414676744, "step": 4543 }, { "epoch": 18.933333333333334, "grad_norm": 2.162297932419361, "learning_rate": 5e-05, "loss": 0.0426, "num_input_tokens_seen": 414767852, "step": 4544 }, { "epoch": 18.933333333333334, "loss": 0.059343062341213226, "loss_ce": 1.6303615666402038e-06, "loss_iou": 0.24609375, "loss_num": 0.0118408203125, "loss_xval": 0.059326171875, "num_input_tokens_seen": 414767852, "step": 4544 }, { "epoch": 18.9375, "grad_norm": 3.184962848674524, "learning_rate": 5e-05, "loss": 0.0369, "num_input_tokens_seen": 414858992, "step": 4545 }, { "epoch": 18.9375, "loss": 0.04635874181985855, "loss_ce": 2.539571596571477e-06, "loss_iou": 0.26171875, "loss_num": 0.00927734375, "loss_xval": 0.04638671875, "num_input_tokens_seen": 414858992, "step": 4545 }, { "epoch": 18.941666666666666, "grad_norm": 3.3524812443803307, "learning_rate": 5e-05, "loss": 0.0461, "num_input_tokens_seen": 414950512, "step": 4546 }, { "epoch": 18.941666666666666, "loss": 0.06305290758609772, "loss_ce": 3.584761316233198e-06, "loss_iou": 0.28515625, "loss_num": 0.0125732421875, "loss_xval": 0.06298828125, "num_input_tokens_seen": 414950512, "step": 4546 }, { "epoch": 18.945833333333333, "grad_norm": 2.9169754083372994, "learning_rate": 5e-05, "loss": 0.0553, "num_input_tokens_seen": 415042388, "step": 4547 }, { "epoch": 18.945833333333333, "loss": 0.06709093600511551, "loss_ce": 0.003065059892833233, "loss_iou": 0.34375, "loss_num": 0.0128173828125, "loss_xval": 0.06396484375, "num_input_tokens_seen": 415042388, "step": 4547 }, { "epoch": 18.95, "grad_norm": 2.9309716744603866, "learning_rate": 5e-05, "loss": 0.0735, "num_input_tokens_seen": 415133472, "step": 4548 }, { "epoch": 18.95, "loss": 0.11270265281200409, "loss_ce": 0.0008404635009355843, "loss_iou": 0.26953125, "loss_num": 0.0223388671875, "loss_xval": 0.11181640625, "num_input_tokens_seen": 415133472, "step": 4548 }, { "epoch": 18.954166666666666, "grad_norm": 2.5923101743984405, "learning_rate": 5e-05, "loss": 0.0838, "num_input_tokens_seen": 415224916, "step": 4549 }, { "epoch": 18.954166666666666, "loss": 0.03926153853535652, "loss_ce": 6.759119060006924e-07, "loss_iou": 0.251953125, "loss_num": 0.00787353515625, "loss_xval": 0.039306640625, "num_input_tokens_seen": 415224916, "step": 4549 }, { "epoch": 18.958333333333332, "grad_norm": 4.250379279730983, "learning_rate": 5e-05, "loss": 0.0507, "num_input_tokens_seen": 415315736, "step": 4550 }, { "epoch": 18.958333333333332, "loss": 0.05874726548790932, "loss_ce": 9.295710015067016e-07, "loss_iou": 0.291015625, "loss_num": 0.01171875, "loss_xval": 0.058837890625, "num_input_tokens_seen": 415315736, "step": 4550 }, { "epoch": 18.9625, "grad_norm": 14.125519061852755, "learning_rate": 5e-05, "loss": 0.038, "num_input_tokens_seen": 415406960, "step": 4551 }, { "epoch": 18.9625, "loss": 0.029943302273750305, "loss_ce": 3.6074361560167745e-05, "loss_iou": 0.263671875, "loss_num": 0.0059814453125, "loss_xval": 0.0299072265625, "num_input_tokens_seen": 415406960, "step": 4551 }, { "epoch": 18.966666666666665, "grad_norm": 3.272380265120036, "learning_rate": 5e-05, "loss": 0.0627, "num_input_tokens_seen": 415498504, "step": 4552 }, { "epoch": 18.966666666666665, "loss": 0.05273493379354477, "loss_ce": 5.606986519524071e-07, "loss_iou": 0.27734375, "loss_num": 0.01055908203125, "loss_xval": 0.052734375, "num_input_tokens_seen": 415498504, "step": 4552 }, { "epoch": 18.970833333333335, "grad_norm": 2.1512033513970876, "learning_rate": 5e-05, "loss": 0.0366, "num_input_tokens_seen": 415589772, "step": 4553 }, { "epoch": 18.970833333333335, "loss": 0.03154401481151581, "loss_ce": 4.097748842468718e-06, "loss_iou": 0.34765625, "loss_num": 0.006317138671875, "loss_xval": 0.031494140625, "num_input_tokens_seen": 415589772, "step": 4553 }, { "epoch": 18.975, "grad_norm": 2.9219392623315428, "learning_rate": 5e-05, "loss": 0.0398, "num_input_tokens_seen": 415680952, "step": 4554 }, { "epoch": 18.975, "loss": 0.03689642623066902, "loss_ce": 6.737686248925456e-07, "loss_iou": 0.26953125, "loss_num": 0.00738525390625, "loss_xval": 0.036865234375, "num_input_tokens_seen": 415680952, "step": 4554 }, { "epoch": 18.979166666666668, "grad_norm": 2.900574044678954, "learning_rate": 5e-05, "loss": 0.0392, "num_input_tokens_seen": 415772132, "step": 4555 }, { "epoch": 18.979166666666668, "loss": 0.04363527148962021, "loss_ce": 8.668923692312092e-05, "loss_iou": 0.2451171875, "loss_num": 0.00872802734375, "loss_xval": 0.04345703125, "num_input_tokens_seen": 415772132, "step": 4555 }, { "epoch": 18.983333333333334, "grad_norm": 2.1061819265107475, "learning_rate": 5e-05, "loss": 0.0532, "num_input_tokens_seen": 415863748, "step": 4556 }, { "epoch": 18.983333333333334, "loss": 0.04218093305826187, "loss_ce": 0.00010482473589945585, "loss_iou": 0.197265625, "loss_num": 0.0084228515625, "loss_xval": 0.0419921875, "num_input_tokens_seen": 415863748, "step": 4556 }, { "epoch": 18.9875, "grad_norm": 4.744328207250594, "learning_rate": 5e-05, "loss": 0.0771, "num_input_tokens_seen": 415954772, "step": 4557 }, { "epoch": 18.9875, "loss": 0.07442444562911987, "loss_ce": 7.329533673328115e-06, "loss_iou": 0.236328125, "loss_num": 0.014892578125, "loss_xval": 0.07421875, "num_input_tokens_seen": 415954772, "step": 4557 }, { "epoch": 18.991666666666667, "grad_norm": 10.388722990505855, "learning_rate": 5e-05, "loss": 0.0488, "num_input_tokens_seen": 416046356, "step": 4558 }, { "epoch": 18.991666666666667, "loss": 0.038884952664375305, "loss_ce": 5.559993951465003e-06, "loss_iou": 0.125, "loss_num": 0.007781982421875, "loss_xval": 0.038818359375, "num_input_tokens_seen": 416046356, "step": 4558 }, { "epoch": 18.995833333333334, "grad_norm": 9.518989246986543, "learning_rate": 5e-05, "loss": 0.0668, "num_input_tokens_seen": 416137732, "step": 4559 }, { "epoch": 18.995833333333334, "loss": 0.05143982917070389, "loss_ce": 0.00024658982874825597, "loss_iou": 0.29296875, "loss_num": 0.01025390625, "loss_xval": 0.05126953125, "num_input_tokens_seen": 416137732, "step": 4559 }, { "epoch": 19.0, "grad_norm": 1.8359022855887634, "learning_rate": 5e-05, "loss": 0.0494, "num_input_tokens_seen": 416229068, "step": 4560 }, { "epoch": 19.0, "loss": 0.028715705499053, "loss_ce": 2.918194695666898e-05, "loss_iou": 0.2080078125, "loss_num": 0.0057373046875, "loss_xval": 0.0286865234375, "num_input_tokens_seen": 416229068, "step": 4560 }, { "epoch": 19.004166666666666, "grad_norm": 6.715655239337954, "learning_rate": 5e-05, "loss": 0.0572, "num_input_tokens_seen": 416320568, "step": 4561 }, { "epoch": 19.004166666666666, "loss": 0.05667451396584511, "loss_ce": 3.371906132088043e-06, "loss_iou": 0.0390625, "loss_num": 0.0113525390625, "loss_xval": 0.056640625, "num_input_tokens_seen": 416320568, "step": 4561 }, { "epoch": 19.008333333333333, "grad_norm": 6.957390228996864, "learning_rate": 5e-05, "loss": 0.0675, "num_input_tokens_seen": 416412304, "step": 4562 }, { "epoch": 19.008333333333333, "loss": 0.024202125146985054, "loss_ce": 1.6853648503456498e-06, "loss_iou": 0.2412109375, "loss_num": 0.004852294921875, "loss_xval": 0.024169921875, "num_input_tokens_seen": 416412304, "step": 4562 }, { "epoch": 19.0125, "grad_norm": 3.496132083831709, "learning_rate": 5e-05, "loss": 0.0865, "num_input_tokens_seen": 416503232, "step": 4563 }, { "epoch": 19.0125, "loss": 0.042641233652830124, "loss_ce": 5.494790684679174e-07, "loss_iou": 0.1953125, "loss_num": 0.008544921875, "loss_xval": 0.042724609375, "num_input_tokens_seen": 416503232, "step": 4563 }, { "epoch": 19.016666666666666, "grad_norm": 2.97382417361745, "learning_rate": 5e-05, "loss": 0.0536, "num_input_tokens_seen": 416594068, "step": 4564 }, { "epoch": 19.016666666666666, "loss": 0.0603485070168972, "loss_ce": 2.2883396013639867e-05, "loss_iou": 0.349609375, "loss_num": 0.0120849609375, "loss_xval": 0.060302734375, "num_input_tokens_seen": 416594068, "step": 4564 }, { "epoch": 19.020833333333332, "grad_norm": 5.756229010297488, "learning_rate": 5e-05, "loss": 0.0849, "num_input_tokens_seen": 416685476, "step": 4565 }, { "epoch": 19.020833333333332, "loss": 0.09227042645215988, "loss_ce": 5.330486487764574e-07, "loss_iou": 0.2421875, "loss_num": 0.0184326171875, "loss_xval": 0.09228515625, "num_input_tokens_seen": 416685476, "step": 4565 }, { "epoch": 19.025, "grad_norm": 3.4465502009365765, "learning_rate": 5e-05, "loss": 0.0699, "num_input_tokens_seen": 416777284, "step": 4566 }, { "epoch": 19.025, "loss": 0.05590882524847984, "loss_ce": 6.220782324817264e-07, "loss_iou": 0.2216796875, "loss_num": 0.01116943359375, "loss_xval": 0.055908203125, "num_input_tokens_seen": 416777284, "step": 4566 }, { "epoch": 19.029166666666665, "grad_norm": 2.79061227690433, "learning_rate": 5e-05, "loss": 0.0641, "num_input_tokens_seen": 416868844, "step": 4567 }, { "epoch": 19.029166666666665, "loss": 0.06916234642267227, "loss_ce": 4.003128196927719e-05, "loss_iou": 0.2734375, "loss_num": 0.0137939453125, "loss_xval": 0.0693359375, "num_input_tokens_seen": 416868844, "step": 4567 }, { "epoch": 19.033333333333335, "grad_norm": 3.02092776013322, "learning_rate": 5e-05, "loss": 0.0522, "num_input_tokens_seen": 416959988, "step": 4568 }, { "epoch": 19.033333333333335, "loss": 0.06227421760559082, "loss_ce": 3.0962000892031938e-06, "loss_iou": 0.435546875, "loss_num": 0.012451171875, "loss_xval": 0.062255859375, "num_input_tokens_seen": 416959988, "step": 4568 }, { "epoch": 19.0375, "grad_norm": 3.2971210612873905, "learning_rate": 5e-05, "loss": 0.0453, "num_input_tokens_seen": 417050984, "step": 4569 }, { "epoch": 19.0375, "loss": 0.055607423186302185, "loss_ce": 4.39636642113328e-06, "loss_iou": 0.28125, "loss_num": 0.0111083984375, "loss_xval": 0.0556640625, "num_input_tokens_seen": 417050984, "step": 4569 }, { "epoch": 19.041666666666668, "grad_norm": 5.407971242628027, "learning_rate": 5e-05, "loss": 0.0483, "num_input_tokens_seen": 417142000, "step": 4570 }, { "epoch": 19.041666666666668, "loss": 0.05627996101975441, "loss_ce": 8.18439875729382e-05, "loss_iou": 0.2470703125, "loss_num": 0.01123046875, "loss_xval": 0.05615234375, "num_input_tokens_seen": 417142000, "step": 4570 }, { "epoch": 19.045833333333334, "grad_norm": 3.072596211228088, "learning_rate": 5e-05, "loss": 0.0588, "num_input_tokens_seen": 417232916, "step": 4571 }, { "epoch": 19.045833333333334, "loss": 0.07009995728731155, "loss_ce": 1.0801534244819777e-06, "loss_iou": 0.3359375, "loss_num": 0.0140380859375, "loss_xval": 0.0703125, "num_input_tokens_seen": 417232916, "step": 4571 }, { "epoch": 19.05, "grad_norm": 2.244302503059525, "learning_rate": 5e-05, "loss": 0.0705, "num_input_tokens_seen": 417324232, "step": 4572 }, { "epoch": 19.05, "loss": 0.10026659071445465, "loss_ce": 1.6345171388820745e-05, "loss_iou": 0.30859375, "loss_num": 0.02001953125, "loss_xval": 0.10009765625, "num_input_tokens_seen": 417324232, "step": 4572 }, { "epoch": 19.054166666666667, "grad_norm": 2.9609513120700575, "learning_rate": 5e-05, "loss": 0.0555, "num_input_tokens_seen": 417415708, "step": 4573 }, { "epoch": 19.054166666666667, "loss": 0.047413308173418045, "loss_ce": 1.187850193673512e-05, "loss_iou": 0.373046875, "loss_num": 0.00946044921875, "loss_xval": 0.04736328125, "num_input_tokens_seen": 417415708, "step": 4573 }, { "epoch": 19.058333333333334, "grad_norm": 2.369157132961251, "learning_rate": 5e-05, "loss": 0.0564, "num_input_tokens_seen": 417507108, "step": 4574 }, { "epoch": 19.058333333333334, "loss": 0.07667060196399689, "loss_ce": 1.0441099220770411e-05, "loss_iou": 0.224609375, "loss_num": 0.01531982421875, "loss_xval": 0.07666015625, "num_input_tokens_seen": 417507108, "step": 4574 }, { "epoch": 19.0625, "grad_norm": 1.3947638658137602, "learning_rate": 5e-05, "loss": 0.0308, "num_input_tokens_seen": 417598908, "step": 4575 }, { "epoch": 19.0625, "loss": 0.02127896249294281, "loss_ce": 5.804000693387934e-07, "loss_iou": 0.169921875, "loss_num": 0.0042724609375, "loss_xval": 0.021240234375, "num_input_tokens_seen": 417598908, "step": 4575 }, { "epoch": 19.066666666666666, "grad_norm": 1.8918181370378695, "learning_rate": 5e-05, "loss": 0.08, "num_input_tokens_seen": 417690016, "step": 4576 }, { "epoch": 19.066666666666666, "loss": 0.13125675916671753, "loss_ce": 6.536600949402782e-07, "loss_iou": 0.1474609375, "loss_num": 0.0262451171875, "loss_xval": 0.130859375, "num_input_tokens_seen": 417690016, "step": 4576 }, { "epoch": 19.070833333333333, "grad_norm": 1.7643172128339852, "learning_rate": 5e-05, "loss": 0.0272, "num_input_tokens_seen": 417781528, "step": 4577 }, { "epoch": 19.070833333333333, "loss": 0.024284040555357933, "loss_ce": 3.019509676960297e-05, "loss_iou": 0.259765625, "loss_num": 0.004852294921875, "loss_xval": 0.0242919921875, "num_input_tokens_seen": 417781528, "step": 4577 }, { "epoch": 19.075, "grad_norm": 2.438866524106157, "learning_rate": 5e-05, "loss": 0.0367, "num_input_tokens_seen": 417871744, "step": 4578 }, { "epoch": 19.075, "loss": 0.03327229619026184, "loss_ce": 2.3394957679556683e-05, "loss_iou": 0.166015625, "loss_num": 0.00665283203125, "loss_xval": 0.033203125, "num_input_tokens_seen": 417871744, "step": 4578 }, { "epoch": 19.079166666666666, "grad_norm": 2.7742865635276135, "learning_rate": 5e-05, "loss": 0.0929, "num_input_tokens_seen": 417962520, "step": 4579 }, { "epoch": 19.079166666666666, "loss": 0.1042037308216095, "loss_ce": 1.4593413197871996e-06, "loss_iou": 0.26171875, "loss_num": 0.0208740234375, "loss_xval": 0.10400390625, "num_input_tokens_seen": 417962520, "step": 4579 }, { "epoch": 19.083333333333332, "grad_norm": 1.9474276412188012, "learning_rate": 5e-05, "loss": 0.0393, "num_input_tokens_seen": 418053328, "step": 4580 }, { "epoch": 19.083333333333332, "loss": 0.03855319693684578, "loss_ce": 1.8681653273233678e-06, "loss_iou": 0.0615234375, "loss_num": 0.007720947265625, "loss_xval": 0.03857421875, "num_input_tokens_seen": 418053328, "step": 4580 }, { "epoch": 19.0875, "grad_norm": 1.8638873381418695, "learning_rate": 5e-05, "loss": 0.0283, "num_input_tokens_seen": 418144472, "step": 4581 }, { "epoch": 19.0875, "loss": 0.02354763075709343, "loss_ce": 3.3199507925019134e-06, "loss_iou": 0.1044921875, "loss_num": 0.00469970703125, "loss_xval": 0.0235595703125, "num_input_tokens_seen": 418144472, "step": 4581 }, { "epoch": 19.091666666666665, "grad_norm": 4.119505044884438, "learning_rate": 5e-05, "loss": 0.0415, "num_input_tokens_seen": 418236104, "step": 4582 }, { "epoch": 19.091666666666665, "loss": 0.055428922176361084, "loss_ce": 0.00017684623890090734, "loss_iou": 0.34375, "loss_num": 0.01104736328125, "loss_xval": 0.05517578125, "num_input_tokens_seen": 418236104, "step": 4582 }, { "epoch": 19.095833333333335, "grad_norm": 1.9469295253184462, "learning_rate": 5e-05, "loss": 0.0522, "num_input_tokens_seen": 418327876, "step": 4583 }, { "epoch": 19.095833333333335, "loss": 0.06220350041985512, "loss_ce": 8.677566256665159e-06, "loss_iou": 0.287109375, "loss_num": 0.012451171875, "loss_xval": 0.062255859375, "num_input_tokens_seen": 418327876, "step": 4583 }, { "epoch": 19.1, "grad_norm": 2.2121584184868843, "learning_rate": 5e-05, "loss": 0.0468, "num_input_tokens_seen": 418419188, "step": 4584 }, { "epoch": 19.1, "loss": 0.05744265764951706, "loss_ce": 8.573052582505625e-06, "loss_iou": 0.27734375, "loss_num": 0.011474609375, "loss_xval": 0.057373046875, "num_input_tokens_seen": 418419188, "step": 4584 }, { "epoch": 19.104166666666668, "grad_norm": 12.93904308181816, "learning_rate": 5e-05, "loss": 0.0477, "num_input_tokens_seen": 418510720, "step": 4585 }, { "epoch": 19.104166666666668, "loss": 0.039964459836483, "loss_ce": 1.6924389001360396e-06, "loss_iou": 0.2470703125, "loss_num": 0.00799560546875, "loss_xval": 0.0400390625, "num_input_tokens_seen": 418510720, "step": 4585 }, { "epoch": 19.108333333333334, "grad_norm": 2.9666561806430396, "learning_rate": 5e-05, "loss": 0.0307, "num_input_tokens_seen": 418602060, "step": 4586 }, { "epoch": 19.108333333333334, "loss": 0.02945863828063011, "loss_ce": 1.545392024127068e-06, "loss_iou": 0.326171875, "loss_num": 0.005889892578125, "loss_xval": 0.0294189453125, "num_input_tokens_seen": 418602060, "step": 4586 }, { "epoch": 19.1125, "grad_norm": 3.6047797729490227, "learning_rate": 5e-05, "loss": 0.0462, "num_input_tokens_seen": 418693120, "step": 4587 }, { "epoch": 19.1125, "loss": 0.05617877095937729, "loss_ce": 1.1170643119839951e-05, "loss_iou": 0.33984375, "loss_num": 0.01123046875, "loss_xval": 0.05615234375, "num_input_tokens_seen": 418693120, "step": 4587 }, { "epoch": 19.116666666666667, "grad_norm": 2.8946339671114987, "learning_rate": 5e-05, "loss": 0.0398, "num_input_tokens_seen": 418784704, "step": 4588 }, { "epoch": 19.116666666666667, "loss": 0.04178152233362198, "loss_ce": 2.955552645289572e-06, "loss_iou": 0.3515625, "loss_num": 0.00836181640625, "loss_xval": 0.041748046875, "num_input_tokens_seen": 418784704, "step": 4588 }, { "epoch": 19.120833333333334, "grad_norm": 5.404336078957903, "learning_rate": 5e-05, "loss": 0.0707, "num_input_tokens_seen": 418876000, "step": 4589 }, { "epoch": 19.120833333333334, "loss": 0.07328888028860092, "loss_ce": 9.190454761665023e-07, "loss_iou": 0.25390625, "loss_num": 0.0146484375, "loss_xval": 0.0732421875, "num_input_tokens_seen": 418876000, "step": 4589 }, { "epoch": 19.125, "grad_norm": 3.182620288468722, "learning_rate": 5e-05, "loss": 0.0363, "num_input_tokens_seen": 418966880, "step": 4590 }, { "epoch": 19.125, "loss": 0.029190445318818092, "loss_ce": 3.817777667336486e-07, "loss_iou": 0.291015625, "loss_num": 0.005828857421875, "loss_xval": 0.0291748046875, "num_input_tokens_seen": 418966880, "step": 4590 }, { "epoch": 19.129166666666666, "grad_norm": 3.29085385384401, "learning_rate": 5e-05, "loss": 0.0394, "num_input_tokens_seen": 419058444, "step": 4591 }, { "epoch": 19.129166666666666, "loss": 0.0446050763130188, "loss_ce": 3.637588861238328e-06, "loss_iou": 0.17578125, "loss_num": 0.0089111328125, "loss_xval": 0.044677734375, "num_input_tokens_seen": 419058444, "step": 4591 }, { "epoch": 19.133333333333333, "grad_norm": 3.3331466382473955, "learning_rate": 5e-05, "loss": 0.0587, "num_input_tokens_seen": 419149700, "step": 4592 }, { "epoch": 19.133333333333333, "loss": 0.07439761608839035, "loss_ce": 0.000270417018327862, "loss_iou": 0.3515625, "loss_num": 0.01483154296875, "loss_xval": 0.07421875, "num_input_tokens_seen": 419149700, "step": 4592 }, { "epoch": 19.1375, "grad_norm": 2.538930195089702, "learning_rate": 5e-05, "loss": 0.0266, "num_input_tokens_seen": 419240220, "step": 4593 }, { "epoch": 19.1375, "loss": 0.026464354246854782, "loss_ce": 5.614498149952851e-06, "loss_iou": 0.0927734375, "loss_num": 0.005279541015625, "loss_xval": 0.0264892578125, "num_input_tokens_seen": 419240220, "step": 4593 }, { "epoch": 19.141666666666666, "grad_norm": 1.845257482792228, "learning_rate": 5e-05, "loss": 0.0579, "num_input_tokens_seen": 419331324, "step": 4594 }, { "epoch": 19.141666666666666, "loss": 0.04600181058049202, "loss_ce": 3.78307788651e-07, "loss_iou": 0.134765625, "loss_num": 0.00921630859375, "loss_xval": 0.0458984375, "num_input_tokens_seen": 419331324, "step": 4594 }, { "epoch": 19.145833333333332, "grad_norm": 1.6647310288365473, "learning_rate": 5e-05, "loss": 0.0436, "num_input_tokens_seen": 419422536, "step": 4595 }, { "epoch": 19.145833333333332, "loss": 0.03375301510095596, "loss_ce": 5.70387953757745e-07, "loss_iou": 0.2080078125, "loss_num": 0.006744384765625, "loss_xval": 0.03369140625, "num_input_tokens_seen": 419422536, "step": 4595 }, { "epoch": 19.15, "grad_norm": 2.4610156499270515, "learning_rate": 5e-05, "loss": 0.0512, "num_input_tokens_seen": 419513348, "step": 4596 }, { "epoch": 19.15, "loss": 0.07220683991909027, "loss_ce": 2.2547997104993556e-06, "loss_iou": 0.2236328125, "loss_num": 0.014404296875, "loss_xval": 0.072265625, "num_input_tokens_seen": 419513348, "step": 4596 }, { "epoch": 19.154166666666665, "grad_norm": 2.281233719535446, "learning_rate": 5e-05, "loss": 0.0342, "num_input_tokens_seen": 419604488, "step": 4597 }, { "epoch": 19.154166666666665, "loss": 0.027935050427913666, "loss_ce": 2.6724294002633542e-05, "loss_iou": 0.1923828125, "loss_num": 0.005584716796875, "loss_xval": 0.0279541015625, "num_input_tokens_seen": 419604488, "step": 4597 }, { "epoch": 19.158333333333335, "grad_norm": 2.3784933036688747, "learning_rate": 5e-05, "loss": 0.0348, "num_input_tokens_seen": 419695952, "step": 4598 }, { "epoch": 19.158333333333335, "loss": 0.028787771239876747, "loss_ce": 5.5471795349149033e-05, "loss_iou": 0.265625, "loss_num": 0.0057373046875, "loss_xval": 0.0286865234375, "num_input_tokens_seen": 419695952, "step": 4598 }, { "epoch": 19.1625, "grad_norm": 2.9961719644931772, "learning_rate": 5e-05, "loss": 0.0316, "num_input_tokens_seen": 419787268, "step": 4599 }, { "epoch": 19.1625, "loss": 0.038679152727127075, "loss_ce": 0.0002117453987011686, "loss_iou": 0.185546875, "loss_num": 0.0076904296875, "loss_xval": 0.03857421875, "num_input_tokens_seen": 419787268, "step": 4599 }, { "epoch": 19.166666666666668, "grad_norm": 10.899750783137005, "learning_rate": 5e-05, "loss": 0.0909, "num_input_tokens_seen": 419878896, "step": 4600 }, { "epoch": 19.166666666666668, "loss": 0.07383735477924347, "loss_ce": 1.532657552161254e-05, "loss_iou": 0.271484375, "loss_num": 0.0147705078125, "loss_xval": 0.07373046875, "num_input_tokens_seen": 419878896, "step": 4600 }, { "epoch": 19.170833333333334, "grad_norm": 2.0187713051589995, "learning_rate": 5e-05, "loss": 0.0765, "num_input_tokens_seen": 419970188, "step": 4601 }, { "epoch": 19.170833333333334, "loss": 0.05879247188568115, "loss_ce": 3.5817220123135485e-07, "loss_iou": 0.28125, "loss_num": 0.01177978515625, "loss_xval": 0.058837890625, "num_input_tokens_seen": 419970188, "step": 4601 }, { "epoch": 19.175, "grad_norm": 2.397311933697297, "learning_rate": 5e-05, "loss": 0.0359, "num_input_tokens_seen": 420059880, "step": 4602 }, { "epoch": 19.175, "loss": 0.03691239282488823, "loss_ce": 1.3815242709824815e-06, "loss_iou": 0.2412109375, "loss_num": 0.00738525390625, "loss_xval": 0.036865234375, "num_input_tokens_seen": 420059880, "step": 4602 }, { "epoch": 19.179166666666667, "grad_norm": 2.472619211766026, "learning_rate": 5e-05, "loss": 0.0345, "num_input_tokens_seen": 420150648, "step": 4603 }, { "epoch": 19.179166666666667, "loss": 0.027687918394804, "loss_ce": 0.00011528656614245847, "loss_iou": 0.205078125, "loss_num": 0.005523681640625, "loss_xval": 0.027587890625, "num_input_tokens_seen": 420150648, "step": 4603 }, { "epoch": 19.183333333333334, "grad_norm": 2.212561412051951, "learning_rate": 5e-05, "loss": 0.0371, "num_input_tokens_seen": 420242356, "step": 4604 }, { "epoch": 19.183333333333334, "loss": 0.03770972788333893, "loss_ce": 5.2634909479820635e-06, "loss_iou": 0.2451171875, "loss_num": 0.007537841796875, "loss_xval": 0.03759765625, "num_input_tokens_seen": 420242356, "step": 4604 }, { "epoch": 19.1875, "grad_norm": 2.719185495876815, "learning_rate": 5e-05, "loss": 0.0376, "num_input_tokens_seen": 420333892, "step": 4605 }, { "epoch": 19.1875, "loss": 0.03716596961021423, "loss_ce": 3.1870847578829853e-06, "loss_iou": 0.34765625, "loss_num": 0.0074462890625, "loss_xval": 0.037109375, "num_input_tokens_seen": 420333892, "step": 4605 }, { "epoch": 19.191666666666666, "grad_norm": 3.8515667271314955, "learning_rate": 5e-05, "loss": 0.0471, "num_input_tokens_seen": 420424708, "step": 4606 }, { "epoch": 19.191666666666666, "loss": 0.03641510009765625, "loss_ce": 7.628682851645863e-06, "loss_iou": 0.3125, "loss_num": 0.007293701171875, "loss_xval": 0.036376953125, "num_input_tokens_seen": 420424708, "step": 4606 }, { "epoch": 19.195833333333333, "grad_norm": 3.647166180041839, "learning_rate": 5e-05, "loss": 0.0684, "num_input_tokens_seen": 420515716, "step": 4607 }, { "epoch": 19.195833333333333, "loss": 0.04955513775348663, "loss_ce": 2.218728923253366e-06, "loss_iou": 0.3671875, "loss_num": 0.0098876953125, "loss_xval": 0.049560546875, "num_input_tokens_seen": 420515716, "step": 4607 }, { "epoch": 19.2, "grad_norm": 2.7711581869391506, "learning_rate": 5e-05, "loss": 0.1014, "num_input_tokens_seen": 420606832, "step": 4608 }, { "epoch": 19.2, "loss": 0.09333069622516632, "loss_ce": 3.1158111823970103e-07, "loss_iou": 0.08251953125, "loss_num": 0.0186767578125, "loss_xval": 0.09326171875, "num_input_tokens_seen": 420606832, "step": 4608 }, { "epoch": 19.204166666666666, "grad_norm": 1.9191125356977428, "learning_rate": 5e-05, "loss": 0.027, "num_input_tokens_seen": 420698212, "step": 4609 }, { "epoch": 19.204166666666666, "loss": 0.02860306203365326, "loss_ce": 8.092390999081545e-06, "loss_iou": 0.203125, "loss_num": 0.005706787109375, "loss_xval": 0.028564453125, "num_input_tokens_seen": 420698212, "step": 4609 }, { "epoch": 19.208333333333332, "grad_norm": 1.381995485228386, "learning_rate": 5e-05, "loss": 0.0385, "num_input_tokens_seen": 420789712, "step": 4610 }, { "epoch": 19.208333333333332, "loss": 0.052813343703746796, "loss_ce": 0.001704030204564333, "loss_iou": 0.2265625, "loss_num": 0.01019287109375, "loss_xval": 0.051025390625, "num_input_tokens_seen": 420789712, "step": 4610 }, { "epoch": 19.2125, "grad_norm": 2.6387628805307397, "learning_rate": 5e-05, "loss": 0.0794, "num_input_tokens_seen": 420881292, "step": 4611 }, { "epoch": 19.2125, "loss": 0.06479300558567047, "loss_ce": 4.183812052360736e-06, "loss_iou": 0.310546875, "loss_num": 0.012939453125, "loss_xval": 0.06494140625, "num_input_tokens_seen": 420881292, "step": 4611 }, { "epoch": 19.216666666666665, "grad_norm": 5.227308189588182, "learning_rate": 5e-05, "loss": 0.0663, "num_input_tokens_seen": 420973184, "step": 4612 }, { "epoch": 19.216666666666665, "loss": 0.037201642990112305, "loss_ce": 7.13834765520005e-07, "loss_iou": 0.26953125, "loss_num": 0.0074462890625, "loss_xval": 0.037109375, "num_input_tokens_seen": 420973184, "step": 4612 }, { "epoch": 19.220833333333335, "grad_norm": 2.520524143939457, "learning_rate": 5e-05, "loss": 0.0451, "num_input_tokens_seen": 421064220, "step": 4613 }, { "epoch": 19.220833333333335, "loss": 0.05414511263370514, "loss_ce": 2.9817920221830718e-05, "loss_iou": 0.291015625, "loss_num": 0.01080322265625, "loss_xval": 0.05419921875, "num_input_tokens_seen": 421064220, "step": 4613 }, { "epoch": 19.225, "grad_norm": 2.049099752261963, "learning_rate": 5e-05, "loss": 0.0608, "num_input_tokens_seen": 421155728, "step": 4614 }, { "epoch": 19.225, "loss": 0.06513015180826187, "loss_ce": 0.00020399918139446527, "loss_iou": 0.263671875, "loss_num": 0.01300048828125, "loss_xval": 0.06494140625, "num_input_tokens_seen": 421155728, "step": 4614 }, { "epoch": 19.229166666666668, "grad_norm": 3.1133542526049043, "learning_rate": 5e-05, "loss": 0.0534, "num_input_tokens_seen": 421247300, "step": 4615 }, { "epoch": 19.229166666666668, "loss": 0.05482819676399231, "loss_ce": 3.372350420249859e-06, "loss_iou": 0.357421875, "loss_num": 0.010986328125, "loss_xval": 0.054931640625, "num_input_tokens_seen": 421247300, "step": 4615 }, { "epoch": 19.233333333333334, "grad_norm": 2.7352091884268783, "learning_rate": 5e-05, "loss": 0.0746, "num_input_tokens_seen": 421338644, "step": 4616 }, { "epoch": 19.233333333333334, "loss": 0.04940875619649887, "loss_ce": 8.015024945962068e-07, "loss_iou": 0.240234375, "loss_num": 0.0098876953125, "loss_xval": 0.04931640625, "num_input_tokens_seen": 421338644, "step": 4616 }, { "epoch": 19.2375, "grad_norm": 2.1159310329647263, "learning_rate": 5e-05, "loss": 0.0478, "num_input_tokens_seen": 421430104, "step": 4617 }, { "epoch": 19.2375, "loss": 0.056608691811561584, "loss_ce": 5.962173963780515e-05, "loss_iou": 0.392578125, "loss_num": 0.01129150390625, "loss_xval": 0.056640625, "num_input_tokens_seen": 421430104, "step": 4617 }, { "epoch": 19.241666666666667, "grad_norm": 2.2682581554209995, "learning_rate": 5e-05, "loss": 0.0276, "num_input_tokens_seen": 421520848, "step": 4618 }, { "epoch": 19.241666666666667, "loss": 0.03226238861680031, "loss_ce": 0.00015789664757903665, "loss_iou": 0.2421875, "loss_num": 0.00640869140625, "loss_xval": 0.0322265625, "num_input_tokens_seen": 421520848, "step": 4618 }, { "epoch": 19.245833333333334, "grad_norm": 3.0687848147882666, "learning_rate": 5e-05, "loss": 0.0393, "num_input_tokens_seen": 421610708, "step": 4619 }, { "epoch": 19.245833333333334, "loss": 0.04960303753614426, "loss_ce": 1.1973999789915979e-05, "loss_iou": 0.30859375, "loss_num": 0.0098876953125, "loss_xval": 0.049560546875, "num_input_tokens_seen": 421610708, "step": 4619 }, { "epoch": 19.25, "grad_norm": 2.688487761475692, "learning_rate": 5e-05, "loss": 0.0753, "num_input_tokens_seen": 421701940, "step": 4620 }, { "epoch": 19.25, "loss": 0.04789130389690399, "loss_ce": 1.5958464700815966e-06, "loss_iou": 0.25390625, "loss_num": 0.00958251953125, "loss_xval": 0.0478515625, "num_input_tokens_seen": 421701940, "step": 4620 }, { "epoch": 19.254166666666666, "grad_norm": 2.7867440067556357, "learning_rate": 5e-05, "loss": 0.0776, "num_input_tokens_seen": 421793464, "step": 4621 }, { "epoch": 19.254166666666666, "loss": 0.05678050220012665, "loss_ce": 2.548747943365015e-06, "loss_iou": 0.373046875, "loss_num": 0.0113525390625, "loss_xval": 0.056884765625, "num_input_tokens_seen": 421793464, "step": 4621 }, { "epoch": 19.258333333333333, "grad_norm": 2.7770962428236006, "learning_rate": 5e-05, "loss": 0.0437, "num_input_tokens_seen": 421884396, "step": 4622 }, { "epoch": 19.258333333333333, "loss": 0.04880964383482933, "loss_ce": 4.406524567457382e-06, "loss_iou": 0.1416015625, "loss_num": 0.009765625, "loss_xval": 0.048828125, "num_input_tokens_seen": 421884396, "step": 4622 }, { "epoch": 19.2625, "grad_norm": 2.2242337443277167, "learning_rate": 5e-05, "loss": 0.041, "num_input_tokens_seen": 421975608, "step": 4623 }, { "epoch": 19.2625, "loss": 0.04673875868320465, "loss_ce": 1.083584493244416e-06, "loss_iou": 0.30859375, "loss_num": 0.00933837890625, "loss_xval": 0.046630859375, "num_input_tokens_seen": 421975608, "step": 4623 }, { "epoch": 19.266666666666666, "grad_norm": 2.489074077579362, "learning_rate": 5e-05, "loss": 0.0408, "num_input_tokens_seen": 422066960, "step": 4624 }, { "epoch": 19.266666666666666, "loss": 0.031821057200431824, "loss_ce": 2.6656816771719605e-06, "loss_iou": 0.287109375, "loss_num": 0.00634765625, "loss_xval": 0.03173828125, "num_input_tokens_seen": 422066960, "step": 4624 }, { "epoch": 19.270833333333332, "grad_norm": 6.752299421382515, "learning_rate": 5e-05, "loss": 0.0481, "num_input_tokens_seen": 422158040, "step": 4625 }, { "epoch": 19.270833333333332, "loss": 0.04742731153964996, "loss_ce": 2.9909158456575824e-06, "loss_iou": 0.34765625, "loss_num": 0.00946044921875, "loss_xval": 0.04736328125, "num_input_tokens_seen": 422158040, "step": 4625 }, { "epoch": 19.275, "grad_norm": 4.463241691798342, "learning_rate": 5e-05, "loss": 0.0559, "num_input_tokens_seen": 422249268, "step": 4626 }, { "epoch": 19.275, "loss": 0.0679115504026413, "loss_ce": 9.937119102687575e-06, "loss_iou": 0.2001953125, "loss_num": 0.0135498046875, "loss_xval": 0.06787109375, "num_input_tokens_seen": 422249268, "step": 4626 }, { "epoch": 19.279166666666665, "grad_norm": 2.493955255139426, "learning_rate": 5e-05, "loss": 0.0333, "num_input_tokens_seen": 422341256, "step": 4627 }, { "epoch": 19.279166666666665, "loss": 0.03526730090379715, "loss_ce": 0.002224391559138894, "loss_iou": 0.357421875, "loss_num": 0.006622314453125, "loss_xval": 0.032958984375, "num_input_tokens_seen": 422341256, "step": 4627 }, { "epoch": 19.283333333333335, "grad_norm": 1.9185103008354087, "learning_rate": 5e-05, "loss": 0.0328, "num_input_tokens_seen": 422432976, "step": 4628 }, { "epoch": 19.283333333333335, "loss": 0.036364421248435974, "loss_ce": 0.008234846405684948, "loss_iou": 0.2578125, "loss_num": 0.005615234375, "loss_xval": 0.028076171875, "num_input_tokens_seen": 422432976, "step": 4628 }, { "epoch": 19.2875, "grad_norm": 2.399643182157675, "learning_rate": 5e-05, "loss": 0.0579, "num_input_tokens_seen": 422524804, "step": 4629 }, { "epoch": 19.2875, "loss": 0.04569268226623535, "loss_ce": 7.867306521802675e-06, "loss_iou": 0.2265625, "loss_num": 0.0091552734375, "loss_xval": 0.045654296875, "num_input_tokens_seen": 422524804, "step": 4629 }, { "epoch": 19.291666666666668, "grad_norm": 6.484003390186585, "learning_rate": 5e-05, "loss": 0.0573, "num_input_tokens_seen": 422616320, "step": 4630 }, { "epoch": 19.291666666666668, "loss": 0.046999868005514145, "loss_ce": 1.805866850190796e-05, "loss_iou": 0.291015625, "loss_num": 0.0093994140625, "loss_xval": 0.046875, "num_input_tokens_seen": 422616320, "step": 4630 }, { "epoch": 19.295833333333334, "grad_norm": 3.7062116592217076, "learning_rate": 5e-05, "loss": 0.0364, "num_input_tokens_seen": 422707244, "step": 4631 }, { "epoch": 19.295833333333334, "loss": 0.04261889308691025, "loss_ce": 1.0962626220134553e-06, "loss_iou": 0.306640625, "loss_num": 0.008544921875, "loss_xval": 0.042724609375, "num_input_tokens_seen": 422707244, "step": 4631 }, { "epoch": 19.3, "grad_norm": 3.0011988021600846, "learning_rate": 5e-05, "loss": 0.0481, "num_input_tokens_seen": 422799340, "step": 4632 }, { "epoch": 19.3, "loss": 0.04482024535536766, "loss_ce": 9.673715248936787e-05, "loss_iou": 0.185546875, "loss_num": 0.00897216796875, "loss_xval": 0.044677734375, "num_input_tokens_seen": 422799340, "step": 4632 }, { "epoch": 19.304166666666667, "grad_norm": 3.561026396108788, "learning_rate": 5e-05, "loss": 0.0623, "num_input_tokens_seen": 422890824, "step": 4633 }, { "epoch": 19.304166666666667, "loss": 0.033055853098630905, "loss_ce": 5.316383067111019e-06, "loss_iou": 0.33203125, "loss_num": 0.006591796875, "loss_xval": 0.032958984375, "num_input_tokens_seen": 422890824, "step": 4633 }, { "epoch": 19.308333333333334, "grad_norm": 2.5648590204529653, "learning_rate": 5e-05, "loss": 0.0371, "num_input_tokens_seen": 422981696, "step": 4634 }, { "epoch": 19.308333333333334, "loss": 0.03345128148794174, "loss_ce": 4.014779733552132e-06, "loss_iou": 0.248046875, "loss_num": 0.006683349609375, "loss_xval": 0.033447265625, "num_input_tokens_seen": 422981696, "step": 4634 }, { "epoch": 19.3125, "grad_norm": 2.713760964626292, "learning_rate": 5e-05, "loss": 0.0322, "num_input_tokens_seen": 423073144, "step": 4635 }, { "epoch": 19.3125, "loss": 0.03699225187301636, "loss_ce": 1.2577083907672204e-05, "loss_iou": 0.2890625, "loss_num": 0.00738525390625, "loss_xval": 0.036865234375, "num_input_tokens_seen": 423073144, "step": 4635 }, { "epoch": 19.316666666666666, "grad_norm": 2.8395400432992584, "learning_rate": 5e-05, "loss": 0.0747, "num_input_tokens_seen": 423164548, "step": 4636 }, { "epoch": 19.316666666666666, "loss": 0.10205307602882385, "loss_ce": 2.2919698494661134e-06, "loss_iou": 0.375, "loss_num": 0.0203857421875, "loss_xval": 0.10205078125, "num_input_tokens_seen": 423164548, "step": 4636 }, { "epoch": 19.320833333333333, "grad_norm": 1.4517058294201308, "learning_rate": 5e-05, "loss": 0.0342, "num_input_tokens_seen": 423255736, "step": 4637 }, { "epoch": 19.320833333333333, "loss": 0.022456102073192596, "loss_ce": 0.0004376704164315015, "loss_iou": 0.2431640625, "loss_num": 0.00439453125, "loss_xval": 0.02197265625, "num_input_tokens_seen": 423255736, "step": 4637 }, { "epoch": 19.325, "grad_norm": 1.7284897894745854, "learning_rate": 5e-05, "loss": 0.0476, "num_input_tokens_seen": 423347540, "step": 4638 }, { "epoch": 19.325, "loss": 0.031548645347356796, "loss_ce": 0.0015803832793608308, "loss_iou": 0.087890625, "loss_num": 0.0059814453125, "loss_xval": 0.030029296875, "num_input_tokens_seen": 423347540, "step": 4638 }, { "epoch": 19.329166666666666, "grad_norm": 3.241048032926379, "learning_rate": 5e-05, "loss": 0.0471, "num_input_tokens_seen": 423439020, "step": 4639 }, { "epoch": 19.329166666666666, "loss": 0.047394994646310806, "loss_ce": 1.1946518725380884e-06, "loss_iou": 0.33203125, "loss_num": 0.00946044921875, "loss_xval": 0.04736328125, "num_input_tokens_seen": 423439020, "step": 4639 }, { "epoch": 19.333333333333332, "grad_norm": 1.7661108467346713, "learning_rate": 5e-05, "loss": 0.0313, "num_input_tokens_seen": 423530072, "step": 4640 }, { "epoch": 19.333333333333332, "loss": 0.024259299039840698, "loss_ce": 0.00010463706712471321, "loss_iou": 0.251953125, "loss_num": 0.00482177734375, "loss_xval": 0.024169921875, "num_input_tokens_seen": 423530072, "step": 4640 }, { "epoch": 19.3375, "grad_norm": 1.796054270519455, "learning_rate": 5e-05, "loss": 0.0448, "num_input_tokens_seen": 423621112, "step": 4641 }, { "epoch": 19.3375, "loss": 0.05872287228703499, "loss_ce": 0.00011385927791707218, "loss_iou": 0.0908203125, "loss_num": 0.01171875, "loss_xval": 0.05859375, "num_input_tokens_seen": 423621112, "step": 4641 }, { "epoch": 19.341666666666665, "grad_norm": 2.8753962287058172, "learning_rate": 5e-05, "loss": 0.0289, "num_input_tokens_seen": 423712208, "step": 4642 }, { "epoch": 19.341666666666665, "loss": 0.02776733972132206, "loss_ce": 1.1602001904975623e-05, "loss_iou": 0.22265625, "loss_num": 0.00555419921875, "loss_xval": 0.0277099609375, "num_input_tokens_seen": 423712208, "step": 4642 }, { "epoch": 19.345833333333335, "grad_norm": 3.584867533621596, "learning_rate": 5e-05, "loss": 0.0452, "num_input_tokens_seen": 423802712, "step": 4643 }, { "epoch": 19.345833333333335, "loss": 0.05088900774717331, "loss_ce": 9.473724276176654e-07, "loss_iou": 0.263671875, "loss_num": 0.01019287109375, "loss_xval": 0.05078125, "num_input_tokens_seen": 423802712, "step": 4643 }, { "epoch": 19.35, "grad_norm": 4.460664034772701, "learning_rate": 5e-05, "loss": 0.0361, "num_input_tokens_seen": 423894308, "step": 4644 }, { "epoch": 19.35, "loss": 0.03981529548764229, "loss_ce": 2.800288530124817e-05, "loss_iou": 0.1611328125, "loss_num": 0.0079345703125, "loss_xval": 0.039794921875, "num_input_tokens_seen": 423894308, "step": 4644 }, { "epoch": 19.354166666666668, "grad_norm": 3.4040997149712022, "learning_rate": 5e-05, "loss": 0.06, "num_input_tokens_seen": 423984912, "step": 4645 }, { "epoch": 19.354166666666668, "loss": 0.05637955665588379, "loss_ce": 1.3589784430223517e-05, "loss_iou": 0.30078125, "loss_num": 0.01129150390625, "loss_xval": 0.056396484375, "num_input_tokens_seen": 423984912, "step": 4645 }, { "epoch": 19.358333333333334, "grad_norm": 2.4571329662506205, "learning_rate": 5e-05, "loss": 0.055, "num_input_tokens_seen": 424075428, "step": 4646 }, { "epoch": 19.358333333333334, "loss": 0.04334487393498421, "loss_ce": 2.2821677703177556e-06, "loss_iou": 0.203125, "loss_num": 0.0086669921875, "loss_xval": 0.04345703125, "num_input_tokens_seen": 424075428, "step": 4646 }, { "epoch": 19.3625, "grad_norm": 1.9287176151702943, "learning_rate": 5e-05, "loss": 0.0515, "num_input_tokens_seen": 424166624, "step": 4647 }, { "epoch": 19.3625, "loss": 0.043597668409347534, "loss_ce": 8.627773058833554e-05, "loss_iou": 0.21875, "loss_num": 0.00872802734375, "loss_xval": 0.04345703125, "num_input_tokens_seen": 424166624, "step": 4647 }, { "epoch": 19.366666666666667, "grad_norm": 3.378448794891788, "learning_rate": 5e-05, "loss": 0.0649, "num_input_tokens_seen": 424258132, "step": 4648 }, { "epoch": 19.366666666666667, "loss": 0.044696178287267685, "loss_ce": 2.607420174172148e-05, "loss_iou": 0.2421875, "loss_num": 0.0089111328125, "loss_xval": 0.044677734375, "num_input_tokens_seen": 424258132, "step": 4648 }, { "epoch": 19.370833333333334, "grad_norm": 5.674397449328923, "learning_rate": 5e-05, "loss": 0.036, "num_input_tokens_seen": 424348976, "step": 4649 }, { "epoch": 19.370833333333334, "loss": 0.036913517862558365, "loss_ce": 2.5051069769688183e-06, "loss_iou": 0.34375, "loss_num": 0.00738525390625, "loss_xval": 0.036865234375, "num_input_tokens_seen": 424348976, "step": 4649 }, { "epoch": 19.375, "grad_norm": 2.679481678738005, "learning_rate": 5e-05, "loss": 0.0715, "num_input_tokens_seen": 424440316, "step": 4650 }, { "epoch": 19.375, "loss": 0.09503061324357986, "loss_ce": 2.93930515908869e-05, "loss_iou": 0.326171875, "loss_num": 0.01904296875, "loss_xval": 0.09521484375, "num_input_tokens_seen": 424440316, "step": 4650 }, { "epoch": 19.379166666666666, "grad_norm": 2.508375754683479, "learning_rate": 5e-05, "loss": 0.0408, "num_input_tokens_seen": 424531544, "step": 4651 }, { "epoch": 19.379166666666666, "loss": 0.03825172036886215, "loss_ce": 2.8451055186451413e-05, "loss_iou": 0.361328125, "loss_num": 0.007659912109375, "loss_xval": 0.038330078125, "num_input_tokens_seen": 424531544, "step": 4651 }, { "epoch": 19.383333333333333, "grad_norm": 2.380298782734332, "learning_rate": 5e-05, "loss": 0.0716, "num_input_tokens_seen": 424622704, "step": 4652 }, { "epoch": 19.383333333333333, "loss": 0.05666510760784149, "loss_ce": 9.22513208934106e-06, "loss_iou": 0.26171875, "loss_num": 0.0113525390625, "loss_xval": 0.056640625, "num_input_tokens_seen": 424622704, "step": 4652 }, { "epoch": 19.3875, "grad_norm": 5.627118312781519, "learning_rate": 5e-05, "loss": 0.0655, "num_input_tokens_seen": 424714176, "step": 4653 }, { "epoch": 19.3875, "loss": 0.05855630338191986, "loss_ce": 0.0014350259443745017, "loss_iou": 0.2060546875, "loss_num": 0.01141357421875, "loss_xval": 0.05712890625, "num_input_tokens_seen": 424714176, "step": 4653 }, { "epoch": 19.391666666666666, "grad_norm": 2.6564384856760244, "learning_rate": 5e-05, "loss": 0.0592, "num_input_tokens_seen": 424805272, "step": 4654 }, { "epoch": 19.391666666666666, "loss": 0.043996796011924744, "loss_ce": 5.7064071370405145e-06, "loss_iou": 0.189453125, "loss_num": 0.0087890625, "loss_xval": 0.0439453125, "num_input_tokens_seen": 424805272, "step": 4654 }, { "epoch": 19.395833333333332, "grad_norm": 4.446683692038585, "learning_rate": 5e-05, "loss": 0.0354, "num_input_tokens_seen": 424897180, "step": 4655 }, { "epoch": 19.395833333333332, "loss": 0.030892925336956978, "loss_ce": 5.491260890266858e-05, "loss_iou": 0.322265625, "loss_num": 0.00616455078125, "loss_xval": 0.0308837890625, "num_input_tokens_seen": 424897180, "step": 4655 }, { "epoch": 19.4, "grad_norm": 3.3669092038663226, "learning_rate": 5e-05, "loss": 0.1017, "num_input_tokens_seen": 424988992, "step": 4656 }, { "epoch": 19.4, "loss": 0.14664022624492645, "loss_ce": 3.2549396564718336e-06, "loss_iou": 0.28125, "loss_num": 0.029296875, "loss_xval": 0.146484375, "num_input_tokens_seen": 424988992, "step": 4656 }, { "epoch": 19.404166666666665, "grad_norm": 2.2558108668362733, "learning_rate": 5e-05, "loss": 0.0387, "num_input_tokens_seen": 425080276, "step": 4657 }, { "epoch": 19.404166666666665, "loss": 0.026068340986967087, "loss_ce": 6.3310485529655125e-06, "loss_iou": 0.263671875, "loss_num": 0.005218505859375, "loss_xval": 0.026123046875, "num_input_tokens_seen": 425080276, "step": 4657 }, { "epoch": 19.408333333333335, "grad_norm": 2.3723436902042834, "learning_rate": 5e-05, "loss": 0.0514, "num_input_tokens_seen": 425171800, "step": 4658 }, { "epoch": 19.408333333333335, "loss": 0.0370577797293663, "loss_ce": 5.5216809414559975e-05, "loss_iou": 0.279296875, "loss_num": 0.00738525390625, "loss_xval": 0.037109375, "num_input_tokens_seen": 425171800, "step": 4658 }, { "epoch": 19.4125, "grad_norm": 2.306311635313943, "learning_rate": 5e-05, "loss": 0.0431, "num_input_tokens_seen": 425263160, "step": 4659 }, { "epoch": 19.4125, "loss": 0.04586896300315857, "loss_ce": 1.0427361303300131e-06, "loss_iou": 0.29296875, "loss_num": 0.0091552734375, "loss_xval": 0.0458984375, "num_input_tokens_seen": 425263160, "step": 4659 }, { "epoch": 19.416666666666668, "grad_norm": 2.3018572746408976, "learning_rate": 5e-05, "loss": 0.0422, "num_input_tokens_seen": 425354852, "step": 4660 }, { "epoch": 19.416666666666668, "loss": 0.049921829253435135, "loss_ce": 0.009493667632341385, "loss_iou": 0.28125, "loss_num": 0.008056640625, "loss_xval": 0.04052734375, "num_input_tokens_seen": 425354852, "step": 4660 }, { "epoch": 19.420833333333334, "grad_norm": 13.237613082223017, "learning_rate": 5e-05, "loss": 0.1018, "num_input_tokens_seen": 425445808, "step": 4661 }, { "epoch": 19.420833333333334, "loss": 0.0899994745850563, "loss_ce": 3.1283711905416567e-06, "loss_iou": 0.283203125, "loss_num": 0.0179443359375, "loss_xval": 0.08984375, "num_input_tokens_seen": 425445808, "step": 4661 }, { "epoch": 19.425, "grad_norm": 1.359916345120742, "learning_rate": 5e-05, "loss": 0.0498, "num_input_tokens_seen": 425536932, "step": 4662 }, { "epoch": 19.425, "loss": 0.03377960994839668, "loss_ce": 4.668272879371216e-07, "loss_iou": 0.2333984375, "loss_num": 0.006744384765625, "loss_xval": 0.03369140625, "num_input_tokens_seen": 425536932, "step": 4662 }, { "epoch": 19.429166666666667, "grad_norm": 1.892552875532026, "learning_rate": 5e-05, "loss": 0.0459, "num_input_tokens_seen": 425628296, "step": 4663 }, { "epoch": 19.429166666666667, "loss": 0.04816794395446777, "loss_ce": 7.392980933218496e-06, "loss_iou": 0.10546875, "loss_num": 0.0096435546875, "loss_xval": 0.048095703125, "num_input_tokens_seen": 425628296, "step": 4663 }, { "epoch": 19.433333333333334, "grad_norm": 0.9833520687870673, "learning_rate": 5e-05, "loss": 0.0468, "num_input_tokens_seen": 425720208, "step": 4664 }, { "epoch": 19.433333333333334, "loss": 0.056046824902296066, "loss_ce": 0.001969678560271859, "loss_iou": 0.201171875, "loss_num": 0.01080322265625, "loss_xval": 0.05419921875, "num_input_tokens_seen": 425720208, "step": 4664 }, { "epoch": 19.4375, "grad_norm": 3.3194012961821304, "learning_rate": 5e-05, "loss": 0.0521, "num_input_tokens_seen": 425811708, "step": 4665 }, { "epoch": 19.4375, "loss": 0.023443780839443207, "loss_ce": 9.783326095202938e-05, "loss_iou": 0.28125, "loss_num": 0.004669189453125, "loss_xval": 0.0233154296875, "num_input_tokens_seen": 425811708, "step": 4665 }, { "epoch": 19.441666666666666, "grad_norm": 2.2507664632180524, "learning_rate": 5e-05, "loss": 0.0884, "num_input_tokens_seen": 425902828, "step": 4666 }, { "epoch": 19.441666666666666, "loss": 0.14204253256320953, "loss_ce": 0.0031570233404636383, "loss_iou": 0.1435546875, "loss_num": 0.02783203125, "loss_xval": 0.138671875, "num_input_tokens_seen": 425902828, "step": 4666 }, { "epoch": 19.445833333333333, "grad_norm": 2.5591175574352567, "learning_rate": 5e-05, "loss": 0.0421, "num_input_tokens_seen": 425994712, "step": 4667 }, { "epoch": 19.445833333333333, "loss": 0.03063901700079441, "loss_ce": 3.7513345887418836e-05, "loss_iou": 0.28125, "loss_num": 0.006134033203125, "loss_xval": 0.0306396484375, "num_input_tokens_seen": 425994712, "step": 4667 }, { "epoch": 19.45, "grad_norm": 3.1368144608611517, "learning_rate": 5e-05, "loss": 0.0439, "num_input_tokens_seen": 426085312, "step": 4668 }, { "epoch": 19.45, "loss": 0.050644420087337494, "loss_ce": 4.987635975339799e-07, "loss_iou": 0.28125, "loss_num": 0.0101318359375, "loss_xval": 0.050537109375, "num_input_tokens_seen": 426085312, "step": 4668 }, { "epoch": 19.454166666666666, "grad_norm": 2.8116083842144644, "learning_rate": 5e-05, "loss": 0.0651, "num_input_tokens_seen": 426176332, "step": 4669 }, { "epoch": 19.454166666666666, "loss": 0.09149643033742905, "loss_ce": 4.723514393845107e-06, "loss_iou": 0.28125, "loss_num": 0.018310546875, "loss_xval": 0.09130859375, "num_input_tokens_seen": 426176332, "step": 4669 }, { "epoch": 19.458333333333332, "grad_norm": 3.030003459572827, "learning_rate": 5e-05, "loss": 0.0482, "num_input_tokens_seen": 426267580, "step": 4670 }, { "epoch": 19.458333333333332, "loss": 0.047811392694711685, "loss_ce": 5.608817900792928e-06, "loss_iou": 0.21875, "loss_num": 0.009521484375, "loss_xval": 0.0478515625, "num_input_tokens_seen": 426267580, "step": 4670 }, { "epoch": 19.4625, "grad_norm": 1.4491655834251747, "learning_rate": 5e-05, "loss": 0.0307, "num_input_tokens_seen": 426358968, "step": 4671 }, { "epoch": 19.4625, "loss": 0.022887222468852997, "loss_ce": 6.6694537963485345e-06, "loss_iou": 0.08984375, "loss_num": 0.00457763671875, "loss_xval": 0.0228271484375, "num_input_tokens_seen": 426358968, "step": 4671 }, { "epoch": 19.466666666666665, "grad_norm": 4.709829187465688, "learning_rate": 5e-05, "loss": 0.0435, "num_input_tokens_seen": 426450512, "step": 4672 }, { "epoch": 19.466666666666665, "loss": 0.050652679055929184, "loss_ce": 1.1255635854467982e-06, "loss_iou": 0.259765625, "loss_num": 0.0101318359375, "loss_xval": 0.050537109375, "num_input_tokens_seen": 426450512, "step": 4672 }, { "epoch": 19.470833333333335, "grad_norm": 2.618074247148993, "learning_rate": 5e-05, "loss": 0.0561, "num_input_tokens_seen": 426541836, "step": 4673 }, { "epoch": 19.470833333333335, "loss": 0.07112175226211548, "loss_ce": 8.170564797183033e-06, "loss_iou": 0.3984375, "loss_num": 0.01422119140625, "loss_xval": 0.0712890625, "num_input_tokens_seen": 426541836, "step": 4673 }, { "epoch": 19.475, "grad_norm": 1.4930309481930126, "learning_rate": 5e-05, "loss": 0.1141, "num_input_tokens_seen": 426633068, "step": 4674 }, { "epoch": 19.475, "loss": 0.14563126862049103, "loss_ce": 0.0001082018789020367, "loss_iou": 0.25390625, "loss_num": 0.029052734375, "loss_xval": 0.1455078125, "num_input_tokens_seen": 426633068, "step": 4674 }, { "epoch": 19.479166666666668, "grad_norm": 2.980041369257071, "learning_rate": 5e-05, "loss": 0.0435, "num_input_tokens_seen": 426723156, "step": 4675 }, { "epoch": 19.479166666666668, "loss": 0.05325976759195328, "loss_ce": 6.596771072509e-06, "loss_iou": 0.1865234375, "loss_num": 0.01068115234375, "loss_xval": 0.05322265625, "num_input_tokens_seen": 426723156, "step": 4675 }, { "epoch": 19.483333333333334, "grad_norm": 3.026211387083105, "learning_rate": 5e-05, "loss": 0.0433, "num_input_tokens_seen": 426814776, "step": 4676 }, { "epoch": 19.483333333333334, "loss": 0.03120480850338936, "loss_ce": 0.002579319756478071, "loss_iou": 0.2490234375, "loss_num": 0.005706787109375, "loss_xval": 0.028564453125, "num_input_tokens_seen": 426814776, "step": 4676 }, { "epoch": 19.4875, "grad_norm": 3.366757929772386, "learning_rate": 5e-05, "loss": 0.0379, "num_input_tokens_seen": 426905988, "step": 4677 }, { "epoch": 19.4875, "loss": 0.03799605742096901, "loss_ce": 0.0034349020570516586, "loss_iou": 0.2734375, "loss_num": 0.006927490234375, "loss_xval": 0.03466796875, "num_input_tokens_seen": 426905988, "step": 4677 }, { "epoch": 19.491666666666667, "grad_norm": 2.8653070360820565, "learning_rate": 5e-05, "loss": 0.0555, "num_input_tokens_seen": 426997520, "step": 4678 }, { "epoch": 19.491666666666667, "loss": 0.045054540038108826, "loss_ce": 8.689066453371197e-05, "loss_iou": 0.265625, "loss_num": 0.00897216796875, "loss_xval": 0.044921875, "num_input_tokens_seen": 426997520, "step": 4678 }, { "epoch": 19.495833333333334, "grad_norm": 2.44534489222494, "learning_rate": 5e-05, "loss": 0.0432, "num_input_tokens_seen": 427088668, "step": 4679 }, { "epoch": 19.495833333333334, "loss": 0.0548277348279953, "loss_ce": 1.0534420653129928e-05, "loss_iou": 0.29296875, "loss_num": 0.010986328125, "loss_xval": 0.054931640625, "num_input_tokens_seen": 427088668, "step": 4679 }, { "epoch": 19.5, "grad_norm": 3.304983826062335, "learning_rate": 5e-05, "loss": 0.0419, "num_input_tokens_seen": 427179708, "step": 4680 }, { "epoch": 19.5, "loss": 0.04680022597312927, "loss_ce": 3.203599408152513e-05, "loss_iou": 0.169921875, "loss_num": 0.00933837890625, "loss_xval": 0.046875, "num_input_tokens_seen": 427179708, "step": 4680 }, { "epoch": 19.504166666666666, "grad_norm": 1.9065554411739414, "learning_rate": 5e-05, "loss": 0.0337, "num_input_tokens_seen": 427270804, "step": 4681 }, { "epoch": 19.504166666666666, "loss": 0.03119390271604061, "loss_ce": 2.019602652580943e-05, "loss_iou": 0.1943359375, "loss_num": 0.0062255859375, "loss_xval": 0.0311279296875, "num_input_tokens_seen": 427270804, "step": 4681 }, { "epoch": 19.508333333333333, "grad_norm": 13.070053340693008, "learning_rate": 5e-05, "loss": 0.0574, "num_input_tokens_seen": 427362196, "step": 4682 }, { "epoch": 19.508333333333333, "loss": 0.051514655351638794, "loss_ce": 9.846053217188455e-07, "loss_iou": 0.265625, "loss_num": 0.01031494140625, "loss_xval": 0.051513671875, "num_input_tokens_seen": 427362196, "step": 4682 }, { "epoch": 19.5125, "grad_norm": 3.3735253455937766, "learning_rate": 5e-05, "loss": 0.0327, "num_input_tokens_seen": 427453656, "step": 4683 }, { "epoch": 19.5125, "loss": 0.02974916622042656, "loss_ce": 3.6488923797151074e-05, "loss_iou": 0.2431640625, "loss_num": 0.005950927734375, "loss_xval": 0.0296630859375, "num_input_tokens_seen": 427453656, "step": 4683 }, { "epoch": 19.516666666666666, "grad_norm": 1.321183884842579, "learning_rate": 5e-05, "loss": 0.0491, "num_input_tokens_seen": 427544776, "step": 4684 }, { "epoch": 19.516666666666666, "loss": 0.05862480029463768, "loss_ce": 5.306997081788722e-07, "loss_iou": 0.1953125, "loss_num": 0.01171875, "loss_xval": 0.05859375, "num_input_tokens_seen": 427544776, "step": 4684 }, { "epoch": 19.520833333333332, "grad_norm": 1.51784472719767, "learning_rate": 5e-05, "loss": 0.0496, "num_input_tokens_seen": 427636228, "step": 4685 }, { "epoch": 19.520833333333332, "loss": 0.03482060134410858, "loss_ce": 3.056173227378167e-05, "loss_iou": 0.259765625, "loss_num": 0.0069580078125, "loss_xval": 0.03466796875, "num_input_tokens_seen": 427636228, "step": 4685 }, { "epoch": 19.525, "grad_norm": 3.819240039794996, "learning_rate": 5e-05, "loss": 0.072, "num_input_tokens_seen": 427727648, "step": 4686 }, { "epoch": 19.525, "loss": 0.03399910777807236, "loss_ce": 4.830400939681567e-05, "loss_iou": 0.2421875, "loss_num": 0.00677490234375, "loss_xval": 0.033935546875, "num_input_tokens_seen": 427727648, "step": 4686 }, { "epoch": 19.529166666666665, "grad_norm": 1.2896131186634596, "learning_rate": 5e-05, "loss": 0.0299, "num_input_tokens_seen": 427818756, "step": 4687 }, { "epoch": 19.529166666666665, "loss": 0.025551263242959976, "loss_ce": 4.222063125780551e-07, "loss_iou": 0.2109375, "loss_num": 0.005096435546875, "loss_xval": 0.0255126953125, "num_input_tokens_seen": 427818756, "step": 4687 }, { "epoch": 19.533333333333335, "grad_norm": 2.4855605544570807, "learning_rate": 5e-05, "loss": 0.0721, "num_input_tokens_seen": 427910028, "step": 4688 }, { "epoch": 19.533333333333335, "loss": 0.052681293338537216, "loss_ce": 7.954224201967008e-06, "loss_iou": 0.25390625, "loss_num": 0.010498046875, "loss_xval": 0.052734375, "num_input_tokens_seen": 427910028, "step": 4688 }, { "epoch": 19.5375, "grad_norm": 2.39258266792005, "learning_rate": 5e-05, "loss": 0.0559, "num_input_tokens_seen": 428001156, "step": 4689 }, { "epoch": 19.5375, "loss": 0.03420303389430046, "loss_ce": 4.5918864088889677e-07, "loss_iou": 0.326171875, "loss_num": 0.0068359375, "loss_xval": 0.0341796875, "num_input_tokens_seen": 428001156, "step": 4689 }, { "epoch": 19.541666666666668, "grad_norm": 3.0163886925914207, "learning_rate": 5e-05, "loss": 0.0331, "num_input_tokens_seen": 428092540, "step": 4690 }, { "epoch": 19.541666666666668, "loss": 0.0386294387280941, "loss_ce": 1.8156640635424992e-06, "loss_iou": 0.283203125, "loss_num": 0.007720947265625, "loss_xval": 0.03857421875, "num_input_tokens_seen": 428092540, "step": 4690 }, { "epoch": 19.545833333333334, "grad_norm": 2.8737297381837252, "learning_rate": 5e-05, "loss": 0.0477, "num_input_tokens_seen": 428184612, "step": 4691 }, { "epoch": 19.545833333333334, "loss": 0.05887051299214363, "loss_ce": 2.1029197796451626e-06, "loss_iou": 0.306640625, "loss_num": 0.01177978515625, "loss_xval": 0.058837890625, "num_input_tokens_seen": 428184612, "step": 4691 }, { "epoch": 19.55, "grad_norm": 2.9026702244736287, "learning_rate": 5e-05, "loss": 0.0353, "num_input_tokens_seen": 428276444, "step": 4692 }, { "epoch": 19.55, "loss": 0.02635483257472515, "loss_ce": 3.3422638807678595e-05, "loss_iou": 0.306640625, "loss_num": 0.0052490234375, "loss_xval": 0.0263671875, "num_input_tokens_seen": 428276444, "step": 4692 }, { "epoch": 19.554166666666667, "grad_norm": 3.604260008083001, "learning_rate": 5e-05, "loss": 0.0368, "num_input_tokens_seen": 428368020, "step": 4693 }, { "epoch": 19.554166666666667, "loss": 0.04505544155836105, "loss_ce": 2.6755342332762666e-05, "loss_iou": 0.326171875, "loss_num": 0.009033203125, "loss_xval": 0.044921875, "num_input_tokens_seen": 428368020, "step": 4693 }, { "epoch": 19.558333333333334, "grad_norm": 4.186936577110721, "learning_rate": 5e-05, "loss": 0.0575, "num_input_tokens_seen": 428459144, "step": 4694 }, { "epoch": 19.558333333333334, "loss": 0.08561927080154419, "loss_ce": 2.206423459938378e-06, "loss_iou": 0.2158203125, "loss_num": 0.01708984375, "loss_xval": 0.08544921875, "num_input_tokens_seen": 428459144, "step": 4694 }, { "epoch": 19.5625, "grad_norm": 2.0447266565619673, "learning_rate": 5e-05, "loss": 0.0521, "num_input_tokens_seen": 428550580, "step": 4695 }, { "epoch": 19.5625, "loss": 0.06577420234680176, "loss_ce": 1.1950858151976718e-06, "loss_iou": 0.287109375, "loss_num": 0.01312255859375, "loss_xval": 0.06591796875, "num_input_tokens_seen": 428550580, "step": 4695 }, { "epoch": 19.566666666666666, "grad_norm": 1.6493418387427887, "learning_rate": 5e-05, "loss": 0.0342, "num_input_tokens_seen": 428642472, "step": 4696 }, { "epoch": 19.566666666666666, "loss": 0.02830236777663231, "loss_ce": 4.942057330481475e-06, "loss_iou": 0.248046875, "loss_num": 0.00567626953125, "loss_xval": 0.0283203125, "num_input_tokens_seen": 428642472, "step": 4696 }, { "epoch": 19.570833333333333, "grad_norm": 1.716198320956972, "learning_rate": 5e-05, "loss": 0.0441, "num_input_tokens_seen": 428733728, "step": 4697 }, { "epoch": 19.570833333333333, "loss": 0.06456813961267471, "loss_ce": 5.716344730899436e-07, "loss_iou": 0.25390625, "loss_num": 0.012939453125, "loss_xval": 0.064453125, "num_input_tokens_seen": 428733728, "step": 4697 }, { "epoch": 19.575, "grad_norm": 2.2096837878042783, "learning_rate": 5e-05, "loss": 0.0613, "num_input_tokens_seen": 428825072, "step": 4698 }, { "epoch": 19.575, "loss": 0.07033099234104156, "loss_ce": 3.374950392753817e-05, "loss_iou": 0.36328125, "loss_num": 0.0140380859375, "loss_xval": 0.0703125, "num_input_tokens_seen": 428825072, "step": 4698 }, { "epoch": 19.579166666666666, "grad_norm": 1.6190235520147538, "learning_rate": 5e-05, "loss": 0.0503, "num_input_tokens_seen": 428916656, "step": 4699 }, { "epoch": 19.579166666666666, "loss": 0.04145081713795662, "loss_ce": 0.0006030374206602573, "loss_iou": 0.189453125, "loss_num": 0.0081787109375, "loss_xval": 0.040771484375, "num_input_tokens_seen": 428916656, "step": 4699 }, { "epoch": 19.583333333333332, "grad_norm": 1.9948864819267058, "learning_rate": 5e-05, "loss": 0.0513, "num_input_tokens_seen": 429008224, "step": 4700 }, { "epoch": 19.583333333333332, "loss": 0.04196205735206604, "loss_ce": 8.014858394744806e-06, "loss_iou": 0.25390625, "loss_num": 0.00836181640625, "loss_xval": 0.0419921875, "num_input_tokens_seen": 429008224, "step": 4700 }, { "epoch": 19.5875, "grad_norm": 2.3718308783133866, "learning_rate": 5e-05, "loss": 0.0558, "num_input_tokens_seen": 429098564, "step": 4701 }, { "epoch": 19.5875, "loss": 0.04729030281305313, "loss_ce": 3.315304411444231e-06, "loss_iou": 0.13671875, "loss_num": 0.00946044921875, "loss_xval": 0.04736328125, "num_input_tokens_seen": 429098564, "step": 4701 }, { "epoch": 19.591666666666665, "grad_norm": 2.724976630234873, "learning_rate": 5e-05, "loss": 0.0262, "num_input_tokens_seen": 429189884, "step": 4702 }, { "epoch": 19.591666666666665, "loss": 0.024874746799468994, "loss_ce": 2.9214047572168056e-06, "loss_iou": 0.162109375, "loss_num": 0.004974365234375, "loss_xval": 0.02490234375, "num_input_tokens_seen": 429189884, "step": 4702 }, { "epoch": 19.595833333333335, "grad_norm": 3.2361230807399264, "learning_rate": 5e-05, "loss": 0.0482, "num_input_tokens_seen": 429281780, "step": 4703 }, { "epoch": 19.595833333333335, "loss": 0.06647312641143799, "loss_ce": 5.846063686476555e-06, "loss_iou": 0.384765625, "loss_num": 0.0133056640625, "loss_xval": 0.06640625, "num_input_tokens_seen": 429281780, "step": 4703 }, { "epoch": 19.6, "grad_norm": 3.0761430478580745, "learning_rate": 5e-05, "loss": 0.0481, "num_input_tokens_seen": 429373436, "step": 4704 }, { "epoch": 19.6, "loss": 0.03468446061015129, "loss_ce": 1.2335344763414469e-06, "loss_iou": 0.314453125, "loss_num": 0.006927490234375, "loss_xval": 0.03466796875, "num_input_tokens_seen": 429373436, "step": 4704 }, { "epoch": 19.604166666666668, "grad_norm": 2.890457420446236, "learning_rate": 5e-05, "loss": 0.0484, "num_input_tokens_seen": 429464764, "step": 4705 }, { "epoch": 19.604166666666668, "loss": 0.05024765059351921, "loss_ce": 1.571861321281176e-05, "loss_iou": 0.1865234375, "loss_num": 0.010009765625, "loss_xval": 0.05029296875, "num_input_tokens_seen": 429464764, "step": 4705 }, { "epoch": 19.608333333333334, "grad_norm": 2.2618360525194126, "learning_rate": 5e-05, "loss": 0.0371, "num_input_tokens_seen": 429556460, "step": 4706 }, { "epoch": 19.608333333333334, "loss": 0.04483448341488838, "loss_ce": 3.468158683972433e-05, "loss_iou": 0.23828125, "loss_num": 0.00897216796875, "loss_xval": 0.044921875, "num_input_tokens_seen": 429556460, "step": 4706 }, { "epoch": 19.6125, "grad_norm": 2.7006877486585705, "learning_rate": 5e-05, "loss": 0.0779, "num_input_tokens_seen": 429647384, "step": 4707 }, { "epoch": 19.6125, "loss": 0.1267559975385666, "loss_ce": 1.241495851900254e-06, "loss_iou": 0.25, "loss_num": 0.025390625, "loss_xval": 0.126953125, "num_input_tokens_seen": 429647384, "step": 4707 }, { "epoch": 19.616666666666667, "grad_norm": 2.8438714688223516, "learning_rate": 5e-05, "loss": 0.0564, "num_input_tokens_seen": 429738540, "step": 4708 }, { "epoch": 19.616666666666667, "loss": 0.0584740974009037, "loss_ce": 2.416654524495243e-06, "loss_iou": 0.21484375, "loss_num": 0.01171875, "loss_xval": 0.05859375, "num_input_tokens_seen": 429738540, "step": 4708 }, { "epoch": 19.620833333333334, "grad_norm": 2.8012454532090842, "learning_rate": 5e-05, "loss": 0.0459, "num_input_tokens_seen": 429830268, "step": 4709 }, { "epoch": 19.620833333333334, "loss": 0.0580122284591198, "loss_ce": 0.0017759620677679777, "loss_iou": 0.234375, "loss_num": 0.01123046875, "loss_xval": 0.05615234375, "num_input_tokens_seen": 429830268, "step": 4709 }, { "epoch": 19.625, "grad_norm": 5.130070550972971, "learning_rate": 5e-05, "loss": 0.0455, "num_input_tokens_seen": 429921536, "step": 4710 }, { "epoch": 19.625, "loss": 0.048953525722026825, "loss_ce": 3.333101176394848e-06, "loss_iou": 0.26171875, "loss_num": 0.009765625, "loss_xval": 0.048828125, "num_input_tokens_seen": 429921536, "step": 4710 }, { "epoch": 19.629166666666666, "grad_norm": 4.6690431755644015, "learning_rate": 5e-05, "loss": 0.0565, "num_input_tokens_seen": 430011480, "step": 4711 }, { "epoch": 19.629166666666666, "loss": 0.0785800963640213, "loss_ce": 1.2594346117111854e-05, "loss_iou": 0.29296875, "loss_num": 0.0157470703125, "loss_xval": 0.07861328125, "num_input_tokens_seen": 430011480, "step": 4711 }, { "epoch": 19.633333333333333, "grad_norm": 1.6665062495952965, "learning_rate": 5e-05, "loss": 0.0485, "num_input_tokens_seen": 430102852, "step": 4712 }, { "epoch": 19.633333333333333, "loss": 0.0401344932615757, "loss_ce": 0.00020224100444465876, "loss_iou": 0.2265625, "loss_num": 0.00799560546875, "loss_xval": 0.0400390625, "num_input_tokens_seen": 430102852, "step": 4712 }, { "epoch": 19.6375, "grad_norm": 55.26472547352569, "learning_rate": 5e-05, "loss": 0.057, "num_input_tokens_seen": 430194392, "step": 4713 }, { "epoch": 19.6375, "loss": 0.08778560161590576, "loss_ce": 0.0003222259401809424, "loss_iou": 0.1435546875, "loss_num": 0.0174560546875, "loss_xval": 0.08740234375, "num_input_tokens_seen": 430194392, "step": 4713 }, { "epoch": 19.641666666666666, "grad_norm": 3.4094239420725945, "learning_rate": 5e-05, "loss": 0.0467, "num_input_tokens_seen": 430285840, "step": 4714 }, { "epoch": 19.641666666666666, "loss": 0.05561989173293114, "loss_ce": 1.606982436896942e-06, "loss_iou": 0.1845703125, "loss_num": 0.0111083984375, "loss_xval": 0.0556640625, "num_input_tokens_seen": 430285840, "step": 4714 }, { "epoch": 19.645833333333332, "grad_norm": 2.209287444515342, "learning_rate": 5e-05, "loss": 0.0687, "num_input_tokens_seen": 430377416, "step": 4715 }, { "epoch": 19.645833333333332, "loss": 0.05580145865678787, "loss_ce": 1.5326233551604673e-05, "loss_iou": 0.2578125, "loss_num": 0.01116943359375, "loss_xval": 0.0556640625, "num_input_tokens_seen": 430377416, "step": 4715 }, { "epoch": 19.65, "grad_norm": 3.2790060049876057, "learning_rate": 5e-05, "loss": 0.0482, "num_input_tokens_seen": 430468836, "step": 4716 }, { "epoch": 19.65, "loss": 0.05664925277233124, "loss_ce": 3.914827175321989e-05, "loss_iou": 0.32421875, "loss_num": 0.01129150390625, "loss_xval": 0.056640625, "num_input_tokens_seen": 430468836, "step": 4716 }, { "epoch": 19.654166666666665, "grad_norm": 4.991141402880353, "learning_rate": 5e-05, "loss": 0.0546, "num_input_tokens_seen": 430560788, "step": 4717 }, { "epoch": 19.654166666666665, "loss": 0.06913499534130096, "loss_ce": 1.2681159205385484e-05, "loss_iou": 0.267578125, "loss_num": 0.01385498046875, "loss_xval": 0.0693359375, "num_input_tokens_seen": 430560788, "step": 4717 }, { "epoch": 19.658333333333335, "grad_norm": 2.2365178989291956, "learning_rate": 5e-05, "loss": 0.0516, "num_input_tokens_seen": 430651900, "step": 4718 }, { "epoch": 19.658333333333335, "loss": 0.04592512920498848, "loss_ce": 1.143124154623365e-05, "loss_iou": 0.1962890625, "loss_num": 0.0091552734375, "loss_xval": 0.0458984375, "num_input_tokens_seen": 430651900, "step": 4718 }, { "epoch": 19.6625, "grad_norm": 1.8556325329910262, "learning_rate": 5e-05, "loss": 0.0319, "num_input_tokens_seen": 430742996, "step": 4719 }, { "epoch": 19.6625, "loss": 0.03147327899932861, "loss_ce": 9.654847417550627e-06, "loss_iou": 0.283203125, "loss_num": 0.00628662109375, "loss_xval": 0.031494140625, "num_input_tokens_seen": 430742996, "step": 4719 }, { "epoch": 19.666666666666668, "grad_norm": 2.4656535129417003, "learning_rate": 5e-05, "loss": 0.0414, "num_input_tokens_seen": 430834932, "step": 4720 }, { "epoch": 19.666666666666668, "loss": 0.035395003855228424, "loss_ce": 7.090914732543752e-05, "loss_iou": 0.34765625, "loss_num": 0.007049560546875, "loss_xval": 0.035400390625, "num_input_tokens_seen": 430834932, "step": 4720 }, { "epoch": 19.670833333333334, "grad_norm": 6.594403244197698, "learning_rate": 5e-05, "loss": 0.0369, "num_input_tokens_seen": 430926420, "step": 4721 }, { "epoch": 19.670833333333334, "loss": 0.03331117704510689, "loss_ce": 1.6500658603035845e-05, "loss_iou": 0.1826171875, "loss_num": 0.00665283203125, "loss_xval": 0.033203125, "num_input_tokens_seen": 430926420, "step": 4721 }, { "epoch": 19.675, "grad_norm": 1.366341970941686, "learning_rate": 5e-05, "loss": 0.0341, "num_input_tokens_seen": 431017736, "step": 4722 }, { "epoch": 19.675, "loss": 0.04814673587679863, "loss_ce": 5.256579697743291e-06, "loss_iou": 0.291015625, "loss_num": 0.0096435546875, "loss_xval": 0.048095703125, "num_input_tokens_seen": 431017736, "step": 4722 }, { "epoch": 19.679166666666667, "grad_norm": 1.2537744542642868, "learning_rate": 5e-05, "loss": 0.0312, "num_input_tokens_seen": 431108828, "step": 4723 }, { "epoch": 19.679166666666667, "loss": 0.032738637179136276, "loss_ce": 8.536110726709012e-06, "loss_iou": 0.2421875, "loss_num": 0.006561279296875, "loss_xval": 0.03271484375, "num_input_tokens_seen": 431108828, "step": 4723 }, { "epoch": 19.683333333333334, "grad_norm": 2.0578171337478017, "learning_rate": 5e-05, "loss": 0.0569, "num_input_tokens_seen": 431200032, "step": 4724 }, { "epoch": 19.683333333333334, "loss": 0.07429766654968262, "loss_ce": 1.7882392057799734e-05, "loss_iou": 0.2080078125, "loss_num": 0.01483154296875, "loss_xval": 0.07421875, "num_input_tokens_seen": 431200032, "step": 4724 }, { "epoch": 19.6875, "grad_norm": 4.339840406561024, "learning_rate": 5e-05, "loss": 0.0585, "num_input_tokens_seen": 431291196, "step": 4725 }, { "epoch": 19.6875, "loss": 0.03668530285358429, "loss_ce": 3.1715421755507123e-06, "loss_iou": 0.2275390625, "loss_num": 0.00732421875, "loss_xval": 0.03662109375, "num_input_tokens_seen": 431291196, "step": 4725 }, { "epoch": 19.691666666666666, "grad_norm": 3.29129615500161, "learning_rate": 5e-05, "loss": 0.0631, "num_input_tokens_seen": 431381816, "step": 4726 }, { "epoch": 19.691666666666666, "loss": 0.040788955986499786, "loss_ce": 2.2145079583424376e-06, "loss_iou": 0.318359375, "loss_num": 0.0081787109375, "loss_xval": 0.040771484375, "num_input_tokens_seen": 431381816, "step": 4726 }, { "epoch": 19.695833333333333, "grad_norm": 12.701915079434166, "learning_rate": 5e-05, "loss": 0.0804, "num_input_tokens_seen": 431473364, "step": 4727 }, { "epoch": 19.695833333333333, "loss": 0.09925023466348648, "loss_ce": 0.0001291386433877051, "loss_iou": 0.326171875, "loss_num": 0.0198974609375, "loss_xval": 0.09912109375, "num_input_tokens_seen": 431473364, "step": 4727 }, { "epoch": 19.7, "grad_norm": 8.645379770563224, "learning_rate": 5e-05, "loss": 0.071, "num_input_tokens_seen": 431564700, "step": 4728 }, { "epoch": 19.7, "loss": 0.056926753371953964, "loss_ce": 1.9098599295830354e-05, "loss_iou": 0.201171875, "loss_num": 0.01141357421875, "loss_xval": 0.056884765625, "num_input_tokens_seen": 431564700, "step": 4728 }, { "epoch": 19.704166666666666, "grad_norm": 2.0947718623776916, "learning_rate": 5e-05, "loss": 0.0616, "num_input_tokens_seen": 431655808, "step": 4729 }, { "epoch": 19.704166666666666, "loss": 0.09451901167631149, "loss_ce": 6.074310476833489e-06, "loss_iou": 0.201171875, "loss_num": 0.0189208984375, "loss_xval": 0.0947265625, "num_input_tokens_seen": 431655808, "step": 4729 }, { "epoch": 19.708333333333332, "grad_norm": 1.0461809066007421, "learning_rate": 5e-05, "loss": 0.0643, "num_input_tokens_seen": 431746692, "step": 4730 }, { "epoch": 19.708333333333332, "loss": 0.04510524123907089, "loss_ce": 2.6206024017483287e-07, "loss_iou": 0.26953125, "loss_num": 0.009033203125, "loss_xval": 0.045166015625, "num_input_tokens_seen": 431746692, "step": 4730 }, { "epoch": 19.7125, "grad_norm": 1.9280093587865277, "learning_rate": 5e-05, "loss": 0.054, "num_input_tokens_seen": 431837696, "step": 4731 }, { "epoch": 19.7125, "loss": 0.04307686537504196, "loss_ce": 1.3039922350799316e-06, "loss_iou": 0.1923828125, "loss_num": 0.00860595703125, "loss_xval": 0.04296875, "num_input_tokens_seen": 431837696, "step": 4731 }, { "epoch": 19.716666666666665, "grad_norm": 1.24827481538375, "learning_rate": 5e-05, "loss": 0.0562, "num_input_tokens_seen": 431928716, "step": 4732 }, { "epoch": 19.716666666666665, "loss": 0.036690644919872284, "loss_ce": 3.140354237984866e-05, "loss_iou": 0.1201171875, "loss_num": 0.00732421875, "loss_xval": 0.03662109375, "num_input_tokens_seen": 431928716, "step": 4732 }, { "epoch": 19.720833333333335, "grad_norm": 2.4385557181882853, "learning_rate": 5e-05, "loss": 0.0383, "num_input_tokens_seen": 432019900, "step": 4733 }, { "epoch": 19.720833333333335, "loss": 0.029526352882385254, "loss_ce": 0.00045836143544875085, "loss_iou": 0.1875, "loss_num": 0.005828857421875, "loss_xval": 0.029052734375, "num_input_tokens_seen": 432019900, "step": 4733 }, { "epoch": 19.725, "grad_norm": 3.1961506297296887, "learning_rate": 5e-05, "loss": 0.0422, "num_input_tokens_seen": 432111116, "step": 4734 }, { "epoch": 19.725, "loss": 0.044266924262046814, "loss_ce": 4.6955759898992255e-05, "loss_iou": 0.369140625, "loss_num": 0.00885009765625, "loss_xval": 0.044189453125, "num_input_tokens_seen": 432111116, "step": 4734 }, { "epoch": 19.729166666666668, "grad_norm": 5.10569169609451, "learning_rate": 5e-05, "loss": 0.1041, "num_input_tokens_seen": 432202424, "step": 4735 }, { "epoch": 19.729166666666668, "loss": 0.17766880989074707, "loss_ce": 5.649718514177948e-05, "loss_iou": 0.35546875, "loss_num": 0.03564453125, "loss_xval": 0.177734375, "num_input_tokens_seen": 432202424, "step": 4735 }, { "epoch": 19.733333333333334, "grad_norm": 3.4066542227567003, "learning_rate": 5e-05, "loss": 0.0536, "num_input_tokens_seen": 432293264, "step": 4736 }, { "epoch": 19.733333333333334, "loss": 0.030559774488210678, "loss_ce": 4.050310508318944e-06, "loss_iou": 0.37109375, "loss_num": 0.006103515625, "loss_xval": 0.030517578125, "num_input_tokens_seen": 432293264, "step": 4736 }, { "epoch": 19.7375, "grad_norm": 4.169440483906738, "learning_rate": 5e-05, "loss": 0.0563, "num_input_tokens_seen": 432385216, "step": 4737 }, { "epoch": 19.7375, "loss": 0.05067963898181915, "loss_ce": 6.623629451496527e-05, "loss_iou": 0.244140625, "loss_num": 0.0101318359375, "loss_xval": 0.050537109375, "num_input_tokens_seen": 432385216, "step": 4737 }, { "epoch": 19.741666666666667, "grad_norm": 2.841919099935251, "learning_rate": 5e-05, "loss": 0.0492, "num_input_tokens_seen": 432476316, "step": 4738 }, { "epoch": 19.741666666666667, "loss": 0.04055830091238022, "loss_ce": 4.422929578140611e-07, "loss_iou": 0.2421875, "loss_num": 0.00811767578125, "loss_xval": 0.04052734375, "num_input_tokens_seen": 432476316, "step": 4738 }, { "epoch": 19.745833333333334, "grad_norm": 5.138289345149183, "learning_rate": 5e-05, "loss": 0.066, "num_input_tokens_seen": 432567392, "step": 4739 }, { "epoch": 19.745833333333334, "loss": 0.1000186949968338, "loss_ce": 0.0001499211648479104, "loss_iou": 0.0908203125, "loss_num": 0.0198974609375, "loss_xval": 0.10009765625, "num_input_tokens_seen": 432567392, "step": 4739 }, { "epoch": 19.75, "grad_norm": 2.2537605228225135, "learning_rate": 5e-05, "loss": 0.0543, "num_input_tokens_seen": 432658656, "step": 4740 }, { "epoch": 19.75, "loss": 0.05645791068673134, "loss_ce": 3.918984816664306e-07, "loss_iou": 0.2578125, "loss_num": 0.01129150390625, "loss_xval": 0.056396484375, "num_input_tokens_seen": 432658656, "step": 4740 }, { "epoch": 19.754166666666666, "grad_norm": 25.017282268378043, "learning_rate": 5e-05, "loss": 0.0533, "num_input_tokens_seen": 432749804, "step": 4741 }, { "epoch": 19.754166666666666, "loss": 0.07857572287321091, "loss_ce": 5.399743895395659e-05, "loss_iou": 0.17578125, "loss_num": 0.0157470703125, "loss_xval": 0.07861328125, "num_input_tokens_seen": 432749804, "step": 4741 }, { "epoch": 19.758333333333333, "grad_norm": 2.2293138053656794, "learning_rate": 5e-05, "loss": 0.0487, "num_input_tokens_seen": 432841492, "step": 4742 }, { "epoch": 19.758333333333333, "loss": 0.06268471479415894, "loss_ce": 1.6107320561786764e-06, "loss_iou": 0.14453125, "loss_num": 0.0125732421875, "loss_xval": 0.0625, "num_input_tokens_seen": 432841492, "step": 4742 }, { "epoch": 19.7625, "grad_norm": 1.9049561130749288, "learning_rate": 5e-05, "loss": 0.0509, "num_input_tokens_seen": 432932664, "step": 4743 }, { "epoch": 19.7625, "loss": 0.034028127789497375, "loss_ce": 1.02684725789004e-06, "loss_iou": 0.1474609375, "loss_num": 0.006805419921875, "loss_xval": 0.033935546875, "num_input_tokens_seen": 432932664, "step": 4743 }, { "epoch": 19.766666666666666, "grad_norm": 2.813401973659435, "learning_rate": 5e-05, "loss": 0.0401, "num_input_tokens_seen": 433024400, "step": 4744 }, { "epoch": 19.766666666666666, "loss": 0.0335683599114418, "loss_ce": 1.4282367374107707e-05, "loss_iou": 0.298828125, "loss_num": 0.0067138671875, "loss_xval": 0.033447265625, "num_input_tokens_seen": 433024400, "step": 4744 }, { "epoch": 19.770833333333332, "grad_norm": 3.536771192947322, "learning_rate": 5e-05, "loss": 0.0771, "num_input_tokens_seen": 433115424, "step": 4745 }, { "epoch": 19.770833333333332, "loss": 0.11302217841148376, "loss_ce": 7.958343303471338e-06, "loss_iou": 0.326171875, "loss_num": 0.0225830078125, "loss_xval": 0.11279296875, "num_input_tokens_seen": 433115424, "step": 4745 }, { "epoch": 19.775, "grad_norm": 4.7410123582333465, "learning_rate": 5e-05, "loss": 0.0496, "num_input_tokens_seen": 433206964, "step": 4746 }, { "epoch": 19.775, "loss": 0.030320316553115845, "loss_ce": 8.73003045853693e-06, "loss_iou": 0.166015625, "loss_num": 0.006072998046875, "loss_xval": 0.0302734375, "num_input_tokens_seen": 433206964, "step": 4746 }, { "epoch": 19.779166666666665, "grad_norm": 2.269552770487602, "learning_rate": 5e-05, "loss": 0.0449, "num_input_tokens_seen": 433298868, "step": 4747 }, { "epoch": 19.779166666666665, "loss": 0.030412226915359497, "loss_ce": 9.0903395175701e-06, "loss_iou": 0.359375, "loss_num": 0.006072998046875, "loss_xval": 0.0303955078125, "num_input_tokens_seen": 433298868, "step": 4747 }, { "epoch": 19.783333333333335, "grad_norm": 2.0103372931750485, "learning_rate": 5e-05, "loss": 0.0618, "num_input_tokens_seen": 433390096, "step": 4748 }, { "epoch": 19.783333333333335, "loss": 0.057116035372018814, "loss_ce": 2.391058842476923e-06, "loss_iou": 0.27734375, "loss_num": 0.01141357421875, "loss_xval": 0.05712890625, "num_input_tokens_seen": 433390096, "step": 4748 }, { "epoch": 19.7875, "grad_norm": 2.0854324586282127, "learning_rate": 5e-05, "loss": 0.0843, "num_input_tokens_seen": 433481292, "step": 4749 }, { "epoch": 19.7875, "loss": 0.09669038653373718, "loss_ce": 7.17340808478184e-05, "loss_iou": 0.1962890625, "loss_num": 0.0194091796875, "loss_xval": 0.0966796875, "num_input_tokens_seen": 433481292, "step": 4749 }, { "epoch": 19.791666666666668, "grad_norm": 2.195617206921799, "learning_rate": 5e-05, "loss": 0.054, "num_input_tokens_seen": 433573160, "step": 4750 }, { "epoch": 19.791666666666668, "eval_seeclick_CIoU": 0.2617866322398186, "eval_seeclick_GIoU": 0.24531903862953186, "eval_seeclick_IoU": 0.352044939994812, "eval_seeclick_MAE_all": 0.0924195908010006, "eval_seeclick_MAE_h": 0.077772106975317, "eval_seeclick_MAE_w": 0.1920384168624878, "eval_seeclick_MAE_x_boxes": 0.19218388199806213, "eval_seeclick_MAE_y_boxes": 0.08314986154437065, "eval_seeclick_NUM_probability": 0.9999991953372955, "eval_seeclick_inside_bbox": 0.6008522808551788, "eval_seeclick_loss": 0.5741644501686096, "eval_seeclick_loss_ce": 0.1503664255142212, "eval_seeclick_loss_iou": 0.48944091796875, "eval_seeclick_loss_num": 0.081939697265625, "eval_seeclick_loss_xval": 0.40985107421875, "eval_seeclick_runtime": 77.1332, "eval_seeclick_samples_per_second": 0.557, "eval_seeclick_steps_per_second": 0.026, "num_input_tokens_seen": 433573160, "step": 4750 }, { "epoch": 19.791666666666668, "eval_icons_CIoU": 0.24555600434541702, "eval_icons_GIoU": 0.2464718595147133, "eval_icons_IoU": 0.34940461814403534, "eval_icons_MAE_all": 0.08040782436728477, "eval_icons_MAE_h": 0.16841255873441696, "eval_icons_MAE_w": 0.12601130455732346, "eval_icons_MAE_x_boxes": 0.1272355169057846, "eval_icons_MAE_y_boxes": 0.1678340807557106, "eval_icons_NUM_probability": 0.9999995231628418, "eval_icons_inside_bbox": 0.4913194477558136, "eval_icons_loss": 0.39623481035232544, "eval_icons_loss_ce": 0.000442440141341649, "eval_icons_loss_iou": 0.17828369140625, "eval_icons_loss_num": 0.0807037353515625, "eval_icons_loss_xval": 0.40380859375, "eval_icons_runtime": 106.6615, "eval_icons_samples_per_second": 0.469, "eval_icons_steps_per_second": 0.019, "num_input_tokens_seen": 433573160, "step": 4750 }, { "epoch": 19.791666666666668, "eval_screenspot_CIoU": 0.36383505662282306, "eval_screenspot_GIoU": 0.3586165060599645, "eval_screenspot_IoU": 0.4403722683588664, "eval_screenspot_MAE_all": 0.10166554649670918, "eval_screenspot_MAE_h": 0.09295431524515152, "eval_screenspot_MAE_w": 0.19901380191246668, "eval_screenspot_MAE_x_boxes": 0.1910252943634987, "eval_screenspot_MAE_y_boxes": 0.08816726754109065, "eval_screenspot_NUM_probability": 0.9999977151552836, "eval_screenspot_inside_bbox": 0.6699999968210856, "eval_screenspot_loss": 0.5046296715736389, "eval_screenspot_loss_ce": 0.0008909959384861091, "eval_screenspot_loss_iou": 0.35546875, "eval_screenspot_loss_num": 0.10320536295572917, "eval_screenspot_loss_xval": 0.5158894856770834, "eval_screenspot_runtime": 153.4008, "eval_screenspot_samples_per_second": 0.58, "eval_screenspot_steps_per_second": 0.02, "num_input_tokens_seen": 433573160, "step": 4750 }, { "epoch": 19.791666666666668, "eval_compot_CIoU": 0.4970640242099762, "eval_compot_GIoU": 0.4937925338745117, "eval_compot_IoU": 0.5729747414588928, "eval_compot_MAE_all": 0.05616691708564758, "eval_compot_MAE_h": 0.06489542499184608, "eval_compot_MAE_w": 0.1452133134007454, "eval_compot_MAE_x_boxes": 0.14551985636353493, "eval_compot_MAE_y_boxes": 0.06429455429315567, "eval_compot_NUM_probability": 0.9999991357326508, "eval_compot_inside_bbox": 0.7204861044883728, "eval_compot_loss": 0.33045417070388794, "eval_compot_loss_ce": 0.054313765838742256, "eval_compot_loss_iou": 0.31292724609375, "eval_compot_loss_num": 0.05380821228027344, "eval_compot_loss_xval": 0.268829345703125, "eval_compot_runtime": 96.9045, "eval_compot_samples_per_second": 0.516, "eval_compot_steps_per_second": 0.021, "num_input_tokens_seen": 433573160, "step": 4750 }, { "epoch": 19.791666666666668, "loss": 0.35344207286834717, "loss_ce": 0.04799164459109306, "loss_iou": 0.326171875, "loss_num": 0.06103515625, "loss_xval": 0.3046875, "num_input_tokens_seen": 433573160, "step": 4750 }, { "epoch": 19.795833333333334, "grad_norm": 2.519109222695506, "learning_rate": 5e-05, "loss": 0.0521, "num_input_tokens_seen": 433664960, "step": 4751 }, { "epoch": 19.795833333333334, "loss": 0.07364164292812347, "loss_ce": 2.7295736799715087e-06, "loss_iou": 0.265625, "loss_num": 0.01470947265625, "loss_xval": 0.07373046875, "num_input_tokens_seen": 433664960, "step": 4751 }, { "epoch": 19.8, "grad_norm": 2.77512853672313, "learning_rate": 5e-05, "loss": 0.0353, "num_input_tokens_seen": 433755616, "step": 4752 }, { "epoch": 19.8, "loss": 0.029641907662153244, "loss_ce": 4.748583887703717e-05, "loss_iou": 0.255859375, "loss_num": 0.00592041015625, "loss_xval": 0.029541015625, "num_input_tokens_seen": 433755616, "step": 4752 }, { "epoch": 19.804166666666667, "grad_norm": 2.770319139030136, "learning_rate": 5e-05, "loss": 0.0391, "num_input_tokens_seen": 433846352, "step": 4753 }, { "epoch": 19.804166666666667, "loss": 0.04019250348210335, "loss_ce": 3.137064413749613e-05, "loss_iou": 0.447265625, "loss_num": 0.008056640625, "loss_xval": 0.0400390625, "num_input_tokens_seen": 433846352, "step": 4753 }, { "epoch": 19.808333333333334, "grad_norm": 2.752000642849686, "learning_rate": 5e-05, "loss": 0.0408, "num_input_tokens_seen": 433937496, "step": 4754 }, { "epoch": 19.808333333333334, "loss": 0.028888067230582237, "loss_ce": 3.1796630537428427e-06, "loss_iou": 0.19140625, "loss_num": 0.005767822265625, "loss_xval": 0.0289306640625, "num_input_tokens_seen": 433937496, "step": 4754 }, { "epoch": 19.8125, "grad_norm": 3.1079540791972287, "learning_rate": 5e-05, "loss": 0.0742, "num_input_tokens_seen": 434028432, "step": 4755 }, { "epoch": 19.8125, "loss": 0.11143758147954941, "loss_ce": 2.651629074534867e-06, "loss_iou": 0.27734375, "loss_num": 0.0223388671875, "loss_xval": 0.111328125, "num_input_tokens_seen": 434028432, "step": 4755 }, { "epoch": 19.816666666666666, "grad_norm": 3.342540574063074, "learning_rate": 5e-05, "loss": 0.0552, "num_input_tokens_seen": 434119424, "step": 4756 }, { "epoch": 19.816666666666666, "loss": 0.05670653283596039, "loss_ce": 4.8754759518487845e-06, "loss_iou": 0.29296875, "loss_num": 0.0113525390625, "loss_xval": 0.056640625, "num_input_tokens_seen": 434119424, "step": 4756 }, { "epoch": 19.820833333333333, "grad_norm": 4.83109444434289, "learning_rate": 5e-05, "loss": 0.0446, "num_input_tokens_seen": 434210808, "step": 4757 }, { "epoch": 19.820833333333333, "loss": 0.04404546320438385, "loss_ce": 0.0034265692811459303, "loss_iou": 0.318359375, "loss_num": 0.00811767578125, "loss_xval": 0.04052734375, "num_input_tokens_seen": 434210808, "step": 4757 }, { "epoch": 19.825, "grad_norm": 3.2738265241261257, "learning_rate": 5e-05, "loss": 0.093, "num_input_tokens_seen": 434302308, "step": 4758 }, { "epoch": 19.825, "loss": 0.06388229876756668, "loss_ce": 9.010569556267e-06, "loss_iou": 0.365234375, "loss_num": 0.01275634765625, "loss_xval": 0.06396484375, "num_input_tokens_seen": 434302308, "step": 4758 }, { "epoch": 19.829166666666666, "grad_norm": 2.387520215439463, "learning_rate": 5e-05, "loss": 0.0755, "num_input_tokens_seen": 434393700, "step": 4759 }, { "epoch": 19.829166666666666, "loss": 0.08277393877506256, "loss_ce": 2.648112058523111e-06, "loss_iou": 0.1826171875, "loss_num": 0.0166015625, "loss_xval": 0.0830078125, "num_input_tokens_seen": 434393700, "step": 4759 }, { "epoch": 19.833333333333332, "grad_norm": 6.311417557444102, "learning_rate": 5e-05, "loss": 0.0617, "num_input_tokens_seen": 434485016, "step": 4760 }, { "epoch": 19.833333333333332, "loss": 0.05282047018408775, "loss_ce": 4.031695061712526e-05, "loss_iou": 0.205078125, "loss_num": 0.01055908203125, "loss_xval": 0.052734375, "num_input_tokens_seen": 434485016, "step": 4760 }, { "epoch": 19.8375, "grad_norm": 1.6847023622892647, "learning_rate": 5e-05, "loss": 0.0532, "num_input_tokens_seen": 434576416, "step": 4761 }, { "epoch": 19.8375, "loss": 0.02808937057852745, "loss_ce": 1.3197223779570777e-05, "loss_iou": 0.26171875, "loss_num": 0.005615234375, "loss_xval": 0.028076171875, "num_input_tokens_seen": 434576416, "step": 4761 }, { "epoch": 19.841666666666665, "grad_norm": 1.833421773612613, "learning_rate": 5e-05, "loss": 0.0422, "num_input_tokens_seen": 434667668, "step": 4762 }, { "epoch": 19.841666666666665, "loss": 0.03834429755806923, "loss_ce": 1.4218359865481034e-05, "loss_iou": 0.1865234375, "loss_num": 0.007659912109375, "loss_xval": 0.038330078125, "num_input_tokens_seen": 434667668, "step": 4762 }, { "epoch": 19.845833333333335, "grad_norm": 3.172469243527616, "learning_rate": 5e-05, "loss": 0.0383, "num_input_tokens_seen": 434759152, "step": 4763 }, { "epoch": 19.845833333333335, "loss": 0.04270630329847336, "loss_ce": 1.2211901776026934e-05, "loss_iou": 0.2177734375, "loss_num": 0.008544921875, "loss_xval": 0.042724609375, "num_input_tokens_seen": 434759152, "step": 4763 }, { "epoch": 19.85, "grad_norm": 6.25394716952818, "learning_rate": 5e-05, "loss": 0.047, "num_input_tokens_seen": 434850704, "step": 4764 }, { "epoch": 19.85, "loss": 0.05046635866165161, "loss_ce": 0.0033014400396496058, "loss_iou": 0.31640625, "loss_num": 0.00946044921875, "loss_xval": 0.047119140625, "num_input_tokens_seen": 434850704, "step": 4764 }, { "epoch": 19.854166666666668, "grad_norm": 3.3766243816417227, "learning_rate": 5e-05, "loss": 0.0314, "num_input_tokens_seen": 434941864, "step": 4765 }, { "epoch": 19.854166666666668, "loss": 0.031087806448340416, "loss_ce": 8.194526162697002e-05, "loss_iou": 0.302734375, "loss_num": 0.006195068359375, "loss_xval": 0.031005859375, "num_input_tokens_seen": 434941864, "step": 4765 }, { "epoch": 19.858333333333334, "grad_norm": 2.9027640432813158, "learning_rate": 5e-05, "loss": 0.0416, "num_input_tokens_seen": 435033672, "step": 4766 }, { "epoch": 19.858333333333334, "loss": 0.04052776098251343, "loss_ce": 4.1431542285863543e-07, "loss_iou": 0.2060546875, "loss_num": 0.00811767578125, "loss_xval": 0.04052734375, "num_input_tokens_seen": 435033672, "step": 4766 }, { "epoch": 19.8625, "grad_norm": 2.5543771738399754, "learning_rate": 5e-05, "loss": 0.067, "num_input_tokens_seen": 435124988, "step": 4767 }, { "epoch": 19.8625, "loss": 0.06493158638477325, "loss_ce": 5.121577123645693e-05, "loss_iou": 0.1650390625, "loss_num": 0.012939453125, "loss_xval": 0.06494140625, "num_input_tokens_seen": 435124988, "step": 4767 }, { "epoch": 19.866666666666667, "grad_norm": 1.9328115074620458, "learning_rate": 5e-05, "loss": 0.0349, "num_input_tokens_seen": 435216236, "step": 4768 }, { "epoch": 19.866666666666667, "loss": 0.03497876226902008, "loss_ce": 0.00011242987238802016, "loss_iou": 0.1943359375, "loss_num": 0.006988525390625, "loss_xval": 0.034912109375, "num_input_tokens_seen": 435216236, "step": 4768 }, { "epoch": 19.870833333333334, "grad_norm": 2.1231171249238603, "learning_rate": 5e-05, "loss": 0.0999, "num_input_tokens_seen": 435307572, "step": 4769 }, { "epoch": 19.870833333333334, "loss": 0.0322178415954113, "loss_ce": 6.538154138979735e-06, "loss_iou": 0.11865234375, "loss_num": 0.006439208984375, "loss_xval": 0.0322265625, "num_input_tokens_seen": 435307572, "step": 4769 }, { "epoch": 19.875, "grad_norm": 2.462645421105774, "learning_rate": 5e-05, "loss": 0.0623, "num_input_tokens_seen": 435398996, "step": 4770 }, { "epoch": 19.875, "loss": 0.042506515979766846, "loss_ce": 0.00016337900888174772, "loss_iou": 0.177734375, "loss_num": 0.00848388671875, "loss_xval": 0.042236328125, "num_input_tokens_seen": 435398996, "step": 4770 }, { "epoch": 19.879166666666666, "grad_norm": 4.052346583769054, "learning_rate": 5e-05, "loss": 0.0598, "num_input_tokens_seen": 435490352, "step": 4771 }, { "epoch": 19.879166666666666, "loss": 0.07947006821632385, "loss_ce": 2.291058535774937e-06, "loss_iou": 0.21484375, "loss_num": 0.015869140625, "loss_xval": 0.07958984375, "num_input_tokens_seen": 435490352, "step": 4771 }, { "epoch": 19.883333333333333, "grad_norm": 2.122526959860213, "learning_rate": 5e-05, "loss": 0.0475, "num_input_tokens_seen": 435582028, "step": 4772 }, { "epoch": 19.883333333333333, "loss": 0.03125705569982529, "loss_ce": 9.860927821137011e-05, "loss_iou": 0.21875, "loss_num": 0.0062255859375, "loss_xval": 0.0311279296875, "num_input_tokens_seen": 435582028, "step": 4772 }, { "epoch": 19.8875, "grad_norm": 3.0380894123602484, "learning_rate": 5e-05, "loss": 0.0467, "num_input_tokens_seen": 435673124, "step": 4773 }, { "epoch": 19.8875, "loss": 0.038742516189813614, "loss_ce": 4.5227932332636556e-07, "loss_iou": 0.20703125, "loss_num": 0.00775146484375, "loss_xval": 0.038818359375, "num_input_tokens_seen": 435673124, "step": 4773 }, { "epoch": 19.891666666666666, "grad_norm": 4.866386407131122, "learning_rate": 5e-05, "loss": 0.0401, "num_input_tokens_seen": 435764704, "step": 4774 }, { "epoch": 19.891666666666666, "loss": 0.035870663821697235, "loss_ce": 0.00024902174482122064, "loss_iou": 0.232421875, "loss_num": 0.00714111328125, "loss_xval": 0.03564453125, "num_input_tokens_seen": 435764704, "step": 4774 }, { "epoch": 19.895833333333332, "grad_norm": 3.302905722056321, "learning_rate": 5e-05, "loss": 0.0727, "num_input_tokens_seen": 435855960, "step": 4775 }, { "epoch": 19.895833333333332, "loss": 0.05564238131046295, "loss_ce": 0.0002682388585526496, "loss_iou": 0.380859375, "loss_num": 0.01104736328125, "loss_xval": 0.055419921875, "num_input_tokens_seen": 435855960, "step": 4775 }, { "epoch": 19.9, "grad_norm": 2.900177431317305, "learning_rate": 5e-05, "loss": 0.0419, "num_input_tokens_seen": 435947544, "step": 4776 }, { "epoch": 19.9, "loss": 0.06207602098584175, "loss_ce": 3.2624948289594613e-06, "loss_iou": 0.30078125, "loss_num": 0.01239013671875, "loss_xval": 0.06201171875, "num_input_tokens_seen": 435947544, "step": 4776 }, { "epoch": 19.904166666666665, "grad_norm": 2.439142759959792, "learning_rate": 5e-05, "loss": 0.0448, "num_input_tokens_seen": 436038620, "step": 4777 }, { "epoch": 19.904166666666665, "loss": 0.041464440524578094, "loss_ce": 6.3084507928579114e-06, "loss_iou": 0.1845703125, "loss_num": 0.00830078125, "loss_xval": 0.04150390625, "num_input_tokens_seen": 436038620, "step": 4777 }, { "epoch": 19.908333333333335, "grad_norm": 5.102423414343203, "learning_rate": 5e-05, "loss": 0.0532, "num_input_tokens_seen": 436129744, "step": 4778 }, { "epoch": 19.908333333333335, "loss": 0.04953721910715103, "loss_ce": 0.0002971051726490259, "loss_iou": 0.27734375, "loss_num": 0.00982666015625, "loss_xval": 0.04931640625, "num_input_tokens_seen": 436129744, "step": 4778 }, { "epoch": 19.9125, "grad_norm": 2.2744876880677984, "learning_rate": 5e-05, "loss": 0.0312, "num_input_tokens_seen": 436221264, "step": 4779 }, { "epoch": 19.9125, "loss": 0.03273453563451767, "loss_ce": 0.000256203900789842, "loss_iou": 0.138671875, "loss_num": 0.006500244140625, "loss_xval": 0.032470703125, "num_input_tokens_seen": 436221264, "step": 4779 }, { "epoch": 19.916666666666668, "grad_norm": 2.644698538864571, "learning_rate": 5e-05, "loss": 0.0511, "num_input_tokens_seen": 436312728, "step": 4780 }, { "epoch": 19.916666666666668, "loss": 0.06178348883986473, "loss_ce": 6.539040668940288e-07, "loss_iou": 0.302734375, "loss_num": 0.0123291015625, "loss_xval": 0.061767578125, "num_input_tokens_seen": 436312728, "step": 4780 }, { "epoch": 19.920833333333334, "grad_norm": 2.5130443225194004, "learning_rate": 5e-05, "loss": 0.0333, "num_input_tokens_seen": 436404180, "step": 4781 }, { "epoch": 19.920833333333334, "loss": 0.032920412719249725, "loss_ce": 0.00017504925199318677, "loss_iou": 0.197265625, "loss_num": 0.006561279296875, "loss_xval": 0.03271484375, "num_input_tokens_seen": 436404180, "step": 4781 }, { "epoch": 19.925, "grad_norm": 1.0152006260830428, "learning_rate": 5e-05, "loss": 0.0389, "num_input_tokens_seen": 436495676, "step": 4782 }, { "epoch": 19.925, "loss": 0.04953616112470627, "loss_ce": 6.131633199402131e-06, "loss_iou": 0.146484375, "loss_num": 0.0098876953125, "loss_xval": 0.049560546875, "num_input_tokens_seen": 436495676, "step": 4782 }, { "epoch": 19.929166666666667, "grad_norm": 1.6079033670692338, "learning_rate": 5e-05, "loss": 0.026, "num_input_tokens_seen": 436587548, "step": 4783 }, { "epoch": 19.929166666666667, "loss": 0.030390875414013863, "loss_ce": 2.996691819134867e-06, "loss_iou": 0.3203125, "loss_num": 0.006072998046875, "loss_xval": 0.0303955078125, "num_input_tokens_seen": 436587548, "step": 4783 }, { "epoch": 19.933333333333334, "grad_norm": 2.056534338786135, "learning_rate": 5e-05, "loss": 0.0614, "num_input_tokens_seen": 436678536, "step": 4784 }, { "epoch": 19.933333333333334, "loss": 0.06954550743103027, "loss_ce": 2.6466495910426602e-05, "loss_iou": 0.16015625, "loss_num": 0.01385498046875, "loss_xval": 0.0693359375, "num_input_tokens_seen": 436678536, "step": 4784 }, { "epoch": 19.9375, "grad_norm": 2.495766816196573, "learning_rate": 5e-05, "loss": 0.0379, "num_input_tokens_seen": 436769484, "step": 4785 }, { "epoch": 19.9375, "loss": 0.03806344047188759, "loss_ce": 8.020611858228222e-06, "loss_iou": 0.189453125, "loss_num": 0.007598876953125, "loss_xval": 0.0380859375, "num_input_tokens_seen": 436769484, "step": 4785 }, { "epoch": 19.941666666666666, "grad_norm": 2.6281992052223955, "learning_rate": 5e-05, "loss": 0.0372, "num_input_tokens_seen": 436860612, "step": 4786 }, { "epoch": 19.941666666666666, "loss": 0.024370262399315834, "loss_ce": 2.4864317310857587e-05, "loss_iou": 0.232421875, "loss_num": 0.004852294921875, "loss_xval": 0.0242919921875, "num_input_tokens_seen": 436860612, "step": 4786 }, { "epoch": 19.945833333333333, "grad_norm": 4.542283123687921, "learning_rate": 5e-05, "loss": 0.0434, "num_input_tokens_seen": 436952216, "step": 4787 }, { "epoch": 19.945833333333333, "loss": 0.055509522557258606, "loss_ce": 0.00028796104015782475, "loss_iou": 0.2294921875, "loss_num": 0.01104736328125, "loss_xval": 0.05517578125, "num_input_tokens_seen": 436952216, "step": 4787 }, { "epoch": 19.95, "grad_norm": 1.9657238523141636, "learning_rate": 5e-05, "loss": 0.038, "num_input_tokens_seen": 437043396, "step": 4788 }, { "epoch": 19.95, "loss": 0.048294927924871445, "loss_ce": 8.602569891991152e-07, "loss_iou": 0.294921875, "loss_num": 0.0096435546875, "loss_xval": 0.04833984375, "num_input_tokens_seen": 437043396, "step": 4788 }, { "epoch": 19.954166666666666, "grad_norm": 5.430431075051217, "learning_rate": 5e-05, "loss": 0.0361, "num_input_tokens_seen": 437134896, "step": 4789 }, { "epoch": 19.954166666666666, "loss": 0.03300505131483078, "loss_ce": 2.8948926455996116e-07, "loss_iou": 0.2373046875, "loss_num": 0.006591796875, "loss_xval": 0.032958984375, "num_input_tokens_seen": 437134896, "step": 4789 }, { "epoch": 19.958333333333332, "grad_norm": 2.4163825972074218, "learning_rate": 5e-05, "loss": 0.0699, "num_input_tokens_seen": 437226228, "step": 4790 }, { "epoch": 19.958333333333332, "loss": 0.08775608241558075, "loss_ce": 0.00028507053502835333, "loss_iou": 0.2265625, "loss_num": 0.0174560546875, "loss_xval": 0.08740234375, "num_input_tokens_seen": 437226228, "step": 4790 }, { "epoch": 19.9625, "grad_norm": 3.138963856779165, "learning_rate": 5e-05, "loss": 0.0347, "num_input_tokens_seen": 437317116, "step": 4791 }, { "epoch": 19.9625, "loss": 0.03851897269487381, "loss_ce": 0.00038726101047359407, "loss_iou": 0.369140625, "loss_num": 0.00762939453125, "loss_xval": 0.0380859375, "num_input_tokens_seen": 437317116, "step": 4791 }, { "epoch": 19.966666666666665, "grad_norm": 3.222853049196043, "learning_rate": 5e-05, "loss": 0.0682, "num_input_tokens_seen": 437408572, "step": 4792 }, { "epoch": 19.966666666666665, "loss": 0.06291039288043976, "loss_ce": 0.002607660135254264, "loss_iou": 0.1796875, "loss_num": 0.0120849609375, "loss_xval": 0.060302734375, "num_input_tokens_seen": 437408572, "step": 4792 }, { "epoch": 19.970833333333335, "grad_norm": 3.1241585449398195, "learning_rate": 5e-05, "loss": 0.0362, "num_input_tokens_seen": 437500044, "step": 4793 }, { "epoch": 19.970833333333335, "loss": 0.039086949080228806, "loss_ce": 0.00014270386600401253, "loss_iou": 0.31640625, "loss_num": 0.007781982421875, "loss_xval": 0.0390625, "num_input_tokens_seen": 437500044, "step": 4793 }, { "epoch": 19.975, "grad_norm": 2.754162527096431, "learning_rate": 5e-05, "loss": 0.0356, "num_input_tokens_seen": 437592028, "step": 4794 }, { "epoch": 19.975, "loss": 0.028476163744926453, "loss_ce": 3.263681264797924e-06, "loss_iou": 0.251953125, "loss_num": 0.005706787109375, "loss_xval": 0.0284423828125, "num_input_tokens_seen": 437592028, "step": 4794 }, { "epoch": 19.979166666666668, "grad_norm": 2.0195337905149455, "learning_rate": 5e-05, "loss": 0.032, "num_input_tokens_seen": 437683280, "step": 4795 }, { "epoch": 19.979166666666668, "loss": 0.023691684007644653, "loss_ce": 1.0043763722933363e-05, "loss_iou": 0.1796875, "loss_num": 0.004730224609375, "loss_xval": 0.023681640625, "num_input_tokens_seen": 437683280, "step": 4795 }, { "epoch": 19.983333333333334, "grad_norm": 2.871976282909025, "learning_rate": 5e-05, "loss": 0.1023, "num_input_tokens_seen": 437774404, "step": 4796 }, { "epoch": 19.983333333333334, "loss": 0.06012318655848503, "loss_ce": 3.559290234989021e-06, "loss_iou": 0.2333984375, "loss_num": 0.01202392578125, "loss_xval": 0.06005859375, "num_input_tokens_seen": 437774404, "step": 4796 }, { "epoch": 19.9875, "grad_norm": 2.7455227723420847, "learning_rate": 5e-05, "loss": 0.0494, "num_input_tokens_seen": 437865412, "step": 4797 }, { "epoch": 19.9875, "loss": 0.07167306542396545, "loss_ce": 2.5335707505291793e-06, "loss_iou": 0.267578125, "loss_num": 0.01434326171875, "loss_xval": 0.07177734375, "num_input_tokens_seen": 437865412, "step": 4797 }, { "epoch": 19.991666666666667, "grad_norm": 3.6041223435322114, "learning_rate": 5e-05, "loss": 0.0452, "num_input_tokens_seen": 437956260, "step": 4798 }, { "epoch": 19.991666666666667, "loss": 0.042237237095832825, "loss_ce": 9.090753110285732e-07, "loss_iou": 0.38671875, "loss_num": 0.0084228515625, "loss_xval": 0.042236328125, "num_input_tokens_seen": 437956260, "step": 4798 }, { "epoch": 19.995833333333334, "grad_norm": 2.4390005978821794, "learning_rate": 5e-05, "loss": 0.0495, "num_input_tokens_seen": 438047760, "step": 4799 }, { "epoch": 19.995833333333334, "loss": 0.046113960444927216, "loss_ce": 1.7160655261250213e-05, "loss_iou": 0.33984375, "loss_num": 0.00921630859375, "loss_xval": 0.046142578125, "num_input_tokens_seen": 438047760, "step": 4799 }, { "epoch": 20.0, "grad_norm": 2.381231649308952, "learning_rate": 5e-05, "loss": 0.0668, "num_input_tokens_seen": 438139052, "step": 4800 }, { "epoch": 20.0, "loss": 0.0817049890756607, "loss_ce": 9.439384484721813e-06, "loss_iou": 0.12060546875, "loss_num": 0.016357421875, "loss_xval": 0.08154296875, "num_input_tokens_seen": 438139052, "step": 4800 }, { "epoch": 20.004166666666666, "grad_norm": 2.8739335153532806, "learning_rate": 5e-05, "loss": 0.0466, "num_input_tokens_seen": 438230164, "step": 4801 }, { "epoch": 20.004166666666666, "loss": 0.041184697300195694, "loss_ce": 1.222786636390083e-06, "loss_iou": 0.369140625, "loss_num": 0.00823974609375, "loss_xval": 0.041259765625, "num_input_tokens_seen": 438230164, "step": 4801 }, { "epoch": 20.008333333333333, "grad_norm": 6.263826768460578, "learning_rate": 5e-05, "loss": 0.044, "num_input_tokens_seen": 438321560, "step": 4802 }, { "epoch": 20.008333333333333, "loss": 0.028685349971055984, "loss_ce": 6.748837768100202e-05, "loss_iou": 0.267578125, "loss_num": 0.005706787109375, "loss_xval": 0.028564453125, "num_input_tokens_seen": 438321560, "step": 4802 }, { "epoch": 20.0125, "grad_norm": 2.6413899758281056, "learning_rate": 5e-05, "loss": 0.0452, "num_input_tokens_seen": 438412924, "step": 4803 }, { "epoch": 20.0125, "loss": 0.04007952660322189, "loss_ce": 9.947018952516373e-06, "loss_iou": 0.361328125, "loss_num": 0.00799560546875, "loss_xval": 0.0400390625, "num_input_tokens_seen": 438412924, "step": 4803 }, { "epoch": 20.016666666666666, "grad_norm": 2.6635715423342825, "learning_rate": 5e-05, "loss": 0.039, "num_input_tokens_seen": 438503780, "step": 4804 }, { "epoch": 20.016666666666666, "loss": 0.04384048283100128, "loss_ce": 1.984845539482194e-06, "loss_iou": 0.349609375, "loss_num": 0.00872802734375, "loss_xval": 0.0439453125, "num_input_tokens_seen": 438503780, "step": 4804 }, { "epoch": 20.020833333333332, "grad_norm": 4.176671686106218, "learning_rate": 5e-05, "loss": 0.0488, "num_input_tokens_seen": 438595184, "step": 4805 }, { "epoch": 20.020833333333332, "loss": 0.05074448883533478, "loss_ce": 1.3875467175239464e-06, "loss_iou": 0.314453125, "loss_num": 0.0101318359375, "loss_xval": 0.05078125, "num_input_tokens_seen": 438595184, "step": 4805 }, { "epoch": 20.025, "grad_norm": 2.052634063071722, "learning_rate": 5e-05, "loss": 0.071, "num_input_tokens_seen": 438686516, "step": 4806 }, { "epoch": 20.025, "loss": 0.03876500576734543, "loss_ce": 0.0001450096460757777, "loss_iou": 0.296875, "loss_num": 0.007720947265625, "loss_xval": 0.03857421875, "num_input_tokens_seen": 438686516, "step": 4806 }, { "epoch": 20.029166666666665, "grad_norm": 3.100562519952563, "learning_rate": 5e-05, "loss": 0.0432, "num_input_tokens_seen": 438777576, "step": 4807 }, { "epoch": 20.029166666666665, "loss": 0.017205331474542618, "loss_ce": 1.0473372640262824e-06, "loss_iou": 0.234375, "loss_num": 0.003448486328125, "loss_xval": 0.0172119140625, "num_input_tokens_seen": 438777576, "step": 4807 }, { "epoch": 20.033333333333335, "grad_norm": 3.1485098132144618, "learning_rate": 5e-05, "loss": 0.0402, "num_input_tokens_seen": 438869028, "step": 4808 }, { "epoch": 20.033333333333335, "loss": 0.04611296206712723, "loss_ce": 8.993303026727517e-07, "loss_iou": 0.310546875, "loss_num": 0.00921630859375, "loss_xval": 0.046142578125, "num_input_tokens_seen": 438869028, "step": 4808 }, { "epoch": 20.0375, "grad_norm": 3.070790534611428, "learning_rate": 5e-05, "loss": 0.0621, "num_input_tokens_seen": 438960856, "step": 4809 }, { "epoch": 20.0375, "loss": 0.08876143395900726, "loss_ce": 0.003754718229174614, "loss_iou": 0.2412109375, "loss_num": 0.0169677734375, "loss_xval": 0.0849609375, "num_input_tokens_seen": 438960856, "step": 4809 }, { "epoch": 20.041666666666668, "grad_norm": 3.648642482675532, "learning_rate": 5e-05, "loss": 0.0306, "num_input_tokens_seen": 439052900, "step": 4810 }, { "epoch": 20.041666666666668, "loss": 0.03443126380443573, "loss_ce": 7.435416591761168e-06, "loss_iou": 0.2421875, "loss_num": 0.00689697265625, "loss_xval": 0.034423828125, "num_input_tokens_seen": 439052900, "step": 4810 }, { "epoch": 20.045833333333334, "grad_norm": 4.008229139740039, "learning_rate": 5e-05, "loss": 0.0574, "num_input_tokens_seen": 439144596, "step": 4811 }, { "epoch": 20.045833333333334, "loss": 0.03682493418455124, "loss_ce": 0.003530257847160101, "loss_iou": 0.2314453125, "loss_num": 0.00665283203125, "loss_xval": 0.033203125, "num_input_tokens_seen": 439144596, "step": 4811 }, { "epoch": 20.05, "grad_norm": 2.6584184346017197, "learning_rate": 5e-05, "loss": 0.0468, "num_input_tokens_seen": 439235856, "step": 4812 }, { "epoch": 20.05, "loss": 0.025795510038733482, "loss_ce": 5.288959528115811e-07, "loss_iou": 0.353515625, "loss_num": 0.005157470703125, "loss_xval": 0.0257568359375, "num_input_tokens_seen": 439235856, "step": 4812 }, { "epoch": 20.054166666666667, "grad_norm": 3.415623241765032, "learning_rate": 5e-05, "loss": 0.0277, "num_input_tokens_seen": 439327328, "step": 4813 }, { "epoch": 20.054166666666667, "loss": 0.029038380831480026, "loss_ce": 9.05168008102919e-07, "loss_iou": 0.30078125, "loss_num": 0.00579833984375, "loss_xval": 0.029052734375, "num_input_tokens_seen": 439327328, "step": 4813 }, { "epoch": 20.058333333333334, "grad_norm": 3.1804420864287044, "learning_rate": 5e-05, "loss": 0.0455, "num_input_tokens_seen": 439418656, "step": 4814 }, { "epoch": 20.058333333333334, "loss": 0.03694318234920502, "loss_ce": 1.6520855297130765e-06, "loss_iou": 0.265625, "loss_num": 0.00738525390625, "loss_xval": 0.036865234375, "num_input_tokens_seen": 439418656, "step": 4814 }, { "epoch": 20.0625, "grad_norm": 2.2293540681116513, "learning_rate": 5e-05, "loss": 0.0417, "num_input_tokens_seen": 439509992, "step": 4815 }, { "epoch": 20.0625, "loss": 0.04403237625956535, "loss_ce": 1.0767493222374469e-05, "loss_iou": 0.2734375, "loss_num": 0.0087890625, "loss_xval": 0.0439453125, "num_input_tokens_seen": 439509992, "step": 4815 }, { "epoch": 20.066666666666666, "grad_norm": 1.4748819583091075, "learning_rate": 5e-05, "loss": 0.0642, "num_input_tokens_seen": 439601028, "step": 4816 }, { "epoch": 20.066666666666666, "loss": 0.06710471957921982, "loss_ce": 4.188733328192029e-06, "loss_iou": 0.0849609375, "loss_num": 0.013427734375, "loss_xval": 0.06689453125, "num_input_tokens_seen": 439601028, "step": 4816 }, { "epoch": 20.070833333333333, "grad_norm": 1.692495496060704, "learning_rate": 5e-05, "loss": 0.0491, "num_input_tokens_seen": 439691404, "step": 4817 }, { "epoch": 20.070833333333333, "loss": 0.05415000021457672, "loss_ce": 0.0011485957074910402, "loss_iou": 0.224609375, "loss_num": 0.0106201171875, "loss_xval": 0.052978515625, "num_input_tokens_seen": 439691404, "step": 4817 }, { "epoch": 20.075, "grad_norm": 1.6939580744445781, "learning_rate": 5e-05, "loss": 0.0299, "num_input_tokens_seen": 439782628, "step": 4818 }, { "epoch": 20.075, "loss": 0.03671405836939812, "loss_ce": 1.4143606676952913e-06, "loss_iou": 0.291015625, "loss_num": 0.00732421875, "loss_xval": 0.03662109375, "num_input_tokens_seen": 439782628, "step": 4818 }, { "epoch": 20.079166666666666, "grad_norm": 2.5877769478834094, "learning_rate": 5e-05, "loss": 0.055, "num_input_tokens_seen": 439873884, "step": 4819 }, { "epoch": 20.079166666666666, "loss": 0.047144003212451935, "loss_ce": 9.603533726476599e-06, "loss_iou": 0.294921875, "loss_num": 0.0093994140625, "loss_xval": 0.047119140625, "num_input_tokens_seen": 439873884, "step": 4819 }, { "epoch": 20.083333333333332, "grad_norm": 2.938377407145417, "learning_rate": 5e-05, "loss": 0.0496, "num_input_tokens_seen": 439965388, "step": 4820 }, { "epoch": 20.083333333333332, "loss": 0.04065392166376114, "loss_ce": 4.511202860157937e-06, "loss_iou": 0.298828125, "loss_num": 0.00811767578125, "loss_xval": 0.04052734375, "num_input_tokens_seen": 439965388, "step": 4820 }, { "epoch": 20.0875, "grad_norm": 2.7862469182221674, "learning_rate": 5e-05, "loss": 0.0232, "num_input_tokens_seen": 440056884, "step": 4821 }, { "epoch": 20.0875, "loss": 0.021492835134267807, "loss_ce": 8.301569778268458e-07, "loss_iou": 0.193359375, "loss_num": 0.004302978515625, "loss_xval": 0.021484375, "num_input_tokens_seen": 440056884, "step": 4821 }, { "epoch": 20.091666666666665, "grad_norm": 2.867290416786235, "learning_rate": 5e-05, "loss": 0.0522, "num_input_tokens_seen": 440148288, "step": 4822 }, { "epoch": 20.091666666666665, "loss": 0.03978437930345535, "loss_ce": 7.33786218916066e-05, "loss_iou": 0.2392578125, "loss_num": 0.0079345703125, "loss_xval": 0.039794921875, "num_input_tokens_seen": 440148288, "step": 4822 }, { "epoch": 20.095833333333335, "grad_norm": 2.7018284998552113, "learning_rate": 5e-05, "loss": 0.0354, "num_input_tokens_seen": 440237976, "step": 4823 }, { "epoch": 20.095833333333335, "loss": 0.0364387109875679, "loss_ce": 7.207721637314535e-07, "loss_iou": 0.294921875, "loss_num": 0.007293701171875, "loss_xval": 0.036376953125, "num_input_tokens_seen": 440237976, "step": 4823 }, { "epoch": 20.1, "grad_norm": 2.3439581503071927, "learning_rate": 5e-05, "loss": 0.0495, "num_input_tokens_seen": 440329640, "step": 4824 }, { "epoch": 20.1, "loss": 0.019852623343467712, "loss_ce": 4.671405258704908e-05, "loss_iou": 0.2451171875, "loss_num": 0.00396728515625, "loss_xval": 0.019775390625, "num_input_tokens_seen": 440329640, "step": 4824 }, { "epoch": 20.104166666666668, "grad_norm": 2.3780211521592336, "learning_rate": 5e-05, "loss": 0.0268, "num_input_tokens_seen": 440421464, "step": 4825 }, { "epoch": 20.104166666666668, "loss": 0.02387407049536705, "loss_ce": 1.694625211712264e-06, "loss_iou": 0.259765625, "loss_num": 0.0047607421875, "loss_xval": 0.02392578125, "num_input_tokens_seen": 440421464, "step": 4825 }, { "epoch": 20.108333333333334, "grad_norm": 2.474421100813016, "learning_rate": 5e-05, "loss": 0.0598, "num_input_tokens_seen": 440512524, "step": 4826 }, { "epoch": 20.108333333333334, "loss": 0.03605201840400696, "loss_ce": 1.0761279554571956e-05, "loss_iou": 0.2392578125, "loss_num": 0.0072021484375, "loss_xval": 0.0361328125, "num_input_tokens_seen": 440512524, "step": 4826 }, { "epoch": 20.1125, "grad_norm": 2.822494510620272, "learning_rate": 5e-05, "loss": 0.0584, "num_input_tokens_seen": 440603720, "step": 4827 }, { "epoch": 20.1125, "loss": 0.04726698249578476, "loss_ce": 2.577023951744195e-05, "loss_iou": 0.357421875, "loss_num": 0.00946044921875, "loss_xval": 0.04736328125, "num_input_tokens_seen": 440603720, "step": 4827 }, { "epoch": 20.116666666666667, "grad_norm": 3.4141074259706294, "learning_rate": 5e-05, "loss": 0.0286, "num_input_tokens_seen": 440695424, "step": 4828 }, { "epoch": 20.116666666666667, "loss": 0.02667359448969364, "loss_ce": 1.2308016721362947e-06, "loss_iou": 0.2177734375, "loss_num": 0.005340576171875, "loss_xval": 0.026611328125, "num_input_tokens_seen": 440695424, "step": 4828 }, { "epoch": 20.120833333333334, "grad_norm": 4.193534404069147, "learning_rate": 5e-05, "loss": 0.0512, "num_input_tokens_seen": 440787128, "step": 4829 }, { "epoch": 20.120833333333334, "loss": 0.02787083014845848, "loss_ce": 8.280884685518686e-06, "loss_iou": 0.2734375, "loss_num": 0.00555419921875, "loss_xval": 0.02783203125, "num_input_tokens_seen": 440787128, "step": 4829 }, { "epoch": 20.125, "grad_norm": 3.2028659600742744, "learning_rate": 5e-05, "loss": 0.0502, "num_input_tokens_seen": 440879012, "step": 4830 }, { "epoch": 20.125, "loss": 0.04750242829322815, "loss_ce": 0.00013151884195394814, "loss_iou": 0.21484375, "loss_num": 0.00946044921875, "loss_xval": 0.04736328125, "num_input_tokens_seen": 440879012, "step": 4830 }, { "epoch": 20.129166666666666, "grad_norm": 2.8369034892888827, "learning_rate": 5e-05, "loss": 0.0641, "num_input_tokens_seen": 440969644, "step": 4831 }, { "epoch": 20.129166666666666, "loss": 0.03952815756201744, "loss_ce": 2.641428125116363e-07, "loss_iou": 0.20703125, "loss_num": 0.0079345703125, "loss_xval": 0.03955078125, "num_input_tokens_seen": 440969644, "step": 4831 }, { "epoch": 20.133333333333333, "grad_norm": 2.778861090319647, "learning_rate": 5e-05, "loss": 0.0658, "num_input_tokens_seen": 441060208, "step": 4832 }, { "epoch": 20.133333333333333, "loss": 0.04722682759165764, "loss_ce": 8.743133435018535e-07, "loss_iou": 0.33203125, "loss_num": 0.00946044921875, "loss_xval": 0.047119140625, "num_input_tokens_seen": 441060208, "step": 4832 }, { "epoch": 20.1375, "grad_norm": 2.7716139239658313, "learning_rate": 5e-05, "loss": 0.0299, "num_input_tokens_seen": 441151684, "step": 4833 }, { "epoch": 20.1375, "loss": 0.03260520473122597, "loss_ce": 4.800813712790841e-06, "loss_iou": 0.2236328125, "loss_num": 0.00653076171875, "loss_xval": 0.03271484375, "num_input_tokens_seen": 441151684, "step": 4833 }, { "epoch": 20.141666666666666, "grad_norm": 2.242768216362516, "learning_rate": 5e-05, "loss": 0.0319, "num_input_tokens_seen": 441242584, "step": 4834 }, { "epoch": 20.141666666666666, "loss": 0.0353640615940094, "loss_ce": 1.8199129954155069e-06, "loss_iou": 0.29296875, "loss_num": 0.007080078125, "loss_xval": 0.035400390625, "num_input_tokens_seen": 441242584, "step": 4834 }, { "epoch": 20.145833333333332, "grad_norm": 3.375832829792383, "learning_rate": 5e-05, "loss": 0.0412, "num_input_tokens_seen": 441333960, "step": 4835 }, { "epoch": 20.145833333333332, "loss": 0.041843000799417496, "loss_ce": 3.401458116059075e-06, "loss_iou": 0.29296875, "loss_num": 0.00836181640625, "loss_xval": 0.041748046875, "num_input_tokens_seen": 441333960, "step": 4835 }, { "epoch": 20.15, "grad_norm": 2.932190921396203, "learning_rate": 5e-05, "loss": 0.0498, "num_input_tokens_seen": 441425004, "step": 4836 }, { "epoch": 20.15, "loss": 0.06082218885421753, "loss_ce": 6.564836212419323e-07, "loss_iou": 0.31640625, "loss_num": 0.01214599609375, "loss_xval": 0.060791015625, "num_input_tokens_seen": 441425004, "step": 4836 }, { "epoch": 20.154166666666665, "grad_norm": 3.0778923197176384, "learning_rate": 5e-05, "loss": 0.0458, "num_input_tokens_seen": 441515492, "step": 4837 }, { "epoch": 20.154166666666665, "loss": 0.05241571366786957, "loss_ce": 1.772940549926716e-06, "loss_iou": 0.1337890625, "loss_num": 0.010498046875, "loss_xval": 0.052490234375, "num_input_tokens_seen": 441515492, "step": 4837 }, { "epoch": 20.158333333333335, "grad_norm": 3.1140495590124795, "learning_rate": 5e-05, "loss": 0.1191, "num_input_tokens_seen": 441607092, "step": 4838 }, { "epoch": 20.158333333333335, "loss": 0.1707739531993866, "loss_ce": 1.284774589294102e-05, "loss_iou": 0.330078125, "loss_num": 0.0341796875, "loss_xval": 0.1708984375, "num_input_tokens_seen": 441607092, "step": 4838 }, { "epoch": 20.1625, "grad_norm": 1.9461023033729858, "learning_rate": 5e-05, "loss": 0.0635, "num_input_tokens_seen": 441699296, "step": 4839 }, { "epoch": 20.1625, "loss": 0.04207409918308258, "loss_ce": 5.619662260869518e-06, "loss_iou": 0.255859375, "loss_num": 0.0084228515625, "loss_xval": 0.0419921875, "num_input_tokens_seen": 441699296, "step": 4839 }, { "epoch": 20.166666666666668, "grad_norm": 2.0869868661141022, "learning_rate": 5e-05, "loss": 0.0435, "num_input_tokens_seen": 441790672, "step": 4840 }, { "epoch": 20.166666666666668, "loss": 0.04363371431827545, "loss_ce": 1.2024182751702028e-06, "loss_iou": 0.32421875, "loss_num": 0.00872802734375, "loss_xval": 0.043701171875, "num_input_tokens_seen": 441790672, "step": 4840 }, { "epoch": 20.170833333333334, "grad_norm": 2.788154441624537, "learning_rate": 5e-05, "loss": 0.0506, "num_input_tokens_seen": 441881792, "step": 4841 }, { "epoch": 20.170833333333334, "loss": 0.05143975839018822, "loss_ce": 2.3802135729056317e-06, "loss_iou": 0.2060546875, "loss_num": 0.01031494140625, "loss_xval": 0.051513671875, "num_input_tokens_seen": 441881792, "step": 4841 }, { "epoch": 20.175, "grad_norm": 3.551655592733579, "learning_rate": 5e-05, "loss": 0.0284, "num_input_tokens_seen": 441972952, "step": 4842 }, { "epoch": 20.175, "loss": 0.03263666480779648, "loss_ce": 1.3373867659538519e-05, "loss_iou": 0.37890625, "loss_num": 0.00653076171875, "loss_xval": 0.03271484375, "num_input_tokens_seen": 441972952, "step": 4842 }, { "epoch": 20.179166666666667, "grad_norm": 3.1591844264872857, "learning_rate": 5e-05, "loss": 0.0348, "num_input_tokens_seen": 442064504, "step": 4843 }, { "epoch": 20.179166666666667, "loss": 0.04957715421915054, "loss_ce": 0.0007795470883138478, "loss_iou": 0.30859375, "loss_num": 0.009765625, "loss_xval": 0.048828125, "num_input_tokens_seen": 442064504, "step": 4843 }, { "epoch": 20.183333333333334, "grad_norm": 2.433716379371601, "learning_rate": 5e-05, "loss": 0.053, "num_input_tokens_seen": 442156304, "step": 4844 }, { "epoch": 20.183333333333334, "loss": 0.08409621566534042, "loss_ce": 5.0806596846086904e-05, "loss_iou": 0.232421875, "loss_num": 0.016845703125, "loss_xval": 0.083984375, "num_input_tokens_seen": 442156304, "step": 4844 }, { "epoch": 20.1875, "grad_norm": 3.0001206937570175, "learning_rate": 5e-05, "loss": 0.0236, "num_input_tokens_seen": 442247652, "step": 4845 }, { "epoch": 20.1875, "loss": 0.02707594260573387, "loss_ce": 2.211073115176987e-05, "loss_iou": 0.16796875, "loss_num": 0.005401611328125, "loss_xval": 0.027099609375, "num_input_tokens_seen": 442247652, "step": 4845 }, { "epoch": 20.191666666666666, "grad_norm": 4.004260269877188, "learning_rate": 5e-05, "loss": 0.0624, "num_input_tokens_seen": 442339016, "step": 4846 }, { "epoch": 20.191666666666666, "loss": 0.07790812849998474, "loss_ce": 2.7273301384411752e-05, "loss_iou": 0.2890625, "loss_num": 0.01556396484375, "loss_xval": 0.078125, "num_input_tokens_seen": 442339016, "step": 4846 }, { "epoch": 20.195833333333333, "grad_norm": 2.666777725899104, "learning_rate": 5e-05, "loss": 0.0567, "num_input_tokens_seen": 442430576, "step": 4847 }, { "epoch": 20.195833333333333, "loss": 0.037611234933137894, "loss_ce": 1.3578915059042629e-05, "loss_iou": 0.326171875, "loss_num": 0.00750732421875, "loss_xval": 0.03759765625, "num_input_tokens_seen": 442430576, "step": 4847 }, { "epoch": 20.2, "grad_norm": 1.149837717392595, "learning_rate": 5e-05, "loss": 0.032, "num_input_tokens_seen": 442521864, "step": 4848 }, { "epoch": 20.2, "loss": 0.03994525223970413, "loss_ce": 5.372582108975621e-06, "loss_iou": 0.154296875, "loss_num": 0.00799560546875, "loss_xval": 0.0400390625, "num_input_tokens_seen": 442521864, "step": 4848 }, { "epoch": 20.204166666666666, "grad_norm": 2.481003143027893, "learning_rate": 5e-05, "loss": 0.0487, "num_input_tokens_seen": 442613304, "step": 4849 }, { "epoch": 20.204166666666666, "loss": 0.03446324169635773, "loss_ce": 6.993237184360623e-05, "loss_iou": 0.2080078125, "loss_num": 0.006866455078125, "loss_xval": 0.034423828125, "num_input_tokens_seen": 442613304, "step": 4849 }, { "epoch": 20.208333333333332, "grad_norm": 1.0981239862495074, "learning_rate": 5e-05, "loss": 0.0337, "num_input_tokens_seen": 442704856, "step": 4850 }, { "epoch": 20.208333333333332, "loss": 0.034536510705947876, "loss_ce": 1.3499163287633564e-05, "loss_iou": 0.30078125, "loss_num": 0.00689697265625, "loss_xval": 0.034423828125, "num_input_tokens_seen": 442704856, "step": 4850 }, { "epoch": 20.2125, "grad_norm": 1.8155814850134073, "learning_rate": 5e-05, "loss": 0.0411, "num_input_tokens_seen": 442796228, "step": 4851 }, { "epoch": 20.2125, "loss": 0.03786277025938034, "loss_ce": 5.716243322240189e-06, "loss_iou": 0.2080078125, "loss_num": 0.007568359375, "loss_xval": 0.037841796875, "num_input_tokens_seen": 442796228, "step": 4851 }, { "epoch": 20.216666666666665, "grad_norm": 1.242135071037316, "learning_rate": 5e-05, "loss": 0.0616, "num_input_tokens_seen": 442888332, "step": 4852 }, { "epoch": 20.216666666666665, "loss": 0.05964813753962517, "loss_ce": 1.5276299336619559e-06, "loss_iou": 0.1953125, "loss_num": 0.01190185546875, "loss_xval": 0.0595703125, "num_input_tokens_seen": 442888332, "step": 4852 }, { "epoch": 20.220833333333335, "grad_norm": 0.8166360003808425, "learning_rate": 5e-05, "loss": 0.0431, "num_input_tokens_seen": 442979476, "step": 4853 }, { "epoch": 20.220833333333335, "loss": 0.0613655149936676, "loss_ce": 1.755387529556174e-05, "loss_iou": 0.1962890625, "loss_num": 0.01226806640625, "loss_xval": 0.061279296875, "num_input_tokens_seen": 442979476, "step": 4853 }, { "epoch": 20.225, "grad_norm": 3.3815310838351103, "learning_rate": 5e-05, "loss": 0.0659, "num_input_tokens_seen": 443070852, "step": 4854 }, { "epoch": 20.225, "loss": 0.041042834520339966, "loss_ce": 1.1952846762142144e-05, "loss_iou": 0.166015625, "loss_num": 0.0081787109375, "loss_xval": 0.041015625, "num_input_tokens_seen": 443070852, "step": 4854 }, { "epoch": 20.229166666666668, "grad_norm": 0.6287886096996755, "learning_rate": 5e-05, "loss": 0.0297, "num_input_tokens_seen": 443162152, "step": 4855 }, { "epoch": 20.229166666666668, "loss": 0.02782684564590454, "loss_ce": 2.4423698050668463e-06, "loss_iou": 0.189453125, "loss_num": 0.00555419921875, "loss_xval": 0.02783203125, "num_input_tokens_seen": 443162152, "step": 4855 }, { "epoch": 20.233333333333334, "grad_norm": 1.2988693869129335, "learning_rate": 5e-05, "loss": 0.0243, "num_input_tokens_seen": 443253400, "step": 4856 }, { "epoch": 20.233333333333334, "loss": 0.029308704659342766, "loss_ce": 1.9458499082247727e-05, "loss_iou": 0.25390625, "loss_num": 0.005859375, "loss_xval": 0.029296875, "num_input_tokens_seen": 443253400, "step": 4856 }, { "epoch": 20.2375, "grad_norm": 1.509705294300433, "learning_rate": 5e-05, "loss": 0.0277, "num_input_tokens_seen": 443344636, "step": 4857 }, { "epoch": 20.2375, "loss": 0.023133575916290283, "loss_ce": 0.00012332136975601315, "loss_iou": 0.322265625, "loss_num": 0.004608154296875, "loss_xval": 0.02294921875, "num_input_tokens_seen": 443344636, "step": 4857 }, { "epoch": 20.241666666666667, "grad_norm": 1.2625375682664126, "learning_rate": 5e-05, "loss": 0.0545, "num_input_tokens_seen": 443435428, "step": 4858 }, { "epoch": 20.241666666666667, "loss": 0.061081625521183014, "loss_ce": 6.900830840095296e-07, "loss_iou": 0.1630859375, "loss_num": 0.01220703125, "loss_xval": 0.06103515625, "num_input_tokens_seen": 443435428, "step": 4858 }, { "epoch": 20.245833333333334, "grad_norm": 3.390200960559453, "learning_rate": 5e-05, "loss": 0.053, "num_input_tokens_seen": 443527028, "step": 4859 }, { "epoch": 20.245833333333334, "loss": 0.05636825039982796, "loss_ce": 7.857749005779624e-05, "loss_iou": 0.265625, "loss_num": 0.01123046875, "loss_xval": 0.056396484375, "num_input_tokens_seen": 443527028, "step": 4859 }, { "epoch": 20.25, "grad_norm": 2.4615376000436946, "learning_rate": 5e-05, "loss": 0.0365, "num_input_tokens_seen": 443618344, "step": 4860 }, { "epoch": 20.25, "loss": 0.029989613220095634, "loss_ce": 3.661078881123103e-05, "loss_iou": 0.232421875, "loss_num": 0.0059814453125, "loss_xval": 0.0299072265625, "num_input_tokens_seen": 443618344, "step": 4860 }, { "epoch": 20.254166666666666, "grad_norm": 3.6234847469098526, "learning_rate": 5e-05, "loss": 0.0424, "num_input_tokens_seen": 443709564, "step": 4861 }, { "epoch": 20.254166666666666, "loss": 0.03184705972671509, "loss_ce": 1.9690830868057674e-06, "loss_iou": 0.29296875, "loss_num": 0.006378173828125, "loss_xval": 0.03173828125, "num_input_tokens_seen": 443709564, "step": 4861 }, { "epoch": 20.258333333333333, "grad_norm": 3.113489820406098, "learning_rate": 5e-05, "loss": 0.0512, "num_input_tokens_seen": 443801068, "step": 4862 }, { "epoch": 20.258333333333333, "loss": 0.06761139631271362, "loss_ce": 7.60020557208918e-05, "loss_iou": 0.47265625, "loss_num": 0.01348876953125, "loss_xval": 0.0673828125, "num_input_tokens_seen": 443801068, "step": 4862 }, { "epoch": 20.2625, "grad_norm": 2.4742643245609606, "learning_rate": 5e-05, "loss": 0.0333, "num_input_tokens_seen": 443892516, "step": 4863 }, { "epoch": 20.2625, "loss": 0.03174731880426407, "loss_ce": 9.034883987624198e-06, "loss_iou": 0.26171875, "loss_num": 0.00634765625, "loss_xval": 0.03173828125, "num_input_tokens_seen": 443892516, "step": 4863 }, { "epoch": 20.266666666666666, "grad_norm": 3.330624777742401, "learning_rate": 5e-05, "loss": 0.0302, "num_input_tokens_seen": 443983416, "step": 4864 }, { "epoch": 20.266666666666666, "loss": 0.02159692347049713, "loss_ce": 5.7377765187993646e-06, "loss_iou": 0.1748046875, "loss_num": 0.004302978515625, "loss_xval": 0.0216064453125, "num_input_tokens_seen": 443983416, "step": 4864 }, { "epoch": 20.270833333333332, "grad_norm": 3.2220605320530704, "learning_rate": 5e-05, "loss": 0.0391, "num_input_tokens_seen": 444074932, "step": 4865 }, { "epoch": 20.270833333333332, "loss": 0.044381555169820786, "loss_ce": 8.995681127998978e-06, "loss_iou": 0.302734375, "loss_num": 0.00885009765625, "loss_xval": 0.04443359375, "num_input_tokens_seen": 444074932, "step": 4865 }, { "epoch": 20.275, "grad_norm": 16.589366367423974, "learning_rate": 5e-05, "loss": 0.0395, "num_input_tokens_seen": 444166308, "step": 4866 }, { "epoch": 20.275, "loss": 0.0509551465511322, "loss_ce": 6.050134288670961e-06, "loss_iou": 0.197265625, "loss_num": 0.01019287109375, "loss_xval": 0.051025390625, "num_input_tokens_seen": 444166308, "step": 4866 }, { "epoch": 20.279166666666665, "grad_norm": 3.115461945601382, "learning_rate": 5e-05, "loss": 0.0574, "num_input_tokens_seen": 444257108, "step": 4867 }, { "epoch": 20.279166666666665, "loss": 0.04429937154054642, "loss_ce": 3.362210190971382e-05, "loss_iou": 0.2890625, "loss_num": 0.00885009765625, "loss_xval": 0.044189453125, "num_input_tokens_seen": 444257108, "step": 4867 }, { "epoch": 20.283333333333335, "grad_norm": 5.941316697519509, "learning_rate": 5e-05, "loss": 0.0826, "num_input_tokens_seen": 444348248, "step": 4868 }, { "epoch": 20.283333333333335, "loss": 0.11602663993835449, "loss_ce": 0.00027346183196641505, "loss_iou": 0.333984375, "loss_num": 0.023193359375, "loss_xval": 0.11572265625, "num_input_tokens_seen": 444348248, "step": 4868 }, { "epoch": 20.2875, "grad_norm": 3.2158858891955595, "learning_rate": 5e-05, "loss": 0.1098, "num_input_tokens_seen": 444439784, "step": 4869 }, { "epoch": 20.2875, "loss": 0.11932092905044556, "loss_ce": 5.822531602461822e-05, "loss_iou": 0.10400390625, "loss_num": 0.02392578125, "loss_xval": 0.119140625, "num_input_tokens_seen": 444439784, "step": 4869 }, { "epoch": 20.291666666666668, "grad_norm": 2.2748669444729335, "learning_rate": 5e-05, "loss": 0.0546, "num_input_tokens_seen": 444531712, "step": 4870 }, { "epoch": 20.291666666666668, "loss": 0.038855671882629395, "loss_ce": 6.020005093887448e-05, "loss_iou": 0.171875, "loss_num": 0.00775146484375, "loss_xval": 0.038818359375, "num_input_tokens_seen": 444531712, "step": 4870 }, { "epoch": 20.295833333333334, "grad_norm": 2.600265464747739, "learning_rate": 5e-05, "loss": 0.041, "num_input_tokens_seen": 444622984, "step": 4871 }, { "epoch": 20.295833333333334, "loss": 0.0485546737909317, "loss_ce": 1.2067093848600052e-06, "loss_iou": 0.2890625, "loss_num": 0.00970458984375, "loss_xval": 0.048583984375, "num_input_tokens_seen": 444622984, "step": 4871 }, { "epoch": 20.3, "grad_norm": 5.2037408658004045, "learning_rate": 5e-05, "loss": 0.0353, "num_input_tokens_seen": 444714240, "step": 4872 }, { "epoch": 20.3, "loss": 0.039145130664110184, "loss_ce": 6.336808837659191e-06, "loss_iou": 0.28515625, "loss_num": 0.0078125, "loss_xval": 0.0390625, "num_input_tokens_seen": 444714240, "step": 4872 }, { "epoch": 20.304166666666667, "grad_norm": 2.7752019833056107, "learning_rate": 5e-05, "loss": 0.0548, "num_input_tokens_seen": 444805300, "step": 4873 }, { "epoch": 20.304166666666667, "loss": 0.04133889824151993, "loss_ce": 2.8373524401104078e-06, "loss_iou": 0.30078125, "loss_num": 0.00830078125, "loss_xval": 0.041259765625, "num_input_tokens_seen": 444805300, "step": 4873 }, { "epoch": 20.308333333333334, "grad_norm": 1.7882737756201468, "learning_rate": 5e-05, "loss": 0.0387, "num_input_tokens_seen": 444896408, "step": 4874 }, { "epoch": 20.308333333333334, "loss": 0.03891061991453171, "loss_ce": 7.065909812808968e-07, "loss_iou": 0.259765625, "loss_num": 0.007781982421875, "loss_xval": 0.038818359375, "num_input_tokens_seen": 444896408, "step": 4874 }, { "epoch": 20.3125, "grad_norm": 1.231073243583653, "learning_rate": 5e-05, "loss": 0.0465, "num_input_tokens_seen": 444987964, "step": 4875 }, { "epoch": 20.3125, "loss": 0.03511609882116318, "loss_ce": 0.00024976313579827547, "loss_iou": 0.236328125, "loss_num": 0.0069580078125, "loss_xval": 0.034912109375, "num_input_tokens_seen": 444987964, "step": 4875 }, { "epoch": 20.316666666666666, "grad_norm": 2.4147512166844023, "learning_rate": 5e-05, "loss": 0.0563, "num_input_tokens_seen": 445079640, "step": 4876 }, { "epoch": 20.316666666666666, "loss": 0.0664483904838562, "loss_ce": 5.740137567045167e-05, "loss_iou": 0.29296875, "loss_num": 0.01324462890625, "loss_xval": 0.06640625, "num_input_tokens_seen": 445079640, "step": 4876 }, { "epoch": 20.320833333333333, "grad_norm": 2.1022623574818824, "learning_rate": 5e-05, "loss": 0.0383, "num_input_tokens_seen": 445171000, "step": 4877 }, { "epoch": 20.320833333333333, "loss": 0.045300401747226715, "loss_ce": 0.00010387108341092244, "loss_iou": 0.1357421875, "loss_num": 0.009033203125, "loss_xval": 0.045166015625, "num_input_tokens_seen": 445171000, "step": 4877 }, { "epoch": 20.325, "grad_norm": 2.3199989305266855, "learning_rate": 5e-05, "loss": 0.0384, "num_input_tokens_seen": 445262104, "step": 4878 }, { "epoch": 20.325, "loss": 0.025294212624430656, "loss_ce": 5.617594433715567e-05, "loss_iou": 0.224609375, "loss_num": 0.00506591796875, "loss_xval": 0.0252685546875, "num_input_tokens_seen": 445262104, "step": 4878 }, { "epoch": 20.329166666666666, "grad_norm": 2.8002295035815514, "learning_rate": 5e-05, "loss": 0.0545, "num_input_tokens_seen": 445353480, "step": 4879 }, { "epoch": 20.329166666666666, "loss": 0.07884392142295837, "loss_ce": 1.7560005289851688e-06, "loss_iou": 0.28125, "loss_num": 0.0157470703125, "loss_xval": 0.07861328125, "num_input_tokens_seen": 445353480, "step": 4879 }, { "epoch": 20.333333333333332, "grad_norm": 2.754852265998824, "learning_rate": 5e-05, "loss": 0.0898, "num_input_tokens_seen": 445444500, "step": 4880 }, { "epoch": 20.333333333333332, "loss": 0.1288609653711319, "loss_ce": 4.901310148852644e-07, "loss_iou": 0.201171875, "loss_num": 0.0257568359375, "loss_xval": 0.12890625, "num_input_tokens_seen": 445444500, "step": 4880 }, { "epoch": 20.3375, "grad_norm": 3.1200199540546754, "learning_rate": 5e-05, "loss": 0.0489, "num_input_tokens_seen": 445535764, "step": 4881 }, { "epoch": 20.3375, "loss": 0.05894544720649719, "loss_ce": 7.442573064508906e-07, "loss_iou": 0.1845703125, "loss_num": 0.01177978515625, "loss_xval": 0.058837890625, "num_input_tokens_seen": 445535764, "step": 4881 }, { "epoch": 20.341666666666665, "grad_norm": 3.1098845961447483, "learning_rate": 5e-05, "loss": 0.0459, "num_input_tokens_seen": 445627000, "step": 4882 }, { "epoch": 20.341666666666665, "loss": 0.03754313290119171, "loss_ce": 6.510799721581861e-06, "loss_iou": 0.33203125, "loss_num": 0.00750732421875, "loss_xval": 0.03759765625, "num_input_tokens_seen": 445627000, "step": 4882 }, { "epoch": 20.345833333333335, "grad_norm": 3.304830489925366, "learning_rate": 5e-05, "loss": 0.0328, "num_input_tokens_seen": 445718184, "step": 4883 }, { "epoch": 20.345833333333335, "loss": 0.03720206022262573, "loss_ce": 1.1303675364615628e-06, "loss_iou": 0.33203125, "loss_num": 0.0074462890625, "loss_xval": 0.037109375, "num_input_tokens_seen": 445718184, "step": 4883 }, { "epoch": 20.35, "grad_norm": 3.000987544750155, "learning_rate": 5e-05, "loss": 0.0504, "num_input_tokens_seen": 445809972, "step": 4884 }, { "epoch": 20.35, "loss": 0.057484131306409836, "loss_ce": 1.9530423742253333e-05, "loss_iou": 0.318359375, "loss_num": 0.011474609375, "loss_xval": 0.057373046875, "num_input_tokens_seen": 445809972, "step": 4884 }, { "epoch": 20.354166666666668, "grad_norm": 2.469429866613967, "learning_rate": 5e-05, "loss": 0.0357, "num_input_tokens_seen": 445901728, "step": 4885 }, { "epoch": 20.354166666666668, "loss": 0.03661388158798218, "loss_ce": 4.1551777485437924e-07, "loss_iou": 0.2041015625, "loss_num": 0.00732421875, "loss_xval": 0.03662109375, "num_input_tokens_seen": 445901728, "step": 4885 }, { "epoch": 20.358333333333334, "grad_norm": 2.100770310341459, "learning_rate": 5e-05, "loss": 0.0533, "num_input_tokens_seen": 445992932, "step": 4886 }, { "epoch": 20.358333333333334, "loss": 0.06200437992811203, "loss_ce": 7.92302489571739e-06, "loss_iou": 0.1533203125, "loss_num": 0.01239013671875, "loss_xval": 0.06201171875, "num_input_tokens_seen": 445992932, "step": 4886 }, { "epoch": 20.3625, "grad_norm": 2.0535116291736144, "learning_rate": 5e-05, "loss": 0.0255, "num_input_tokens_seen": 446084960, "step": 4887 }, { "epoch": 20.3625, "loss": 0.030836593359708786, "loss_ce": 7.487383845727891e-05, "loss_iou": 0.25, "loss_num": 0.006134033203125, "loss_xval": 0.03076171875, "num_input_tokens_seen": 446084960, "step": 4887 }, { "epoch": 20.366666666666667, "grad_norm": 7.862201651792061, "learning_rate": 5e-05, "loss": 0.0305, "num_input_tokens_seen": 446176152, "step": 4888 }, { "epoch": 20.366666666666667, "loss": 0.029422171413898468, "loss_ce": 0.00011766894749598578, "loss_iou": 0.173828125, "loss_num": 0.005859375, "loss_xval": 0.029296875, "num_input_tokens_seen": 446176152, "step": 4888 }, { "epoch": 20.370833333333334, "grad_norm": 2.2691565397775983, "learning_rate": 5e-05, "loss": 0.0578, "num_input_tokens_seen": 446266984, "step": 4889 }, { "epoch": 20.370833333333334, "loss": 0.058382242918014526, "loss_ce": 2.115804136337829e-06, "loss_iou": 0.26171875, "loss_num": 0.01165771484375, "loss_xval": 0.058349609375, "num_input_tokens_seen": 446266984, "step": 4889 }, { "epoch": 20.375, "grad_norm": 1.362806615033498, "learning_rate": 5e-05, "loss": 0.0261, "num_input_tokens_seen": 446358556, "step": 4890 }, { "epoch": 20.375, "loss": 0.021336067467927933, "loss_ce": 4.280506345821777e-06, "loss_iou": 0.2001953125, "loss_num": 0.0042724609375, "loss_xval": 0.0213623046875, "num_input_tokens_seen": 446358556, "step": 4890 }, { "epoch": 20.379166666666666, "grad_norm": 1.362143346364944, "learning_rate": 5e-05, "loss": 0.0504, "num_input_tokens_seen": 446448380, "step": 4891 }, { "epoch": 20.379166666666666, "loss": 0.04467766731977463, "loss_ce": 2.2821543097961694e-05, "loss_iou": 0.1484375, "loss_num": 0.0089111328125, "loss_xval": 0.044677734375, "num_input_tokens_seen": 446448380, "step": 4891 }, { "epoch": 20.383333333333333, "grad_norm": 2.8894799690511053, "learning_rate": 5e-05, "loss": 0.0336, "num_input_tokens_seen": 446539984, "step": 4892 }, { "epoch": 20.383333333333333, "loss": 0.03168143704533577, "loss_ce": 1.9449596948106773e-05, "loss_iou": 0.1396484375, "loss_num": 0.00634765625, "loss_xval": 0.03173828125, "num_input_tokens_seen": 446539984, "step": 4892 }, { "epoch": 20.3875, "grad_norm": 2.1546245449489243, "learning_rate": 5e-05, "loss": 0.035, "num_input_tokens_seen": 446631024, "step": 4893 }, { "epoch": 20.3875, "loss": 0.04168850928544998, "loss_ce": 1.6754056559875607e-05, "loss_iou": 0.259765625, "loss_num": 0.00830078125, "loss_xval": 0.041748046875, "num_input_tokens_seen": 446631024, "step": 4893 }, { "epoch": 20.391666666666666, "grad_norm": 3.214993700974819, "learning_rate": 5e-05, "loss": 0.0318, "num_input_tokens_seen": 446722444, "step": 4894 }, { "epoch": 20.391666666666666, "loss": 0.03269556164741516, "loss_ce": 2.649427915457636e-05, "loss_iou": 0.14453125, "loss_num": 0.00653076171875, "loss_xval": 0.03271484375, "num_input_tokens_seen": 446722444, "step": 4894 }, { "epoch": 20.395833333333332, "grad_norm": 8.2722110275818, "learning_rate": 5e-05, "loss": 0.117, "num_input_tokens_seen": 446813668, "step": 4895 }, { "epoch": 20.395833333333332, "loss": 0.06616764515638351, "loss_ce": 5.53373229195131e-06, "loss_iou": 0.26953125, "loss_num": 0.01324462890625, "loss_xval": 0.06640625, "num_input_tokens_seen": 446813668, "step": 4895 }, { "epoch": 20.4, "grad_norm": 2.4259130877628534, "learning_rate": 5e-05, "loss": 0.0511, "num_input_tokens_seen": 446904808, "step": 4896 }, { "epoch": 20.4, "loss": 0.05022679269313812, "loss_ce": 1.0118440513906535e-05, "loss_iou": 0.173828125, "loss_num": 0.010009765625, "loss_xval": 0.05029296875, "num_input_tokens_seen": 446904808, "step": 4896 }, { "epoch": 20.404166666666665, "grad_norm": 2.8399090176153585, "learning_rate": 5e-05, "loss": 0.0384, "num_input_tokens_seen": 446996608, "step": 4897 }, { "epoch": 20.404166666666665, "loss": 0.030106237158179283, "loss_ce": 8.274846550193615e-06, "loss_iou": 0.271484375, "loss_num": 0.006011962890625, "loss_xval": 0.0301513671875, "num_input_tokens_seen": 446996608, "step": 4897 }, { "epoch": 20.408333333333335, "grad_norm": 1.588252686102434, "learning_rate": 5e-05, "loss": 0.0411, "num_input_tokens_seen": 447088364, "step": 4898 }, { "epoch": 20.408333333333335, "loss": 0.049656666815280914, "loss_ce": 7.517551239288878e-07, "loss_iou": 0.2119140625, "loss_num": 0.00994873046875, "loss_xval": 0.049560546875, "num_input_tokens_seen": 447088364, "step": 4898 }, { "epoch": 20.4125, "grad_norm": 0.9741283001699321, "learning_rate": 5e-05, "loss": 0.0464, "num_input_tokens_seen": 447179584, "step": 4899 }, { "epoch": 20.4125, "loss": 0.04484070837497711, "loss_ce": 2.757896481853095e-06, "loss_iou": 0.2041015625, "loss_num": 0.00897216796875, "loss_xval": 0.044921875, "num_input_tokens_seen": 447179584, "step": 4899 }, { "epoch": 20.416666666666668, "grad_norm": 1.3770220316016173, "learning_rate": 5e-05, "loss": 0.025, "num_input_tokens_seen": 447270912, "step": 4900 }, { "epoch": 20.416666666666668, "loss": 0.023782670497894287, "loss_ce": 1.846855525400315e-06, "loss_iou": 0.224609375, "loss_num": 0.0047607421875, "loss_xval": 0.0238037109375, "num_input_tokens_seen": 447270912, "step": 4900 }, { "epoch": 20.420833333333334, "grad_norm": 2.3947178776460167, "learning_rate": 5e-05, "loss": 0.0279, "num_input_tokens_seen": 447361732, "step": 4901 }, { "epoch": 20.420833333333334, "loss": 0.0212043859064579, "loss_ce": 9.92965215118602e-06, "loss_iou": 0.1328125, "loss_num": 0.004241943359375, "loss_xval": 0.021240234375, "num_input_tokens_seen": 447361732, "step": 4901 }, { "epoch": 20.425, "grad_norm": 2.006551586773389, "learning_rate": 5e-05, "loss": 0.0502, "num_input_tokens_seen": 447453548, "step": 4902 }, { "epoch": 20.425, "loss": 0.05505795404314995, "loss_ce": 5.0019931222777814e-05, "loss_iou": 0.1005859375, "loss_num": 0.010986328125, "loss_xval": 0.054931640625, "num_input_tokens_seen": 447453548, "step": 4902 }, { "epoch": 20.429166666666667, "grad_norm": 2.1123937763145793, "learning_rate": 5e-05, "loss": 0.0294, "num_input_tokens_seen": 447544444, "step": 4903 }, { "epoch": 20.429166666666667, "loss": 0.03331262990832329, "loss_ce": 2.6916898150375346e-06, "loss_iou": 0.24609375, "loss_num": 0.00665283203125, "loss_xval": 0.033203125, "num_input_tokens_seen": 447544444, "step": 4903 }, { "epoch": 20.433333333333334, "grad_norm": 3.218991848840505, "learning_rate": 5e-05, "loss": 0.0393, "num_input_tokens_seen": 447635164, "step": 4904 }, { "epoch": 20.433333333333334, "loss": 0.02320890873670578, "loss_ce": 2.9073927976241976e-07, "loss_iou": 0.27734375, "loss_num": 0.004638671875, "loss_xval": 0.023193359375, "num_input_tokens_seen": 447635164, "step": 4904 }, { "epoch": 20.4375, "grad_norm": 3.050650709598406, "learning_rate": 5e-05, "loss": 0.0802, "num_input_tokens_seen": 447726428, "step": 4905 }, { "epoch": 20.4375, "loss": 0.05380012094974518, "loss_ce": 1.289098599954741e-05, "loss_iou": 0.1962890625, "loss_num": 0.0107421875, "loss_xval": 0.0537109375, "num_input_tokens_seen": 447726428, "step": 4905 }, { "epoch": 20.441666666666666, "grad_norm": 3.6080904941032017, "learning_rate": 5e-05, "loss": 0.0512, "num_input_tokens_seen": 447817312, "step": 4906 }, { "epoch": 20.441666666666666, "loss": 0.05204792320728302, "loss_ce": 1.9120729177757312e-07, "loss_iou": 0.140625, "loss_num": 0.0103759765625, "loss_xval": 0.052001953125, "num_input_tokens_seen": 447817312, "step": 4906 }, { "epoch": 20.445833333333333, "grad_norm": 0.8562594459524338, "learning_rate": 5e-05, "loss": 0.058, "num_input_tokens_seen": 447908384, "step": 4907 }, { "epoch": 20.445833333333333, "loss": 0.04393097758293152, "loss_ce": 9.249480399375898e-07, "loss_iou": 0.05517578125, "loss_num": 0.0087890625, "loss_xval": 0.0439453125, "num_input_tokens_seen": 447908384, "step": 4907 }, { "epoch": 20.45, "grad_norm": 1.9636652938937202, "learning_rate": 5e-05, "loss": 0.0611, "num_input_tokens_seen": 447999656, "step": 4908 }, { "epoch": 20.45, "loss": 0.0840383991599083, "loss_ce": 0.003967905882745981, "loss_iou": 0.1875, "loss_num": 0.0159912109375, "loss_xval": 0.080078125, "num_input_tokens_seen": 447999656, "step": 4908 }, { "epoch": 20.454166666666666, "grad_norm": 1.8850154368498628, "learning_rate": 5e-05, "loss": 0.0405, "num_input_tokens_seen": 448090716, "step": 4909 }, { "epoch": 20.454166666666666, "loss": 0.034120649099349976, "loss_ce": 2.000324002437992e-06, "loss_iou": 0.2138671875, "loss_num": 0.0068359375, "loss_xval": 0.0341796875, "num_input_tokens_seen": 448090716, "step": 4909 }, { "epoch": 20.458333333333332, "grad_norm": 2.574376969400775, "learning_rate": 5e-05, "loss": 0.0505, "num_input_tokens_seen": 448182416, "step": 4910 }, { "epoch": 20.458333333333332, "loss": 0.05781601369380951, "loss_ce": 4.6435079070761276e-07, "loss_iou": 0.3203125, "loss_num": 0.01153564453125, "loss_xval": 0.057861328125, "num_input_tokens_seen": 448182416, "step": 4910 }, { "epoch": 20.4625, "grad_norm": 1.8043865352238553, "learning_rate": 5e-05, "loss": 0.0403, "num_input_tokens_seen": 448273596, "step": 4911 }, { "epoch": 20.4625, "loss": 0.02100570686161518, "loss_ce": 0.00010116630437551066, "loss_iou": 0.1875, "loss_num": 0.004180908203125, "loss_xval": 0.0208740234375, "num_input_tokens_seen": 448273596, "step": 4911 }, { "epoch": 20.466666666666665, "grad_norm": 1.6229164781208034, "learning_rate": 5e-05, "loss": 0.0573, "num_input_tokens_seen": 448364676, "step": 4912 }, { "epoch": 20.466666666666665, "loss": 0.036881882697343826, "loss_ce": 1.3882765870221192e-06, "loss_iou": 0.1572265625, "loss_num": 0.00738525390625, "loss_xval": 0.036865234375, "num_input_tokens_seen": 448364676, "step": 4912 }, { "epoch": 20.470833333333335, "grad_norm": 4.454826770446542, "learning_rate": 5e-05, "loss": 0.0553, "num_input_tokens_seen": 448456108, "step": 4913 }, { "epoch": 20.470833333333335, "loss": 0.044205501675605774, "loss_ce": 7.885735158197349e-07, "loss_iou": 0.26953125, "loss_num": 0.00885009765625, "loss_xval": 0.044189453125, "num_input_tokens_seen": 448456108, "step": 4913 }, { "epoch": 20.475, "grad_norm": 2.6555909998390566, "learning_rate": 5e-05, "loss": 0.068, "num_input_tokens_seen": 448547816, "step": 4914 }, { "epoch": 20.475, "loss": 0.06155569851398468, "loss_ce": 0.00032217518310062587, "loss_iou": 0.15234375, "loss_num": 0.01226806640625, "loss_xval": 0.061279296875, "num_input_tokens_seen": 448547816, "step": 4914 }, { "epoch": 20.479166666666668, "grad_norm": 1.584083152690339, "learning_rate": 5e-05, "loss": 0.0557, "num_input_tokens_seen": 448639440, "step": 4915 }, { "epoch": 20.479166666666668, "loss": 0.08793891966342926, "loss_ce": 2.5262986582674785e-06, "loss_iou": 0.1923828125, "loss_num": 0.017578125, "loss_xval": 0.087890625, "num_input_tokens_seen": 448639440, "step": 4915 }, { "epoch": 20.483333333333334, "grad_norm": 2.9419067451996987, "learning_rate": 5e-05, "loss": 0.039, "num_input_tokens_seen": 448731128, "step": 4916 }, { "epoch": 20.483333333333334, "loss": 0.04757130891084671, "loss_ce": 4.018229083158076e-05, "loss_iou": 0.203125, "loss_num": 0.009521484375, "loss_xval": 0.047607421875, "num_input_tokens_seen": 448731128, "step": 4916 }, { "epoch": 20.4875, "grad_norm": 3.3289255011100236, "learning_rate": 5e-05, "loss": 0.097, "num_input_tokens_seen": 448822136, "step": 4917 }, { "epoch": 20.4875, "loss": 0.13442227244377136, "loss_ce": 0.001197781995870173, "loss_iou": 0.1640625, "loss_num": 0.026611328125, "loss_xval": 0.1328125, "num_input_tokens_seen": 448822136, "step": 4917 }, { "epoch": 20.491666666666667, "grad_norm": 2.2184218252363013, "learning_rate": 5e-05, "loss": 0.0275, "num_input_tokens_seen": 448913020, "step": 4918 }, { "epoch": 20.491666666666667, "loss": 0.025361178442835808, "loss_ce": 1.0709804882935714e-06, "loss_iou": 0.18359375, "loss_num": 0.00506591796875, "loss_xval": 0.025390625, "num_input_tokens_seen": 448913020, "step": 4918 }, { "epoch": 20.495833333333334, "grad_norm": 3.114290733634199, "learning_rate": 5e-05, "loss": 0.0568, "num_input_tokens_seen": 449005084, "step": 4919 }, { "epoch": 20.495833333333334, "loss": 0.06543560326099396, "loss_ce": 5.912777851335704e-06, "loss_iou": 0.1845703125, "loss_num": 0.01312255859375, "loss_xval": 0.0654296875, "num_input_tokens_seen": 449005084, "step": 4919 }, { "epoch": 20.5, "grad_norm": 2.268225791254527, "learning_rate": 5e-05, "loss": 0.0318, "num_input_tokens_seen": 449096416, "step": 4920 }, { "epoch": 20.5, "loss": 0.026118092238903046, "loss_ce": 4.082125815330073e-05, "loss_iou": 0.224609375, "loss_num": 0.005218505859375, "loss_xval": 0.026123046875, "num_input_tokens_seen": 449096416, "step": 4920 }, { "epoch": 20.504166666666666, "grad_norm": 1.6800960184311333, "learning_rate": 5e-05, "loss": 0.0868, "num_input_tokens_seen": 449187816, "step": 4921 }, { "epoch": 20.504166666666666, "loss": 0.04860733449459076, "loss_ce": 2.3353166398010217e-05, "loss_iou": 0.234375, "loss_num": 0.00970458984375, "loss_xval": 0.048583984375, "num_input_tokens_seen": 449187816, "step": 4921 }, { "epoch": 20.508333333333333, "grad_norm": 3.0114733370373057, "learning_rate": 5e-05, "loss": 0.1067, "num_input_tokens_seen": 449279200, "step": 4922 }, { "epoch": 20.508333333333333, "loss": 0.14715611934661865, "loss_ce": 3.693668872983835e-07, "loss_iou": 0.12890625, "loss_num": 0.0294189453125, "loss_xval": 0.1474609375, "num_input_tokens_seen": 449279200, "step": 4922 }, { "epoch": 20.5125, "grad_norm": 1.6858748187256003, "learning_rate": 5e-05, "loss": 0.1058, "num_input_tokens_seen": 449370092, "step": 4923 }, { "epoch": 20.5125, "loss": 0.05952360853552818, "loss_ce": 6.704354746034369e-06, "loss_iou": 0.16015625, "loss_num": 0.01190185546875, "loss_xval": 0.0595703125, "num_input_tokens_seen": 449370092, "step": 4923 }, { "epoch": 20.516666666666666, "grad_norm": 2.420186816096476, "learning_rate": 5e-05, "loss": 0.0296, "num_input_tokens_seen": 449461756, "step": 4924 }, { "epoch": 20.516666666666666, "loss": 0.023171938955783844, "loss_ce": 1.4685980431750068e-06, "loss_iou": 0.27734375, "loss_num": 0.004638671875, "loss_xval": 0.023193359375, "num_input_tokens_seen": 449461756, "step": 4924 }, { "epoch": 20.520833333333332, "grad_norm": 2.356011899654843, "learning_rate": 5e-05, "loss": 0.0319, "num_input_tokens_seen": 449553476, "step": 4925 }, { "epoch": 20.520833333333332, "loss": 0.030006490647792816, "loss_ce": 7.71085251471959e-06, "loss_iou": 0.2421875, "loss_num": 0.006011962890625, "loss_xval": 0.030029296875, "num_input_tokens_seen": 449553476, "step": 4925 }, { "epoch": 20.525, "grad_norm": 2.4604421924811173, "learning_rate": 5e-05, "loss": 0.0388, "num_input_tokens_seen": 449644924, "step": 4926 }, { "epoch": 20.525, "loss": 0.02052384242415428, "loss_ce": 7.716460004303372e-07, "loss_iou": 0.34375, "loss_num": 0.00408935546875, "loss_xval": 0.0205078125, "num_input_tokens_seen": 449644924, "step": 4926 }, { "epoch": 20.529166666666665, "grad_norm": 2.972454634959909, "learning_rate": 5e-05, "loss": 0.0525, "num_input_tokens_seen": 449736300, "step": 4927 }, { "epoch": 20.529166666666665, "loss": 0.07225370407104492, "loss_ce": 3.344216111145215e-06, "loss_iou": 0.158203125, "loss_num": 0.01446533203125, "loss_xval": 0.072265625, "num_input_tokens_seen": 449736300, "step": 4927 }, { "epoch": 20.533333333333335, "grad_norm": 2.8204475890224407, "learning_rate": 5e-05, "loss": 0.0768, "num_input_tokens_seen": 449827500, "step": 4928 }, { "epoch": 20.533333333333335, "loss": 0.10444729030132294, "loss_ce": 8.792231938059558e-07, "loss_iou": 0.20703125, "loss_num": 0.0208740234375, "loss_xval": 0.1044921875, "num_input_tokens_seen": 449827500, "step": 4928 }, { "epoch": 20.5375, "grad_norm": 2.4035501839042865, "learning_rate": 5e-05, "loss": 0.0635, "num_input_tokens_seen": 449919048, "step": 4929 }, { "epoch": 20.5375, "loss": 0.07318446040153503, "loss_ce": 3.3055887342925416e-06, "loss_iou": 0.1171875, "loss_num": 0.0146484375, "loss_xval": 0.0732421875, "num_input_tokens_seen": 449919048, "step": 4929 }, { "epoch": 20.541666666666668, "grad_norm": 2.1633006857346166, "learning_rate": 5e-05, "loss": 0.0327, "num_input_tokens_seen": 450009940, "step": 4930 }, { "epoch": 20.541666666666668, "loss": 0.04298504441976547, "loss_ce": 1.0339194886910263e-06, "loss_iou": 0.263671875, "loss_num": 0.00860595703125, "loss_xval": 0.04296875, "num_input_tokens_seen": 450009940, "step": 4930 }, { "epoch": 20.545833333333334, "grad_norm": 2.71002409160864, "learning_rate": 5e-05, "loss": 0.0279, "num_input_tokens_seen": 450100912, "step": 4931 }, { "epoch": 20.545833333333334, "loss": 0.02671864628791809, "loss_ce": 5.075110038887942e-07, "loss_iou": 0.1787109375, "loss_num": 0.005340576171875, "loss_xval": 0.0267333984375, "num_input_tokens_seen": 450100912, "step": 4931 }, { "epoch": 20.55, "grad_norm": 4.408404110767032, "learning_rate": 5e-05, "loss": 0.0428, "num_input_tokens_seen": 450192432, "step": 4932 }, { "epoch": 20.55, "loss": 0.060529936105012894, "loss_ce": 0.00024245594977401197, "loss_iou": 0.359375, "loss_num": 0.0120849609375, "loss_xval": 0.060302734375, "num_input_tokens_seen": 450192432, "step": 4932 }, { "epoch": 20.554166666666667, "grad_norm": 2.696961014567847, "learning_rate": 5e-05, "loss": 0.0847, "num_input_tokens_seen": 450284088, "step": 4933 }, { "epoch": 20.554166666666667, "loss": 0.08644923567771912, "loss_ce": 8.188740139303263e-06, "loss_iou": 0.30078125, "loss_num": 0.017333984375, "loss_xval": 0.08642578125, "num_input_tokens_seen": 450284088, "step": 4933 }, { "epoch": 20.558333333333334, "grad_norm": 2.7205486255151228, "learning_rate": 5e-05, "loss": 0.0443, "num_input_tokens_seen": 450374740, "step": 4934 }, { "epoch": 20.558333333333334, "loss": 0.033890899270772934, "loss_ce": 4.690532659878954e-05, "loss_iou": 0.2431640625, "loss_num": 0.00677490234375, "loss_xval": 0.033935546875, "num_input_tokens_seen": 450374740, "step": 4934 }, { "epoch": 20.5625, "grad_norm": 5.026116382223273, "learning_rate": 5e-05, "loss": 0.0498, "num_input_tokens_seen": 450466152, "step": 4935 }, { "epoch": 20.5625, "loss": 0.046482790261507034, "loss_ce": 4.519070898822974e-06, "loss_iou": 0.28515625, "loss_num": 0.00927734375, "loss_xval": 0.04638671875, "num_input_tokens_seen": 450466152, "step": 4935 }, { "epoch": 20.566666666666666, "grad_norm": 2.5441081198921967, "learning_rate": 5e-05, "loss": 0.0473, "num_input_tokens_seen": 450557664, "step": 4936 }, { "epoch": 20.566666666666666, "loss": 0.048398494720458984, "loss_ce": 1.2869089914602228e-05, "loss_iou": 0.23046875, "loss_num": 0.0096435546875, "loss_xval": 0.04833984375, "num_input_tokens_seen": 450557664, "step": 4936 }, { "epoch": 20.570833333333333, "grad_norm": 1.8114682143984124, "learning_rate": 5e-05, "loss": 0.036, "num_input_tokens_seen": 450649088, "step": 4937 }, { "epoch": 20.570833333333333, "loss": 0.03278595581650734, "loss_ce": 2.4474088604620192e-06, "loss_iou": 0.123046875, "loss_num": 0.006561279296875, "loss_xval": 0.03271484375, "num_input_tokens_seen": 450649088, "step": 4937 }, { "epoch": 20.575, "grad_norm": 1.6078122221436446, "learning_rate": 5e-05, "loss": 0.0259, "num_input_tokens_seen": 450740344, "step": 4938 }, { "epoch": 20.575, "loss": 0.030597861856222153, "loss_ce": 3.990992354374612e-06, "loss_iou": 0.267578125, "loss_num": 0.006134033203125, "loss_xval": 0.0306396484375, "num_input_tokens_seen": 450740344, "step": 4938 }, { "epoch": 20.579166666666666, "grad_norm": 2.0393086384958923, "learning_rate": 5e-05, "loss": 0.0443, "num_input_tokens_seen": 450831416, "step": 4939 }, { "epoch": 20.579166666666666, "loss": 0.05075102299451828, "loss_ce": 2.886481524910778e-07, "loss_iou": 0.2578125, "loss_num": 0.0101318359375, "loss_xval": 0.05078125, "num_input_tokens_seen": 450831416, "step": 4939 }, { "epoch": 20.583333333333332, "grad_norm": 3.8968025371129684, "learning_rate": 5e-05, "loss": 0.0556, "num_input_tokens_seen": 450922228, "step": 4940 }, { "epoch": 20.583333333333332, "loss": 0.07820156961679459, "loss_ce": 2.7902996180273476e-07, "loss_iou": 0.1982421875, "loss_num": 0.015625, "loss_xval": 0.078125, "num_input_tokens_seen": 450922228, "step": 4940 }, { "epoch": 20.5875, "grad_norm": 1.5736493006126937, "learning_rate": 5e-05, "loss": 0.0303, "num_input_tokens_seen": 451013644, "step": 4941 }, { "epoch": 20.5875, "loss": 0.033364854753017426, "loss_ce": 1.5105636066437e-06, "loss_iou": 0.16796875, "loss_num": 0.006683349609375, "loss_xval": 0.033447265625, "num_input_tokens_seen": 451013644, "step": 4941 }, { "epoch": 20.591666666666665, "grad_norm": 2.6377710329881214, "learning_rate": 5e-05, "loss": 0.0495, "num_input_tokens_seen": 451104932, "step": 4942 }, { "epoch": 20.591666666666665, "loss": 0.06481263041496277, "loss_ce": 9.242695568900672e-07, "loss_iou": 0.1396484375, "loss_num": 0.012939453125, "loss_xval": 0.06494140625, "num_input_tokens_seen": 451104932, "step": 4942 }, { "epoch": 20.595833333333335, "grad_norm": 2.3555642663825966, "learning_rate": 5e-05, "loss": 0.0346, "num_input_tokens_seen": 451196380, "step": 4943 }, { "epoch": 20.595833333333335, "loss": 0.029668737202882767, "loss_ce": 1.8372354588791495e-06, "loss_iou": 0.220703125, "loss_num": 0.00592041015625, "loss_xval": 0.0296630859375, "num_input_tokens_seen": 451196380, "step": 4943 }, { "epoch": 20.6, "grad_norm": 2.60332049955372, "learning_rate": 5e-05, "loss": 0.0327, "num_input_tokens_seen": 451288068, "step": 4944 }, { "epoch": 20.6, "loss": 0.03186202049255371, "loss_ce": 1.6669213209752343e-06, "loss_iou": 0.306640625, "loss_num": 0.006378173828125, "loss_xval": 0.03173828125, "num_input_tokens_seen": 451288068, "step": 4944 }, { "epoch": 20.604166666666668, "grad_norm": 2.567116921718554, "learning_rate": 5e-05, "loss": 0.0937, "num_input_tokens_seen": 451379516, "step": 4945 }, { "epoch": 20.604166666666668, "loss": 0.04703579470515251, "loss_ce": 5.3982519602868706e-05, "loss_iou": 0.275390625, "loss_num": 0.0093994140625, "loss_xval": 0.046875, "num_input_tokens_seen": 451379516, "step": 4945 }, { "epoch": 20.608333333333334, "grad_norm": 1.6160182593215768, "learning_rate": 5e-05, "loss": 0.0424, "num_input_tokens_seen": 451470420, "step": 4946 }, { "epoch": 20.608333333333334, "loss": 0.027573561295866966, "loss_ce": 9.291766218666453e-07, "loss_iou": 0.10595703125, "loss_num": 0.005523681640625, "loss_xval": 0.027587890625, "num_input_tokens_seen": 451470420, "step": 4946 }, { "epoch": 20.6125, "grad_norm": 1.3467015493968915, "learning_rate": 5e-05, "loss": 0.0356, "num_input_tokens_seen": 451561924, "step": 4947 }, { "epoch": 20.6125, "loss": 0.04347209259867668, "loss_ce": 0.0010602866532281041, "loss_iou": 0.21875, "loss_num": 0.00848388671875, "loss_xval": 0.04248046875, "num_input_tokens_seen": 451561924, "step": 4947 }, { "epoch": 20.616666666666667, "grad_norm": 1.9679868243687706, "learning_rate": 5e-05, "loss": 0.0303, "num_input_tokens_seen": 451653044, "step": 4948 }, { "epoch": 20.616666666666667, "loss": 0.038005080074071884, "loss_ce": 1.0696719982661307e-05, "loss_iou": 0.32421875, "loss_num": 0.007598876953125, "loss_xval": 0.0380859375, "num_input_tokens_seen": 451653044, "step": 4948 }, { "epoch": 20.620833333333334, "grad_norm": 2.5169923263187264, "learning_rate": 5e-05, "loss": 0.0399, "num_input_tokens_seen": 451743928, "step": 4949 }, { "epoch": 20.620833333333334, "loss": 0.027726180851459503, "loss_ce": 9.608993423171341e-07, "loss_iou": 0.275390625, "loss_num": 0.00555419921875, "loss_xval": 0.0277099609375, "num_input_tokens_seen": 451743928, "step": 4949 }, { "epoch": 20.625, "grad_norm": 2.635246367786188, "learning_rate": 5e-05, "loss": 0.0712, "num_input_tokens_seen": 451834636, "step": 4950 }, { "epoch": 20.625, "loss": 0.05243086814880371, "loss_ce": 1.6696777720426326e-06, "loss_iou": 0.24609375, "loss_num": 0.010498046875, "loss_xval": 0.052490234375, "num_input_tokens_seen": 451834636, "step": 4950 }, { "epoch": 20.629166666666666, "grad_norm": 4.173072825862923, "learning_rate": 5e-05, "loss": 0.0607, "num_input_tokens_seen": 451926068, "step": 4951 }, { "epoch": 20.629166666666666, "loss": 0.08744490146636963, "loss_ce": 4.417190666572424e-06, "loss_iou": 0.162109375, "loss_num": 0.017578125, "loss_xval": 0.08740234375, "num_input_tokens_seen": 451926068, "step": 4951 }, { "epoch": 20.633333333333333, "grad_norm": 2.5783744439870997, "learning_rate": 5e-05, "loss": 0.0353, "num_input_tokens_seen": 452017932, "step": 4952 }, { "epoch": 20.633333333333333, "loss": 0.028565822169184685, "loss_ce": 1.3682913504453609e-06, "loss_iou": 0.255859375, "loss_num": 0.005706787109375, "loss_xval": 0.028564453125, "num_input_tokens_seen": 452017932, "step": 4952 }, { "epoch": 20.6375, "grad_norm": 3.316342304316025, "learning_rate": 5e-05, "loss": 0.0344, "num_input_tokens_seen": 452109576, "step": 4953 }, { "epoch": 20.6375, "loss": 0.030848821625113487, "loss_ce": 3.1791919354873244e-06, "loss_iou": 0.361328125, "loss_num": 0.00616455078125, "loss_xval": 0.0308837890625, "num_input_tokens_seen": 452109576, "step": 4953 }, { "epoch": 20.641666666666666, "grad_norm": 3.1392782822852583, "learning_rate": 5e-05, "loss": 0.0635, "num_input_tokens_seen": 452200544, "step": 4954 }, { "epoch": 20.641666666666666, "loss": 0.08412261307239532, "loss_ce": 9.096820576814935e-07, "loss_iou": 0.287109375, "loss_num": 0.016845703125, "loss_xval": 0.083984375, "num_input_tokens_seen": 452200544, "step": 4954 }, { "epoch": 20.645833333333332, "grad_norm": 2.492693871159871, "learning_rate": 5e-05, "loss": 0.0939, "num_input_tokens_seen": 452292380, "step": 4955 }, { "epoch": 20.645833333333332, "loss": 0.06159612163901329, "loss_ce": 4.017566880065715e-06, "loss_iou": 0.3125, "loss_num": 0.0123291015625, "loss_xval": 0.0615234375, "num_input_tokens_seen": 452292380, "step": 4955 }, { "epoch": 20.65, "grad_norm": 2.5843001524160107, "learning_rate": 5e-05, "loss": 0.0569, "num_input_tokens_seen": 452384136, "step": 4956 }, { "epoch": 20.65, "loss": 0.08120512962341309, "loss_ce": 1.310615425609285e-05, "loss_iou": 0.298828125, "loss_num": 0.0162353515625, "loss_xval": 0.0810546875, "num_input_tokens_seen": 452384136, "step": 4956 }, { "epoch": 20.654166666666665, "grad_norm": 2.9508343488964126, "learning_rate": 5e-05, "loss": 0.034, "num_input_tokens_seen": 452474504, "step": 4957 }, { "epoch": 20.654166666666665, "loss": 0.022920362651348114, "loss_ce": 1.6614910691714613e-06, "loss_iou": 0.271484375, "loss_num": 0.00457763671875, "loss_xval": 0.02294921875, "num_input_tokens_seen": 452474504, "step": 4957 }, { "epoch": 20.658333333333335, "grad_norm": 3.197401309944741, "learning_rate": 5e-05, "loss": 0.0323, "num_input_tokens_seen": 452565788, "step": 4958 }, { "epoch": 20.658333333333335, "loss": 0.03662900626659393, "loss_ce": 6.894825492054224e-05, "loss_iou": 0.369140625, "loss_num": 0.00732421875, "loss_xval": 0.03662109375, "num_input_tokens_seen": 452565788, "step": 4958 }, { "epoch": 20.6625, "grad_norm": 2.461105491930837, "learning_rate": 5e-05, "loss": 0.0721, "num_input_tokens_seen": 452657340, "step": 4959 }, { "epoch": 20.6625, "loss": 0.050036199390888214, "loss_ce": 1.0260364433634095e-05, "loss_iou": 0.1728515625, "loss_num": 0.010009765625, "loss_xval": 0.050048828125, "num_input_tokens_seen": 452657340, "step": 4959 }, { "epoch": 20.666666666666668, "grad_norm": 3.42370674644056, "learning_rate": 5e-05, "loss": 0.0566, "num_input_tokens_seen": 452748192, "step": 4960 }, { "epoch": 20.666666666666668, "loss": 0.06822985410690308, "loss_ce": 7.814576747477986e-06, "loss_iou": 0.291015625, "loss_num": 0.013671875, "loss_xval": 0.068359375, "num_input_tokens_seen": 452748192, "step": 4960 }, { "epoch": 20.670833333333334, "grad_norm": 31.597300727880917, "learning_rate": 5e-05, "loss": 0.052, "num_input_tokens_seen": 452839420, "step": 4961 }, { "epoch": 20.670833333333334, "loss": 0.030092181637883186, "loss_ce": 3.236833435948938e-05, "loss_iou": 0.20703125, "loss_num": 0.006011962890625, "loss_xval": 0.030029296875, "num_input_tokens_seen": 452839420, "step": 4961 }, { "epoch": 20.675, "grad_norm": 1.5560147417535624, "learning_rate": 5e-05, "loss": 0.0382, "num_input_tokens_seen": 452930708, "step": 4962 }, { "epoch": 20.675, "loss": 0.03558146208524704, "loss_ce": 5.5974505812628195e-06, "loss_iou": 0.265625, "loss_num": 0.007110595703125, "loss_xval": 0.03564453125, "num_input_tokens_seen": 452930708, "step": 4962 }, { "epoch": 20.679166666666667, "grad_norm": 1.530031099095701, "learning_rate": 5e-05, "loss": 0.0508, "num_input_tokens_seen": 453022456, "step": 4963 }, { "epoch": 20.679166666666667, "loss": 0.04042421653866768, "loss_ce": 3.684350076582632e-06, "loss_iou": 0.1337890625, "loss_num": 0.008056640625, "loss_xval": 0.04052734375, "num_input_tokens_seen": 453022456, "step": 4963 }, { "epoch": 20.683333333333334, "grad_norm": 1.1910616741094349, "learning_rate": 5e-05, "loss": 0.0377, "num_input_tokens_seen": 453114192, "step": 4964 }, { "epoch": 20.683333333333334, "loss": 0.045101359486579895, "loss_ce": 4.005429673270555e-06, "loss_iou": 0.1796875, "loss_num": 0.009033203125, "loss_xval": 0.045166015625, "num_input_tokens_seen": 453114192, "step": 4964 }, { "epoch": 20.6875, "grad_norm": 1.4244696213226584, "learning_rate": 5e-05, "loss": 0.0433, "num_input_tokens_seen": 453205336, "step": 4965 }, { "epoch": 20.6875, "loss": 0.026782888919115067, "loss_ce": 3.7131860608496936e-06, "loss_iou": 0.25390625, "loss_num": 0.00537109375, "loss_xval": 0.0267333984375, "num_input_tokens_seen": 453205336, "step": 4965 }, { "epoch": 20.691666666666666, "grad_norm": 1.3465652173814044, "learning_rate": 5e-05, "loss": 0.0285, "num_input_tokens_seen": 453296424, "step": 4966 }, { "epoch": 20.691666666666666, "loss": 0.03415077552199364, "loss_ce": 1.6061776477727108e-06, "loss_iou": 0.232421875, "loss_num": 0.0068359375, "loss_xval": 0.0341796875, "num_input_tokens_seen": 453296424, "step": 4966 }, { "epoch": 20.695833333333333, "grad_norm": 13.981129636039054, "learning_rate": 5e-05, "loss": 0.0518, "num_input_tokens_seen": 453387852, "step": 4967 }, { "epoch": 20.695833333333333, "loss": 0.03573738783597946, "loss_ce": 1.3047135780652752e-06, "loss_iou": 0.1318359375, "loss_num": 0.00714111328125, "loss_xval": 0.03564453125, "num_input_tokens_seen": 453387852, "step": 4967 }, { "epoch": 20.7, "grad_norm": 3.2851894633221, "learning_rate": 5e-05, "loss": 0.0467, "num_input_tokens_seen": 453479236, "step": 4968 }, { "epoch": 20.7, "loss": 0.049005232751369476, "loss_ce": 3.215075412299484e-05, "loss_iou": 0.1767578125, "loss_num": 0.00982666015625, "loss_xval": 0.049072265625, "num_input_tokens_seen": 453479236, "step": 4968 }, { "epoch": 20.704166666666666, "grad_norm": 2.673697904962962, "learning_rate": 5e-05, "loss": 0.0576, "num_input_tokens_seen": 453570696, "step": 4969 }, { "epoch": 20.704166666666666, "loss": 0.05169004574418068, "loss_ce": 8.527647878509015e-06, "loss_iou": 0.27734375, "loss_num": 0.0103759765625, "loss_xval": 0.0517578125, "num_input_tokens_seen": 453570696, "step": 4969 }, { "epoch": 20.708333333333332, "grad_norm": 2.1221730915444774, "learning_rate": 5e-05, "loss": 0.0328, "num_input_tokens_seen": 453660612, "step": 4970 }, { "epoch": 20.708333333333332, "loss": 0.03286011517047882, "loss_ce": 3.1290687729779165e-07, "loss_iou": 0.2353515625, "loss_num": 0.006561279296875, "loss_xval": 0.032958984375, "num_input_tokens_seen": 453660612, "step": 4970 }, { "epoch": 20.7125, "grad_norm": 2.8251891286421738, "learning_rate": 5e-05, "loss": 0.046, "num_input_tokens_seen": 453751436, "step": 4971 }, { "epoch": 20.7125, "loss": 0.049376361072063446, "loss_ce": 1.4179516256263014e-05, "loss_iou": 0.15625, "loss_num": 0.0098876953125, "loss_xval": 0.04931640625, "num_input_tokens_seen": 453751436, "step": 4971 }, { "epoch": 20.716666666666665, "grad_norm": 2.863646490213054, "learning_rate": 5e-05, "loss": 0.0483, "num_input_tokens_seen": 453842632, "step": 4972 }, { "epoch": 20.716666666666665, "loss": 0.05301649123430252, "loss_ce": 0.00040418541175313294, "loss_iou": 0.1572265625, "loss_num": 0.010498046875, "loss_xval": 0.052734375, "num_input_tokens_seen": 453842632, "step": 4972 }, { "epoch": 20.720833333333335, "grad_norm": 3.3607289465878973, "learning_rate": 5e-05, "loss": 0.0361, "num_input_tokens_seen": 453934264, "step": 4973 }, { "epoch": 20.720833333333335, "loss": 0.043854400515556335, "loss_ce": 6.405851422641717e-07, "loss_iou": 0.294921875, "loss_num": 0.0087890625, "loss_xval": 0.0439453125, "num_input_tokens_seen": 453934264, "step": 4973 }, { "epoch": 20.725, "grad_norm": 3.2709664410734023, "learning_rate": 5e-05, "loss": 0.0489, "num_input_tokens_seen": 454025712, "step": 4974 }, { "epoch": 20.725, "loss": 0.030515987426042557, "loss_ce": 0.00027306523406878114, "loss_iou": 0.287109375, "loss_num": 0.00604248046875, "loss_xval": 0.0302734375, "num_input_tokens_seen": 454025712, "step": 4974 }, { "epoch": 20.729166666666668, "grad_norm": 2.5059070516777266, "learning_rate": 5e-05, "loss": 0.085, "num_input_tokens_seen": 454116892, "step": 4975 }, { "epoch": 20.729166666666668, "loss": 0.06782197952270508, "loss_ce": 1.1920225006178953e-05, "loss_iou": 0.2255859375, "loss_num": 0.0135498046875, "loss_xval": 0.06787109375, "num_input_tokens_seen": 454116892, "step": 4975 }, { "epoch": 20.733333333333334, "grad_norm": 3.253961335286682, "learning_rate": 5e-05, "loss": 0.0671, "num_input_tokens_seen": 454208128, "step": 4976 }, { "epoch": 20.733333333333334, "loss": 0.0467258021235466, "loss_ce": 3.3931551115529146e-06, "loss_iou": 0.2197265625, "loss_num": 0.00933837890625, "loss_xval": 0.046630859375, "num_input_tokens_seen": 454208128, "step": 4976 }, { "epoch": 20.7375, "grad_norm": 3.3823405746496076, "learning_rate": 5e-05, "loss": 0.0551, "num_input_tokens_seen": 454299648, "step": 4977 }, { "epoch": 20.7375, "loss": 0.058784861117601395, "loss_ce": 3.749439940747834e-07, "loss_iou": 0.2490234375, "loss_num": 0.01177978515625, "loss_xval": 0.058837890625, "num_input_tokens_seen": 454299648, "step": 4977 }, { "epoch": 20.741666666666667, "grad_norm": 2.786119531230585, "learning_rate": 5e-05, "loss": 0.0294, "num_input_tokens_seen": 454390716, "step": 4978 }, { "epoch": 20.741666666666667, "loss": 0.030614566057920456, "loss_ce": 1.3063713595329318e-05, "loss_iou": 0.2294921875, "loss_num": 0.006103515625, "loss_xval": 0.0306396484375, "num_input_tokens_seen": 454390716, "step": 4978 }, { "epoch": 20.745833333333334, "grad_norm": 2.2338281704667216, "learning_rate": 5e-05, "loss": 0.0491, "num_input_tokens_seen": 454481636, "step": 4979 }, { "epoch": 20.745833333333334, "loss": 0.06036851927638054, "loss_ce": 4.749166691908613e-06, "loss_iou": 0.234375, "loss_num": 0.0120849609375, "loss_xval": 0.060302734375, "num_input_tokens_seen": 454481636, "step": 4979 }, { "epoch": 20.75, "grad_norm": 2.0095958006107137, "learning_rate": 5e-05, "loss": 0.0579, "num_input_tokens_seen": 454572604, "step": 4980 }, { "epoch": 20.75, "loss": 0.035050153732299805, "loss_ce": 7.142032245610608e-07, "loss_iou": 0.18359375, "loss_num": 0.00701904296875, "loss_xval": 0.03515625, "num_input_tokens_seen": 454572604, "step": 4980 }, { "epoch": 20.754166666666666, "grad_norm": 2.264932494350519, "learning_rate": 5e-05, "loss": 0.0429, "num_input_tokens_seen": 454663844, "step": 4981 }, { "epoch": 20.754166666666666, "loss": 0.03668418526649475, "loss_ce": 2.054937340290053e-06, "loss_iou": 0.271484375, "loss_num": 0.007354736328125, "loss_xval": 0.03662109375, "num_input_tokens_seen": 454663844, "step": 4981 }, { "epoch": 20.758333333333333, "grad_norm": 2.424681742010323, "learning_rate": 5e-05, "loss": 0.0496, "num_input_tokens_seen": 454755116, "step": 4982 }, { "epoch": 20.758333333333333, "loss": 0.053710393607616425, "loss_ce": 7.08766674506478e-06, "loss_iou": 0.24609375, "loss_num": 0.0107421875, "loss_xval": 0.0537109375, "num_input_tokens_seen": 454755116, "step": 4982 }, { "epoch": 20.7625, "grad_norm": 2.746761167417316, "learning_rate": 5e-05, "loss": 0.0568, "num_input_tokens_seen": 454846020, "step": 4983 }, { "epoch": 20.7625, "loss": 0.04286251217126846, "loss_ce": 5.733260195484036e-07, "loss_iou": 0.2314453125, "loss_num": 0.00860595703125, "loss_xval": 0.04296875, "num_input_tokens_seen": 454846020, "step": 4983 }, { "epoch": 20.766666666666666, "grad_norm": 4.3282290256004545, "learning_rate": 5e-05, "loss": 0.0422, "num_input_tokens_seen": 454937608, "step": 4984 }, { "epoch": 20.766666666666666, "loss": 0.026827123016119003, "loss_ce": 2.170055950045935e-06, "loss_iou": 0.2734375, "loss_num": 0.00537109375, "loss_xval": 0.02685546875, "num_input_tokens_seen": 454937608, "step": 4984 }, { "epoch": 20.770833333333332, "grad_norm": 1.8867506140848092, "learning_rate": 5e-05, "loss": 0.077, "num_input_tokens_seen": 455029116, "step": 4985 }, { "epoch": 20.770833333333332, "loss": 0.12005200982093811, "loss_ce": 3.4892395888164174e-06, "loss_iou": 0.189453125, "loss_num": 0.0240478515625, "loss_xval": 0.1201171875, "num_input_tokens_seen": 455029116, "step": 4985 }, { "epoch": 20.775, "grad_norm": 1.8878314678749506, "learning_rate": 5e-05, "loss": 0.0559, "num_input_tokens_seen": 455120496, "step": 4986 }, { "epoch": 20.775, "loss": 0.06420492380857468, "loss_ce": 5.6972850870806724e-05, "loss_iou": 0.138671875, "loss_num": 0.0128173828125, "loss_xval": 0.06396484375, "num_input_tokens_seen": 455120496, "step": 4986 }, { "epoch": 20.779166666666665, "grad_norm": 2.5363373125866993, "learning_rate": 5e-05, "loss": 0.0409, "num_input_tokens_seen": 455211888, "step": 4987 }, { "epoch": 20.779166666666665, "loss": 0.04293936491012573, "loss_ce": 1.1320619250909658e-06, "loss_iou": 0.2177734375, "loss_num": 0.00860595703125, "loss_xval": 0.04296875, "num_input_tokens_seen": 455211888, "step": 4987 }, { "epoch": 20.783333333333335, "grad_norm": 3.6959060546808886, "learning_rate": 5e-05, "loss": 0.0936, "num_input_tokens_seen": 455302952, "step": 4988 }, { "epoch": 20.783333333333335, "loss": 0.03244100511074066, "loss_ce": 8.187475941667799e-07, "loss_iou": 0.23828125, "loss_num": 0.006500244140625, "loss_xval": 0.032470703125, "num_input_tokens_seen": 455302952, "step": 4988 }, { "epoch": 20.7875, "grad_norm": 2.2800049431943354, "learning_rate": 5e-05, "loss": 0.0486, "num_input_tokens_seen": 455394648, "step": 4989 }, { "epoch": 20.7875, "loss": 0.03557218611240387, "loss_ce": 0.00029386812821030617, "loss_iou": 0.28125, "loss_num": 0.007049560546875, "loss_xval": 0.03515625, "num_input_tokens_seen": 455394648, "step": 4989 }, { "epoch": 20.791666666666668, "grad_norm": 4.819173410676004, "learning_rate": 5e-05, "loss": 0.0514, "num_input_tokens_seen": 455486140, "step": 4990 }, { "epoch": 20.791666666666668, "loss": 0.047681789845228195, "loss_ce": 8.96269193617627e-05, "loss_iou": 0.310546875, "loss_num": 0.009521484375, "loss_xval": 0.047607421875, "num_input_tokens_seen": 455486140, "step": 4990 }, { "epoch": 20.795833333333334, "grad_norm": 3.162090646689533, "learning_rate": 5e-05, "loss": 0.0512, "num_input_tokens_seen": 455577380, "step": 4991 }, { "epoch": 20.795833333333334, "loss": 0.046526797115802765, "loss_ce": 1.801136022550054e-05, "loss_iou": 0.333984375, "loss_num": 0.00927734375, "loss_xval": 0.04638671875, "num_input_tokens_seen": 455577380, "step": 4991 }, { "epoch": 20.8, "grad_norm": 1.782414590120586, "learning_rate": 5e-05, "loss": 0.0581, "num_input_tokens_seen": 455669196, "step": 4992 }, { "epoch": 20.8, "loss": 0.04718227684497833, "loss_ce": 8.602525485912338e-05, "loss_iou": 0.1435546875, "loss_num": 0.0093994140625, "loss_xval": 0.047119140625, "num_input_tokens_seen": 455669196, "step": 4992 }, { "epoch": 20.804166666666667, "grad_norm": 1.6869126328682527, "learning_rate": 5e-05, "loss": 0.0341, "num_input_tokens_seen": 455759684, "step": 4993 }, { "epoch": 20.804166666666667, "loss": 0.029673587530851364, "loss_ce": 0.0011549104237928987, "loss_iou": 0.1728515625, "loss_num": 0.005706787109375, "loss_xval": 0.028564453125, "num_input_tokens_seen": 455759684, "step": 4993 }, { "epoch": 20.808333333333334, "grad_norm": 3.5504204820461487, "learning_rate": 5e-05, "loss": 0.0411, "num_input_tokens_seen": 455851540, "step": 4994 }, { "epoch": 20.808333333333334, "loss": 0.04614360257983208, "loss_ce": 1.0263997864967678e-06, "loss_iou": 0.3203125, "loss_num": 0.00921630859375, "loss_xval": 0.046142578125, "num_input_tokens_seen": 455851540, "step": 4994 }, { "epoch": 20.8125, "grad_norm": 7.9999161852664775, "learning_rate": 5e-05, "loss": 0.1073, "num_input_tokens_seen": 455943032, "step": 4995 }, { "epoch": 20.8125, "loss": 0.16412606835365295, "loss_ce": 0.0007807252113707364, "loss_iou": 0.171875, "loss_num": 0.03271484375, "loss_xval": 0.1630859375, "num_input_tokens_seen": 455943032, "step": 4995 }, { "epoch": 20.816666666666666, "grad_norm": 2.344526080385181, "learning_rate": 5e-05, "loss": 0.0481, "num_input_tokens_seen": 456034340, "step": 4996 }, { "epoch": 20.816666666666666, "loss": 0.06617549061775208, "loss_ce": 5.753272034780821e-06, "loss_iou": 0.236328125, "loss_num": 0.01324462890625, "loss_xval": 0.06640625, "num_input_tokens_seen": 456034340, "step": 4996 }, { "epoch": 20.820833333333333, "grad_norm": 0.7441476896758881, "learning_rate": 5e-05, "loss": 0.0344, "num_input_tokens_seen": 456125576, "step": 4997 }, { "epoch": 20.820833333333333, "loss": 0.04812666028738022, "loss_ce": 4.406510640819761e-07, "loss_iou": 0.203125, "loss_num": 0.0096435546875, "loss_xval": 0.048095703125, "num_input_tokens_seen": 456125576, "step": 4997 }, { "epoch": 20.825, "grad_norm": 2.994426114133977, "learning_rate": 5e-05, "loss": 0.0381, "num_input_tokens_seen": 456216976, "step": 4998 }, { "epoch": 20.825, "loss": 0.03340182825922966, "loss_ce": 3.3879524607982603e-07, "loss_iou": 0.2265625, "loss_num": 0.006683349609375, "loss_xval": 0.033447265625, "num_input_tokens_seen": 456216976, "step": 4998 }, { "epoch": 20.829166666666666, "grad_norm": 0.7822136255921042, "learning_rate": 5e-05, "loss": 0.0225, "num_input_tokens_seen": 456307684, "step": 4999 }, { "epoch": 20.829166666666666, "loss": 0.0229647196829319, "loss_ce": 2.422164016024908e-07, "loss_iou": 0.1669921875, "loss_num": 0.004608154296875, "loss_xval": 0.02294921875, "num_input_tokens_seen": 456307684, "step": 4999 }, { "epoch": 20.833333333333332, "grad_norm": 1.1877380935922035, "learning_rate": 5e-05, "loss": 0.0583, "num_input_tokens_seen": 456398700, "step": 5000 }, { "epoch": 20.833333333333332, "eval_seeclick_CIoU": 0.24451086670160294, "eval_seeclick_GIoU": 0.21807420998811722, "eval_seeclick_IoU": 0.3399440348148346, "eval_seeclick_MAE_all": 0.10255898535251617, "eval_seeclick_MAE_h": 0.08058749511837959, "eval_seeclick_MAE_w": 0.2307920679450035, "eval_seeclick_MAE_x_boxes": 0.22138375788927078, "eval_seeclick_MAE_y_boxes": 0.0859718956053257, "eval_seeclick_NUM_probability": 0.9999991357326508, "eval_seeclick_inside_bbox": 0.5852272808551788, "eval_seeclick_loss": 0.6117884516716003, "eval_seeclick_loss_ce": 0.14957696199417114, "eval_seeclick_loss_iou": 0.45806884765625, "eval_seeclick_loss_num": 0.0897216796875, "eval_seeclick_loss_xval": 0.4483642578125, "eval_seeclick_runtime": 76.2635, "eval_seeclick_samples_per_second": 0.564, "eval_seeclick_steps_per_second": 0.026, "num_input_tokens_seen": 456398700, "step": 5000 }, { "epoch": 20.833333333333332, "eval_icons_CIoU": 0.2932829260826111, "eval_icons_GIoU": 0.28378401696681976, "eval_icons_IoU": 0.39218054711818695, "eval_icons_MAE_all": 0.07791866362094879, "eval_icons_MAE_h": 0.1548103168606758, "eval_icons_MAE_w": 0.1250578574836254, "eval_icons_MAE_x_boxes": 0.12813802063465118, "eval_icons_MAE_y_boxes": 0.1542813554406166, "eval_icons_NUM_probability": 0.9999994933605194, "eval_icons_inside_bbox": 0.5503472238779068, "eval_icons_loss": 0.3675766885280609, "eval_icons_loss_ce": 0.003656390472315252, "eval_icons_loss_iou": 0.23919677734375, "eval_icons_loss_num": 0.07466888427734375, "eval_icons_loss_xval": 0.37335205078125, "eval_icons_runtime": 87.8661, "eval_icons_samples_per_second": 0.569, "eval_icons_steps_per_second": 0.023, "num_input_tokens_seen": 456398700, "step": 5000 }, { "epoch": 20.833333333333332, "eval_screenspot_CIoU": 0.35886751115322113, "eval_screenspot_GIoU": 0.3485796203215917, "eval_screenspot_IoU": 0.4398085872332255, "eval_screenspot_MAE_all": 0.10286615292231242, "eval_screenspot_MAE_h": 0.09417568395535152, "eval_screenspot_MAE_w": 0.22546063363552094, "eval_screenspot_MAE_x_boxes": 0.18992613007624945, "eval_screenspot_MAE_y_boxes": 0.09073736766974132, "eval_screenspot_NUM_probability": 0.9999987681706747, "eval_screenspot_inside_bbox": 0.7012499968210856, "eval_screenspot_loss": 0.5169656276702881, "eval_screenspot_loss_ce": 0.005585619326060017, "eval_screenspot_loss_iou": 0.3651123046875, "eval_screenspot_loss_num": 0.10569254557291667, "eval_screenspot_loss_xval": 0.5284016927083334, "eval_screenspot_runtime": 149.1336, "eval_screenspot_samples_per_second": 0.597, "eval_screenspot_steps_per_second": 0.02, "num_input_tokens_seen": 456398700, "step": 5000 }, { "epoch": 20.833333333333332, "eval_compot_CIoU": 0.4774431884288788, "eval_compot_GIoU": 0.4672915041446686, "eval_compot_IoU": 0.5645192861557007, "eval_compot_MAE_all": 0.0588533990085125, "eval_compot_MAE_h": 0.06242892146110535, "eval_compot_MAE_w": 0.16087764501571655, "eval_compot_MAE_x_boxes": 0.16103634238243103, "eval_compot_MAE_y_boxes": 0.061904361471533775, "eval_compot_NUM_probability": 0.9999991953372955, "eval_compot_inside_bbox": 0.7361111044883728, "eval_compot_loss": 0.3558048605918884, "eval_compot_loss_ce": 0.0663297027349472, "eval_compot_loss_iou": 0.25738525390625, "eval_compot_loss_num": 0.05814361572265625, "eval_compot_loss_xval": 0.2906646728515625, "eval_compot_runtime": 96.7957, "eval_compot_samples_per_second": 0.517, "eval_compot_steps_per_second": 0.021, "num_input_tokens_seen": 456398700, "step": 5000 }, { "epoch": 20.833333333333332, "loss": 0.38143765926361084, "loss_ce": 0.06625210493803024, "loss_iou": 0.26171875, "loss_num": 0.06298828125, "loss_xval": 0.314453125, "num_input_tokens_seen": 456398700, "step": 5000 }, { "epoch": 20.8375, "grad_norm": 3.337976958031263, "learning_rate": 5e-05, "loss": 0.06, "num_input_tokens_seen": 456490176, "step": 5001 }, { "epoch": 20.8375, "loss": 0.0710010752081871, "loss_ce": 0.00035288077197037637, "loss_iou": 0.197265625, "loss_num": 0.01416015625, "loss_xval": 0.07080078125, "num_input_tokens_seen": 456490176, "step": 5001 }, { "epoch": 20.841666666666665, "grad_norm": 1.9477035834448888, "learning_rate": 5e-05, "loss": 0.0471, "num_input_tokens_seen": 456581212, "step": 5002 }, { "epoch": 20.841666666666665, "loss": 0.03873321786522865, "loss_ce": 6.408430181181757e-06, "loss_iou": 0.11328125, "loss_num": 0.00775146484375, "loss_xval": 0.038818359375, "num_input_tokens_seen": 456581212, "step": 5002 }, { "epoch": 20.845833333333335, "grad_norm": 2.132809389200927, "learning_rate": 5e-05, "loss": 0.0859, "num_input_tokens_seen": 456671592, "step": 5003 }, { "epoch": 20.845833333333335, "loss": 0.09218017011880875, "loss_ce": 1.8222680182589102e-06, "loss_iou": 0.3671875, "loss_num": 0.0184326171875, "loss_xval": 0.09228515625, "num_input_tokens_seen": 456671592, "step": 5003 }, { "epoch": 20.85, "grad_norm": 4.614820135793629, "learning_rate": 5e-05, "loss": 0.0646, "num_input_tokens_seen": 456762728, "step": 5004 }, { "epoch": 20.85, "loss": 0.09170796722173691, "loss_ce": 2.6483596684556687e-06, "loss_iou": 0.1796875, "loss_num": 0.018310546875, "loss_xval": 0.091796875, "num_input_tokens_seen": 456762728, "step": 5004 }, { "epoch": 20.854166666666668, "grad_norm": 2.787794613467919, "learning_rate": 5e-05, "loss": 0.0386, "num_input_tokens_seen": 456853680, "step": 5005 }, { "epoch": 20.854166666666668, "loss": 0.04777923598885536, "loss_ce": 3.964900315622799e-06, "loss_iou": 0.236328125, "loss_num": 0.00958251953125, "loss_xval": 0.0478515625, "num_input_tokens_seen": 456853680, "step": 5005 }, { "epoch": 20.858333333333334, "grad_norm": 2.606264263599698, "learning_rate": 5e-05, "loss": 0.0484, "num_input_tokens_seen": 456945360, "step": 5006 }, { "epoch": 20.858333333333334, "loss": 0.06281374394893646, "loss_ce": 1.6193531337194145e-05, "loss_iou": 0.251953125, "loss_num": 0.0125732421875, "loss_xval": 0.06298828125, "num_input_tokens_seen": 456945360, "step": 5006 }, { "epoch": 20.8625, "grad_norm": 3.1024920269881573, "learning_rate": 5e-05, "loss": 0.0997, "num_input_tokens_seen": 457035500, "step": 5007 }, { "epoch": 20.8625, "loss": 0.0986967384815216, "loss_ce": 7.156594801926985e-05, "loss_iou": 0.333984375, "loss_num": 0.019775390625, "loss_xval": 0.0986328125, "num_input_tokens_seen": 457035500, "step": 5007 }, { "epoch": 20.866666666666667, "grad_norm": 2.668374767398001, "learning_rate": 5e-05, "loss": 0.0448, "num_input_tokens_seen": 457126620, "step": 5008 }, { "epoch": 20.866666666666667, "loss": 0.052392516285181046, "loss_ce": 1.4628599274146836e-06, "loss_iou": 0.16796875, "loss_num": 0.010498046875, "loss_xval": 0.052490234375, "num_input_tokens_seen": 457126620, "step": 5008 }, { "epoch": 20.870833333333334, "grad_norm": 2.0467538160029886, "learning_rate": 5e-05, "loss": 0.0342, "num_input_tokens_seen": 457217796, "step": 5009 }, { "epoch": 20.870833333333334, "loss": 0.03557741269469261, "loss_ce": 1.5464353282368393e-06, "loss_iou": 0.2109375, "loss_num": 0.007110595703125, "loss_xval": 0.03564453125, "num_input_tokens_seen": 457217796, "step": 5009 }, { "epoch": 20.875, "grad_norm": 3.191668867345092, "learning_rate": 5e-05, "loss": 0.079, "num_input_tokens_seen": 457309084, "step": 5010 }, { "epoch": 20.875, "loss": 0.10242909938097, "loss_ce": 6.703396024931862e-07, "loss_iou": 0.27734375, "loss_num": 0.0205078125, "loss_xval": 0.1025390625, "num_input_tokens_seen": 457309084, "step": 5010 }, { "epoch": 20.879166666666666, "grad_norm": 0.4186359609007298, "learning_rate": 5e-05, "loss": 0.0323, "num_input_tokens_seen": 457400456, "step": 5011 }, { "epoch": 20.879166666666666, "loss": 0.024034079164266586, "loss_ce": 1.4864708646200597e-06, "loss_iou": 0.302734375, "loss_num": 0.00482177734375, "loss_xval": 0.0240478515625, "num_input_tokens_seen": 457400456, "step": 5011 }, { "epoch": 20.883333333333333, "grad_norm": 1.1425865029006985, "learning_rate": 5e-05, "loss": 0.038, "num_input_tokens_seen": 457492068, "step": 5012 }, { "epoch": 20.883333333333333, "loss": 0.05428203195333481, "loss_ce": 0.0007999764638952911, "loss_iou": 0.2265625, "loss_num": 0.0107421875, "loss_xval": 0.053466796875, "num_input_tokens_seen": 457492068, "step": 5012 }, { "epoch": 20.8875, "grad_norm": 1.4859386496117717, "learning_rate": 5e-05, "loss": 0.0343, "num_input_tokens_seen": 457583168, "step": 5013 }, { "epoch": 20.8875, "loss": 0.046580176800489426, "loss_ce": 1.7980943084694445e-05, "loss_iou": 0.25390625, "loss_num": 0.00933837890625, "loss_xval": 0.046630859375, "num_input_tokens_seen": 457583168, "step": 5013 }, { "epoch": 20.891666666666666, "grad_norm": 4.079518351007812, "learning_rate": 5e-05, "loss": 0.0685, "num_input_tokens_seen": 457674756, "step": 5014 }, { "epoch": 20.891666666666666, "loss": 0.06736816465854645, "loss_ce": 6.158369956210663e-07, "loss_iou": 0.2431640625, "loss_num": 0.01348876953125, "loss_xval": 0.0673828125, "num_input_tokens_seen": 457674756, "step": 5014 }, { "epoch": 20.895833333333332, "grad_norm": 4.058994239972123, "learning_rate": 5e-05, "loss": 0.0412, "num_input_tokens_seen": 457765868, "step": 5015 }, { "epoch": 20.895833333333332, "loss": 0.030060499906539917, "loss_ce": 6.862018153697136e-07, "loss_iou": 0.28515625, "loss_num": 0.006011962890625, "loss_xval": 0.030029296875, "num_input_tokens_seen": 457765868, "step": 5015 }, { "epoch": 20.9, "grad_norm": 2.2914838035650793, "learning_rate": 5e-05, "loss": 0.0659, "num_input_tokens_seen": 457857512, "step": 5016 }, { "epoch": 20.9, "loss": 0.05039317160844803, "loss_ce": 1.0222338460152969e-06, "loss_iou": 0.228515625, "loss_num": 0.01007080078125, "loss_xval": 0.05029296875, "num_input_tokens_seen": 457857512, "step": 5016 }, { "epoch": 20.904166666666665, "grad_norm": 3.7616964007946807, "learning_rate": 5e-05, "loss": 0.0354, "num_input_tokens_seen": 457949028, "step": 5017 }, { "epoch": 20.904166666666665, "loss": 0.028687015175819397, "loss_ce": 4.915837052976713e-07, "loss_iou": 0.236328125, "loss_num": 0.0057373046875, "loss_xval": 0.0286865234375, "num_input_tokens_seen": 457949028, "step": 5017 }, { "epoch": 20.908333333333335, "grad_norm": 1.4106076483959105, "learning_rate": 5e-05, "loss": 0.0278, "num_input_tokens_seen": 458040516, "step": 5018 }, { "epoch": 20.908333333333335, "loss": 0.02542763389647007, "loss_ce": 2.1749872757936828e-05, "loss_iou": 0.154296875, "loss_num": 0.00506591796875, "loss_xval": 0.025390625, "num_input_tokens_seen": 458040516, "step": 5018 }, { "epoch": 20.9125, "grad_norm": 1.5423874174158072, "learning_rate": 5e-05, "loss": 0.0451, "num_input_tokens_seen": 458131972, "step": 5019 }, { "epoch": 20.9125, "loss": 0.04575492814183235, "loss_ce": 1.4497619531539385e-06, "loss_iou": 0.1552734375, "loss_num": 0.0091552734375, "loss_xval": 0.045654296875, "num_input_tokens_seen": 458131972, "step": 5019 }, { "epoch": 20.916666666666668, "grad_norm": 2.4457580219008053, "learning_rate": 5e-05, "loss": 0.0309, "num_input_tokens_seen": 458223872, "step": 5020 }, { "epoch": 20.916666666666668, "loss": 0.03433071821928024, "loss_ce": 2.2569920474779792e-06, "loss_iou": 0.201171875, "loss_num": 0.006866455078125, "loss_xval": 0.034423828125, "num_input_tokens_seen": 458223872, "step": 5020 }, { "epoch": 20.920833333333334, "grad_norm": 2.6501025146001673, "learning_rate": 5e-05, "loss": 0.0477, "num_input_tokens_seen": 458315016, "step": 5021 }, { "epoch": 20.920833333333334, "loss": 0.061783723533153534, "loss_ce": 8.834414302327787e-07, "loss_iou": 0.33984375, "loss_num": 0.01239013671875, "loss_xval": 0.061767578125, "num_input_tokens_seen": 458315016, "step": 5021 }, { "epoch": 20.925, "grad_norm": 1.8261893391247939, "learning_rate": 5e-05, "loss": 0.025, "num_input_tokens_seen": 458406876, "step": 5022 }, { "epoch": 20.925, "loss": 0.020394135266542435, "loss_ce": 7.634452003912884e-07, "loss_iou": 0.1337890625, "loss_num": 0.00408935546875, "loss_xval": 0.0203857421875, "num_input_tokens_seen": 458406876, "step": 5022 }, { "epoch": 20.929166666666667, "grad_norm": 7.652682655986564, "learning_rate": 5e-05, "loss": 0.0305, "num_input_tokens_seen": 458497860, "step": 5023 }, { "epoch": 20.929166666666667, "loss": 0.03658346086740494, "loss_ce": 5.112319172440039e-07, "loss_iou": 0.25, "loss_num": 0.00732421875, "loss_xval": 0.03662109375, "num_input_tokens_seen": 458497860, "step": 5023 }, { "epoch": 20.933333333333334, "grad_norm": 3.372785638348631, "learning_rate": 5e-05, "loss": 0.0617, "num_input_tokens_seen": 458588648, "step": 5024 }, { "epoch": 20.933333333333334, "loss": 0.07515182346105576, "loss_ce": 2.2860726858198177e-06, "loss_iou": 0.28125, "loss_num": 0.0150146484375, "loss_xval": 0.0751953125, "num_input_tokens_seen": 458588648, "step": 5024 }, { "epoch": 20.9375, "grad_norm": 2.4428530832099793, "learning_rate": 5e-05, "loss": 0.032, "num_input_tokens_seen": 458680288, "step": 5025 }, { "epoch": 20.9375, "loss": 0.036588169634342194, "loss_ce": 0.0004935072502121329, "loss_iou": 0.1796875, "loss_num": 0.0072021484375, "loss_xval": 0.0361328125, "num_input_tokens_seen": 458680288, "step": 5025 }, { "epoch": 20.941666666666666, "grad_norm": 16.422718461859585, "learning_rate": 5e-05, "loss": 0.0527, "num_input_tokens_seen": 458771808, "step": 5026 }, { "epoch": 20.941666666666666, "loss": 0.062179841101169586, "loss_ce": 2.767868636510684e-07, "loss_iou": 0.337890625, "loss_num": 0.012451171875, "loss_xval": 0.062255859375, "num_input_tokens_seen": 458771808, "step": 5026 }, { "epoch": 20.945833333333333, "grad_norm": 2.947345254036747, "learning_rate": 5e-05, "loss": 0.0472, "num_input_tokens_seen": 458862592, "step": 5027 }, { "epoch": 20.945833333333333, "loss": 0.051636189222335815, "loss_ce": 4.4753275574294094e-07, "loss_iou": 0.244140625, "loss_num": 0.01031494140625, "loss_xval": 0.0517578125, "num_input_tokens_seen": 458862592, "step": 5027 }, { "epoch": 20.95, "grad_norm": 3.3874837066315058, "learning_rate": 5e-05, "loss": 0.0847, "num_input_tokens_seen": 458954308, "step": 5028 }, { "epoch": 20.95, "loss": 0.034767866134643555, "loss_ce": 8.345932656084187e-06, "loss_iou": 0.3515625, "loss_num": 0.0069580078125, "loss_xval": 0.03466796875, "num_input_tokens_seen": 458954308, "step": 5028 }, { "epoch": 20.954166666666666, "grad_norm": 1.9194942216754223, "learning_rate": 5e-05, "loss": 0.0533, "num_input_tokens_seen": 459045720, "step": 5029 }, { "epoch": 20.954166666666666, "loss": 0.06594738364219666, "loss_ce": 1.4157611985865515e-05, "loss_iou": 0.208984375, "loss_num": 0.01318359375, "loss_xval": 0.06591796875, "num_input_tokens_seen": 459045720, "step": 5029 }, { "epoch": 20.958333333333332, "grad_norm": 1.9020720939002147, "learning_rate": 5e-05, "loss": 0.0288, "num_input_tokens_seen": 459136588, "step": 5030 }, { "epoch": 20.958333333333332, "loss": 0.035919640213251114, "loss_ce": 4.5112381030776305e-07, "loss_iou": 0.1982421875, "loss_num": 0.007171630859375, "loss_xval": 0.035888671875, "num_input_tokens_seen": 459136588, "step": 5030 }, { "epoch": 20.9625, "grad_norm": 1.6016189140048587, "learning_rate": 5e-05, "loss": 0.0488, "num_input_tokens_seen": 459227684, "step": 5031 }, { "epoch": 20.9625, "loss": 0.036752112209796906, "loss_ce": 2.4207462047343142e-05, "loss_iou": 0.1884765625, "loss_num": 0.007354736328125, "loss_xval": 0.03662109375, "num_input_tokens_seen": 459227684, "step": 5031 }, { "epoch": 20.966666666666665, "grad_norm": 1.9618492446367335, "learning_rate": 5e-05, "loss": 0.0552, "num_input_tokens_seen": 459318620, "step": 5032 }, { "epoch": 20.966666666666665, "loss": 0.05786576122045517, "loss_ce": 4.430605713423574e-06, "loss_iou": 0.177734375, "loss_num": 0.0115966796875, "loss_xval": 0.057861328125, "num_input_tokens_seen": 459318620, "step": 5032 }, { "epoch": 20.970833333333335, "grad_norm": 0.5597310898126626, "learning_rate": 5e-05, "loss": 0.0456, "num_input_tokens_seen": 459409972, "step": 5033 }, { "epoch": 20.970833333333335, "loss": 0.04477032646536827, "loss_ce": 1.0376323871241766e-06, "loss_iou": 0.125, "loss_num": 0.00897216796875, "loss_xval": 0.044677734375, "num_input_tokens_seen": 459409972, "step": 5033 }, { "epoch": 20.975, "grad_norm": 4.598239030189397, "learning_rate": 5e-05, "loss": 0.0342, "num_input_tokens_seen": 459500968, "step": 5034 }, { "epoch": 20.975, "loss": 0.042814694344997406, "loss_ce": 6.1625769376405515e-06, "loss_iou": 0.1669921875, "loss_num": 0.008544921875, "loss_xval": 0.042724609375, "num_input_tokens_seen": 459500968, "step": 5034 }, { "epoch": 20.979166666666668, "grad_norm": 4.326593137145183, "learning_rate": 5e-05, "loss": 0.0555, "num_input_tokens_seen": 459592724, "step": 5035 }, { "epoch": 20.979166666666668, "loss": 0.056089065968990326, "loss_ce": 6.64217586745508e-05, "loss_iou": 0.1953125, "loss_num": 0.01116943359375, "loss_xval": 0.055908203125, "num_input_tokens_seen": 459592724, "step": 5035 }, { "epoch": 20.983333333333334, "grad_norm": 1.715973771888479, "learning_rate": 5e-05, "loss": 0.0498, "num_input_tokens_seen": 459684760, "step": 5036 }, { "epoch": 20.983333333333334, "loss": 0.06677746027708054, "loss_ce": 4.996963070880156e-06, "loss_iou": 0.2421875, "loss_num": 0.01336669921875, "loss_xval": 0.06689453125, "num_input_tokens_seen": 459684760, "step": 5036 }, { "epoch": 20.9875, "grad_norm": 0.9801665764126386, "learning_rate": 5e-05, "loss": 0.0231, "num_input_tokens_seen": 459775892, "step": 5037 }, { "epoch": 20.9875, "loss": 0.02707877941429615, "loss_ce": 0.00038352797855623066, "loss_iou": 0.028076171875, "loss_num": 0.005340576171875, "loss_xval": 0.0267333984375, "num_input_tokens_seen": 459775892, "step": 5037 }, { "epoch": 20.991666666666667, "grad_norm": 1.6502273748838339, "learning_rate": 5e-05, "loss": 0.0463, "num_input_tokens_seen": 459867180, "step": 5038 }, { "epoch": 20.991666666666667, "loss": 0.04678413271903992, "loss_ce": 6.85528220856213e-07, "loss_iou": 0.25, "loss_num": 0.00933837890625, "loss_xval": 0.046875, "num_input_tokens_seen": 459867180, "step": 5038 }, { "epoch": 20.995833333333334, "grad_norm": 13.8492837561597, "learning_rate": 5e-05, "loss": 0.0592, "num_input_tokens_seen": 459958980, "step": 5039 }, { "epoch": 20.995833333333334, "loss": 0.03167743235826492, "loss_ce": 1.8604114870868216e-07, "loss_iou": 0.193359375, "loss_num": 0.00634765625, "loss_xval": 0.03173828125, "num_input_tokens_seen": 459958980, "step": 5039 }, { "epoch": 21.0, "grad_norm": 2.997662396936798, "learning_rate": 5e-05, "loss": 0.0709, "num_input_tokens_seen": 460050472, "step": 5040 }, { "epoch": 21.0, "loss": 0.051897190511226654, "loss_ce": 2.0506997771008173e-06, "loss_iou": 0.23828125, "loss_num": 0.0103759765625, "loss_xval": 0.052001953125, "num_input_tokens_seen": 460050472, "step": 5040 }, { "epoch": 21.004166666666666, "grad_norm": 2.6900583583643867, "learning_rate": 5e-05, "loss": 0.0334, "num_input_tokens_seen": 460142104, "step": 5041 }, { "epoch": 21.004166666666666, "loss": 0.032296109944581985, "loss_ce": 5.4287651437334716e-05, "loss_iou": 0.26953125, "loss_num": 0.006439208984375, "loss_xval": 0.0322265625, "num_input_tokens_seen": 460142104, "step": 5041 }, { "epoch": 21.008333333333333, "grad_norm": 6.367181829234212, "learning_rate": 5e-05, "loss": 0.0337, "num_input_tokens_seen": 460232776, "step": 5042 }, { "epoch": 21.008333333333333, "loss": 0.023956449702382088, "loss_ce": 1.5054619950660708e-07, "loss_iou": 0.244140625, "loss_num": 0.004791259765625, "loss_xval": 0.02392578125, "num_input_tokens_seen": 460232776, "step": 5042 }, { "epoch": 21.0125, "grad_norm": 2.7300525317927953, "learning_rate": 5e-05, "loss": 0.0333, "num_input_tokens_seen": 460324156, "step": 5043 }, { "epoch": 21.0125, "loss": 0.04496552795171738, "loss_ce": 2.0766037778230384e-05, "loss_iou": 0.2578125, "loss_num": 0.00897216796875, "loss_xval": 0.044921875, "num_input_tokens_seen": 460324156, "step": 5043 }, { "epoch": 21.016666666666666, "grad_norm": 3.6869204388176096, "learning_rate": 5e-05, "loss": 0.0361, "num_input_tokens_seen": 460415312, "step": 5044 }, { "epoch": 21.016666666666666, "loss": 0.036822736263275146, "loss_ce": 3.2774910323496442e-06, "loss_iou": 0.158203125, "loss_num": 0.007354736328125, "loss_xval": 0.036865234375, "num_input_tokens_seen": 460415312, "step": 5044 }, { "epoch": 21.020833333333332, "grad_norm": 2.5416651150304377, "learning_rate": 5e-05, "loss": 0.0953, "num_input_tokens_seen": 460506688, "step": 5045 }, { "epoch": 21.020833333333332, "loss": 0.13226720690727234, "loss_ce": 4.033006462123012e-06, "loss_iou": 0.384765625, "loss_num": 0.0263671875, "loss_xval": 0.1318359375, "num_input_tokens_seen": 460506688, "step": 5045 }, { "epoch": 21.025, "grad_norm": 1.3640919934626132, "learning_rate": 5e-05, "loss": 0.0434, "num_input_tokens_seen": 460597880, "step": 5046 }, { "epoch": 21.025, "loss": 0.026312153786420822, "loss_ce": 6.000811481499113e-06, "loss_iou": 0.007080078125, "loss_num": 0.0052490234375, "loss_xval": 0.0263671875, "num_input_tokens_seen": 460597880, "step": 5046 }, { "epoch": 21.029166666666665, "grad_norm": 2.111172573092908, "learning_rate": 5e-05, "loss": 0.0408, "num_input_tokens_seen": 460688988, "step": 5047 }, { "epoch": 21.029166666666665, "loss": 0.031969036906957626, "loss_ce": 1.8739243614618317e-06, "loss_iou": 0.1328125, "loss_num": 0.006378173828125, "loss_xval": 0.031982421875, "num_input_tokens_seen": 460688988, "step": 5047 }, { "epoch": 21.033333333333335, "grad_norm": 2.483429770162464, "learning_rate": 5e-05, "loss": 0.0447, "num_input_tokens_seen": 460780876, "step": 5048 }, { "epoch": 21.033333333333335, "loss": 0.038086287677288055, "loss_ce": 1.560991586302407e-05, "loss_iou": 0.271484375, "loss_num": 0.007598876953125, "loss_xval": 0.0380859375, "num_input_tokens_seen": 460780876, "step": 5048 }, { "epoch": 21.0375, "grad_norm": 2.667579986903645, "learning_rate": 5e-05, "loss": 0.0318, "num_input_tokens_seen": 460872044, "step": 5049 }, { "epoch": 21.0375, "loss": 0.028536254540085793, "loss_ce": 3.283719706814736e-05, "loss_iou": 0.173828125, "loss_num": 0.005706787109375, "loss_xval": 0.028564453125, "num_input_tokens_seen": 460872044, "step": 5049 }, { "epoch": 21.041666666666668, "grad_norm": 2.1337628588389506, "learning_rate": 5e-05, "loss": 0.0462, "num_input_tokens_seen": 460963572, "step": 5050 }, { "epoch": 21.041666666666668, "loss": 0.02425198256969452, "loss_ce": 0.00021938963618595153, "loss_iou": 0.158203125, "loss_num": 0.00482177734375, "loss_xval": 0.0240478515625, "num_input_tokens_seen": 460963572, "step": 5050 }, { "epoch": 21.045833333333334, "grad_norm": 1.825125178904793, "learning_rate": 5e-05, "loss": 0.0385, "num_input_tokens_seen": 461054480, "step": 5051 }, { "epoch": 21.045833333333334, "loss": 0.05502419173717499, "loss_ce": 1.0019826959251077e-06, "loss_iou": 0.2294921875, "loss_num": 0.010986328125, "loss_xval": 0.054931640625, "num_input_tokens_seen": 461054480, "step": 5051 }, { "epoch": 21.05, "grad_norm": 3.647261849290983, "learning_rate": 5e-05, "loss": 0.0316, "num_input_tokens_seen": 461145168, "step": 5052 }, { "epoch": 21.05, "loss": 0.038705211132764816, "loss_ce": 2.4182521883631125e-05, "loss_iou": 0.26171875, "loss_num": 0.007720947265625, "loss_xval": 0.03857421875, "num_input_tokens_seen": 461145168, "step": 5052 }, { "epoch": 21.054166666666667, "grad_norm": 1.985815327131711, "learning_rate": 5e-05, "loss": 0.0356, "num_input_tokens_seen": 461236644, "step": 5053 }, { "epoch": 21.054166666666667, "loss": 0.0394623801112175, "loss_ce": 3.151773398712976e-06, "loss_iou": 0.265625, "loss_num": 0.00787353515625, "loss_xval": 0.03955078125, "num_input_tokens_seen": 461236644, "step": 5053 }, { "epoch": 21.058333333333334, "grad_norm": 2.5777082297701255, "learning_rate": 5e-05, "loss": 0.0762, "num_input_tokens_seen": 461327936, "step": 5054 }, { "epoch": 21.058333333333334, "loss": 0.03418642282485962, "loss_ce": 6.014150858391076e-05, "loss_iou": 0.1318359375, "loss_num": 0.006805419921875, "loss_xval": 0.0341796875, "num_input_tokens_seen": 461327936, "step": 5054 }, { "epoch": 21.0625, "grad_norm": 2.1285702336933796, "learning_rate": 5e-05, "loss": 0.0637, "num_input_tokens_seen": 461418936, "step": 5055 }, { "epoch": 21.0625, "loss": 0.0940098986029625, "loss_ce": 4.946746230416466e-07, "loss_iou": 0.2578125, "loss_num": 0.018798828125, "loss_xval": 0.09423828125, "num_input_tokens_seen": 461418936, "step": 5055 }, { "epoch": 21.066666666666666, "grad_norm": 2.5402293879248803, "learning_rate": 5e-05, "loss": 0.0596, "num_input_tokens_seen": 461509892, "step": 5056 }, { "epoch": 21.066666666666666, "loss": 0.09463571012020111, "loss_ce": 7.034442432996002e-07, "loss_iou": 0.1826171875, "loss_num": 0.0189208984375, "loss_xval": 0.0947265625, "num_input_tokens_seen": 461509892, "step": 5056 }, { "epoch": 21.070833333333333, "grad_norm": 3.144230652175914, "learning_rate": 5e-05, "loss": 0.0347, "num_input_tokens_seen": 461601260, "step": 5057 }, { "epoch": 21.070833333333333, "loss": 0.0379909984767437, "loss_ce": 4.2440492507012095e-06, "loss_iou": 0.375, "loss_num": 0.007598876953125, "loss_xval": 0.0380859375, "num_input_tokens_seen": 461601260, "step": 5057 }, { "epoch": 21.075, "grad_norm": 3.121256612516521, "learning_rate": 5e-05, "loss": 0.0366, "num_input_tokens_seen": 461692564, "step": 5058 }, { "epoch": 21.075, "loss": 0.04023890942335129, "loss_ce": 1.4835345609753858e-06, "loss_iou": 0.3359375, "loss_num": 0.008056640625, "loss_xval": 0.040283203125, "num_input_tokens_seen": 461692564, "step": 5058 }, { "epoch": 21.079166666666666, "grad_norm": 2.355240613160966, "learning_rate": 5e-05, "loss": 0.0289, "num_input_tokens_seen": 461784752, "step": 5059 }, { "epoch": 21.079166666666666, "loss": 0.024186529219150543, "loss_ce": 1.348738805972971e-06, "loss_iou": 0.224609375, "loss_num": 0.004852294921875, "loss_xval": 0.024169921875, "num_input_tokens_seen": 461784752, "step": 5059 }, { "epoch": 21.083333333333332, "grad_norm": 2.8721899574505234, "learning_rate": 5e-05, "loss": 0.0347, "num_input_tokens_seen": 461874012, "step": 5060 }, { "epoch": 21.083333333333332, "loss": 0.02969268709421158, "loss_ce": 4.485932004172355e-05, "loss_iou": 0.3359375, "loss_num": 0.00592041015625, "loss_xval": 0.0296630859375, "num_input_tokens_seen": 461874012, "step": 5060 }, { "epoch": 21.0875, "grad_norm": 2.34567439799414, "learning_rate": 5e-05, "loss": 0.0345, "num_input_tokens_seen": 461965424, "step": 5061 }, { "epoch": 21.0875, "loss": 0.03739079833030701, "loss_ce": 6.762803877791157e-06, "loss_iou": 0.2060546875, "loss_num": 0.007476806640625, "loss_xval": 0.037353515625, "num_input_tokens_seen": 461965424, "step": 5061 }, { "epoch": 21.091666666666665, "grad_norm": 2.147127909839684, "learning_rate": 5e-05, "loss": 0.0394, "num_input_tokens_seen": 462057056, "step": 5062 }, { "epoch": 21.091666666666665, "loss": 0.030382130295038223, "loss_ce": 1.7140035197371617e-05, "loss_iou": 0.1376953125, "loss_num": 0.006072998046875, "loss_xval": 0.0303955078125, "num_input_tokens_seen": 462057056, "step": 5062 }, { "epoch": 21.095833333333335, "grad_norm": 2.5408218475613866, "learning_rate": 5e-05, "loss": 0.0396, "num_input_tokens_seen": 462148068, "step": 5063 }, { "epoch": 21.095833333333335, "loss": 0.03337986022233963, "loss_ce": 5.466506991069764e-05, "loss_iou": 0.2177734375, "loss_num": 0.00665283203125, "loss_xval": 0.033203125, "num_input_tokens_seen": 462148068, "step": 5063 }, { "epoch": 21.1, "grad_norm": 3.4630009665204855, "learning_rate": 5e-05, "loss": 0.0394, "num_input_tokens_seen": 462239468, "step": 5064 }, { "epoch": 21.1, "loss": 0.04306018725037575, "loss_ce": 7.514630851801485e-06, "loss_iou": 0.283203125, "loss_num": 0.00860595703125, "loss_xval": 0.04296875, "num_input_tokens_seen": 462239468, "step": 5064 }, { "epoch": 21.104166666666668, "grad_norm": 2.7646771813700397, "learning_rate": 5e-05, "loss": 0.0316, "num_input_tokens_seen": 462329652, "step": 5065 }, { "epoch": 21.104166666666668, "loss": 0.03456904739141464, "loss_ce": 2.5871258912957273e-07, "loss_iou": 0.2041015625, "loss_num": 0.006927490234375, "loss_xval": 0.03466796875, "num_input_tokens_seen": 462329652, "step": 5065 }, { "epoch": 21.108333333333334, "grad_norm": 2.55066057684379, "learning_rate": 5e-05, "loss": 0.0354, "num_input_tokens_seen": 462421144, "step": 5066 }, { "epoch": 21.108333333333334, "loss": 0.029943151399493217, "loss_ce": 5.405986030382337e-06, "loss_iou": 0.267578125, "loss_num": 0.0059814453125, "loss_xval": 0.0299072265625, "num_input_tokens_seen": 462421144, "step": 5066 }, { "epoch": 21.1125, "grad_norm": 2.8795739152285855, "learning_rate": 5e-05, "loss": 0.046, "num_input_tokens_seen": 462512932, "step": 5067 }, { "epoch": 21.1125, "loss": 0.0422324612736702, "loss_ce": 1.1392266060283873e-05, "loss_iou": 0.3515625, "loss_num": 0.0084228515625, "loss_xval": 0.042236328125, "num_input_tokens_seen": 462512932, "step": 5067 }, { "epoch": 21.116666666666667, "grad_norm": 2.163544161580569, "learning_rate": 5e-05, "loss": 0.0297, "num_input_tokens_seen": 462603004, "step": 5068 }, { "epoch": 21.116666666666667, "loss": 0.030031124129891396, "loss_ce": 1.8264663594891317e-06, "loss_iou": 0.185546875, "loss_num": 0.006011962890625, "loss_xval": 0.030029296875, "num_input_tokens_seen": 462603004, "step": 5068 }, { "epoch": 21.120833333333334, "grad_norm": 1.964811021290909, "learning_rate": 5e-05, "loss": 0.0369, "num_input_tokens_seen": 462694484, "step": 5069 }, { "epoch": 21.120833333333334, "loss": 0.022997736930847168, "loss_ce": 2.7417595447332133e-06, "loss_iou": 0.2294921875, "loss_num": 0.004608154296875, "loss_xval": 0.02294921875, "num_input_tokens_seen": 462694484, "step": 5069 }, { "epoch": 21.125, "grad_norm": 1.3945575648462891, "learning_rate": 5e-05, "loss": 0.0472, "num_input_tokens_seen": 462785588, "step": 5070 }, { "epoch": 21.125, "loss": 0.036909572780132294, "loss_ce": 9.011285874294117e-05, "loss_iou": 0.30859375, "loss_num": 0.007354736328125, "loss_xval": 0.036865234375, "num_input_tokens_seen": 462785588, "step": 5070 }, { "epoch": 21.129166666666666, "grad_norm": 0.7354333195966727, "learning_rate": 5e-05, "loss": 0.0295, "num_input_tokens_seen": 462877428, "step": 5071 }, { "epoch": 21.129166666666666, "loss": 0.023640796542167664, "loss_ce": 2.7821044568554498e-05, "loss_iou": 0.2578125, "loss_num": 0.004730224609375, "loss_xval": 0.0235595703125, "num_input_tokens_seen": 462877428, "step": 5071 }, { "epoch": 21.133333333333333, "grad_norm": 2.465280101647072, "learning_rate": 5e-05, "loss": 0.0299, "num_input_tokens_seen": 462969388, "step": 5072 }, { "epoch": 21.133333333333333, "loss": 0.02642808109521866, "loss_ce": 0.00011429937876528129, "loss_iou": 0.162109375, "loss_num": 0.0052490234375, "loss_xval": 0.0263671875, "num_input_tokens_seen": 462969388, "step": 5072 }, { "epoch": 21.1375, "grad_norm": 1.3225705648155508, "learning_rate": 5e-05, "loss": 0.0375, "num_input_tokens_seen": 463061024, "step": 5073 }, { "epoch": 21.1375, "loss": 0.052613116800785065, "loss_ce": 8.112248224279028e-07, "loss_iou": 0.1376953125, "loss_num": 0.010498046875, "loss_xval": 0.052734375, "num_input_tokens_seen": 463061024, "step": 5073 }, { "epoch": 21.141666666666666, "grad_norm": 4.000357639254447, "learning_rate": 5e-05, "loss": 0.0554, "num_input_tokens_seen": 463152004, "step": 5074 }, { "epoch": 21.141666666666666, "loss": 0.02655082568526268, "loss_ce": 5.327280518940825e-07, "loss_iou": 0.2421875, "loss_num": 0.00531005859375, "loss_xval": 0.026611328125, "num_input_tokens_seen": 463152004, "step": 5074 }, { "epoch": 21.145833333333332, "grad_norm": 3.28702933174913, "learning_rate": 5e-05, "loss": 0.086, "num_input_tokens_seen": 463243256, "step": 5075 }, { "epoch": 21.145833333333332, "loss": 0.13034138083457947, "loss_ce": 8.039830845518736e-07, "loss_iou": 0.26953125, "loss_num": 0.026123046875, "loss_xval": 0.1298828125, "num_input_tokens_seen": 463243256, "step": 5075 }, { "epoch": 21.15, "grad_norm": 5.610082202684336, "learning_rate": 5e-05, "loss": 0.0598, "num_input_tokens_seen": 463334320, "step": 5076 }, { "epoch": 21.15, "loss": 0.07361648231744766, "loss_ce": 4.5481851884687785e-07, "loss_iou": 0.2890625, "loss_num": 0.01470947265625, "loss_xval": 0.07373046875, "num_input_tokens_seen": 463334320, "step": 5076 }, { "epoch": 21.154166666666665, "grad_norm": 4.048968495288943, "learning_rate": 5e-05, "loss": 0.0479, "num_input_tokens_seen": 463426408, "step": 5077 }, { "epoch": 21.154166666666665, "loss": 0.039019372314214706, "loss_ce": 4.842506314162165e-05, "loss_iou": 0.2255859375, "loss_num": 0.0078125, "loss_xval": 0.0390625, "num_input_tokens_seen": 463426408, "step": 5077 }, { "epoch": 21.158333333333335, "grad_norm": 2.9163708565496, "learning_rate": 5e-05, "loss": 0.0575, "num_input_tokens_seen": 463516684, "step": 5078 }, { "epoch": 21.158333333333335, "loss": 0.07347186654806137, "loss_ce": 8.00481871010561e-07, "loss_iou": 0.1669921875, "loss_num": 0.01470947265625, "loss_xval": 0.0732421875, "num_input_tokens_seen": 463516684, "step": 5078 }, { "epoch": 21.1625, "grad_norm": 2.568020065248772, "learning_rate": 5e-05, "loss": 0.0374, "num_input_tokens_seen": 463607868, "step": 5079 }, { "epoch": 21.1625, "loss": 0.03308640792965889, "loss_ce": 5.353571395971812e-06, "loss_iou": 0.2431640625, "loss_num": 0.006622314453125, "loss_xval": 0.033203125, "num_input_tokens_seen": 463607868, "step": 5079 }, { "epoch": 21.166666666666668, "grad_norm": 2.019850801933071, "learning_rate": 5e-05, "loss": 0.0295, "num_input_tokens_seen": 463699692, "step": 5080 }, { "epoch": 21.166666666666668, "loss": 0.023806005716323853, "loss_ce": 2.295150807185564e-06, "loss_iou": 0.05126953125, "loss_num": 0.0047607421875, "loss_xval": 0.0238037109375, "num_input_tokens_seen": 463699692, "step": 5080 }, { "epoch": 21.170833333333334, "grad_norm": 2.1500917435027826, "learning_rate": 5e-05, "loss": 0.0368, "num_input_tokens_seen": 463791052, "step": 5081 }, { "epoch": 21.170833333333334, "loss": 0.039952442049980164, "loss_ce": 4.93404695589561e-06, "loss_iou": 0.23046875, "loss_num": 0.00799560546875, "loss_xval": 0.0400390625, "num_input_tokens_seen": 463791052, "step": 5081 }, { "epoch": 21.175, "grad_norm": 2.976406847478439, "learning_rate": 5e-05, "loss": 0.059, "num_input_tokens_seen": 463882336, "step": 5082 }, { "epoch": 21.175, "loss": 0.0614861361682415, "loss_ce": 8.455392048745125e-07, "loss_iou": 0.20703125, "loss_num": 0.01226806640625, "loss_xval": 0.0615234375, "num_input_tokens_seen": 463882336, "step": 5082 }, { "epoch": 21.179166666666667, "grad_norm": 3.8415896939854655, "learning_rate": 5e-05, "loss": 0.049, "num_input_tokens_seen": 463973180, "step": 5083 }, { "epoch": 21.179166666666667, "loss": 0.034776244312524796, "loss_ce": 1.4637113281423808e-06, "loss_iou": 0.3515625, "loss_num": 0.0069580078125, "loss_xval": 0.03466796875, "num_input_tokens_seen": 463973180, "step": 5083 }, { "epoch": 21.183333333333334, "grad_norm": 3.0208003017066174, "learning_rate": 5e-05, "loss": 0.0541, "num_input_tokens_seen": 464064584, "step": 5084 }, { "epoch": 21.183333333333334, "loss": 0.06564892083406448, "loss_ce": 3.6127748899161816e-05, "loss_iou": 0.265625, "loss_num": 0.01312255859375, "loss_xval": 0.0654296875, "num_input_tokens_seen": 464064584, "step": 5084 }, { "epoch": 21.1875, "grad_norm": 3.241014229091648, "learning_rate": 5e-05, "loss": 0.0466, "num_input_tokens_seen": 464156084, "step": 5085 }, { "epoch": 21.1875, "loss": 0.04939756914973259, "loss_ce": 0.0018359236419200897, "loss_iou": 0.216796875, "loss_num": 0.009521484375, "loss_xval": 0.047607421875, "num_input_tokens_seen": 464156084, "step": 5085 }, { "epoch": 21.191666666666666, "grad_norm": 3.468386060828746, "learning_rate": 5e-05, "loss": 0.0514, "num_input_tokens_seen": 464246820, "step": 5086 }, { "epoch": 21.191666666666666, "loss": 0.05452055484056473, "loss_ce": 9.02461238183605e-07, "loss_iou": 0.240234375, "loss_num": 0.01092529296875, "loss_xval": 0.054443359375, "num_input_tokens_seen": 464246820, "step": 5086 }, { "epoch": 21.195833333333333, "grad_norm": 3.8644889622630334, "learning_rate": 5e-05, "loss": 0.0396, "num_input_tokens_seen": 464337828, "step": 5087 }, { "epoch": 21.195833333333333, "loss": 0.031357407569885254, "loss_ce": 5.93802496950957e-07, "loss_iou": 0.171875, "loss_num": 0.00628662109375, "loss_xval": 0.03125, "num_input_tokens_seen": 464337828, "step": 5087 }, { "epoch": 21.2, "grad_norm": 3.08540439088849, "learning_rate": 5e-05, "loss": 0.072, "num_input_tokens_seen": 464429808, "step": 5088 }, { "epoch": 21.2, "loss": 0.08263568580150604, "loss_ce": 1.715917846922821e-06, "loss_iou": 0.193359375, "loss_num": 0.0164794921875, "loss_xval": 0.08251953125, "num_input_tokens_seen": 464429808, "step": 5088 }, { "epoch": 21.204166666666666, "grad_norm": 2.1076324500298864, "learning_rate": 5e-05, "loss": 0.0337, "num_input_tokens_seen": 464521652, "step": 5089 }, { "epoch": 21.204166666666666, "loss": 0.024250905960798264, "loss_ce": 4.6885875235602725e-06, "loss_iou": 0.2216796875, "loss_num": 0.004852294921875, "loss_xval": 0.0242919921875, "num_input_tokens_seen": 464521652, "step": 5089 }, { "epoch": 21.208333333333332, "grad_norm": 2.1363318465816445, "learning_rate": 5e-05, "loss": 0.0369, "num_input_tokens_seen": 464613240, "step": 5090 }, { "epoch": 21.208333333333332, "loss": 0.023094555363059044, "loss_ce": 1.56367605086416e-05, "loss_iou": 0.1923828125, "loss_num": 0.004608154296875, "loss_xval": 0.0230712890625, "num_input_tokens_seen": 464613240, "step": 5090 }, { "epoch": 21.2125, "grad_norm": 1.4078491064513639, "learning_rate": 5e-05, "loss": 0.0582, "num_input_tokens_seen": 464704340, "step": 5091 }, { "epoch": 21.2125, "loss": 0.07828094810247421, "loss_ce": 3.3591459214221686e-06, "loss_iou": 0.185546875, "loss_num": 0.015625, "loss_xval": 0.078125, "num_input_tokens_seen": 464704340, "step": 5091 }, { "epoch": 21.216666666666665, "grad_norm": 0.7291619373870915, "learning_rate": 5e-05, "loss": 0.0176, "num_input_tokens_seen": 464795532, "step": 5092 }, { "epoch": 21.216666666666665, "loss": 0.014080582186579704, "loss_ce": 0.00014930842735338956, "loss_iou": 0.181640625, "loss_num": 0.0027923583984375, "loss_xval": 0.013916015625, "num_input_tokens_seen": 464795532, "step": 5092 }, { "epoch": 21.220833333333335, "grad_norm": 1.0392975169344065, "learning_rate": 5e-05, "loss": 0.0837, "num_input_tokens_seen": 464885692, "step": 5093 }, { "epoch": 21.220833333333335, "loss": 0.14798246324062347, "loss_ce": 0.00018581724725663662, "loss_iou": 0.1298828125, "loss_num": 0.029541015625, "loss_xval": 0.1474609375, "num_input_tokens_seen": 464885692, "step": 5093 }, { "epoch": 21.225, "grad_norm": 1.6224562431284506, "learning_rate": 5e-05, "loss": 0.0418, "num_input_tokens_seen": 464977256, "step": 5094 }, { "epoch": 21.225, "loss": 0.06149175018072128, "loss_ce": 6.458715688495431e-06, "loss_iou": 0.2314453125, "loss_num": 0.0123291015625, "loss_xval": 0.0615234375, "num_input_tokens_seen": 464977256, "step": 5094 }, { "epoch": 21.229166666666668, "grad_norm": 1.2327032327636886, "learning_rate": 5e-05, "loss": 0.0408, "num_input_tokens_seen": 465068456, "step": 5095 }, { "epoch": 21.229166666666668, "loss": 0.04151744768023491, "loss_ce": 2.880004103644751e-05, "loss_iou": 0.1708984375, "loss_num": 0.00830078125, "loss_xval": 0.04150390625, "num_input_tokens_seen": 465068456, "step": 5095 }, { "epoch": 21.233333333333334, "grad_norm": 0.8941970332041576, "learning_rate": 5e-05, "loss": 0.0509, "num_input_tokens_seen": 465160128, "step": 5096 }, { "epoch": 21.233333333333334, "loss": 0.03685451298952103, "loss_ce": 0.00037075072759762406, "loss_iou": 0.16796875, "loss_num": 0.007293701171875, "loss_xval": 0.036376953125, "num_input_tokens_seen": 465160128, "step": 5096 }, { "epoch": 21.2375, "grad_norm": 0.6661944717074832, "learning_rate": 5e-05, "loss": 0.0459, "num_input_tokens_seen": 465250648, "step": 5097 }, { "epoch": 21.2375, "loss": 0.016508635133504868, "loss_ce": 3.295803981018253e-05, "loss_iou": 0.25, "loss_num": 0.0032958984375, "loss_xval": 0.0164794921875, "num_input_tokens_seen": 465250648, "step": 5097 }, { "epoch": 21.241666666666667, "grad_norm": 0.8383924992028557, "learning_rate": 5e-05, "loss": 0.0315, "num_input_tokens_seen": 465342480, "step": 5098 }, { "epoch": 21.241666666666667, "loss": 0.023276329040527344, "loss_ce": 4.482225267565809e-05, "loss_iou": 0.203125, "loss_num": 0.004638671875, "loss_xval": 0.023193359375, "num_input_tokens_seen": 465342480, "step": 5098 }, { "epoch": 21.245833333333334, "grad_norm": 1.5745019835901888, "learning_rate": 5e-05, "loss": 0.0407, "num_input_tokens_seen": 465434396, "step": 5099 }, { "epoch": 21.245833333333334, "loss": 0.04514005407691002, "loss_ce": 4.554187398753129e-06, "loss_iou": 0.1728515625, "loss_num": 0.009033203125, "loss_xval": 0.045166015625, "num_input_tokens_seen": 465434396, "step": 5099 }, { "epoch": 21.25, "grad_norm": 6.255873712161377, "learning_rate": 5e-05, "loss": 0.0669, "num_input_tokens_seen": 465525368, "step": 5100 }, { "epoch": 21.25, "loss": 0.10162433236837387, "loss_ce": 7.985177603586635e-07, "loss_iou": 0.2431640625, "loss_num": 0.0203857421875, "loss_xval": 0.1015625, "num_input_tokens_seen": 465525368, "step": 5100 }, { "epoch": 21.254166666666666, "grad_norm": 1.161366941581007, "learning_rate": 5e-05, "loss": 0.055, "num_input_tokens_seen": 465616504, "step": 5101 }, { "epoch": 21.254166666666666, "loss": 0.0856485515832901, "loss_ce": 9.648006198403891e-07, "loss_iou": 0.265625, "loss_num": 0.01708984375, "loss_xval": 0.08544921875, "num_input_tokens_seen": 465616504, "step": 5101 }, { "epoch": 21.258333333333333, "grad_norm": 1.1708937248271916, "learning_rate": 5e-05, "loss": 0.0253, "num_input_tokens_seen": 465708368, "step": 5102 }, { "epoch": 21.258333333333333, "loss": 0.02742135338485241, "loss_ce": 6.234511965885758e-05, "loss_iou": 0.2041015625, "loss_num": 0.005462646484375, "loss_xval": 0.02734375, "num_input_tokens_seen": 465708368, "step": 5102 }, { "epoch": 21.2625, "grad_norm": 4.447723485840417, "learning_rate": 5e-05, "loss": 0.0581, "num_input_tokens_seen": 465799968, "step": 5103 }, { "epoch": 21.2625, "loss": 0.07481688261032104, "loss_ce": 3.0401115509448573e-06, "loss_iou": 0.2734375, "loss_num": 0.01495361328125, "loss_xval": 0.07470703125, "num_input_tokens_seen": 465799968, "step": 5103 }, { "epoch": 21.266666666666666, "grad_norm": 1.3703411906643668, "learning_rate": 5e-05, "loss": 0.0425, "num_input_tokens_seen": 465890928, "step": 5104 }, { "epoch": 21.266666666666666, "loss": 0.05223878473043442, "loss_ce": 3.2282886763823626e-07, "loss_iou": 0.18359375, "loss_num": 0.01043701171875, "loss_xval": 0.05224609375, "num_input_tokens_seen": 465890928, "step": 5104 }, { "epoch": 21.270833333333332, "grad_norm": 2.769525755927495, "learning_rate": 5e-05, "loss": 0.0284, "num_input_tokens_seen": 465981804, "step": 5105 }, { "epoch": 21.270833333333332, "loss": 0.03247120976448059, "loss_ce": 5.079807010588411e-07, "loss_iou": 0.2236328125, "loss_num": 0.006500244140625, "loss_xval": 0.032470703125, "num_input_tokens_seen": 465981804, "step": 5105 }, { "epoch": 21.275, "grad_norm": 1.7096892427478472, "learning_rate": 5e-05, "loss": 0.0317, "num_input_tokens_seen": 466073436, "step": 5106 }, { "epoch": 21.275, "loss": 0.041040971875190735, "loss_ce": 0.00025422926410101354, "loss_iou": 0.1650390625, "loss_num": 0.0081787109375, "loss_xval": 0.040771484375, "num_input_tokens_seen": 466073436, "step": 5106 }, { "epoch": 21.279166666666665, "grad_norm": 3.939910670255322, "learning_rate": 5e-05, "loss": 0.0394, "num_input_tokens_seen": 466164960, "step": 5107 }, { "epoch": 21.279166666666665, "loss": 0.05779968202114105, "loss_ce": 7.568756700493395e-05, "loss_iou": 0.28125, "loss_num": 0.01153564453125, "loss_xval": 0.0576171875, "num_input_tokens_seen": 466164960, "step": 5107 }, { "epoch": 21.283333333333335, "grad_norm": 2.9411654891285806, "learning_rate": 5e-05, "loss": 0.0741, "num_input_tokens_seen": 466256052, "step": 5108 }, { "epoch": 21.283333333333335, "loss": 0.04657086730003357, "loss_ce": 1.0450962690811139e-06, "loss_iou": 0.251953125, "loss_num": 0.00933837890625, "loss_xval": 0.046630859375, "num_input_tokens_seen": 466256052, "step": 5108 }, { "epoch": 21.2875, "grad_norm": 3.239933849340362, "learning_rate": 5e-05, "loss": 0.0449, "num_input_tokens_seen": 466347372, "step": 5109 }, { "epoch": 21.2875, "loss": 0.042045846581459045, "loss_ce": 2.5268442982451234e-07, "loss_iou": 0.25390625, "loss_num": 0.0084228515625, "loss_xval": 0.0419921875, "num_input_tokens_seen": 466347372, "step": 5109 }, { "epoch": 21.291666666666668, "grad_norm": 6.448067717519509, "learning_rate": 5e-05, "loss": 0.0589, "num_input_tokens_seen": 466438272, "step": 5110 }, { "epoch": 21.291666666666668, "loss": 0.03699462115764618, "loss_ce": 3.0206821975298226e-05, "loss_iou": 0.1806640625, "loss_num": 0.00738525390625, "loss_xval": 0.036865234375, "num_input_tokens_seen": 466438272, "step": 5110 }, { "epoch": 21.295833333333334, "grad_norm": 1.5650711524552718, "learning_rate": 5e-05, "loss": 0.0487, "num_input_tokens_seen": 466529504, "step": 5111 }, { "epoch": 21.295833333333334, "loss": 0.027462124824523926, "loss_ce": 3.932990239263745e-06, "loss_iou": 0.162109375, "loss_num": 0.0054931640625, "loss_xval": 0.0274658203125, "num_input_tokens_seen": 466529504, "step": 5111 }, { "epoch": 21.3, "grad_norm": 3.5636719032526685, "learning_rate": 5e-05, "loss": 0.0592, "num_input_tokens_seen": 466621060, "step": 5112 }, { "epoch": 21.3, "loss": 0.05067984759807587, "loss_ce": 0.00010840390314115211, "loss_iou": 0.275390625, "loss_num": 0.0101318359375, "loss_xval": 0.050537109375, "num_input_tokens_seen": 466621060, "step": 5112 }, { "epoch": 21.304166666666667, "grad_norm": 2.7456772689630182, "learning_rate": 5e-05, "loss": 0.0445, "num_input_tokens_seen": 466712584, "step": 5113 }, { "epoch": 21.304166666666667, "loss": 0.04448344558477402, "loss_ce": 4.072552201250801e-06, "loss_iou": 0.2197265625, "loss_num": 0.0089111328125, "loss_xval": 0.04443359375, "num_input_tokens_seen": 466712584, "step": 5113 }, { "epoch": 21.308333333333334, "grad_norm": 4.233691763444781, "learning_rate": 5e-05, "loss": 0.0626, "num_input_tokens_seen": 466803620, "step": 5114 }, { "epoch": 21.308333333333334, "loss": 0.026803674176335335, "loss_ce": 1.6103215330076637e-06, "loss_iou": 0.21484375, "loss_num": 0.00537109375, "loss_xval": 0.02685546875, "num_input_tokens_seen": 466803620, "step": 5114 }, { "epoch": 21.3125, "grad_norm": 3.9309097782444935, "learning_rate": 5e-05, "loss": 0.0392, "num_input_tokens_seen": 466895120, "step": 5115 }, { "epoch": 21.3125, "loss": 0.033508796244859695, "loss_ce": 1.5754052583361045e-05, "loss_iou": 0.31640625, "loss_num": 0.0067138671875, "loss_xval": 0.033447265625, "num_input_tokens_seen": 466895120, "step": 5115 }, { "epoch": 21.316666666666666, "grad_norm": 1.9957723175132391, "learning_rate": 5e-05, "loss": 0.0686, "num_input_tokens_seen": 466986304, "step": 5116 }, { "epoch": 21.316666666666666, "loss": 0.034279532730579376, "loss_ce": 8.292201528092846e-06, "loss_iou": 0.32421875, "loss_num": 0.006866455078125, "loss_xval": 0.0341796875, "num_input_tokens_seen": 466986304, "step": 5116 }, { "epoch": 21.320833333333333, "grad_norm": 1.605300799367561, "learning_rate": 5e-05, "loss": 0.0417, "num_input_tokens_seen": 467077756, "step": 5117 }, { "epoch": 21.320833333333333, "loss": 0.027537034824490547, "loss_ce": 1.0179230230278336e-05, "loss_iou": 0.232421875, "loss_num": 0.005523681640625, "loss_xval": 0.027587890625, "num_input_tokens_seen": 467077756, "step": 5117 }, { "epoch": 21.325, "grad_norm": 1.4889248178904293, "learning_rate": 5e-05, "loss": 0.0567, "num_input_tokens_seen": 467168436, "step": 5118 }, { "epoch": 21.325, "loss": 0.06738781929016113, "loss_ce": 0.00047039767378009856, "loss_iou": 0.1953125, "loss_num": 0.01336669921875, "loss_xval": 0.06689453125, "num_input_tokens_seen": 467168436, "step": 5118 }, { "epoch": 21.329166666666666, "grad_norm": 1.6569154713614649, "learning_rate": 5e-05, "loss": 0.0625, "num_input_tokens_seen": 467260136, "step": 5119 }, { "epoch": 21.329166666666666, "loss": 0.089745432138443, "loss_ce": 8.497641829308122e-06, "loss_iou": 0.20703125, "loss_num": 0.0179443359375, "loss_xval": 0.08984375, "num_input_tokens_seen": 467260136, "step": 5119 }, { "epoch": 21.333333333333332, "grad_norm": 2.7731899562486255, "learning_rate": 5e-05, "loss": 0.0667, "num_input_tokens_seen": 467351804, "step": 5120 }, { "epoch": 21.333333333333332, "loss": 0.04865720868110657, "loss_ce": 5.033303386881016e-05, "loss_iou": 0.3671875, "loss_num": 0.00970458984375, "loss_xval": 0.048583984375, "num_input_tokens_seen": 467351804, "step": 5120 }, { "epoch": 21.3375, "grad_norm": 2.712222274695321, "learning_rate": 5e-05, "loss": 0.0313, "num_input_tokens_seen": 467443352, "step": 5121 }, { "epoch": 21.3375, "loss": 0.026291847229003906, "loss_ce": 9.549963806421147e-07, "loss_iou": 0.265625, "loss_num": 0.0052490234375, "loss_xval": 0.0262451171875, "num_input_tokens_seen": 467443352, "step": 5121 }, { "epoch": 21.341666666666665, "grad_norm": 2.6302096628943454, "learning_rate": 5e-05, "loss": 0.0553, "num_input_tokens_seen": 467534672, "step": 5122 }, { "epoch": 21.341666666666665, "loss": 0.06893706321716309, "loss_ce": 5.48449042980792e-06, "loss_iou": 0.2236328125, "loss_num": 0.0137939453125, "loss_xval": 0.06884765625, "num_input_tokens_seen": 467534672, "step": 5122 }, { "epoch": 21.345833333333335, "grad_norm": 6.87071556194301, "learning_rate": 5e-05, "loss": 0.0385, "num_input_tokens_seen": 467625644, "step": 5123 }, { "epoch": 21.345833333333335, "loss": 0.04277816414833069, "loss_ce": 7.777657629048917e-06, "loss_iou": 0.271484375, "loss_num": 0.008544921875, "loss_xval": 0.042724609375, "num_input_tokens_seen": 467625644, "step": 5123 }, { "epoch": 21.35, "grad_norm": 2.9986205193387017, "learning_rate": 5e-05, "loss": 0.0373, "num_input_tokens_seen": 467716840, "step": 5124 }, { "epoch": 21.35, "loss": 0.034434035420417786, "loss_ce": 0.0005137492553330958, "loss_iou": 0.3125, "loss_num": 0.006805419921875, "loss_xval": 0.033935546875, "num_input_tokens_seen": 467716840, "step": 5124 }, { "epoch": 21.354166666666668, "grad_norm": 2.778658504696439, "learning_rate": 5e-05, "loss": 0.0319, "num_input_tokens_seen": 467808148, "step": 5125 }, { "epoch": 21.354166666666668, "loss": 0.02985912188887596, "loss_ce": 4.344867920735851e-05, "loss_iou": 0.314453125, "loss_num": 0.005950927734375, "loss_xval": 0.02978515625, "num_input_tokens_seen": 467808148, "step": 5125 }, { "epoch": 21.358333333333334, "grad_norm": 2.6175877227407347, "learning_rate": 5e-05, "loss": 0.0346, "num_input_tokens_seen": 467899308, "step": 5126 }, { "epoch": 21.358333333333334, "loss": 0.030777405947446823, "loss_ce": 8.059006177063566e-06, "loss_iou": 0.302734375, "loss_num": 0.00616455078125, "loss_xval": 0.03076171875, "num_input_tokens_seen": 467899308, "step": 5126 }, { "epoch": 21.3625, "grad_norm": 2.3286229772663187, "learning_rate": 5e-05, "loss": 0.0528, "num_input_tokens_seen": 467990460, "step": 5127 }, { "epoch": 21.3625, "loss": 0.05989307537674904, "loss_ce": 2.329000380996149e-06, "loss_iou": 0.15234375, "loss_num": 0.011962890625, "loss_xval": 0.059814453125, "num_input_tokens_seen": 467990460, "step": 5127 }, { "epoch": 21.366666666666667, "grad_norm": 2.6920855529969043, "learning_rate": 5e-05, "loss": 0.0486, "num_input_tokens_seen": 468081116, "step": 5128 }, { "epoch": 21.366666666666667, "loss": 0.041651129722595215, "loss_ce": 2.2659719434159342e-06, "loss_iou": 0.263671875, "loss_num": 0.00830078125, "loss_xval": 0.041748046875, "num_input_tokens_seen": 468081116, "step": 5128 }, { "epoch": 21.370833333333334, "grad_norm": 0.9620627133383356, "learning_rate": 5e-05, "loss": 0.0224, "num_input_tokens_seen": 468172632, "step": 5129 }, { "epoch": 21.370833333333334, "loss": 0.02263781800866127, "loss_ce": 1.4041656868357677e-06, "loss_iou": 0.314453125, "loss_num": 0.0045166015625, "loss_xval": 0.0225830078125, "num_input_tokens_seen": 468172632, "step": 5129 }, { "epoch": 21.375, "grad_norm": 4.4940261797059895, "learning_rate": 5e-05, "loss": 0.0538, "num_input_tokens_seen": 468263924, "step": 5130 }, { "epoch": 21.375, "loss": 0.08159472048282623, "loss_ce": 0.00016237185627687722, "loss_iou": 0.095703125, "loss_num": 0.0162353515625, "loss_xval": 0.08154296875, "num_input_tokens_seen": 468263924, "step": 5130 }, { "epoch": 21.379166666666666, "grad_norm": 2.425644312036774, "learning_rate": 5e-05, "loss": 0.0444, "num_input_tokens_seen": 468355576, "step": 5131 }, { "epoch": 21.379166666666666, "loss": 0.06586514413356781, "loss_ce": 5.811029950564262e-07, "loss_iou": 0.203125, "loss_num": 0.01318359375, "loss_xval": 0.06591796875, "num_input_tokens_seen": 468355576, "step": 5131 }, { "epoch": 21.383333333333333, "grad_norm": 3.169692159749731, "learning_rate": 5e-05, "loss": 0.0576, "num_input_tokens_seen": 468446672, "step": 5132 }, { "epoch": 21.383333333333333, "loss": 0.06761285662651062, "loss_ce": 1.161280351880123e-06, "loss_iou": 0.275390625, "loss_num": 0.0135498046875, "loss_xval": 0.0673828125, "num_input_tokens_seen": 468446672, "step": 5132 }, { "epoch": 21.3875, "grad_norm": 4.388810851070836, "learning_rate": 5e-05, "loss": 0.0409, "num_input_tokens_seen": 468537072, "step": 5133 }, { "epoch": 21.3875, "loss": 0.033356256783008575, "loss_ce": 5.455306109070079e-07, "loss_iou": 0.267578125, "loss_num": 0.006683349609375, "loss_xval": 0.033447265625, "num_input_tokens_seen": 468537072, "step": 5133 }, { "epoch": 21.391666666666666, "grad_norm": 2.4028206194039163, "learning_rate": 5e-05, "loss": 0.0721, "num_input_tokens_seen": 468627996, "step": 5134 }, { "epoch": 21.391666666666666, "loss": 0.04357944428920746, "loss_ce": 3.4140907700930256e-07, "loss_iou": 0.28515625, "loss_num": 0.00872802734375, "loss_xval": 0.04345703125, "num_input_tokens_seen": 468627996, "step": 5134 }, { "epoch": 21.395833333333332, "grad_norm": 2.5505019179703026, "learning_rate": 5e-05, "loss": 0.0505, "num_input_tokens_seen": 468716964, "step": 5135 }, { "epoch": 21.395833333333332, "loss": 0.026316307485103607, "loss_ce": 2.5413703042431735e-05, "loss_iou": 0.1455078125, "loss_num": 0.0052490234375, "loss_xval": 0.0262451171875, "num_input_tokens_seen": 468716964, "step": 5135 }, { "epoch": 21.4, "grad_norm": 1.4135616379362372, "learning_rate": 5e-05, "loss": 0.055, "num_input_tokens_seen": 468807892, "step": 5136 }, { "epoch": 21.4, "loss": 0.05752602219581604, "loss_ce": 0.001999289495870471, "loss_iou": 0.212890625, "loss_num": 0.0111083984375, "loss_xval": 0.055419921875, "num_input_tokens_seen": 468807892, "step": 5136 }, { "epoch": 21.404166666666665, "grad_norm": 2.6192946966806017, "learning_rate": 5e-05, "loss": 0.0746, "num_input_tokens_seen": 468898704, "step": 5137 }, { "epoch": 21.404166666666665, "loss": 0.07630203664302826, "loss_ce": 4.6984385448922694e-07, "loss_iou": 0.1435546875, "loss_num": 0.0152587890625, "loss_xval": 0.076171875, "num_input_tokens_seen": 468898704, "step": 5137 }, { "epoch": 21.408333333333335, "grad_norm": 2.2379969434569134, "learning_rate": 5e-05, "loss": 0.0747, "num_input_tokens_seen": 468990852, "step": 5138 }, { "epoch": 21.408333333333335, "loss": 0.06957028061151505, "loss_ce": 0.0005471492768265307, "loss_iou": 0.265625, "loss_num": 0.0137939453125, "loss_xval": 0.06884765625, "num_input_tokens_seen": 468990852, "step": 5138 }, { "epoch": 21.4125, "grad_norm": 1.5048102658186053, "learning_rate": 5e-05, "loss": 0.0762, "num_input_tokens_seen": 469082548, "step": 5139 }, { "epoch": 21.4125, "loss": 0.11329013854265213, "loss_ce": 8.887142939784098e-06, "loss_iou": 0.29296875, "loss_num": 0.022705078125, "loss_xval": 0.11328125, "num_input_tokens_seen": 469082548, "step": 5139 }, { "epoch": 21.416666666666668, "grad_norm": 19.14512999510595, "learning_rate": 5e-05, "loss": 0.0853, "num_input_tokens_seen": 469173712, "step": 5140 }, { "epoch": 21.416666666666668, "loss": 0.11006104946136475, "loss_ce": 6.806710007367656e-05, "loss_iou": 0.26171875, "loss_num": 0.02197265625, "loss_xval": 0.10986328125, "num_input_tokens_seen": 469173712, "step": 5140 }, { "epoch": 21.420833333333334, "grad_norm": 1.0371820529257796, "learning_rate": 5e-05, "loss": 0.0611, "num_input_tokens_seen": 469264932, "step": 5141 }, { "epoch": 21.420833333333334, "loss": 0.07569563388824463, "loss_ce": 0.000256184081081301, "loss_iou": 0.25, "loss_num": 0.01507568359375, "loss_xval": 0.0751953125, "num_input_tokens_seen": 469264932, "step": 5141 }, { "epoch": 21.425, "grad_norm": 0.8318537160887409, "learning_rate": 5e-05, "loss": 0.0499, "num_input_tokens_seen": 469355808, "step": 5142 }, { "epoch": 21.425, "loss": 0.023618394508957863, "loss_ce": 9.231413059751503e-06, "loss_iou": 0.14453125, "loss_num": 0.004730224609375, "loss_xval": 0.0235595703125, "num_input_tokens_seen": 469355808, "step": 5142 }, { "epoch": 21.429166666666667, "grad_norm": 1.7916804024785156, "learning_rate": 5e-05, "loss": 0.0397, "num_input_tokens_seen": 469447440, "step": 5143 }, { "epoch": 21.429166666666667, "loss": 0.048645682632923126, "loss_ce": 6.652858246525284e-07, "loss_iou": 0.17578125, "loss_num": 0.009765625, "loss_xval": 0.048583984375, "num_input_tokens_seen": 469447440, "step": 5143 }, { "epoch": 21.433333333333334, "grad_norm": 4.676872959945136, "learning_rate": 5e-05, "loss": 0.0404, "num_input_tokens_seen": 469538496, "step": 5144 }, { "epoch": 21.433333333333334, "loss": 0.03317292779684067, "loss_ce": 3.225621298952319e-07, "loss_iou": 0.26171875, "loss_num": 0.00665283203125, "loss_xval": 0.033203125, "num_input_tokens_seen": 469538496, "step": 5144 }, { "epoch": 21.4375, "grad_norm": 2.8883211934646775, "learning_rate": 5e-05, "loss": 0.0408, "num_input_tokens_seen": 469629716, "step": 5145 }, { "epoch": 21.4375, "loss": 0.04455077648162842, "loss_ce": 2.5629222363932058e-05, "loss_iou": 0.33203125, "loss_num": 0.0089111328125, "loss_xval": 0.04443359375, "num_input_tokens_seen": 469629716, "step": 5145 }, { "epoch": 21.441666666666666, "grad_norm": 3.172159232706883, "learning_rate": 5e-05, "loss": 0.0716, "num_input_tokens_seen": 469721268, "step": 5146 }, { "epoch": 21.441666666666666, "loss": 0.07887370884418488, "loss_ce": 2.3914546545711346e-05, "loss_iou": 0.24609375, "loss_num": 0.0157470703125, "loss_xval": 0.07861328125, "num_input_tokens_seen": 469721268, "step": 5146 }, { "epoch": 21.445833333333333, "grad_norm": 1.941963955509079, "learning_rate": 5e-05, "loss": 0.1117, "num_input_tokens_seen": 469813208, "step": 5147 }, { "epoch": 21.445833333333333, "loss": 0.14985010027885437, "loss_ce": 8.781100405030884e-06, "loss_iou": 0.12109375, "loss_num": 0.0299072265625, "loss_xval": 0.1494140625, "num_input_tokens_seen": 469813208, "step": 5147 }, { "epoch": 21.45, "grad_norm": 5.756794171636228, "learning_rate": 5e-05, "loss": 0.0602, "num_input_tokens_seen": 469903704, "step": 5148 }, { "epoch": 21.45, "loss": 0.05248153209686279, "loss_ce": 6.558901532116579e-06, "loss_iou": 0.310546875, "loss_num": 0.010498046875, "loss_xval": 0.052490234375, "num_input_tokens_seen": 469903704, "step": 5148 }, { "epoch": 21.454166666666666, "grad_norm": 2.0219404994608374, "learning_rate": 5e-05, "loss": 0.035, "num_input_tokens_seen": 469994624, "step": 5149 }, { "epoch": 21.454166666666666, "loss": 0.042460788041353226, "loss_ce": 3.2075922717922367e-06, "loss_iou": 0.14453125, "loss_num": 0.00848388671875, "loss_xval": 0.04248046875, "num_input_tokens_seen": 469994624, "step": 5149 }, { "epoch": 21.458333333333332, "grad_norm": 4.55508797806237, "learning_rate": 5e-05, "loss": 0.0412, "num_input_tokens_seen": 470085964, "step": 5150 }, { "epoch": 21.458333333333332, "loss": 0.05022701993584633, "loss_ce": 0.000483365380205214, "loss_iou": 0.2197265625, "loss_num": 0.00994873046875, "loss_xval": 0.0498046875, "num_input_tokens_seen": 470085964, "step": 5150 }, { "epoch": 21.4625, "grad_norm": 2.80946260721684, "learning_rate": 5e-05, "loss": 0.0517, "num_input_tokens_seen": 470177768, "step": 5151 }, { "epoch": 21.4625, "loss": 0.03056851401925087, "loss_ce": 5.1591487135738134e-06, "loss_iou": 0.31640625, "loss_num": 0.006103515625, "loss_xval": 0.030517578125, "num_input_tokens_seen": 470177768, "step": 5151 }, { "epoch": 21.466666666666665, "grad_norm": 6.489473341280429, "learning_rate": 5e-05, "loss": 0.0636, "num_input_tokens_seen": 470268748, "step": 5152 }, { "epoch": 21.466666666666665, "loss": 0.07733218371868134, "loss_ce": 6.388052042893833e-07, "loss_iou": 0.255859375, "loss_num": 0.01544189453125, "loss_xval": 0.0771484375, "num_input_tokens_seen": 470268748, "step": 5152 }, { "epoch": 21.470833333333335, "grad_norm": 2.385341172996109, "learning_rate": 5e-05, "loss": 0.0604, "num_input_tokens_seen": 470360844, "step": 5153 }, { "epoch": 21.470833333333335, "loss": 0.06412360072135925, "loss_ce": 6.167654191813199e-06, "loss_iou": 0.294921875, "loss_num": 0.0128173828125, "loss_xval": 0.06396484375, "num_input_tokens_seen": 470360844, "step": 5153 }, { "epoch": 21.475, "grad_norm": 2.7186507731781706, "learning_rate": 5e-05, "loss": 0.0559, "num_input_tokens_seen": 470452448, "step": 5154 }, { "epoch": 21.475, "loss": 0.07098732888698578, "loss_ce": 3.439177817199379e-06, "loss_iou": 0.3359375, "loss_num": 0.01422119140625, "loss_xval": 0.07080078125, "num_input_tokens_seen": 470452448, "step": 5154 }, { "epoch": 21.479166666666668, "grad_norm": 3.7008464093250946, "learning_rate": 5e-05, "loss": 0.04, "num_input_tokens_seen": 470544008, "step": 5155 }, { "epoch": 21.479166666666668, "loss": 0.04185812547802925, "loss_ce": 0.00233786110766232, "loss_iou": 0.376953125, "loss_num": 0.00787353515625, "loss_xval": 0.03955078125, "num_input_tokens_seen": 470544008, "step": 5155 }, { "epoch": 21.483333333333334, "grad_norm": 2.4514042894558172, "learning_rate": 5e-05, "loss": 0.034, "num_input_tokens_seen": 470635716, "step": 5156 }, { "epoch": 21.483333333333334, "loss": 0.029755450785160065, "loss_ce": 8.143604190991027e-07, "loss_iou": 0.306640625, "loss_num": 0.005950927734375, "loss_xval": 0.02978515625, "num_input_tokens_seen": 470635716, "step": 5156 }, { "epoch": 21.4875, "grad_norm": 1.7616124288679285, "learning_rate": 5e-05, "loss": 0.0848, "num_input_tokens_seen": 470727076, "step": 5157 }, { "epoch": 21.4875, "loss": 0.06504976749420166, "loss_ce": 4.733014793600887e-05, "loss_iou": 0.2109375, "loss_num": 0.01300048828125, "loss_xval": 0.06494140625, "num_input_tokens_seen": 470727076, "step": 5157 }, { "epoch": 21.491666666666667, "grad_norm": 2.6863159119286926, "learning_rate": 5e-05, "loss": 0.0518, "num_input_tokens_seen": 470818544, "step": 5158 }, { "epoch": 21.491666666666667, "loss": 0.03866743668913841, "loss_ce": 0.003724808106198907, "loss_iou": 0.302734375, "loss_num": 0.006988525390625, "loss_xval": 0.034912109375, "num_input_tokens_seen": 470818544, "step": 5158 }, { "epoch": 21.495833333333334, "grad_norm": 3.538664901062215, "learning_rate": 5e-05, "loss": 0.0444, "num_input_tokens_seen": 470910092, "step": 5159 }, { "epoch": 21.495833333333334, "loss": 0.05011036992073059, "loss_ce": 5.07782260683598e-07, "loss_iou": 0.29296875, "loss_num": 0.010009765625, "loss_xval": 0.050048828125, "num_input_tokens_seen": 470910092, "step": 5159 }, { "epoch": 21.5, "grad_norm": 2.51570375481264, "learning_rate": 5e-05, "loss": 0.0612, "num_input_tokens_seen": 471001484, "step": 5160 }, { "epoch": 21.5, "loss": 0.06976377964019775, "loss_ce": 5.932008662057342e-07, "loss_iou": 0.1162109375, "loss_num": 0.01397705078125, "loss_xval": 0.06982421875, "num_input_tokens_seen": 471001484, "step": 5160 }, { "epoch": 21.504166666666666, "grad_norm": 1.4899894367983637, "learning_rate": 5e-05, "loss": 0.0551, "num_input_tokens_seen": 471093124, "step": 5161 }, { "epoch": 21.504166666666666, "loss": 0.05978493392467499, "loss_ce": 6.203370139701292e-05, "loss_iou": 0.2294921875, "loss_num": 0.011962890625, "loss_xval": 0.059814453125, "num_input_tokens_seen": 471093124, "step": 5161 }, { "epoch": 21.508333333333333, "grad_norm": 2.804098382159329, "learning_rate": 5e-05, "loss": 0.031, "num_input_tokens_seen": 471184200, "step": 5162 }, { "epoch": 21.508333333333333, "loss": 0.034136831760406494, "loss_ce": 2.918685140684829e-06, "loss_iou": 0.3203125, "loss_num": 0.0068359375, "loss_xval": 0.0341796875, "num_input_tokens_seen": 471184200, "step": 5162 }, { "epoch": 21.5125, "grad_norm": 2.4871477512168187, "learning_rate": 5e-05, "loss": 0.0416, "num_input_tokens_seen": 471274320, "step": 5163 }, { "epoch": 21.5125, "loss": 0.0459405779838562, "loss_ce": 1.1626463674474508e-05, "loss_iou": 0.32421875, "loss_num": 0.0091552734375, "loss_xval": 0.0458984375, "num_input_tokens_seen": 471274320, "step": 5163 }, { "epoch": 21.516666666666666, "grad_norm": 2.0333966820572096, "learning_rate": 5e-05, "loss": 0.0737, "num_input_tokens_seen": 471365360, "step": 5164 }, { "epoch": 21.516666666666666, "loss": 0.06339624524116516, "loss_ce": 0.0007589179440401495, "loss_iou": 0.3125, "loss_num": 0.01251220703125, "loss_xval": 0.0625, "num_input_tokens_seen": 471365360, "step": 5164 }, { "epoch": 21.520833333333332, "grad_norm": 2.6889053137143564, "learning_rate": 5e-05, "loss": 0.0472, "num_input_tokens_seen": 471457112, "step": 5165 }, { "epoch": 21.520833333333332, "loss": 0.051147811114788055, "loss_ce": 3.4876194376920466e-07, "loss_iou": 0.3125, "loss_num": 0.01025390625, "loss_xval": 0.05126953125, "num_input_tokens_seen": 471457112, "step": 5165 }, { "epoch": 21.525, "grad_norm": 4.306832416324348, "learning_rate": 5e-05, "loss": 0.046, "num_input_tokens_seen": 471548696, "step": 5166 }, { "epoch": 21.525, "loss": 0.03406589478254318, "loss_ce": 4.4640055421041325e-06, "loss_iou": 0.283203125, "loss_num": 0.006805419921875, "loss_xval": 0.0341796875, "num_input_tokens_seen": 471548696, "step": 5166 }, { "epoch": 21.529166666666665, "grad_norm": 3.313246129449017, "learning_rate": 5e-05, "loss": 0.0608, "num_input_tokens_seen": 471640760, "step": 5167 }, { "epoch": 21.529166666666665, "loss": 0.09329698234796524, "loss_ce": 0.00018022381118498743, "loss_iou": 0.3203125, "loss_num": 0.0186767578125, "loss_xval": 0.09326171875, "num_input_tokens_seen": 471640760, "step": 5167 }, { "epoch": 21.533333333333335, "grad_norm": 2.6613088108665552, "learning_rate": 5e-05, "loss": 0.032, "num_input_tokens_seen": 471731924, "step": 5168 }, { "epoch": 21.533333333333335, "loss": 0.03726760298013687, "loss_ce": 8.193208486773074e-05, "loss_iou": 0.236328125, "loss_num": 0.007415771484375, "loss_xval": 0.037109375, "num_input_tokens_seen": 471731924, "step": 5168 }, { "epoch": 21.5375, "grad_norm": 6.013018668852461, "learning_rate": 5e-05, "loss": 0.0308, "num_input_tokens_seen": 471823292, "step": 5169 }, { "epoch": 21.5375, "loss": 0.03623223304748535, "loss_ce": 7.870154149713926e-06, "loss_iou": 0.162109375, "loss_num": 0.00726318359375, "loss_xval": 0.0361328125, "num_input_tokens_seen": 471823292, "step": 5169 }, { "epoch": 21.541666666666668, "grad_norm": 3.1403542077463613, "learning_rate": 5e-05, "loss": 0.0352, "num_input_tokens_seen": 471914652, "step": 5170 }, { "epoch": 21.541666666666668, "loss": 0.026538927108049393, "loss_ce": 1.152195363829378e-05, "loss_iou": 0.248046875, "loss_num": 0.00531005859375, "loss_xval": 0.0264892578125, "num_input_tokens_seen": 471914652, "step": 5170 }, { "epoch": 21.545833333333334, "grad_norm": 2.5965410496723247, "learning_rate": 5e-05, "loss": 0.028, "num_input_tokens_seen": 472005660, "step": 5171 }, { "epoch": 21.545833333333334, "loss": 0.02297055907547474, "loss_ce": 6.082138497731648e-06, "loss_iou": 0.248046875, "loss_num": 0.00457763671875, "loss_xval": 0.02294921875, "num_input_tokens_seen": 472005660, "step": 5171 }, { "epoch": 21.55, "grad_norm": 2.488738984890954, "learning_rate": 5e-05, "loss": 0.0664, "num_input_tokens_seen": 472097340, "step": 5172 }, { "epoch": 21.55, "loss": 0.0831460952758789, "loss_ce": 9.540294740872923e-07, "loss_iou": 0.29296875, "loss_num": 0.0166015625, "loss_xval": 0.0830078125, "num_input_tokens_seen": 472097340, "step": 5172 }, { "epoch": 21.554166666666667, "grad_norm": 2.1226264315021828, "learning_rate": 5e-05, "loss": 0.0781, "num_input_tokens_seen": 472189100, "step": 5173 }, { "epoch": 21.554166666666667, "loss": 0.13382770121097565, "loss_ce": 3.100679168710485e-05, "loss_iou": 0.1337890625, "loss_num": 0.0267333984375, "loss_xval": 0.1337890625, "num_input_tokens_seen": 472189100, "step": 5173 }, { "epoch": 21.558333333333334, "grad_norm": 1.9685445410059124, "learning_rate": 5e-05, "loss": 0.0377, "num_input_tokens_seen": 472280176, "step": 5174 }, { "epoch": 21.558333333333334, "loss": 0.04735033959150314, "loss_ce": 2.315889787496417e-06, "loss_iou": 0.2578125, "loss_num": 0.00946044921875, "loss_xval": 0.04736328125, "num_input_tokens_seen": 472280176, "step": 5174 }, { "epoch": 21.5625, "grad_norm": 2.124853590179741, "learning_rate": 5e-05, "loss": 0.0336, "num_input_tokens_seen": 472371624, "step": 5175 }, { "epoch": 21.5625, "loss": 0.0341351218521595, "loss_ce": 1.2107645943615353e-06, "loss_iou": 0.193359375, "loss_num": 0.0068359375, "loss_xval": 0.0341796875, "num_input_tokens_seen": 472371624, "step": 5175 }, { "epoch": 21.566666666666666, "grad_norm": 7.927331329297304, "learning_rate": 5e-05, "loss": 0.0357, "num_input_tokens_seen": 472463452, "step": 5176 }, { "epoch": 21.566666666666666, "loss": 0.03894485533237457, "loss_ce": 0.00223983614705503, "loss_iou": 0.2119140625, "loss_num": 0.00732421875, "loss_xval": 0.03662109375, "num_input_tokens_seen": 472463452, "step": 5176 }, { "epoch": 21.570833333333333, "grad_norm": 2.168471099832549, "learning_rate": 5e-05, "loss": 0.0331, "num_input_tokens_seen": 472554896, "step": 5177 }, { "epoch": 21.570833333333333, "loss": 0.034134577959775925, "loss_ce": 5.407243952504359e-05, "loss_iou": 0.2353515625, "loss_num": 0.006805419921875, "loss_xval": 0.0341796875, "num_input_tokens_seen": 472554896, "step": 5177 }, { "epoch": 21.575, "grad_norm": 2.28263867475468, "learning_rate": 5e-05, "loss": 0.0328, "num_input_tokens_seen": 472646320, "step": 5178 }, { "epoch": 21.575, "loss": 0.03484075516462326, "loss_ce": 4.938349775329698e-06, "loss_iou": 0.36328125, "loss_num": 0.0069580078125, "loss_xval": 0.034912109375, "num_input_tokens_seen": 472646320, "step": 5178 }, { "epoch": 21.579166666666666, "grad_norm": 2.6364224922774007, "learning_rate": 5e-05, "loss": 0.0365, "num_input_tokens_seen": 472737792, "step": 5179 }, { "epoch": 21.579166666666666, "loss": 0.04767080396413803, "loss_ce": 2.346263499930501e-06, "loss_iou": 0.265625, "loss_num": 0.009521484375, "loss_xval": 0.047607421875, "num_input_tokens_seen": 472737792, "step": 5179 }, { "epoch": 21.583333333333332, "grad_norm": 2.8503069499383784, "learning_rate": 5e-05, "loss": 0.0699, "num_input_tokens_seen": 472829404, "step": 5180 }, { "epoch": 21.583333333333332, "loss": 0.10118187963962555, "loss_ce": 0.00010765744809759781, "loss_iou": 0.2578125, "loss_num": 0.0201416015625, "loss_xval": 0.10107421875, "num_input_tokens_seen": 472829404, "step": 5180 }, { "epoch": 21.5875, "grad_norm": 1.8857280247450574, "learning_rate": 5e-05, "loss": 0.034, "num_input_tokens_seen": 472921276, "step": 5181 }, { "epoch": 21.5875, "loss": 0.039288513362407684, "loss_ce": 7.342467870330438e-05, "loss_iou": 0.39453125, "loss_num": 0.0078125, "loss_xval": 0.039306640625, "num_input_tokens_seen": 472921276, "step": 5181 }, { "epoch": 21.591666666666665, "grad_norm": 1.4468703705584696, "learning_rate": 5e-05, "loss": 0.0549, "num_input_tokens_seen": 473012032, "step": 5182 }, { "epoch": 21.591666666666665, "loss": 0.036988455802202225, "loss_ce": 1.1513138815644197e-06, "loss_iou": 0.1767578125, "loss_num": 0.00738525390625, "loss_xval": 0.037109375, "num_input_tokens_seen": 473012032, "step": 5182 }, { "epoch": 21.595833333333335, "grad_norm": 1.1441987157474223, "learning_rate": 5e-05, "loss": 0.0555, "num_input_tokens_seen": 473103984, "step": 5183 }, { "epoch": 21.595833333333335, "loss": 0.08971662819385529, "loss_ce": 0.0003306442522443831, "loss_iou": 0.28515625, "loss_num": 0.0179443359375, "loss_xval": 0.08935546875, "num_input_tokens_seen": 473103984, "step": 5183 }, { "epoch": 21.6, "grad_norm": 1.1025974467681459, "learning_rate": 5e-05, "loss": 0.0513, "num_input_tokens_seen": 473195300, "step": 5184 }, { "epoch": 21.6, "loss": 0.04473863169550896, "loss_ce": 3.8011618016753346e-05, "loss_iou": 0.1904296875, "loss_num": 0.0089111328125, "loss_xval": 0.044677734375, "num_input_tokens_seen": 473195300, "step": 5184 }, { "epoch": 21.604166666666668, "grad_norm": 1.6782143259054236, "learning_rate": 5e-05, "loss": 0.0353, "num_input_tokens_seen": 473286108, "step": 5185 }, { "epoch": 21.604166666666668, "loss": 0.03215426951646805, "loss_ce": 4.004207312391372e-06, "loss_iou": 0.19140625, "loss_num": 0.006439208984375, "loss_xval": 0.0322265625, "num_input_tokens_seen": 473286108, "step": 5185 }, { "epoch": 21.608333333333334, "grad_norm": 2.202930522774745, "learning_rate": 5e-05, "loss": 0.0402, "num_input_tokens_seen": 473377196, "step": 5186 }, { "epoch": 21.608333333333334, "loss": 0.040970880538225174, "loss_ce": 1.0322871730750194e-06, "loss_iou": 0.29296875, "loss_num": 0.0081787109375, "loss_xval": 0.041015625, "num_input_tokens_seen": 473377196, "step": 5186 }, { "epoch": 21.6125, "grad_norm": 2.8559950712554056, "learning_rate": 5e-05, "loss": 0.0464, "num_input_tokens_seen": 473468276, "step": 5187 }, { "epoch": 21.6125, "loss": 0.03946885094046593, "loss_ce": 1.992463467104244e-06, "loss_iou": 0.1943359375, "loss_num": 0.00787353515625, "loss_xval": 0.03955078125, "num_input_tokens_seen": 473468276, "step": 5187 }, { "epoch": 21.616666666666667, "grad_norm": 3.2077730781119507, "learning_rate": 5e-05, "loss": 0.0368, "num_input_tokens_seen": 473559784, "step": 5188 }, { "epoch": 21.616666666666667, "loss": 0.032352715730667114, "loss_ce": 0.00032451938022859395, "loss_iou": 0.216796875, "loss_num": 0.00640869140625, "loss_xval": 0.031982421875, "num_input_tokens_seen": 473559784, "step": 5188 }, { "epoch": 21.620833333333334, "grad_norm": 2.899727186482045, "learning_rate": 5e-05, "loss": 0.0579, "num_input_tokens_seen": 473651204, "step": 5189 }, { "epoch": 21.620833333333334, "loss": 0.0342266820371151, "loss_ce": 1.2193077054689638e-06, "loss_iou": 0.28515625, "loss_num": 0.0068359375, "loss_xval": 0.0341796875, "num_input_tokens_seen": 473651204, "step": 5189 }, { "epoch": 21.625, "grad_norm": 2.4169527212246447, "learning_rate": 5e-05, "loss": 0.0961, "num_input_tokens_seen": 473742684, "step": 5190 }, { "epoch": 21.625, "loss": 0.15376363694667816, "loss_ce": 8.107833195936109e-07, "loss_iou": 0.287109375, "loss_num": 0.03076171875, "loss_xval": 0.1533203125, "num_input_tokens_seen": 473742684, "step": 5190 }, { "epoch": 21.629166666666666, "grad_norm": 1.5503498073536341, "learning_rate": 5e-05, "loss": 0.0225, "num_input_tokens_seen": 473833896, "step": 5191 }, { "epoch": 21.629166666666666, "loss": 0.022525303065776825, "loss_ce": 3.3305796023341827e-06, "loss_iou": 0.216796875, "loss_num": 0.0045166015625, "loss_xval": 0.0224609375, "num_input_tokens_seen": 473833896, "step": 5191 }, { "epoch": 21.633333333333333, "grad_norm": 1.127646011377556, "learning_rate": 5e-05, "loss": 0.0308, "num_input_tokens_seen": 473924504, "step": 5192 }, { "epoch": 21.633333333333333, "loss": 0.015396122820675373, "loss_ce": 7.63364732847549e-06, "loss_iou": 0.1796875, "loss_num": 0.003082275390625, "loss_xval": 0.015380859375, "num_input_tokens_seen": 473924504, "step": 5192 }, { "epoch": 21.6375, "grad_norm": 1.3417370517909932, "learning_rate": 5e-05, "loss": 0.0522, "num_input_tokens_seen": 474016108, "step": 5193 }, { "epoch": 21.6375, "loss": 0.04691285640001297, "loss_ce": 1.4966522030590568e-05, "loss_iou": 0.2255859375, "loss_num": 0.0093994140625, "loss_xval": 0.046875, "num_input_tokens_seen": 474016108, "step": 5193 }, { "epoch": 21.641666666666666, "grad_norm": 2.760065897453089, "learning_rate": 5e-05, "loss": 0.0611, "num_input_tokens_seen": 474107736, "step": 5194 }, { "epoch": 21.641666666666666, "loss": 0.031467683613300323, "loss_ce": 4.0595964492240455e-06, "loss_iou": 0.29296875, "loss_num": 0.00628662109375, "loss_xval": 0.031494140625, "num_input_tokens_seen": 474107736, "step": 5194 }, { "epoch": 21.645833333333332, "grad_norm": 3.8417209646728834, "learning_rate": 5e-05, "loss": 0.0475, "num_input_tokens_seen": 474198916, "step": 5195 }, { "epoch": 21.645833333333332, "loss": 0.054826926440000534, "loss_ce": 2.099684479617281e-06, "loss_iou": 0.421875, "loss_num": 0.010986328125, "loss_xval": 0.054931640625, "num_input_tokens_seen": 474198916, "step": 5195 }, { "epoch": 21.65, "grad_norm": 2.5788512954742857, "learning_rate": 5e-05, "loss": 0.0505, "num_input_tokens_seen": 474290460, "step": 5196 }, { "epoch": 21.65, "loss": 0.041484106332063675, "loss_ce": 3.085830030613579e-06, "loss_iou": 0.2392578125, "loss_num": 0.00830078125, "loss_xval": 0.04150390625, "num_input_tokens_seen": 474290460, "step": 5196 }, { "epoch": 21.654166666666665, "grad_norm": 2.7839116889670095, "learning_rate": 5e-05, "loss": 0.0531, "num_input_tokens_seen": 474381736, "step": 5197 }, { "epoch": 21.654166666666665, "loss": 0.04018375277519226, "loss_ce": 2.2618718503508717e-05, "loss_iou": 0.2392578125, "loss_num": 0.00799560546875, "loss_xval": 0.0400390625, "num_input_tokens_seen": 474381736, "step": 5197 }, { "epoch": 21.658333333333335, "grad_norm": 2.394371038678764, "learning_rate": 5e-05, "loss": 0.036, "num_input_tokens_seen": 474473196, "step": 5198 }, { "epoch": 21.658333333333335, "loss": 0.025610364973545074, "loss_ce": 6.117667908256408e-06, "loss_iou": 0.279296875, "loss_num": 0.005126953125, "loss_xval": 0.025634765625, "num_input_tokens_seen": 474473196, "step": 5198 }, { "epoch": 21.6625, "grad_norm": 1.5772029071222384, "learning_rate": 5e-05, "loss": 0.0403, "num_input_tokens_seen": 474564788, "step": 5199 }, { "epoch": 21.6625, "loss": 0.03406330198049545, "loss_ce": 5.686005806637695e-06, "loss_iou": 0.2734375, "loss_num": 0.006805419921875, "loss_xval": 0.0341796875, "num_input_tokens_seen": 474564788, "step": 5199 }, { "epoch": 21.666666666666668, "grad_norm": 2.271853023782968, "learning_rate": 5e-05, "loss": 0.0337, "num_input_tokens_seen": 474655668, "step": 5200 }, { "epoch": 21.666666666666668, "loss": 0.03489246591925621, "loss_ce": 3.245364950998919e-06, "loss_iou": 0.263671875, "loss_num": 0.006988525390625, "loss_xval": 0.034912109375, "num_input_tokens_seen": 474655668, "step": 5200 }, { "epoch": 21.670833333333334, "grad_norm": 1.1929891383052986, "learning_rate": 5e-05, "loss": 0.0472, "num_input_tokens_seen": 474747052, "step": 5201 }, { "epoch": 21.670833333333334, "loss": 0.03987661376595497, "loss_ce": 5.399557267082855e-06, "loss_iou": 0.2421875, "loss_num": 0.00799560546875, "loss_xval": 0.039794921875, "num_input_tokens_seen": 474747052, "step": 5201 }, { "epoch": 21.675, "grad_norm": 1.3799547850853573, "learning_rate": 5e-05, "loss": 0.0856, "num_input_tokens_seen": 474838368, "step": 5202 }, { "epoch": 21.675, "loss": 0.12406538426876068, "loss_ce": 2.6694284315453842e-05, "loss_iou": 0.2353515625, "loss_num": 0.0247802734375, "loss_xval": 0.1240234375, "num_input_tokens_seen": 474838368, "step": 5202 }, { "epoch": 21.679166666666667, "grad_norm": 2.284261544884045, "learning_rate": 5e-05, "loss": 0.0513, "num_input_tokens_seen": 474927804, "step": 5203 }, { "epoch": 21.679166666666667, "loss": 0.06884060055017471, "loss_ce": 8.208784493035637e-06, "loss_iou": 0.220703125, "loss_num": 0.0137939453125, "loss_xval": 0.06884765625, "num_input_tokens_seen": 474927804, "step": 5203 }, { "epoch": 21.683333333333334, "grad_norm": 1.127675302187502, "learning_rate": 5e-05, "loss": 0.045, "num_input_tokens_seen": 475019008, "step": 5204 }, { "epoch": 21.683333333333334, "loss": 0.04536312445998192, "loss_ce": 9.02963220141828e-05, "loss_iou": 0.3125, "loss_num": 0.009033203125, "loss_xval": 0.045166015625, "num_input_tokens_seen": 475019008, "step": 5204 }, { "epoch": 21.6875, "grad_norm": 0.5740189289806529, "learning_rate": 5e-05, "loss": 0.0401, "num_input_tokens_seen": 475110416, "step": 5205 }, { "epoch": 21.6875, "loss": 0.049877770245075226, "loss_ce": 3.493142867228016e-05, "loss_iou": 0.1796875, "loss_num": 0.00994873046875, "loss_xval": 0.0498046875, "num_input_tokens_seen": 475110416, "step": 5205 }, { "epoch": 21.691666666666666, "grad_norm": 3.3850143021747465, "learning_rate": 5e-05, "loss": 0.0496, "num_input_tokens_seen": 475201784, "step": 5206 }, { "epoch": 21.691666666666666, "loss": 0.054459817707538605, "loss_ce": 8.827035344438627e-06, "loss_iou": 0.1787109375, "loss_num": 0.0108642578125, "loss_xval": 0.054443359375, "num_input_tokens_seen": 475201784, "step": 5206 }, { "epoch": 21.695833333333333, "grad_norm": 4.522311779324683, "learning_rate": 5e-05, "loss": 0.0333, "num_input_tokens_seen": 475293092, "step": 5207 }, { "epoch": 21.695833333333333, "loss": 0.03339429944753647, "loss_ce": 4.3943214222963434e-07, "loss_iou": 0.26953125, "loss_num": 0.006683349609375, "loss_xval": 0.033447265625, "num_input_tokens_seen": 475293092, "step": 5207 }, { "epoch": 21.7, "grad_norm": 1.5613983690941637, "learning_rate": 5e-05, "loss": 0.0584, "num_input_tokens_seen": 475385228, "step": 5208 }, { "epoch": 21.7, "loss": 0.03437087684869766, "loss_ce": 4.623156928573735e-05, "loss_iou": 0.271484375, "loss_num": 0.006866455078125, "loss_xval": 0.034423828125, "num_input_tokens_seen": 475385228, "step": 5208 }, { "epoch": 21.704166666666666, "grad_norm": 2.2465253992130014, "learning_rate": 5e-05, "loss": 0.0381, "num_input_tokens_seen": 475476016, "step": 5209 }, { "epoch": 21.704166666666666, "loss": 0.040314361453056335, "loss_ce": 6.393887019839894e-07, "loss_iou": 0.31640625, "loss_num": 0.008056640625, "loss_xval": 0.040283203125, "num_input_tokens_seen": 475476016, "step": 5209 }, { "epoch": 21.708333333333332, "grad_norm": 2.8540832434061927, "learning_rate": 5e-05, "loss": 0.0416, "num_input_tokens_seen": 475566704, "step": 5210 }, { "epoch": 21.708333333333332, "loss": 0.037071388214826584, "loss_ce": 1.6171455285984848e-07, "loss_iou": 0.140625, "loss_num": 0.007415771484375, "loss_xval": 0.037109375, "num_input_tokens_seen": 475566704, "step": 5210 }, { "epoch": 21.7125, "grad_norm": 2.0271835737644843, "learning_rate": 5e-05, "loss": 0.0559, "num_input_tokens_seen": 475657748, "step": 5211 }, { "epoch": 21.7125, "loss": 0.07204379886388779, "loss_ce": 1.4682226719742175e-05, "loss_iou": 0.267578125, "loss_num": 0.014404296875, "loss_xval": 0.072265625, "num_input_tokens_seen": 475657748, "step": 5211 }, { "epoch": 21.716666666666665, "grad_norm": 2.4003885949869166, "learning_rate": 5e-05, "loss": 0.0431, "num_input_tokens_seen": 475748644, "step": 5212 }, { "epoch": 21.716666666666665, "loss": 0.05843600630760193, "loss_ce": 1.0106807167176157e-05, "loss_iou": 0.30078125, "loss_num": 0.01171875, "loss_xval": 0.058349609375, "num_input_tokens_seen": 475748644, "step": 5212 }, { "epoch": 21.720833333333335, "grad_norm": 2.513923371140344, "learning_rate": 5e-05, "loss": 0.031, "num_input_tokens_seen": 475838924, "step": 5213 }, { "epoch": 21.720833333333335, "loss": 0.034256190061569214, "loss_ce": 2.093961768423469e-07, "loss_iou": 0.228515625, "loss_num": 0.0068359375, "loss_xval": 0.0341796875, "num_input_tokens_seen": 475838924, "step": 5213 }, { "epoch": 21.725, "grad_norm": 3.3437388752474724, "learning_rate": 5e-05, "loss": 0.0602, "num_input_tokens_seen": 475930388, "step": 5214 }, { "epoch": 21.725, "loss": 0.05761764198541641, "loss_ce": 4.5206579102341493e-07, "loss_iou": 0.2060546875, "loss_num": 0.01153564453125, "loss_xval": 0.0576171875, "num_input_tokens_seen": 475930388, "step": 5214 }, { "epoch": 21.729166666666668, "grad_norm": 2.113248422801573, "learning_rate": 5e-05, "loss": 0.054, "num_input_tokens_seen": 476021372, "step": 5215 }, { "epoch": 21.729166666666668, "loss": 0.06953422725200653, "loss_ce": 0.0004042856162413955, "loss_iou": 0.1533203125, "loss_num": 0.0137939453125, "loss_xval": 0.0693359375, "num_input_tokens_seen": 476021372, "step": 5215 }, { "epoch": 21.733333333333334, "grad_norm": 1.6024094805556266, "learning_rate": 5e-05, "loss": 0.0521, "num_input_tokens_seen": 476112972, "step": 5216 }, { "epoch": 21.733333333333334, "loss": 0.05392562970519066, "loss_ce": 1.070283929038851e-06, "loss_iou": 0.310546875, "loss_num": 0.01080322265625, "loss_xval": 0.053955078125, "num_input_tokens_seen": 476112972, "step": 5216 }, { "epoch": 21.7375, "grad_norm": 1.7687476494316916, "learning_rate": 5e-05, "loss": 0.0389, "num_input_tokens_seen": 476204388, "step": 5217 }, { "epoch": 21.7375, "loss": 0.022738073021173477, "loss_ce": 2.478513351888978e-06, "loss_iou": 0.23828125, "loss_num": 0.004547119140625, "loss_xval": 0.022705078125, "num_input_tokens_seen": 476204388, "step": 5217 }, { "epoch": 21.741666666666667, "grad_norm": 2.358736586141072, "learning_rate": 5e-05, "loss": 0.0426, "num_input_tokens_seen": 476295792, "step": 5218 }, { "epoch": 21.741666666666667, "loss": 0.060945916920900345, "loss_ce": 1.7571270291227847e-05, "loss_iou": 0.189453125, "loss_num": 0.01220703125, "loss_xval": 0.06103515625, "num_input_tokens_seen": 476295792, "step": 5218 }, { "epoch": 21.745833333333334, "grad_norm": 2.7285288322163357, "learning_rate": 5e-05, "loss": 0.0661, "num_input_tokens_seen": 476387128, "step": 5219 }, { "epoch": 21.745833333333334, "loss": 0.02875593677163124, "loss_ce": 1.600810901436489e-05, "loss_iou": 0.333984375, "loss_num": 0.0057373046875, "loss_xval": 0.0286865234375, "num_input_tokens_seen": 476387128, "step": 5219 }, { "epoch": 21.75, "grad_norm": 5.473026437297767, "learning_rate": 5e-05, "loss": 0.0824, "num_input_tokens_seen": 476478060, "step": 5220 }, { "epoch": 21.75, "loss": 0.07223550975322723, "loss_ce": 3.9653684780205367e-07, "loss_iou": 0.25, "loss_num": 0.01446533203125, "loss_xval": 0.072265625, "num_input_tokens_seen": 476478060, "step": 5220 }, { "epoch": 21.754166666666666, "grad_norm": 2.6463556427619648, "learning_rate": 5e-05, "loss": 0.1077, "num_input_tokens_seen": 476567608, "step": 5221 }, { "epoch": 21.754166666666666, "loss": 0.06919453293085098, "loss_ce": 0.009914140217006207, "loss_iou": 0.234375, "loss_num": 0.0118408203125, "loss_xval": 0.059326171875, "num_input_tokens_seen": 476567608, "step": 5221 }, { "epoch": 21.758333333333333, "grad_norm": 4.524436723885793, "learning_rate": 5e-05, "loss": 0.1222, "num_input_tokens_seen": 476658892, "step": 5222 }, { "epoch": 21.758333333333333, "loss": 0.1302042156457901, "loss_ce": 0.0603799931704998, "loss_iou": 0.326171875, "loss_num": 0.013916015625, "loss_xval": 0.06982421875, "num_input_tokens_seen": 476658892, "step": 5222 }, { "epoch": 21.7625, "grad_norm": 16.378174829214576, "learning_rate": 5e-05, "loss": 0.3835, "num_input_tokens_seen": 476750800, "step": 5223 }, { "epoch": 21.7625, "loss": 0.47329944372177124, "loss_ce": 0.29362720251083374, "loss_iou": 0.1796875, "loss_num": 0.035888671875, "loss_xval": 0.1796875, "num_input_tokens_seen": 476750800, "step": 5223 }, { "epoch": 21.766666666666666, "grad_norm": 12.95783818151927, "learning_rate": 5e-05, "loss": 1.3604, "num_input_tokens_seen": 476841908, "step": 5224 }, { "epoch": 21.766666666666666, "loss": 1.3603050708770752, "loss_ce": 1.3277199268341064, "loss_iou": 0.1806640625, "loss_num": 0.00653076171875, "loss_xval": 0.032470703125, "num_input_tokens_seen": 476841908, "step": 5224 }, { "epoch": 21.770833333333332, "grad_norm": 38.876350421328716, "learning_rate": 5e-05, "loss": 0.3373, "num_input_tokens_seen": 476933316, "step": 5225 }, { "epoch": 21.770833333333332, "loss": 0.3426739275455475, "loss_ce": 0.26179471611976624, "loss_iou": 0.1728515625, "loss_num": 0.0162353515625, "loss_xval": 0.0810546875, "num_input_tokens_seen": 476933316, "step": 5225 }, { "epoch": 21.775, "grad_norm": 72.00053397847842, "learning_rate": 5e-05, "loss": 3.4274, "num_input_tokens_seen": 477024284, "step": 5226 }, { "epoch": 21.775, "loss": 3.5369696617126465, "loss_ce": 3.5133261680603027, "loss_iou": 0.140625, "loss_num": 0.004730224609375, "loss_xval": 0.023681640625, "num_input_tokens_seen": 477024284, "step": 5226 }, { "epoch": 21.779166666666665, "grad_norm": 2.655551455701337, "learning_rate": 5e-05, "loss": 0.0686, "num_input_tokens_seen": 477114208, "step": 5227 }, { "epoch": 21.779166666666665, "loss": 0.06098110228776932, "loss_ce": 0.0073006837628781796, "loss_iou": 0.26953125, "loss_num": 0.0107421875, "loss_xval": 0.0537109375, "num_input_tokens_seen": 477114208, "step": 5227 }, { "epoch": 21.783333333333335, "grad_norm": 0.8326858351138243, "learning_rate": 5e-05, "loss": 0.0345, "num_input_tokens_seen": 477205596, "step": 5228 }, { "epoch": 21.783333333333335, "loss": 0.041718218475580215, "loss_ce": 0.004334184341132641, "loss_iou": 0.2421875, "loss_num": 0.007476806640625, "loss_xval": 0.037353515625, "num_input_tokens_seen": 477205596, "step": 5228 }, { "epoch": 21.7875, "grad_norm": 4.0888741279726695, "learning_rate": 5e-05, "loss": 0.0483, "num_input_tokens_seen": 477296764, "step": 5229 }, { "epoch": 21.7875, "loss": 0.06158842518925667, "loss_ce": 0.000614304793998599, "loss_iou": 0.1181640625, "loss_num": 0.01214599609375, "loss_xval": 0.06103515625, "num_input_tokens_seen": 477296764, "step": 5229 }, { "epoch": 21.791666666666668, "grad_norm": 1.460208471676983, "learning_rate": 5e-05, "loss": 0.04, "num_input_tokens_seen": 477388312, "step": 5230 }, { "epoch": 21.791666666666668, "loss": 0.03421544283628464, "loss_ce": 0.0005087743629701436, "loss_iou": 0.193359375, "loss_num": 0.006744384765625, "loss_xval": 0.03369140625, "num_input_tokens_seen": 477388312, "step": 5230 }, { "epoch": 21.795833333333334, "grad_norm": 4.024651774496485, "learning_rate": 5e-05, "loss": 0.0378, "num_input_tokens_seen": 477479928, "step": 5231 }, { "epoch": 21.795833333333334, "loss": 0.021592549979686737, "loss_ce": 0.003411701647564769, "loss_iou": 0.12060546875, "loss_num": 0.003631591796875, "loss_xval": 0.0181884765625, "num_input_tokens_seen": 477479928, "step": 5231 }, { "epoch": 21.8, "grad_norm": 3.102519437165367, "learning_rate": 5e-05, "loss": 0.0499, "num_input_tokens_seen": 477571288, "step": 5232 }, { "epoch": 21.8, "loss": 0.046392329037189484, "loss_ce": 0.0008143266895785928, "loss_iou": 0.263671875, "loss_num": 0.00909423828125, "loss_xval": 0.045654296875, "num_input_tokens_seen": 477571288, "step": 5232 }, { "epoch": 21.804166666666667, "grad_norm": 2.26852830905551, "learning_rate": 5e-05, "loss": 0.0449, "num_input_tokens_seen": 477662128, "step": 5233 }, { "epoch": 21.804166666666667, "loss": 0.03912534564733505, "loss_ce": 0.0004214280634187162, "loss_iou": 0.2578125, "loss_num": 0.007720947265625, "loss_xval": 0.038818359375, "num_input_tokens_seen": 477662128, "step": 5233 }, { "epoch": 21.808333333333334, "grad_norm": 1.2881546218242232, "learning_rate": 5e-05, "loss": 0.0421, "num_input_tokens_seen": 477753112, "step": 5234 }, { "epoch": 21.808333333333334, "loss": 0.04148241505026817, "loss_ce": 0.0005964894080534577, "loss_iou": 0.201171875, "loss_num": 0.0081787109375, "loss_xval": 0.040771484375, "num_input_tokens_seen": 477753112, "step": 5234 }, { "epoch": 21.8125, "grad_norm": 1.392795251198521, "learning_rate": 5e-05, "loss": 0.0299, "num_input_tokens_seen": 477844560, "step": 5235 }, { "epoch": 21.8125, "loss": 0.03164687007665634, "loss_ce": 0.00039305430254898965, "loss_iou": 0.1630859375, "loss_num": 0.006256103515625, "loss_xval": 0.03125, "num_input_tokens_seen": 477844560, "step": 5235 }, { "epoch": 21.816666666666666, "grad_norm": 2.1300994137265494, "learning_rate": 5e-05, "loss": 0.0363, "num_input_tokens_seen": 477935896, "step": 5236 }, { "epoch": 21.816666666666666, "loss": 0.03342318534851074, "loss_ce": 0.00029635560349561274, "loss_iou": 0.287109375, "loss_num": 0.006622314453125, "loss_xval": 0.033203125, "num_input_tokens_seen": 477935896, "step": 5236 }, { "epoch": 21.820833333333333, "grad_norm": 4.482202232344798, "learning_rate": 5e-05, "loss": 0.0275, "num_input_tokens_seen": 478026960, "step": 5237 }, { "epoch": 21.820833333333333, "loss": 0.028818076476454735, "loss_ce": 0.0006961278268136084, "loss_iou": 0.25, "loss_num": 0.005615234375, "loss_xval": 0.028076171875, "num_input_tokens_seen": 478026960, "step": 5237 }, { "epoch": 21.825, "grad_norm": 2.2595071349267837, "learning_rate": 5e-05, "loss": 0.0349, "num_input_tokens_seen": 478118188, "step": 5238 }, { "epoch": 21.825, "loss": 0.026751350611448288, "loss_ce": 0.00017816826584748924, "loss_iou": 0.21875, "loss_num": 0.00531005859375, "loss_xval": 0.026611328125, "num_input_tokens_seen": 478118188, "step": 5238 }, { "epoch": 21.829166666666666, "grad_norm": 2.7136863235245485, "learning_rate": 5e-05, "loss": 0.0425, "num_input_tokens_seen": 478209636, "step": 5239 }, { "epoch": 21.829166666666666, "loss": 0.047790057957172394, "loss_ce": 0.00024366873549297452, "loss_iou": 0.232421875, "loss_num": 0.009521484375, "loss_xval": 0.047607421875, "num_input_tokens_seen": 478209636, "step": 5239 }, { "epoch": 21.833333333333332, "grad_norm": 2.423196777012122, "learning_rate": 5e-05, "loss": 0.0288, "num_input_tokens_seen": 478300992, "step": 5240 }, { "epoch": 21.833333333333332, "loss": 0.030891956761479378, "loss_ce": 0.00019127382256556302, "loss_iou": 0.23046875, "loss_num": 0.006134033203125, "loss_xval": 0.03076171875, "num_input_tokens_seen": 478300992, "step": 5240 }, { "epoch": 21.8375, "grad_norm": 2.7051208449677895, "learning_rate": 5e-05, "loss": 0.0293, "num_input_tokens_seen": 478391728, "step": 5241 }, { "epoch": 21.8375, "loss": 0.024106372147798538, "loss_ce": 0.00021873789955861866, "loss_iou": 0.1943359375, "loss_num": 0.004791259765625, "loss_xval": 0.02392578125, "num_input_tokens_seen": 478391728, "step": 5241 }, { "epoch": 21.841666666666665, "grad_norm": 2.582985556850991, "learning_rate": 5e-05, "loss": 0.0318, "num_input_tokens_seen": 478483932, "step": 5242 }, { "epoch": 21.841666666666665, "loss": 0.041258759796619415, "loss_ce": 0.000525420589838177, "loss_iou": 0.265625, "loss_num": 0.0081787109375, "loss_xval": 0.040771484375, "num_input_tokens_seen": 478483932, "step": 5242 }, { "epoch": 21.845833333333335, "grad_norm": 2.8424877418042307, "learning_rate": 5e-05, "loss": 0.039, "num_input_tokens_seen": 478575216, "step": 5243 }, { "epoch": 21.845833333333335, "loss": 0.03894955664873123, "loss_ce": 0.000184601143701002, "loss_iou": 0.2578125, "loss_num": 0.00775146484375, "loss_xval": 0.038818359375, "num_input_tokens_seen": 478575216, "step": 5243 }, { "epoch": 21.85, "grad_norm": 2.235698937848587, "learning_rate": 5e-05, "loss": 0.0266, "num_input_tokens_seen": 478665472, "step": 5244 }, { "epoch": 21.85, "loss": 0.0273615550249815, "loss_ce": 0.00024668616242706776, "loss_iou": 0.25390625, "loss_num": 0.00543212890625, "loss_xval": 0.027099609375, "num_input_tokens_seen": 478665472, "step": 5244 }, { "epoch": 21.854166666666668, "grad_norm": 2.869722444238756, "learning_rate": 5e-05, "loss": 0.0603, "num_input_tokens_seen": 478756948, "step": 5245 }, { "epoch": 21.854166666666668, "loss": 0.09617424011230469, "loss_ce": 0.003171928459778428, "loss_iou": 0.255859375, "loss_num": 0.0185546875, "loss_xval": 0.0927734375, "num_input_tokens_seen": 478756948, "step": 5245 }, { "epoch": 21.858333333333334, "grad_norm": 2.883646852211987, "learning_rate": 5e-05, "loss": 0.0579, "num_input_tokens_seen": 478848536, "step": 5246 }, { "epoch": 21.858333333333334, "loss": 0.06147945672273636, "loss_ce": 0.0002077911631204188, "loss_iou": 0.26953125, "loss_num": 0.01226806640625, "loss_xval": 0.061279296875, "num_input_tokens_seen": 478848536, "step": 5246 }, { "epoch": 21.8625, "grad_norm": 2.4193694224975797, "learning_rate": 5e-05, "loss": 0.0597, "num_input_tokens_seen": 478940084, "step": 5247 }, { "epoch": 21.8625, "loss": 0.05228853598237038, "loss_ce": 0.008594990707933903, "loss_iou": 0.189453125, "loss_num": 0.00872802734375, "loss_xval": 0.043701171875, "num_input_tokens_seen": 478940084, "step": 5247 }, { "epoch": 21.866666666666667, "grad_norm": 1.2736474236894773, "learning_rate": 5e-05, "loss": 0.03, "num_input_tokens_seen": 479031040, "step": 5248 }, { "epoch": 21.866666666666667, "loss": 0.03317740187048912, "loss_ce": 0.0001726409827824682, "loss_iou": 0.134765625, "loss_num": 0.006591796875, "loss_xval": 0.032958984375, "num_input_tokens_seen": 479031040, "step": 5248 }, { "epoch": 21.870833333333334, "grad_norm": 1.7156603280658627, "learning_rate": 5e-05, "loss": 0.0448, "num_input_tokens_seen": 479122160, "step": 5249 }, { "epoch": 21.870833333333334, "loss": 0.049606695771217346, "loss_ce": 0.00019873742712661624, "loss_iou": 0.236328125, "loss_num": 0.0098876953125, "loss_xval": 0.04931640625, "num_input_tokens_seen": 479122160, "step": 5249 }, { "epoch": 21.875, "grad_norm": 2.9339600092716287, "learning_rate": 5e-05, "loss": 0.0305, "num_input_tokens_seen": 479213764, "step": 5250 }, { "epoch": 21.875, "eval_seeclick_CIoU": 0.22049980238080025, "eval_seeclick_GIoU": 0.21388709545135498, "eval_seeclick_IoU": 0.32379044592380524, "eval_seeclick_MAE_all": 0.10728560388088226, "eval_seeclick_MAE_h": 0.07920366153120995, "eval_seeclick_MAE_w": 0.25277023017406464, "eval_seeclick_MAE_x_boxes": 0.22806067764759064, "eval_seeclick_MAE_y_boxes": 0.08363592252135277, "eval_seeclick_NUM_probability": 0.9999969601631165, "eval_seeclick_inside_bbox": 0.5866477340459824, "eval_seeclick_loss": 0.582019031047821, "eval_seeclick_loss_ce": 0.09686426445841789, "eval_seeclick_loss_iou": 0.4454345703125, "eval_seeclick_loss_num": 0.0921783447265625, "eval_seeclick_loss_xval": 0.4608154296875, "eval_seeclick_runtime": 76.6074, "eval_seeclick_samples_per_second": 0.561, "eval_seeclick_steps_per_second": 0.026, "num_input_tokens_seen": 479213764, "step": 5250 }, { "epoch": 21.875, "eval_icons_CIoU": 0.2262982428073883, "eval_icons_GIoU": 0.15895729139447212, "eval_icons_IoU": 0.3449597507715225, "eval_icons_MAE_all": 0.09179431200027466, "eval_icons_MAE_h": 0.1736046001315117, "eval_icons_MAE_w": 0.16428311169147491, "eval_icons_MAE_x_boxes": 0.16489966958761215, "eval_icons_MAE_y_boxes": 0.17304036766290665, "eval_icons_NUM_probability": 0.9999924004077911, "eval_icons_inside_bbox": 0.4878472238779068, "eval_icons_loss": 0.45651158690452576, "eval_icons_loss_ce": 0.008816860150545835, "eval_icons_loss_iou": 0.177001953125, "eval_icons_loss_num": 0.093536376953125, "eval_icons_loss_xval": 0.4674072265625, "eval_icons_runtime": 86.8027, "eval_icons_samples_per_second": 0.576, "eval_icons_steps_per_second": 0.023, "num_input_tokens_seen": 479213764, "step": 5250 }, { "epoch": 21.875, "eval_screenspot_CIoU": 0.3330252965291341, "eval_screenspot_GIoU": 0.3239004115263621, "eval_screenspot_IoU": 0.4180097281932831, "eval_screenspot_MAE_all": 0.09967080752054851, "eval_screenspot_MAE_h": 0.09926832715670268, "eval_screenspot_MAE_w": 0.21363018453121185, "eval_screenspot_MAE_x_boxes": 0.1893785446882248, "eval_screenspot_MAE_y_boxes": 0.09871842215458553, "eval_screenspot_NUM_probability": 0.9999570647875468, "eval_screenspot_inside_bbox": 0.6804166634877523, "eval_screenspot_loss": 0.5017403364181519, "eval_screenspot_loss_ce": 0.005790580064058304, "eval_screenspot_loss_iou": 0.3467203776041667, "eval_screenspot_loss_num": 0.10290018717447917, "eval_screenspot_loss_xval": 0.5142008463541666, "eval_screenspot_runtime": 149.4622, "eval_screenspot_samples_per_second": 0.595, "eval_screenspot_steps_per_second": 0.02, "num_input_tokens_seen": 479213764, "step": 5250 }, { "epoch": 21.875, "eval_compot_CIoU": 0.45339295268058777, "eval_compot_GIoU": 0.45081885159015656, "eval_compot_IoU": 0.5354850888252258, "eval_compot_MAE_all": 0.06074054725468159, "eval_compot_MAE_h": 0.04861530102789402, "eval_compot_MAE_w": 0.17756661027669907, "eval_compot_MAE_x_boxes": 0.1776425689458847, "eval_compot_MAE_y_boxes": 0.047350864857435226, "eval_compot_NUM_probability": 0.9999347925186157, "eval_compot_inside_bbox": 0.7361111044883728, "eval_compot_loss": 0.35404831171035767, "eval_compot_loss_ce": 0.04850252345204353, "eval_compot_loss_iou": 0.2886962890625, "eval_compot_loss_num": 0.059711456298828125, "eval_compot_loss_xval": 0.298583984375, "eval_compot_runtime": 86.1966, "eval_compot_samples_per_second": 0.58, "eval_compot_steps_per_second": 0.023, "num_input_tokens_seen": 479213764, "step": 5250 }, { "epoch": 21.875, "loss": 0.35134467482566833, "loss_ce": 0.05233343690633774, "loss_iou": 0.31640625, "loss_num": 0.059814453125, "loss_xval": 0.298828125, "num_input_tokens_seen": 479213764, "step": 5250 }, { "epoch": 21.879166666666666, "grad_norm": 4.492691268409018, "learning_rate": 5e-05, "loss": 0.0857, "num_input_tokens_seen": 479304724, "step": 5251 }, { "epoch": 21.879166666666666, "loss": 0.11784595996141434, "loss_ce": 0.00026172742946073413, "loss_iou": 0.294921875, "loss_num": 0.0235595703125, "loss_xval": 0.11767578125, "num_input_tokens_seen": 479304724, "step": 5251 }, { "epoch": 21.883333333333333, "grad_norm": 5.380219994049467, "learning_rate": 5e-05, "loss": 0.0434, "num_input_tokens_seen": 479395364, "step": 5252 }, { "epoch": 21.883333333333333, "loss": 0.04840845614671707, "loss_ce": 0.0003127541858702898, "loss_iou": 0.359375, "loss_num": 0.0096435546875, "loss_xval": 0.048095703125, "num_input_tokens_seen": 479395364, "step": 5252 }, { "epoch": 21.8875, "grad_norm": 2.4134467975947613, "learning_rate": 5e-05, "loss": 0.039, "num_input_tokens_seen": 479486420, "step": 5253 }, { "epoch": 21.8875, "loss": 0.04481302201747894, "loss_ce": 0.0002573596721049398, "loss_iou": 0.23828125, "loss_num": 0.0089111328125, "loss_xval": 0.04443359375, "num_input_tokens_seen": 479486420, "step": 5253 }, { "epoch": 21.891666666666666, "grad_norm": 2.3544784211872374, "learning_rate": 5e-05, "loss": 0.0224, "num_input_tokens_seen": 479577596, "step": 5254 }, { "epoch": 21.891666666666666, "loss": 0.020729443058371544, "loss_ce": 0.00013770633086096495, "loss_iou": 0.1787109375, "loss_num": 0.004119873046875, "loss_xval": 0.0206298828125, "num_input_tokens_seen": 479577596, "step": 5254 }, { "epoch": 21.895833333333332, "grad_norm": 2.0921788964058807, "learning_rate": 5e-05, "loss": 0.0375, "num_input_tokens_seen": 479669088, "step": 5255 }, { "epoch": 21.895833333333332, "loss": 0.04369882494211197, "loss_ce": 0.0002646785578690469, "loss_iou": 0.2080078125, "loss_num": 0.0086669921875, "loss_xval": 0.04345703125, "num_input_tokens_seen": 479669088, "step": 5255 }, { "epoch": 21.9, "grad_norm": 1.389998062638131, "learning_rate": 5e-05, "loss": 0.0492, "num_input_tokens_seen": 479760124, "step": 5256 }, { "epoch": 21.9, "loss": 0.027485912665724754, "loss_ce": 0.0001269038039026782, "loss_iou": 0.216796875, "loss_num": 0.005462646484375, "loss_xval": 0.02734375, "num_input_tokens_seen": 479760124, "step": 5256 }, { "epoch": 21.904166666666665, "grad_norm": 1.559914252826017, "learning_rate": 5e-05, "loss": 0.0404, "num_input_tokens_seen": 479851468, "step": 5257 }, { "epoch": 21.904166666666665, "loss": 0.02916884422302246, "loss_ce": 0.00010847946396097541, "loss_iou": 0.26171875, "loss_num": 0.00579833984375, "loss_xval": 0.029052734375, "num_input_tokens_seen": 479851468, "step": 5257 }, { "epoch": 21.908333333333335, "grad_norm": 0.6527247533851892, "learning_rate": 5e-05, "loss": 0.0399, "num_input_tokens_seen": 479942660, "step": 5258 }, { "epoch": 21.908333333333335, "loss": 0.052520204335451126, "loss_ce": 0.00015203985094558448, "loss_iou": 0.27734375, "loss_num": 0.010498046875, "loss_xval": 0.05224609375, "num_input_tokens_seen": 479942660, "step": 5258 }, { "epoch": 21.9125, "grad_norm": 2.4036418366319356, "learning_rate": 5e-05, "loss": 0.0292, "num_input_tokens_seen": 480033712, "step": 5259 }, { "epoch": 21.9125, "loss": 0.026913050562143326, "loss_ce": 8.81015439517796e-05, "loss_iou": 0.220703125, "loss_num": 0.00537109375, "loss_xval": 0.02685546875, "num_input_tokens_seen": 480033712, "step": 5259 }, { "epoch": 21.916666666666668, "grad_norm": 1.5052202432782815, "learning_rate": 5e-05, "loss": 0.0426, "num_input_tokens_seen": 480124500, "step": 5260 }, { "epoch": 21.916666666666668, "loss": 0.0278320275247097, "loss_ce": 9.917754505295306e-05, "loss_iou": 0.224609375, "loss_num": 0.00555419921875, "loss_xval": 0.0277099609375, "num_input_tokens_seen": 480124500, "step": 5260 }, { "epoch": 21.920833333333334, "grad_norm": 7.039157047532622, "learning_rate": 5e-05, "loss": 0.0355, "num_input_tokens_seen": 480215860, "step": 5261 }, { "epoch": 21.920833333333334, "loss": 0.03863038867712021, "loss_ce": 0.001780410297214985, "loss_iou": 0.2080078125, "loss_num": 0.00738525390625, "loss_xval": 0.036865234375, "num_input_tokens_seen": 480215860, "step": 5261 }, { "epoch": 21.925, "grad_norm": 3.226045222418645, "learning_rate": 5e-05, "loss": 0.038, "num_input_tokens_seen": 480307224, "step": 5262 }, { "epoch": 21.925, "loss": 0.04644927382469177, "loss_ce": 0.0002304044901393354, "loss_iou": 0.298828125, "loss_num": 0.00921630859375, "loss_xval": 0.046142578125, "num_input_tokens_seen": 480307224, "step": 5262 }, { "epoch": 21.929166666666667, "grad_norm": 3.41641248782928, "learning_rate": 5e-05, "loss": 0.0458, "num_input_tokens_seen": 480398828, "step": 5263 }, { "epoch": 21.929166666666667, "loss": 0.054157473146915436, "loss_ce": 0.00015662264195270836, "loss_iou": 0.265625, "loss_num": 0.01080322265625, "loss_xval": 0.053955078125, "num_input_tokens_seen": 480398828, "step": 5263 }, { "epoch": 21.933333333333334, "grad_norm": 2.2687866903164013, "learning_rate": 5e-05, "loss": 0.0461, "num_input_tokens_seen": 480490488, "step": 5264 }, { "epoch": 21.933333333333334, "loss": 0.047733329236507416, "loss_ce": 0.0015907498309388757, "loss_iou": 0.162109375, "loss_num": 0.00921630859375, "loss_xval": 0.046142578125, "num_input_tokens_seen": 480490488, "step": 5264 }, { "epoch": 21.9375, "grad_norm": 2.2036409399472876, "learning_rate": 5e-05, "loss": 0.0394, "num_input_tokens_seen": 480582308, "step": 5265 }, { "epoch": 21.9375, "loss": 0.0360955074429512, "loss_ce": 0.00023354000586550683, "loss_iou": 0.1708984375, "loss_num": 0.007171630859375, "loss_xval": 0.035888671875, "num_input_tokens_seen": 480582308, "step": 5265 }, { "epoch": 21.941666666666666, "grad_norm": 1.5157849644907473, "learning_rate": 5e-05, "loss": 0.0288, "num_input_tokens_seen": 480673540, "step": 5266 }, { "epoch": 21.941666666666666, "loss": 0.02454659342765808, "loss_ce": 0.0029325177893042564, "loss_iou": 0.2353515625, "loss_num": 0.00433349609375, "loss_xval": 0.0216064453125, "num_input_tokens_seen": 480673540, "step": 5266 }, { "epoch": 21.945833333333333, "grad_norm": 2.283724280107787, "learning_rate": 5e-05, "loss": 0.0364, "num_input_tokens_seen": 480764408, "step": 5267 }, { "epoch": 21.945833333333333, "loss": 0.05202634632587433, "loss_ce": 0.00010068815026897937, "loss_iou": 0.216796875, "loss_num": 0.0103759765625, "loss_xval": 0.052001953125, "num_input_tokens_seen": 480764408, "step": 5267 }, { "epoch": 21.95, "grad_norm": 2.406683325736589, "learning_rate": 5e-05, "loss": 0.0656, "num_input_tokens_seen": 480855996, "step": 5268 }, { "epoch": 21.95, "loss": 0.09315572679042816, "loss_ce": 0.0002907381858676672, "loss_iou": 0.3515625, "loss_num": 0.0185546875, "loss_xval": 0.0927734375, "num_input_tokens_seen": 480855996, "step": 5268 }, { "epoch": 21.954166666666666, "grad_norm": 2.810274551612445, "learning_rate": 5e-05, "loss": 0.0376, "num_input_tokens_seen": 480947560, "step": 5269 }, { "epoch": 21.954166666666666, "loss": 0.05150197818875313, "loss_ce": 0.00014089501928538084, "loss_iou": 0.271484375, "loss_num": 0.01025390625, "loss_xval": 0.05126953125, "num_input_tokens_seen": 480947560, "step": 5269 }, { "epoch": 21.958333333333332, "grad_norm": 3.7126604800620457, "learning_rate": 5e-05, "loss": 0.0471, "num_input_tokens_seen": 481039152, "step": 5270 }, { "epoch": 21.958333333333332, "loss": 0.044563956558704376, "loss_ce": 0.00020665646297857165, "loss_iou": 0.2734375, "loss_num": 0.00885009765625, "loss_xval": 0.04443359375, "num_input_tokens_seen": 481039152, "step": 5270 }, { "epoch": 21.9625, "grad_norm": 3.3842874869357606, "learning_rate": 5e-05, "loss": 0.0332, "num_input_tokens_seen": 481130816, "step": 5271 }, { "epoch": 21.9625, "loss": 0.03572816029191017, "loss_ce": 0.00011414707114454359, "loss_iou": 0.275390625, "loss_num": 0.00714111328125, "loss_xval": 0.03564453125, "num_input_tokens_seen": 481130816, "step": 5271 }, { "epoch": 21.966666666666665, "grad_norm": 2.8120471497391386, "learning_rate": 5e-05, "loss": 0.0466, "num_input_tokens_seen": 481222128, "step": 5272 }, { "epoch": 21.966666666666665, "loss": 0.043471939861774445, "loss_ce": 9.120319009525701e-05, "loss_iou": 0.35546875, "loss_num": 0.0086669921875, "loss_xval": 0.04345703125, "num_input_tokens_seen": 481222128, "step": 5272 }, { "epoch": 21.970833333333335, "grad_norm": 1.8730839606126495, "learning_rate": 5e-05, "loss": 0.0476, "num_input_tokens_seen": 481313140, "step": 5273 }, { "epoch": 21.970833333333335, "loss": 0.027745647355914116, "loss_ce": 0.00014249965897761285, "loss_iou": 0.16796875, "loss_num": 0.005523681640625, "loss_xval": 0.027587890625, "num_input_tokens_seen": 481313140, "step": 5273 }, { "epoch": 21.975, "grad_norm": 1.8566497064524343, "learning_rate": 5e-05, "loss": 0.0451, "num_input_tokens_seen": 481404928, "step": 5274 }, { "epoch": 21.975, "loss": 0.05320358648896217, "loss_ce": 0.0001945517724379897, "loss_iou": 0.33984375, "loss_num": 0.0106201171875, "loss_xval": 0.052978515625, "num_input_tokens_seen": 481404928, "step": 5274 }, { "epoch": 21.979166666666668, "grad_norm": 1.6505782038848729, "learning_rate": 5e-05, "loss": 0.0452, "num_input_tokens_seen": 481496404, "step": 5275 }, { "epoch": 21.979166666666668, "loss": 0.06193459406495094, "loss_ce": 0.0005713765858672559, "loss_iou": 0.2099609375, "loss_num": 0.01226806640625, "loss_xval": 0.061279296875, "num_input_tokens_seen": 481496404, "step": 5275 }, { "epoch": 21.983333333333334, "grad_norm": 2.0673108497867694, "learning_rate": 5e-05, "loss": 0.0354, "num_input_tokens_seen": 481588516, "step": 5276 }, { "epoch": 21.983333333333334, "loss": 0.0274334903806448, "loss_ce": 0.00366029585711658, "loss_iou": 0.244140625, "loss_num": 0.0047607421875, "loss_xval": 0.0238037109375, "num_input_tokens_seen": 481588516, "step": 5276 }, { "epoch": 21.9875, "grad_norm": 0.6926522013161461, "learning_rate": 5e-05, "loss": 0.0323, "num_input_tokens_seen": 481680176, "step": 5277 }, { "epoch": 21.9875, "loss": 0.03269730508327484, "loss_ce": 0.00021134436246939003, "loss_iou": 0.2431640625, "loss_num": 0.006500244140625, "loss_xval": 0.032470703125, "num_input_tokens_seen": 481680176, "step": 5277 }, { "epoch": 21.991666666666667, "grad_norm": 2.836791567215157, "learning_rate": 5e-05, "loss": 0.0297, "num_input_tokens_seen": 481771536, "step": 5278 }, { "epoch": 21.991666666666667, "loss": 0.031540993601083755, "loss_ce": 0.00033677060855552554, "loss_iou": 0.201171875, "loss_num": 0.006256103515625, "loss_xval": 0.03125, "num_input_tokens_seen": 481771536, "step": 5278 }, { "epoch": 21.995833333333334, "grad_norm": 4.867023021726077, "learning_rate": 5e-05, "loss": 0.028, "num_input_tokens_seen": 481861428, "step": 5279 }, { "epoch": 21.995833333333334, "loss": 0.025719735771417618, "loss_ce": 6.208197009982541e-05, "loss_iou": 0.2490234375, "loss_num": 0.005126953125, "loss_xval": 0.025634765625, "num_input_tokens_seen": 481861428, "step": 5279 }, { "epoch": 22.0, "grad_norm": 1.02537337810124, "learning_rate": 5e-05, "loss": 0.0334, "num_input_tokens_seen": 481951132, "step": 5280 }, { "epoch": 22.0, "loss": 0.032459720969200134, "loss_ce": 0.0001110858574975282, "loss_iou": 0.26171875, "loss_num": 0.0064697265625, "loss_xval": 0.0322265625, "num_input_tokens_seen": 481951132, "step": 5280 }, { "epoch": 22.004166666666666, "grad_norm": 1.0512527663811588, "learning_rate": 5e-05, "loss": 0.0581, "num_input_tokens_seen": 482042020, "step": 5281 }, { "epoch": 22.004166666666666, "loss": 0.043644338846206665, "loss_ce": 7.286606705747545e-05, "loss_iou": 0.189453125, "loss_num": 0.00872802734375, "loss_xval": 0.04345703125, "num_input_tokens_seen": 482042020, "step": 5281 }, { "epoch": 22.008333333333333, "grad_norm": 2.0710280576409925, "learning_rate": 5e-05, "loss": 0.0389, "num_input_tokens_seen": 482133376, "step": 5282 }, { "epoch": 22.008333333333333, "loss": 0.030969956889748573, "loss_ce": 0.0001472034491598606, "loss_iou": 0.09912109375, "loss_num": 0.00616455078125, "loss_xval": 0.03076171875, "num_input_tokens_seen": 482133376, "step": 5282 }, { "epoch": 22.0125, "grad_norm": 2.4110739556831646, "learning_rate": 5e-05, "loss": 0.0462, "num_input_tokens_seen": 482224396, "step": 5283 }, { "epoch": 22.0125, "loss": 0.041511379182338715, "loss_ce": 9.139893518295139e-05, "loss_iou": 0.12890625, "loss_num": 0.00830078125, "loss_xval": 0.04150390625, "num_input_tokens_seen": 482224396, "step": 5283 }, { "epoch": 22.016666666666666, "grad_norm": 1.5776653945280796, "learning_rate": 5e-05, "loss": 0.0314, "num_input_tokens_seen": 482315864, "step": 5284 }, { "epoch": 22.016666666666666, "loss": 0.02530059777200222, "loss_ce": 0.0001388565287925303, "loss_iou": 0.1220703125, "loss_num": 0.005035400390625, "loss_xval": 0.025146484375, "num_input_tokens_seen": 482315864, "step": 5284 }, { "epoch": 22.020833333333332, "grad_norm": 2.6097619209688183, "learning_rate": 5e-05, "loss": 0.0326, "num_input_tokens_seen": 482408016, "step": 5285 }, { "epoch": 22.020833333333332, "loss": 0.025224104523658752, "loss_ce": 0.00033701941720210016, "loss_iou": 0.2294921875, "loss_num": 0.004974365234375, "loss_xval": 0.02490234375, "num_input_tokens_seen": 482408016, "step": 5285 }, { "epoch": 22.025, "grad_norm": 2.4991762130925825, "learning_rate": 5e-05, "loss": 0.0428, "num_input_tokens_seen": 482499076, "step": 5286 }, { "epoch": 22.025, "loss": 0.024774424731731415, "loss_ce": 0.0004290279175620526, "loss_iou": 0.294921875, "loss_num": 0.0048828125, "loss_xval": 0.0242919921875, "num_input_tokens_seen": 482499076, "step": 5286 }, { "epoch": 22.029166666666665, "grad_norm": 1.711039056793413, "learning_rate": 5e-05, "loss": 0.0215, "num_input_tokens_seen": 482591584, "step": 5287 }, { "epoch": 22.029166666666665, "loss": 0.021085284650325775, "loss_ce": 0.00011970890773227438, "loss_iou": 0.1640625, "loss_num": 0.004180908203125, "loss_xval": 0.02099609375, "num_input_tokens_seen": 482591584, "step": 5287 }, { "epoch": 22.033333333333335, "grad_norm": 1.9039882747765806, "learning_rate": 5e-05, "loss": 0.0319, "num_input_tokens_seen": 482681820, "step": 5288 }, { "epoch": 22.033333333333335, "loss": 0.023020733147859573, "loss_ce": 0.00024699015193618834, "loss_iou": 0.1865234375, "loss_num": 0.004547119140625, "loss_xval": 0.0228271484375, "num_input_tokens_seen": 482681820, "step": 5288 }, { "epoch": 22.0375, "grad_norm": 1.5749168952865242, "learning_rate": 5e-05, "loss": 0.0204, "num_input_tokens_seen": 482773252, "step": 5289 }, { "epoch": 22.0375, "loss": 0.0190866831690073, "loss_ce": 0.0002344487002119422, "loss_iou": 0.12060546875, "loss_num": 0.0037689208984375, "loss_xval": 0.018798828125, "num_input_tokens_seen": 482773252, "step": 5289 }, { "epoch": 22.041666666666668, "grad_norm": 1.716734322199081, "learning_rate": 5e-05, "loss": 0.0838, "num_input_tokens_seen": 482864240, "step": 5290 }, { "epoch": 22.041666666666668, "loss": 0.13151633739471436, "loss_ce": 0.00013816305727232248, "loss_iou": 0.310546875, "loss_num": 0.0262451171875, "loss_xval": 0.1318359375, "num_input_tokens_seen": 482864240, "step": 5290 }, { "epoch": 22.045833333333334, "grad_norm": 3.0159664173797176, "learning_rate": 5e-05, "loss": 0.0553, "num_input_tokens_seen": 482955372, "step": 5291 }, { "epoch": 22.045833333333334, "loss": 0.031092027202248573, "loss_ce": 0.00011668415390886366, "loss_iou": 0.2490234375, "loss_num": 0.006195068359375, "loss_xval": 0.031005859375, "num_input_tokens_seen": 482955372, "step": 5291 }, { "epoch": 22.05, "grad_norm": 4.501999040234554, "learning_rate": 5e-05, "loss": 0.045, "num_input_tokens_seen": 483046876, "step": 5292 }, { "epoch": 22.05, "loss": 0.05462396889925003, "loss_ce": 0.00011957163223996758, "loss_iou": 0.34765625, "loss_num": 0.0108642578125, "loss_xval": 0.054443359375, "num_input_tokens_seen": 483046876, "step": 5292 }, { "epoch": 22.054166666666667, "grad_norm": 2.982610396408003, "learning_rate": 5e-05, "loss": 0.0703, "num_input_tokens_seen": 483138532, "step": 5293 }, { "epoch": 22.054166666666667, "loss": 0.050793588161468506, "loss_ce": 0.0008363127708435059, "loss_iou": 0.30859375, "loss_num": 0.00994873046875, "loss_xval": 0.050048828125, "num_input_tokens_seen": 483138532, "step": 5293 }, { "epoch": 22.058333333333334, "grad_norm": 2.5559913977150157, "learning_rate": 5e-05, "loss": 0.0433, "num_input_tokens_seen": 483229876, "step": 5294 }, { "epoch": 22.058333333333334, "loss": 0.03200722858309746, "loss_ce": 0.00010110236325999722, "loss_iou": 0.296875, "loss_num": 0.006378173828125, "loss_xval": 0.031982421875, "num_input_tokens_seen": 483229876, "step": 5294 }, { "epoch": 22.0625, "grad_norm": 1.6884618816929389, "learning_rate": 5e-05, "loss": 0.0781, "num_input_tokens_seen": 483321328, "step": 5295 }, { "epoch": 22.0625, "loss": 0.0712161660194397, "loss_ce": 7.969448051881045e-05, "loss_iou": 0.21875, "loss_num": 0.01422119140625, "loss_xval": 0.0712890625, "num_input_tokens_seen": 483321328, "step": 5295 }, { "epoch": 22.066666666666666, "grad_norm": 1.5093449135600019, "learning_rate": 5e-05, "loss": 0.0293, "num_input_tokens_seen": 483412264, "step": 5296 }, { "epoch": 22.066666666666666, "loss": 0.015982337296009064, "loss_ce": 7.504779932787642e-05, "loss_iou": 0.04052734375, "loss_num": 0.0031890869140625, "loss_xval": 0.015869140625, "num_input_tokens_seen": 483412264, "step": 5296 }, { "epoch": 22.070833333333333, "grad_norm": 1.485904538435143, "learning_rate": 5e-05, "loss": 0.0218, "num_input_tokens_seen": 483503844, "step": 5297 }, { "epoch": 22.070833333333333, "loss": 0.02632717601954937, "loss_ce": 9.731962927617133e-05, "loss_iou": 0.1826171875, "loss_num": 0.0052490234375, "loss_xval": 0.0262451171875, "num_input_tokens_seen": 483503844, "step": 5297 }, { "epoch": 22.075, "grad_norm": 0.9224941304114999, "learning_rate": 5e-05, "loss": 0.0834, "num_input_tokens_seen": 483594920, "step": 5298 }, { "epoch": 22.075, "loss": 0.14659483730793, "loss_ce": 6.469509389717132e-05, "loss_iou": 0.1484375, "loss_num": 0.029296875, "loss_xval": 0.146484375, "num_input_tokens_seen": 483594920, "step": 5298 }, { "epoch": 22.079166666666666, "grad_norm": 2.282638840202788, "learning_rate": 5e-05, "loss": 0.0252, "num_input_tokens_seen": 483685976, "step": 5299 }, { "epoch": 22.079166666666666, "loss": 0.022921577095985413, "loss_ce": 6.391090573742986e-05, "loss_iou": 0.2177734375, "loss_num": 0.00457763671875, "loss_xval": 0.0228271484375, "num_input_tokens_seen": 483685976, "step": 5299 }, { "epoch": 22.083333333333332, "grad_norm": 2.6521355470734473, "learning_rate": 5e-05, "loss": 0.0513, "num_input_tokens_seen": 483777272, "step": 5300 }, { "epoch": 22.083333333333332, "loss": 0.046746619045734406, "loss_ce": 8.52411612868309e-05, "loss_iou": 0.2314453125, "loss_num": 0.00933837890625, "loss_xval": 0.046630859375, "num_input_tokens_seen": 483777272, "step": 5300 }, { "epoch": 22.0875, "grad_norm": 2.7199186660563672, "learning_rate": 5e-05, "loss": 0.0477, "num_input_tokens_seen": 483868844, "step": 5301 }, { "epoch": 22.0875, "loss": 0.059269554913043976, "loss_ce": 0.00018752555479295552, "loss_iou": 0.06884765625, "loss_num": 0.01177978515625, "loss_xval": 0.05908203125, "num_input_tokens_seen": 483868844, "step": 5301 }, { "epoch": 22.091666666666665, "grad_norm": 6.386667306569148, "learning_rate": 5e-05, "loss": 0.0287, "num_input_tokens_seen": 483959484, "step": 5302 }, { "epoch": 22.091666666666665, "loss": 0.035696543753147125, "loss_ce": 3.675363404909149e-05, "loss_iou": 0.193359375, "loss_num": 0.00714111328125, "loss_xval": 0.03564453125, "num_input_tokens_seen": 483959484, "step": 5302 }, { "epoch": 22.095833333333335, "grad_norm": 2.8292560563918876, "learning_rate": 5e-05, "loss": 0.0395, "num_input_tokens_seen": 484050572, "step": 5303 }, { "epoch": 22.095833333333335, "loss": 0.024900998920202255, "loss_ce": 0.0009294397314079106, "loss_iou": 0.2470703125, "loss_num": 0.004791259765625, "loss_xval": 0.02392578125, "num_input_tokens_seen": 484050572, "step": 5303 }, { "epoch": 22.1, "grad_norm": 1.6401184329935514, "learning_rate": 5e-05, "loss": 0.0567, "num_input_tokens_seen": 484142108, "step": 5304 }, { "epoch": 22.1, "loss": 0.06375754624605179, "loss_ce": 5.2097351726843044e-05, "loss_iou": 0.251953125, "loss_num": 0.01275634765625, "loss_xval": 0.0634765625, "num_input_tokens_seen": 484142108, "step": 5304 }, { "epoch": 22.104166666666668, "grad_norm": 1.3663876458902557, "learning_rate": 5e-05, "loss": 0.0574, "num_input_tokens_seen": 484233692, "step": 5305 }, { "epoch": 22.104166666666668, "loss": 0.03514707088470459, "loss_ce": 0.0007537580095231533, "loss_iou": 0.12060546875, "loss_num": 0.006866455078125, "loss_xval": 0.034423828125, "num_input_tokens_seen": 484233692, "step": 5305 }, { "epoch": 22.108333333333334, "grad_norm": 2.06616645999263, "learning_rate": 5e-05, "loss": 0.0274, "num_input_tokens_seen": 484323416, "step": 5306 }, { "epoch": 22.108333333333334, "loss": 0.024102866649627686, "loss_ce": 8.55340767884627e-05, "loss_iou": 0.162109375, "loss_num": 0.004791259765625, "loss_xval": 0.0240478515625, "num_input_tokens_seen": 484323416, "step": 5306 }, { "epoch": 22.1125, "grad_norm": 1.2662155080798614, "learning_rate": 5e-05, "loss": 0.0357, "num_input_tokens_seen": 484414608, "step": 5307 }, { "epoch": 22.1125, "loss": 0.033745765686035156, "loss_ce": 7.724766328465194e-05, "loss_iou": 0.130859375, "loss_num": 0.006744384765625, "loss_xval": 0.03369140625, "num_input_tokens_seen": 484414608, "step": 5307 }, { "epoch": 22.116666666666667, "grad_norm": 1.852131239125287, "learning_rate": 5e-05, "loss": 0.0288, "num_input_tokens_seen": 484506084, "step": 5308 }, { "epoch": 22.116666666666667, "loss": 0.028312429785728455, "loss_ce": 7.985768024809659e-05, "loss_iou": 0.13671875, "loss_num": 0.005645751953125, "loss_xval": 0.0281982421875, "num_input_tokens_seen": 484506084, "step": 5308 }, { "epoch": 22.120833333333334, "grad_norm": 1.9954084619623214, "learning_rate": 5e-05, "loss": 0.0713, "num_input_tokens_seen": 484597320, "step": 5309 }, { "epoch": 22.120833333333334, "loss": 0.08587288111448288, "loss_ce": 0.00010322533489670604, "loss_iou": 0.126953125, "loss_num": 0.0172119140625, "loss_xval": 0.0859375, "num_input_tokens_seen": 484597320, "step": 5309 }, { "epoch": 22.125, "grad_norm": 12.733430054841147, "learning_rate": 5e-05, "loss": 0.0496, "num_input_tokens_seen": 484688572, "step": 5310 }, { "epoch": 22.125, "loss": 0.06032126024365425, "loss_ce": 4.141611134400591e-05, "loss_iou": 0.2109375, "loss_num": 0.01202392578125, "loss_xval": 0.060302734375, "num_input_tokens_seen": 484688572, "step": 5310 }, { "epoch": 22.129166666666666, "grad_norm": 1.793970054204257, "learning_rate": 5e-05, "loss": 0.0341, "num_input_tokens_seen": 484780296, "step": 5311 }, { "epoch": 22.129166666666666, "loss": 0.02716052532196045, "loss_ce": 0.0001372115802951157, "loss_iou": 0.1328125, "loss_num": 0.005401611328125, "loss_xval": 0.0269775390625, "num_input_tokens_seen": 484780296, "step": 5311 }, { "epoch": 22.133333333333333, "grad_norm": 2.2711533570209013, "learning_rate": 5e-05, "loss": 0.0555, "num_input_tokens_seen": 484872084, "step": 5312 }, { "epoch": 22.133333333333333, "loss": 0.07867635786533356, "loss_ce": 0.0001851472770795226, "loss_iou": 0.291015625, "loss_num": 0.0157470703125, "loss_xval": 0.07861328125, "num_input_tokens_seen": 484872084, "step": 5312 }, { "epoch": 22.1375, "grad_norm": 2.4990669253833677, "learning_rate": 5e-05, "loss": 0.0233, "num_input_tokens_seen": 484963592, "step": 5313 }, { "epoch": 22.1375, "loss": 0.026571379974484444, "loss_ce": 0.00012789816537406296, "loss_iou": 0.236328125, "loss_num": 0.005279541015625, "loss_xval": 0.0264892578125, "num_input_tokens_seen": 484963592, "step": 5313 }, { "epoch": 22.141666666666666, "grad_norm": 7.084602588469983, "learning_rate": 5e-05, "loss": 0.0346, "num_input_tokens_seen": 485055388, "step": 5314 }, { "epoch": 22.141666666666666, "loss": 0.04131896421313286, "loss_ce": 0.0003033394750673324, "loss_iou": 0.337890625, "loss_num": 0.0081787109375, "loss_xval": 0.041015625, "num_input_tokens_seen": 485055388, "step": 5314 }, { "epoch": 22.145833333333332, "grad_norm": 2.734101352423799, "learning_rate": 5e-05, "loss": 0.0262, "num_input_tokens_seen": 485146524, "step": 5315 }, { "epoch": 22.145833333333332, "loss": 0.025176800787448883, "loss_ce": 7.60941329644993e-05, "loss_iou": 0.279296875, "loss_num": 0.0050048828125, "loss_xval": 0.025146484375, "num_input_tokens_seen": 485146524, "step": 5315 }, { "epoch": 22.15, "grad_norm": 3.0314632006075444, "learning_rate": 5e-05, "loss": 0.0435, "num_input_tokens_seen": 485237700, "step": 5316 }, { "epoch": 22.15, "loss": 0.05775919556617737, "loss_ce": 0.0045517971739172935, "loss_iou": 0.27734375, "loss_num": 0.0106201171875, "loss_xval": 0.05322265625, "num_input_tokens_seen": 485237700, "step": 5316 }, { "epoch": 22.154166666666665, "grad_norm": 2.742873805341264, "learning_rate": 5e-05, "loss": 0.0356, "num_input_tokens_seen": 485328580, "step": 5317 }, { "epoch": 22.154166666666665, "loss": 0.038473501801490784, "loss_ce": 5.186857742955908e-05, "loss_iou": 0.27734375, "loss_num": 0.0076904296875, "loss_xval": 0.038330078125, "num_input_tokens_seen": 485328580, "step": 5317 }, { "epoch": 22.158333333333335, "grad_norm": 3.0947148946477605, "learning_rate": 5e-05, "loss": 0.0438, "num_input_tokens_seen": 485419876, "step": 5318 }, { "epoch": 22.158333333333335, "loss": 0.054676301777362823, "loss_ce": 0.0001185036962851882, "loss_iou": 0.17578125, "loss_num": 0.01092529296875, "loss_xval": 0.054443359375, "num_input_tokens_seen": 485419876, "step": 5318 }, { "epoch": 22.1625, "grad_norm": 1.2289292236498561, "learning_rate": 5e-05, "loss": 0.0308, "num_input_tokens_seen": 485511428, "step": 5319 }, { "epoch": 22.1625, "loss": 0.03030611202120781, "loss_ce": 0.00012422689178492874, "loss_iou": 0.130859375, "loss_num": 0.00604248046875, "loss_xval": 0.0301513671875, "num_input_tokens_seen": 485511428, "step": 5319 }, { "epoch": 22.166666666666668, "grad_norm": 0.7004978757823226, "learning_rate": 5e-05, "loss": 0.1065, "num_input_tokens_seen": 485602360, "step": 5320 }, { "epoch": 22.166666666666668, "loss": 0.05163067951798439, "loss_ce": 0.0003230020229239017, "loss_iou": 0.1875, "loss_num": 0.01025390625, "loss_xval": 0.05126953125, "num_input_tokens_seen": 485602360, "step": 5320 }, { "epoch": 22.170833333333334, "grad_norm": 1.0268657155267547, "learning_rate": 5e-05, "loss": 0.0298, "num_input_tokens_seen": 485693464, "step": 5321 }, { "epoch": 22.170833333333334, "loss": 0.04323270916938782, "loss_ce": 5.0333379476796836e-05, "loss_iou": 0.193359375, "loss_num": 0.0086669921875, "loss_xval": 0.043212890625, "num_input_tokens_seen": 485693464, "step": 5321 }, { "epoch": 22.175, "grad_norm": 0.5279806498577583, "learning_rate": 5e-05, "loss": 0.0344, "num_input_tokens_seen": 485784804, "step": 5322 }, { "epoch": 22.175, "loss": 0.017814230173826218, "loss_ce": 6.062957982067019e-05, "loss_iou": 0.125, "loss_num": 0.0035552978515625, "loss_xval": 0.0177001953125, "num_input_tokens_seen": 485784804, "step": 5322 }, { "epoch": 22.179166666666667, "grad_norm": 4.115271450580411, "learning_rate": 5e-05, "loss": 0.0428, "num_input_tokens_seen": 485876136, "step": 5323 }, { "epoch": 22.179166666666667, "loss": 0.023923374712467194, "loss_ce": 5.8628491387935355e-05, "loss_iou": 0.201171875, "loss_num": 0.004791259765625, "loss_xval": 0.02392578125, "num_input_tokens_seen": 485876136, "step": 5323 }, { "epoch": 22.183333333333334, "grad_norm": 1.1259019805751649, "learning_rate": 5e-05, "loss": 0.0512, "num_input_tokens_seen": 485967676, "step": 5324 }, { "epoch": 22.183333333333334, "loss": 0.05798065662384033, "loss_ce": 3.92239453503862e-05, "loss_iou": 0.2080078125, "loss_num": 0.0115966796875, "loss_xval": 0.057861328125, "num_input_tokens_seen": 485967676, "step": 5324 }, { "epoch": 22.1875, "grad_norm": 1.8439616652645208, "learning_rate": 5e-05, "loss": 0.0334, "num_input_tokens_seen": 486059168, "step": 5325 }, { "epoch": 22.1875, "loss": 0.04111999273300171, "loss_ce": 3.951697362936102e-05, "loss_iou": 0.045166015625, "loss_num": 0.00823974609375, "loss_xval": 0.041015625, "num_input_tokens_seen": 486059168, "step": 5325 }, { "epoch": 22.191666666666666, "grad_norm": 1.4991446665938253, "learning_rate": 5e-05, "loss": 0.0523, "num_input_tokens_seen": 486151080, "step": 5326 }, { "epoch": 22.191666666666666, "loss": 0.05851975455880165, "loss_ce": 0.00019684791914187372, "loss_iou": 0.1357421875, "loss_num": 0.01171875, "loss_xval": 0.058349609375, "num_input_tokens_seen": 486151080, "step": 5326 }, { "epoch": 22.195833333333333, "grad_norm": 12.291938403856095, "learning_rate": 5e-05, "loss": 0.0524, "num_input_tokens_seen": 486242284, "step": 5327 }, { "epoch": 22.195833333333333, "loss": 0.0813756138086319, "loss_ce": 4.627187809091993e-05, "loss_iou": 0.248046875, "loss_num": 0.0162353515625, "loss_xval": 0.08154296875, "num_input_tokens_seen": 486242284, "step": 5327 }, { "epoch": 22.2, "grad_norm": 1.4562714436129898, "learning_rate": 5e-05, "loss": 0.0304, "num_input_tokens_seen": 486334084, "step": 5328 }, { "epoch": 22.2, "loss": 0.02499345690011978, "loss_ce": 8.348390110768378e-05, "loss_iou": 0.2265625, "loss_num": 0.004974365234375, "loss_xval": 0.02490234375, "num_input_tokens_seen": 486334084, "step": 5328 }, { "epoch": 22.204166666666666, "grad_norm": 1.3089633299319143, "learning_rate": 5e-05, "loss": 0.0292, "num_input_tokens_seen": 486426152, "step": 5329 }, { "epoch": 22.204166666666666, "loss": 0.016233263537287712, "loss_ce": 0.00012379758118186146, "loss_iou": 0.205078125, "loss_num": 0.0032196044921875, "loss_xval": 0.01611328125, "num_input_tokens_seen": 486426152, "step": 5329 }, { "epoch": 22.208333333333332, "grad_norm": 2.426740851370266, "learning_rate": 5e-05, "loss": 0.0395, "num_input_tokens_seen": 486517208, "step": 5330 }, { "epoch": 22.208333333333332, "loss": 0.05923628434538841, "loss_ce": 5.5069765949156135e-05, "loss_iou": 0.1484375, "loss_num": 0.0118408203125, "loss_xval": 0.05908203125, "num_input_tokens_seen": 486517208, "step": 5330 }, { "epoch": 22.2125, "grad_norm": 3.210597140596656, "learning_rate": 5e-05, "loss": 0.0415, "num_input_tokens_seen": 486609036, "step": 5331 }, { "epoch": 22.2125, "loss": 0.054034460335969925, "loss_ce": 0.002108802553266287, "loss_iou": 0.171875, "loss_num": 0.0103759765625, "loss_xval": 0.052001953125, "num_input_tokens_seen": 486609036, "step": 5331 }, { "epoch": 22.216666666666665, "grad_norm": 2.111383574118387, "learning_rate": 5e-05, "loss": 0.0645, "num_input_tokens_seen": 486700124, "step": 5332 }, { "epoch": 22.216666666666665, "loss": 0.07857811450958252, "loss_ce": 5.637863068841398e-05, "loss_iou": 0.3046875, "loss_num": 0.0157470703125, "loss_xval": 0.07861328125, "num_input_tokens_seen": 486700124, "step": 5332 }, { "epoch": 22.220833333333335, "grad_norm": 2.978790690703692, "learning_rate": 5e-05, "loss": 0.037, "num_input_tokens_seen": 486791124, "step": 5333 }, { "epoch": 22.220833333333335, "loss": 0.024580243974924088, "loss_ce": 4.4111726310802624e-05, "loss_iou": 0.1494140625, "loss_num": 0.004913330078125, "loss_xval": 0.0245361328125, "num_input_tokens_seen": 486791124, "step": 5333 }, { "epoch": 22.225, "grad_norm": 4.959953751790821, "learning_rate": 5e-05, "loss": 0.0493, "num_input_tokens_seen": 486882660, "step": 5334 }, { "epoch": 22.225, "loss": 0.054794326424598694, "loss_ce": 7.630858453921974e-05, "loss_iou": 0.25390625, "loss_num": 0.010986328125, "loss_xval": 0.0546875, "num_input_tokens_seen": 486882660, "step": 5334 }, { "epoch": 22.229166666666668, "grad_norm": 1.4963218782355763, "learning_rate": 5e-05, "loss": 0.0529, "num_input_tokens_seen": 486973948, "step": 5335 }, { "epoch": 22.229166666666668, "loss": 0.04119858145713806, "loss_ce": 5.3257987019605935e-05, "loss_iou": 0.1728515625, "loss_num": 0.00823974609375, "loss_xval": 0.041259765625, "num_input_tokens_seen": 486973948, "step": 5335 }, { "epoch": 22.233333333333334, "grad_norm": 4.227896015406954, "learning_rate": 5e-05, "loss": 0.0552, "num_input_tokens_seen": 487065208, "step": 5336 }, { "epoch": 22.233333333333334, "loss": 0.04797931760549545, "loss_ce": 0.0002650836540851742, "loss_iou": 0.275390625, "loss_num": 0.009521484375, "loss_xval": 0.047607421875, "num_input_tokens_seen": 487065208, "step": 5336 }, { "epoch": 22.2375, "grad_norm": 2.962284188425735, "learning_rate": 5e-05, "loss": 0.0348, "num_input_tokens_seen": 487156896, "step": 5337 }, { "epoch": 22.2375, "loss": 0.04217856377363205, "loss_ce": 6.430511712096632e-05, "loss_iou": 0.158203125, "loss_num": 0.0084228515625, "loss_xval": 0.0419921875, "num_input_tokens_seen": 487156896, "step": 5337 }, { "epoch": 22.241666666666667, "grad_norm": 2.3401394567648683, "learning_rate": 5e-05, "loss": 0.0305, "num_input_tokens_seen": 487248404, "step": 5338 }, { "epoch": 22.241666666666667, "loss": 0.029043810442090034, "loss_ce": 2.9222243028925732e-05, "loss_iou": 0.16796875, "loss_num": 0.00579833984375, "loss_xval": 0.029052734375, "num_input_tokens_seen": 487248404, "step": 5338 }, { "epoch": 22.245833333333334, "grad_norm": 1.8049075424901455, "learning_rate": 5e-05, "loss": 0.0388, "num_input_tokens_seen": 487339940, "step": 5339 }, { "epoch": 22.245833333333334, "loss": 0.030754217877984047, "loss_ce": 3.827477485174313e-05, "loss_iou": 0.2021484375, "loss_num": 0.006134033203125, "loss_xval": 0.03076171875, "num_input_tokens_seen": 487339940, "step": 5339 }, { "epoch": 22.25, "grad_norm": 1.8560570430837169, "learning_rate": 5e-05, "loss": 0.0351, "num_input_tokens_seen": 487430952, "step": 5340 }, { "epoch": 22.25, "loss": 0.04377035051584244, "loss_ce": 2.3403106752084568e-05, "loss_iou": 0.265625, "loss_num": 0.0087890625, "loss_xval": 0.043701171875, "num_input_tokens_seen": 487430952, "step": 5340 }, { "epoch": 22.254166666666666, "grad_norm": 2.220258487143648, "learning_rate": 5e-05, "loss": 0.0569, "num_input_tokens_seen": 487521912, "step": 5341 }, { "epoch": 22.254166666666666, "loss": 0.03143194317817688, "loss_ce": 2.9357390303630382e-05, "loss_iou": 0.31640625, "loss_num": 0.00628662109375, "loss_xval": 0.031494140625, "num_input_tokens_seen": 487521912, "step": 5341 }, { "epoch": 22.258333333333333, "grad_norm": 2.406366107649946, "learning_rate": 5e-05, "loss": 0.0648, "num_input_tokens_seen": 487612848, "step": 5342 }, { "epoch": 22.258333333333333, "loss": 0.10091523826122284, "loss_ce": 3.9381353417411447e-05, "loss_iou": 0.2001953125, "loss_num": 0.0201416015625, "loss_xval": 0.10107421875, "num_input_tokens_seen": 487612848, "step": 5342 }, { "epoch": 22.2625, "grad_norm": 2.050396271138617, "learning_rate": 5e-05, "loss": 0.0598, "num_input_tokens_seen": 487704392, "step": 5343 }, { "epoch": 22.2625, "loss": 0.02968679741024971, "loss_ce": 0.0001762998872436583, "loss_iou": 0.1787109375, "loss_num": 0.00592041015625, "loss_xval": 0.029541015625, "num_input_tokens_seen": 487704392, "step": 5343 }, { "epoch": 22.266666666666666, "grad_norm": 1.3513413531259062, "learning_rate": 5e-05, "loss": 0.0734, "num_input_tokens_seen": 487795284, "step": 5344 }, { "epoch": 22.266666666666666, "loss": 0.057062018662691116, "loss_ce": 0.00023829156998544931, "loss_iou": 0.267578125, "loss_num": 0.0113525390625, "loss_xval": 0.056884765625, "num_input_tokens_seen": 487795284, "step": 5344 }, { "epoch": 22.270833333333332, "grad_norm": 2.863963273524733, "learning_rate": 5e-05, "loss": 0.041, "num_input_tokens_seen": 487886308, "step": 5345 }, { "epoch": 22.270833333333332, "loss": 0.05691341683268547, "loss_ce": 2.864954512915574e-05, "loss_iou": 0.1513671875, "loss_num": 0.0113525390625, "loss_xval": 0.056884765625, "num_input_tokens_seen": 487886308, "step": 5345 }, { "epoch": 22.275, "grad_norm": 1.9127195605234466, "learning_rate": 5e-05, "loss": 0.0256, "num_input_tokens_seen": 487978188, "step": 5346 }, { "epoch": 22.275, "loss": 0.02111220732331276, "loss_ce": 6.270705489441752e-05, "loss_iou": 0.2265625, "loss_num": 0.00421142578125, "loss_xval": 0.02099609375, "num_input_tokens_seen": 487978188, "step": 5346 }, { "epoch": 22.279166666666665, "grad_norm": 2.39834707809313, "learning_rate": 5e-05, "loss": 0.0384, "num_input_tokens_seen": 488068596, "step": 5347 }, { "epoch": 22.279166666666665, "loss": 0.03273743391036987, "loss_ce": 6.836632383055985e-05, "loss_iou": 0.2578125, "loss_num": 0.00653076171875, "loss_xval": 0.03271484375, "num_input_tokens_seen": 488068596, "step": 5347 }, { "epoch": 22.283333333333335, "grad_norm": 3.0382686069871174, "learning_rate": 5e-05, "loss": 0.0321, "num_input_tokens_seen": 488160336, "step": 5348 }, { "epoch": 22.283333333333335, "loss": 0.04009832814335823, "loss_ce": 5.926272933720611e-05, "loss_iou": 0.25390625, "loss_num": 0.00799560546875, "loss_xval": 0.0400390625, "num_input_tokens_seen": 488160336, "step": 5348 }, { "epoch": 22.2875, "grad_norm": 3.3178942224571095, "learning_rate": 5e-05, "loss": 0.0805, "num_input_tokens_seen": 488252200, "step": 5349 }, { "epoch": 22.2875, "loss": 0.04742604121565819, "loss_ce": 0.0003603041695896536, "loss_iou": 0.240234375, "loss_num": 0.0093994140625, "loss_xval": 0.047119140625, "num_input_tokens_seen": 488252200, "step": 5349 }, { "epoch": 22.291666666666668, "grad_norm": 3.3946229074438197, "learning_rate": 5e-05, "loss": 0.0732, "num_input_tokens_seen": 488343676, "step": 5350 }, { "epoch": 22.291666666666668, "loss": 0.040834080427885056, "loss_ce": 0.00010837269655894488, "loss_iou": 0.3125, "loss_num": 0.00811767578125, "loss_xval": 0.040771484375, "num_input_tokens_seen": 488343676, "step": 5350 }, { "epoch": 22.295833333333334, "grad_norm": 3.3166911799541503, "learning_rate": 5e-05, "loss": 0.0722, "num_input_tokens_seen": 488434972, "step": 5351 }, { "epoch": 22.295833333333334, "loss": 0.10436109453439713, "loss_ce": 9.779349784366786e-05, "loss_iou": 0.203125, "loss_num": 0.0208740234375, "loss_xval": 0.1044921875, "num_input_tokens_seen": 488434972, "step": 5351 }, { "epoch": 22.3, "grad_norm": 3.277188400787632, "learning_rate": 5e-05, "loss": 0.0294, "num_input_tokens_seen": 488527088, "step": 5352 }, { "epoch": 22.3, "loss": 0.03265918046236038, "loss_ce": 2.063011925201863e-05, "loss_iou": 0.3046875, "loss_num": 0.00653076171875, "loss_xval": 0.03271484375, "num_input_tokens_seen": 488527088, "step": 5352 }, { "epoch": 22.304166666666667, "grad_norm": 3.244957587048781, "learning_rate": 5e-05, "loss": 0.0315, "num_input_tokens_seen": 488618212, "step": 5353 }, { "epoch": 22.304166666666667, "loss": 0.03455101326107979, "loss_ce": 8.140966383507475e-05, "loss_iou": 0.212890625, "loss_num": 0.00689697265625, "loss_xval": 0.034423828125, "num_input_tokens_seen": 488618212, "step": 5353 }, { "epoch": 22.308333333333334, "grad_norm": 2.133454868668326, "learning_rate": 5e-05, "loss": 0.0263, "num_input_tokens_seen": 488709552, "step": 5354 }, { "epoch": 22.308333333333334, "loss": 0.027545509859919548, "loss_ce": 0.001201209961436689, "loss_iou": 0.03173828125, "loss_num": 0.005279541015625, "loss_xval": 0.0263671875, "num_input_tokens_seen": 488709552, "step": 5354 }, { "epoch": 22.3125, "grad_norm": 2.270121558074414, "learning_rate": 5e-05, "loss": 0.0263, "num_input_tokens_seen": 488800540, "step": 5355 }, { "epoch": 22.3125, "loss": 0.02278338000178337, "loss_ce": 0.00011644795449683443, "loss_iou": 0.1669921875, "loss_num": 0.004547119140625, "loss_xval": 0.022705078125, "num_input_tokens_seen": 488800540, "step": 5355 }, { "epoch": 22.316666666666666, "grad_norm": 3.5211375058871357, "learning_rate": 5e-05, "loss": 0.0587, "num_input_tokens_seen": 488892136, "step": 5356 }, { "epoch": 22.316666666666666, "loss": 0.08479742705821991, "loss_ce": 0.00018743482360150665, "loss_iou": 0.298828125, "loss_num": 0.0169677734375, "loss_xval": 0.08447265625, "num_input_tokens_seen": 488892136, "step": 5356 }, { "epoch": 22.320833333333333, "grad_norm": 2.5951780334016963, "learning_rate": 5e-05, "loss": 0.0287, "num_input_tokens_seen": 488983396, "step": 5357 }, { "epoch": 22.320833333333333, "loss": 0.030574705451726913, "loss_ce": 3.42391176673118e-05, "loss_iou": 0.1474609375, "loss_num": 0.006103515625, "loss_xval": 0.030517578125, "num_input_tokens_seen": 488983396, "step": 5357 }, { "epoch": 22.325, "grad_norm": 2.2765668696194092, "learning_rate": 5e-05, "loss": 0.0894, "num_input_tokens_seen": 489075212, "step": 5358 }, { "epoch": 22.325, "loss": 0.11566958576440811, "loss_ce": 0.004524565767496824, "loss_iou": 0.06494140625, "loss_num": 0.022216796875, "loss_xval": 0.111328125, "num_input_tokens_seen": 489075212, "step": 5358 }, { "epoch": 22.329166666666666, "grad_norm": 2.426511575607964, "learning_rate": 5e-05, "loss": 0.0361, "num_input_tokens_seen": 489166424, "step": 5359 }, { "epoch": 22.329166666666666, "loss": 0.022829484194517136, "loss_ce": 2.522457180020865e-05, "loss_iou": 0.189453125, "loss_num": 0.004547119140625, "loss_xval": 0.0228271484375, "num_input_tokens_seen": 489166424, "step": 5359 }, { "epoch": 22.333333333333332, "grad_norm": 2.384897569870611, "learning_rate": 5e-05, "loss": 0.0243, "num_input_tokens_seen": 489258044, "step": 5360 }, { "epoch": 22.333333333333332, "loss": 0.018354296684265137, "loss_ce": 2.086133827106096e-05, "loss_iou": 0.2041015625, "loss_num": 0.003662109375, "loss_xval": 0.018310546875, "num_input_tokens_seen": 489258044, "step": 5360 }, { "epoch": 22.3375, "grad_norm": 3.4264892694382643, "learning_rate": 5e-05, "loss": 0.0312, "num_input_tokens_seen": 489349168, "step": 5361 }, { "epoch": 22.3375, "loss": 0.034981995820999146, "loss_ce": 0.00010040501365438104, "loss_iou": 0.271484375, "loss_num": 0.0069580078125, "loss_xval": 0.034912109375, "num_input_tokens_seen": 489349168, "step": 5361 }, { "epoch": 22.341666666666665, "grad_norm": 35.093081007506875, "learning_rate": 5e-05, "loss": 0.0561, "num_input_tokens_seen": 489440432, "step": 5362 }, { "epoch": 22.341666666666665, "loss": 0.0685751810669899, "loss_ce": 1.7436836060369387e-05, "loss_iou": 0.1884765625, "loss_num": 0.01373291015625, "loss_xval": 0.068359375, "num_input_tokens_seen": 489440432, "step": 5362 }, { "epoch": 22.345833333333335, "grad_norm": 2.7972034028546755, "learning_rate": 5e-05, "loss": 0.0354, "num_input_tokens_seen": 489531972, "step": 5363 }, { "epoch": 22.345833333333335, "loss": 0.042357951402664185, "loss_ce": 0.002158672781661153, "loss_iou": 0.173828125, "loss_num": 0.008056640625, "loss_xval": 0.040283203125, "num_input_tokens_seen": 489531972, "step": 5363 }, { "epoch": 22.35, "grad_norm": 3.494472550638683, "learning_rate": 5e-05, "loss": 0.0621, "num_input_tokens_seen": 489623632, "step": 5364 }, { "epoch": 22.35, "loss": 0.0324832946062088, "loss_ce": 2.78517218248453e-05, "loss_iou": 0.1806640625, "loss_num": 0.006500244140625, "loss_xval": 0.032470703125, "num_input_tokens_seen": 489623632, "step": 5364 }, { "epoch": 22.354166666666668, "grad_norm": 4.362105699287279, "learning_rate": 5e-05, "loss": 0.046, "num_input_tokens_seen": 489714560, "step": 5365 }, { "epoch": 22.354166666666668, "loss": 0.043050382286310196, "loss_ce": 3.585560625651851e-05, "loss_iou": 0.240234375, "loss_num": 0.00860595703125, "loss_xval": 0.04296875, "num_input_tokens_seen": 489714560, "step": 5365 }, { "epoch": 22.358333333333334, "grad_norm": 2.8929406516760863, "learning_rate": 5e-05, "loss": 0.0287, "num_input_tokens_seen": 489805968, "step": 5366 }, { "epoch": 22.358333333333334, "loss": 0.035020582377910614, "loss_ce": 1.691956822469365e-05, "loss_iou": 0.23828125, "loss_num": 0.006988525390625, "loss_xval": 0.034912109375, "num_input_tokens_seen": 489805968, "step": 5366 }, { "epoch": 22.3625, "grad_norm": 3.0977638120639153, "learning_rate": 5e-05, "loss": 0.0456, "num_input_tokens_seen": 489897024, "step": 5367 }, { "epoch": 22.3625, "loss": 0.02139360085129738, "loss_ce": 3.8926802517380565e-05, "loss_iou": 0.25390625, "loss_num": 0.0042724609375, "loss_xval": 0.0213623046875, "num_input_tokens_seen": 489897024, "step": 5367 }, { "epoch": 22.366666666666667, "grad_norm": 1.3422920759104453, "learning_rate": 5e-05, "loss": 0.0302, "num_input_tokens_seen": 489988420, "step": 5368 }, { "epoch": 22.366666666666667, "loss": 0.03387787193059921, "loss_ce": 3.388000914128497e-05, "loss_iou": 0.10302734375, "loss_num": 0.00677490234375, "loss_xval": 0.033935546875, "num_input_tokens_seen": 489988420, "step": 5368 }, { "epoch": 22.370833333333334, "grad_norm": 1.0428391477939085, "learning_rate": 5e-05, "loss": 0.0702, "num_input_tokens_seen": 490079600, "step": 5369 }, { "epoch": 22.370833333333334, "loss": 0.07174451649188995, "loss_ce": 4.346769128460437e-05, "loss_iou": 0.1943359375, "loss_num": 0.01434326171875, "loss_xval": 0.07177734375, "num_input_tokens_seen": 490079600, "step": 5369 }, { "epoch": 22.375, "grad_norm": 1.0664256751003562, "learning_rate": 5e-05, "loss": 0.0257, "num_input_tokens_seen": 490171280, "step": 5370 }, { "epoch": 22.375, "loss": 0.025925656780600548, "loss_ce": 0.000985165941528976, "loss_iou": 0.2373046875, "loss_num": 0.004974365234375, "loss_xval": 0.02490234375, "num_input_tokens_seen": 490171280, "step": 5370 }, { "epoch": 22.379166666666666, "grad_norm": 1.5563494410398024, "learning_rate": 5e-05, "loss": 0.0364, "num_input_tokens_seen": 490262924, "step": 5371 }, { "epoch": 22.379166666666666, "loss": 0.034245528280735016, "loss_ce": 6.583852518815547e-05, "loss_iou": 0.15234375, "loss_num": 0.0068359375, "loss_xval": 0.0341796875, "num_input_tokens_seen": 490262924, "step": 5371 }, { "epoch": 22.383333333333333, "grad_norm": 2.5664351197816786, "learning_rate": 5e-05, "loss": 0.0459, "num_input_tokens_seen": 490354332, "step": 5372 }, { "epoch": 22.383333333333333, "loss": 0.033131957054138184, "loss_ce": 2.8015738280373625e-05, "loss_iou": 0.26953125, "loss_num": 0.006622314453125, "loss_xval": 0.033203125, "num_input_tokens_seen": 490354332, "step": 5372 }, { "epoch": 22.3875, "grad_norm": 1.916318718588456, "learning_rate": 5e-05, "loss": 0.0288, "num_input_tokens_seen": 490445200, "step": 5373 }, { "epoch": 22.3875, "loss": 0.025140874087810516, "loss_ce": 7.068323611747473e-05, "loss_iou": 0.185546875, "loss_num": 0.0050048828125, "loss_xval": 0.0250244140625, "num_input_tokens_seen": 490445200, "step": 5373 }, { "epoch": 22.391666666666666, "grad_norm": 3.1213070784766765, "learning_rate": 5e-05, "loss": 0.0535, "num_input_tokens_seen": 490536484, "step": 5374 }, { "epoch": 22.391666666666666, "loss": 0.037068236619234085, "loss_ce": 0.00018774466298054904, "loss_iou": 0.298828125, "loss_num": 0.00738525390625, "loss_xval": 0.036865234375, "num_input_tokens_seen": 490536484, "step": 5374 }, { "epoch": 22.395833333333332, "grad_norm": 2.529762377102089, "learning_rate": 5e-05, "loss": 0.0287, "num_input_tokens_seen": 490628100, "step": 5375 }, { "epoch": 22.395833333333332, "loss": 0.03373111039400101, "loss_ce": 7.02215766068548e-05, "loss_iou": 0.1484375, "loss_num": 0.006744384765625, "loss_xval": 0.03369140625, "num_input_tokens_seen": 490628100, "step": 5375 }, { "epoch": 22.4, "grad_norm": 19.242992938124434, "learning_rate": 5e-05, "loss": 0.0582, "num_input_tokens_seen": 490719064, "step": 5376 }, { "epoch": 22.4, "loss": 0.03175321966409683, "loss_ce": 0.00010648959869286045, "loss_iou": 0.154296875, "loss_num": 0.006317138671875, "loss_xval": 0.03173828125, "num_input_tokens_seen": 490719064, "step": 5376 }, { "epoch": 22.404166666666665, "grad_norm": 2.3804535970551663, "learning_rate": 5e-05, "loss": 0.0458, "num_input_tokens_seen": 490809884, "step": 5377 }, { "epoch": 22.404166666666665, "loss": 0.02702101692557335, "loss_ce": 8.544004231225699e-05, "loss_iou": 0.169921875, "loss_num": 0.005401611328125, "loss_xval": 0.0269775390625, "num_input_tokens_seen": 490809884, "step": 5377 }, { "epoch": 22.408333333333335, "grad_norm": 2.6108490382580496, "learning_rate": 5e-05, "loss": 0.036, "num_input_tokens_seen": 490901520, "step": 5378 }, { "epoch": 22.408333333333335, "loss": 0.031930726021528244, "loss_ce": 8.563094161218032e-05, "loss_iou": 0.294921875, "loss_num": 0.006378173828125, "loss_xval": 0.03173828125, "num_input_tokens_seen": 490901520, "step": 5378 }, { "epoch": 22.4125, "grad_norm": 2.5894466488834946, "learning_rate": 5e-05, "loss": 0.0626, "num_input_tokens_seen": 490992648, "step": 5379 }, { "epoch": 22.4125, "loss": 0.0888570249080658, "loss_ce": 0.0005544152809306979, "loss_iou": 0.228515625, "loss_num": 0.0177001953125, "loss_xval": 0.08837890625, "num_input_tokens_seen": 490992648, "step": 5379 }, { "epoch": 22.416666666666668, "grad_norm": 2.78634108448832, "learning_rate": 5e-05, "loss": 0.0662, "num_input_tokens_seen": 491083320, "step": 5380 }, { "epoch": 22.416666666666668, "loss": 0.08157380670309067, "loss_ce": 6.13602896919474e-05, "loss_iou": 0.185546875, "loss_num": 0.0162353515625, "loss_xval": 0.08154296875, "num_input_tokens_seen": 491083320, "step": 5380 }, { "epoch": 22.420833333333334, "grad_norm": 1.6569327710068393, "learning_rate": 5e-05, "loss": 0.0297, "num_input_tokens_seen": 491174316, "step": 5381 }, { "epoch": 22.420833333333334, "loss": 0.019204962998628616, "loss_ce": 7.044081576168537e-05, "loss_iou": 0.080078125, "loss_num": 0.0038299560546875, "loss_xval": 0.0191650390625, "num_input_tokens_seen": 491174316, "step": 5381 }, { "epoch": 22.425, "grad_norm": 1.7105774926903508, "learning_rate": 5e-05, "loss": 0.039, "num_input_tokens_seen": 491265744, "step": 5382 }, { "epoch": 22.425, "loss": 0.04312212020158768, "loss_ce": 3.1299583497457206e-05, "loss_iou": 0.27734375, "loss_num": 0.00860595703125, "loss_xval": 0.04296875, "num_input_tokens_seen": 491265744, "step": 5382 }, { "epoch": 22.429166666666667, "grad_norm": 1.7403705972722234, "learning_rate": 5e-05, "loss": 0.0698, "num_input_tokens_seen": 491354352, "step": 5383 }, { "epoch": 22.429166666666667, "loss": 0.05720948055386543, "loss_ce": 1.9535385945346206e-05, "loss_iou": 0.2421875, "loss_num": 0.01141357421875, "loss_xval": 0.05712890625, "num_input_tokens_seen": 491354352, "step": 5383 }, { "epoch": 22.433333333333334, "grad_norm": 2.00698516413751, "learning_rate": 5e-05, "loss": 0.0886, "num_input_tokens_seen": 491445484, "step": 5384 }, { "epoch": 22.433333333333334, "loss": 0.14696118235588074, "loss_ce": 3.430706055951305e-05, "loss_iou": 0.1962890625, "loss_num": 0.029296875, "loss_xval": 0.146484375, "num_input_tokens_seen": 491445484, "step": 5384 }, { "epoch": 22.4375, "grad_norm": 1.4529458763998768, "learning_rate": 5e-05, "loss": 0.0322, "num_input_tokens_seen": 491536328, "step": 5385 }, { "epoch": 22.4375, "loss": 0.03684793412685394, "loss_ce": 1.3217177183832973e-05, "loss_iou": 0.2353515625, "loss_num": 0.00738525390625, "loss_xval": 0.036865234375, "num_input_tokens_seen": 491536328, "step": 5385 }, { "epoch": 22.441666666666666, "grad_norm": 2.2941956647046395, "learning_rate": 5e-05, "loss": 0.0453, "num_input_tokens_seen": 491627740, "step": 5386 }, { "epoch": 22.441666666666666, "loss": 0.052660562098026276, "loss_ce": 0.00010929418203886598, "loss_iou": 0.2158203125, "loss_num": 0.010498046875, "loss_xval": 0.052490234375, "num_input_tokens_seen": 491627740, "step": 5386 }, { "epoch": 22.445833333333333, "grad_norm": 2.2595644852220897, "learning_rate": 5e-05, "loss": 0.0356, "num_input_tokens_seen": 491718420, "step": 5387 }, { "epoch": 22.445833333333333, "loss": 0.020403403788805008, "loss_ce": 8.632548997411504e-05, "loss_iou": 0.107421875, "loss_num": 0.004058837890625, "loss_xval": 0.020263671875, "num_input_tokens_seen": 491718420, "step": 5387 }, { "epoch": 22.45, "grad_norm": 2.665623036596892, "learning_rate": 5e-05, "loss": 0.0484, "num_input_tokens_seen": 491809752, "step": 5388 }, { "epoch": 22.45, "loss": 0.03691437840461731, "loss_ce": 3.388620825717226e-05, "loss_iou": 0.27734375, "loss_num": 0.00738525390625, "loss_xval": 0.036865234375, "num_input_tokens_seen": 491809752, "step": 5388 }, { "epoch": 22.454166666666666, "grad_norm": 1.7622978237673568, "learning_rate": 5e-05, "loss": 0.0412, "num_input_tokens_seen": 491901876, "step": 5389 }, { "epoch": 22.454166666666666, "loss": 0.03598965331912041, "loss_ce": 0.00012005640019197017, "loss_iou": 0.1787109375, "loss_num": 0.007171630859375, "loss_xval": 0.035888671875, "num_input_tokens_seen": 491901876, "step": 5389 }, { "epoch": 22.458333333333332, "grad_norm": 0.7490084016911978, "learning_rate": 5e-05, "loss": 0.0451, "num_input_tokens_seen": 491993576, "step": 5390 }, { "epoch": 22.458333333333332, "loss": 0.05927729979157448, "loss_ce": 6.5567423007451e-05, "loss_iou": 0.310546875, "loss_num": 0.0118408203125, "loss_xval": 0.059326171875, "num_input_tokens_seen": 491993576, "step": 5390 }, { "epoch": 22.4625, "grad_norm": 0.47811007001813804, "learning_rate": 5e-05, "loss": 0.0365, "num_input_tokens_seen": 492085012, "step": 5391 }, { "epoch": 22.4625, "loss": 0.02164047211408615, "loss_ce": 2.6398767658974975e-05, "loss_iou": 0.1962890625, "loss_num": 0.00433349609375, "loss_xval": 0.0216064453125, "num_input_tokens_seen": 492085012, "step": 5391 }, { "epoch": 22.466666666666665, "grad_norm": 0.7928415294582661, "learning_rate": 5e-05, "loss": 0.0207, "num_input_tokens_seen": 492176244, "step": 5392 }, { "epoch": 22.466666666666665, "loss": 0.022790245711803436, "loss_ce": 2.7946376576437615e-05, "loss_iou": 0.201171875, "loss_num": 0.004547119140625, "loss_xval": 0.022705078125, "num_input_tokens_seen": 492176244, "step": 5392 }, { "epoch": 22.470833333333335, "grad_norm": 1.4785762565522547, "learning_rate": 5e-05, "loss": 0.0451, "num_input_tokens_seen": 492267676, "step": 5393 }, { "epoch": 22.470833333333335, "loss": 0.06214084103703499, "loss_ce": 2.2309055566438474e-05, "loss_iou": 0.28515625, "loss_num": 0.012451171875, "loss_xval": 0.06201171875, "num_input_tokens_seen": 492267676, "step": 5393 }, { "epoch": 22.475, "grad_norm": 2.0332821119261713, "learning_rate": 5e-05, "loss": 0.0243, "num_input_tokens_seen": 492359032, "step": 5394 }, { "epoch": 22.475, "loss": 0.02958713099360466, "loss_ce": 1.5598343452438712e-05, "loss_iou": 0.2255859375, "loss_num": 0.00592041015625, "loss_xval": 0.029541015625, "num_input_tokens_seen": 492359032, "step": 5394 }, { "epoch": 22.479166666666668, "grad_norm": 3.236142234487662, "learning_rate": 5e-05, "loss": 0.0287, "num_input_tokens_seen": 492450032, "step": 5395 }, { "epoch": 22.479166666666668, "loss": 0.02547776699066162, "loss_ce": 4.136636562179774e-05, "loss_iou": 0.333984375, "loss_num": 0.005096435546875, "loss_xval": 0.025390625, "num_input_tokens_seen": 492450032, "step": 5395 }, { "epoch": 22.483333333333334, "grad_norm": 3.099703672782828, "learning_rate": 5e-05, "loss": 0.0478, "num_input_tokens_seen": 492539916, "step": 5396 }, { "epoch": 22.483333333333334, "loss": 0.03732884302735329, "loss_ce": 2.1101570382597856e-05, "loss_iou": 0.267578125, "loss_num": 0.0074462890625, "loss_xval": 0.037353515625, "num_input_tokens_seen": 492539916, "step": 5396 }, { "epoch": 22.4875, "grad_norm": 3.3406444811373293, "learning_rate": 5e-05, "loss": 0.0543, "num_input_tokens_seen": 492631208, "step": 5397 }, { "epoch": 22.4875, "loss": 0.07744846493005753, "loss_ce": 9.403174044564366e-05, "loss_iou": 0.2001953125, "loss_num": 0.0155029296875, "loss_xval": 0.0771484375, "num_input_tokens_seen": 492631208, "step": 5397 }, { "epoch": 22.491666666666667, "grad_norm": 2.500264706930889, "learning_rate": 5e-05, "loss": 0.0411, "num_input_tokens_seen": 492722220, "step": 5398 }, { "epoch": 22.491666666666667, "loss": 0.03508803993463516, "loss_ce": 0.0004963674582540989, "loss_iou": 0.1357421875, "loss_num": 0.006927490234375, "loss_xval": 0.03466796875, "num_input_tokens_seen": 492722220, "step": 5398 }, { "epoch": 22.495833333333334, "grad_norm": 2.923014487782051, "learning_rate": 5e-05, "loss": 0.0809, "num_input_tokens_seen": 492813116, "step": 5399 }, { "epoch": 22.495833333333334, "loss": 0.0822400376200676, "loss_ce": 5.620271258521825e-05, "loss_iou": 0.2353515625, "loss_num": 0.0164794921875, "loss_xval": 0.08203125, "num_input_tokens_seen": 492813116, "step": 5399 }, { "epoch": 22.5, "grad_norm": 1.9591952052807597, "learning_rate": 5e-05, "loss": 0.0467, "num_input_tokens_seen": 492904024, "step": 5400 }, { "epoch": 22.5, "loss": 0.05392465740442276, "loss_ce": 4.587280272971839e-05, "loss_iou": 0.24609375, "loss_num": 0.0107421875, "loss_xval": 0.053955078125, "num_input_tokens_seen": 492904024, "step": 5400 }, { "epoch": 22.504166666666666, "grad_norm": 2.1966628956720307, "learning_rate": 5e-05, "loss": 0.0528, "num_input_tokens_seen": 492996044, "step": 5401 }, { "epoch": 22.504166666666666, "loss": 0.0637560486793518, "loss_ce": 6.586193921975791e-05, "loss_iou": 0.1982421875, "loss_num": 0.01275634765625, "loss_xval": 0.0634765625, "num_input_tokens_seen": 492996044, "step": 5401 }, { "epoch": 22.508333333333333, "grad_norm": 1.8024880556166163, "learning_rate": 5e-05, "loss": 0.0308, "num_input_tokens_seen": 493087680, "step": 5402 }, { "epoch": 22.508333333333333, "loss": 0.021951181814074516, "loss_ce": 0.0002074065268971026, "loss_iou": 0.1396484375, "loss_num": 0.00433349609375, "loss_xval": 0.021728515625, "num_input_tokens_seen": 493087680, "step": 5402 }, { "epoch": 22.5125, "grad_norm": 2.6832177202867578, "learning_rate": 5e-05, "loss": 0.0234, "num_input_tokens_seen": 493179756, "step": 5403 }, { "epoch": 22.5125, "loss": 0.018739566206932068, "loss_ce": 1.703085217741318e-05, "loss_iou": 0.248046875, "loss_num": 0.0037384033203125, "loss_xval": 0.0186767578125, "num_input_tokens_seen": 493179756, "step": 5403 }, { "epoch": 22.516666666666666, "grad_norm": 11.651569876024217, "learning_rate": 5e-05, "loss": 0.0643, "num_input_tokens_seen": 493271120, "step": 5404 }, { "epoch": 22.516666666666666, "loss": 0.025263365358114243, "loss_ce": 7.873350114095956e-05, "loss_iou": 0.275390625, "loss_num": 0.005035400390625, "loss_xval": 0.025146484375, "num_input_tokens_seen": 493271120, "step": 5404 }, { "epoch": 22.520833333333332, "grad_norm": 1.6422518445865064, "learning_rate": 5e-05, "loss": 0.0412, "num_input_tokens_seen": 493361644, "step": 5405 }, { "epoch": 22.520833333333332, "loss": 0.024423548951745033, "loss_ce": 3.237510100007057e-05, "loss_iou": 0.25390625, "loss_num": 0.0048828125, "loss_xval": 0.0244140625, "num_input_tokens_seen": 493361644, "step": 5405 }, { "epoch": 22.525, "grad_norm": 2.780010890769678, "learning_rate": 5e-05, "loss": 0.0674, "num_input_tokens_seen": 493453124, "step": 5406 }, { "epoch": 22.525, "loss": 0.11650041490793228, "loss_ce": 2.2444699425250292e-05, "loss_iou": 0.1279296875, "loss_num": 0.0233154296875, "loss_xval": 0.11669921875, "num_input_tokens_seen": 493453124, "step": 5406 }, { "epoch": 22.529166666666665, "grad_norm": 1.7272088537396273, "learning_rate": 5e-05, "loss": 0.0691, "num_input_tokens_seen": 493544756, "step": 5407 }, { "epoch": 22.529166666666665, "loss": 0.05183388292789459, "loss_ce": 4.5554290409199893e-05, "loss_iou": 0.11669921875, "loss_num": 0.0103759765625, "loss_xval": 0.0517578125, "num_input_tokens_seen": 493544756, "step": 5407 }, { "epoch": 22.533333333333335, "grad_norm": 2.4838237798667437, "learning_rate": 5e-05, "loss": 0.0204, "num_input_tokens_seen": 493635696, "step": 5408 }, { "epoch": 22.533333333333335, "loss": 0.01650792546570301, "loss_ce": 5.8951460232492536e-05, "loss_iou": 0.171875, "loss_num": 0.0032958984375, "loss_xval": 0.0164794921875, "num_input_tokens_seen": 493635696, "step": 5408 }, { "epoch": 22.5375, "grad_norm": 2.680248921270045, "learning_rate": 5e-05, "loss": 0.0375, "num_input_tokens_seen": 493727152, "step": 5409 }, { "epoch": 22.5375, "loss": 0.043390579521656036, "loss_ce": 5.5617347243241966e-05, "loss_iou": 0.1728515625, "loss_num": 0.0086669921875, "loss_xval": 0.04345703125, "num_input_tokens_seen": 493727152, "step": 5409 }, { "epoch": 22.541666666666668, "grad_norm": 3.1224713410805434, "learning_rate": 5e-05, "loss": 0.0326, "num_input_tokens_seen": 493818056, "step": 5410 }, { "epoch": 22.541666666666668, "loss": 0.034009747207164764, "loss_ce": 4.368008376332e-05, "loss_iou": 0.109375, "loss_num": 0.006805419921875, "loss_xval": 0.033935546875, "num_input_tokens_seen": 493818056, "step": 5410 }, { "epoch": 22.545833333333334, "grad_norm": 1.9609990516597686, "learning_rate": 5e-05, "loss": 0.0247, "num_input_tokens_seen": 493908748, "step": 5411 }, { "epoch": 22.545833333333334, "loss": 0.028770219534635544, "loss_ce": 3.792079951381311e-05, "loss_iou": 0.19140625, "loss_num": 0.0057373046875, "loss_xval": 0.0286865234375, "num_input_tokens_seen": 493908748, "step": 5411 }, { "epoch": 22.55, "grad_norm": 2.1888017020400845, "learning_rate": 5e-05, "loss": 0.0507, "num_input_tokens_seen": 493999912, "step": 5412 }, { "epoch": 22.55, "loss": 0.04256013035774231, "loss_ce": 0.0007510512950830162, "loss_iou": 0.271484375, "loss_num": 0.00836181640625, "loss_xval": 0.041748046875, "num_input_tokens_seen": 493999912, "step": 5412 }, { "epoch": 22.554166666666667, "grad_norm": 2.142711327252004, "learning_rate": 5e-05, "loss": 0.08, "num_input_tokens_seen": 494091008, "step": 5413 }, { "epoch": 22.554166666666667, "loss": 0.10330962389707565, "loss_ce": 0.000221242691623047, "loss_iou": 0.26953125, "loss_num": 0.0206298828125, "loss_xval": 0.10302734375, "num_input_tokens_seen": 494091008, "step": 5413 }, { "epoch": 22.558333333333334, "grad_norm": 2.894368719760623, "learning_rate": 5e-05, "loss": 0.0305, "num_input_tokens_seen": 494182104, "step": 5414 }, { "epoch": 22.558333333333334, "loss": 0.02766202762722969, "loss_ce": 0.0001428009127266705, "loss_iou": 0.248046875, "loss_num": 0.0054931640625, "loss_xval": 0.0274658203125, "num_input_tokens_seen": 494182104, "step": 5414 }, { "epoch": 22.5625, "grad_norm": 2.7283296438572817, "learning_rate": 5e-05, "loss": 0.0604, "num_input_tokens_seen": 494273712, "step": 5415 }, { "epoch": 22.5625, "loss": 0.05128024145960808, "loss_ce": 0.00020907694124616683, "loss_iou": 0.19140625, "loss_num": 0.01019287109375, "loss_xval": 0.051025390625, "num_input_tokens_seen": 494273712, "step": 5415 }, { "epoch": 22.566666666666666, "grad_norm": 3.605747533364917, "learning_rate": 5e-05, "loss": 0.0553, "num_input_tokens_seen": 494364492, "step": 5416 }, { "epoch": 22.566666666666666, "loss": 0.046604275703430176, "loss_ce": 1.9191516912542284e-05, "loss_iou": 0.1767578125, "loss_num": 0.00927734375, "loss_xval": 0.046630859375, "num_input_tokens_seen": 494364492, "step": 5416 }, { "epoch": 22.570833333333333, "grad_norm": 2.559940130277489, "learning_rate": 5e-05, "loss": 0.0548, "num_input_tokens_seen": 494455692, "step": 5417 }, { "epoch": 22.570833333333333, "loss": 0.0370146818459034, "loss_ce": 1.2118907761760056e-05, "loss_iou": 0.2197265625, "loss_num": 0.007415771484375, "loss_xval": 0.037109375, "num_input_tokens_seen": 494455692, "step": 5417 }, { "epoch": 22.575, "grad_norm": 1.9876258889721863, "learning_rate": 5e-05, "loss": 0.0389, "num_input_tokens_seen": 494546932, "step": 5418 }, { "epoch": 22.575, "loss": 0.043613698333501816, "loss_ce": 0.00012615090236067772, "loss_iou": 0.0634765625, "loss_num": 0.0086669921875, "loss_xval": 0.04345703125, "num_input_tokens_seen": 494546932, "step": 5418 }, { "epoch": 22.579166666666666, "grad_norm": 2.014097885386239, "learning_rate": 5e-05, "loss": 0.0365, "num_input_tokens_seen": 494638788, "step": 5419 }, { "epoch": 22.579166666666666, "loss": 0.049494411796331406, "loss_ce": 0.00042214401764795184, "loss_iou": 0.265625, "loss_num": 0.00982666015625, "loss_xval": 0.049072265625, "num_input_tokens_seen": 494638788, "step": 5419 }, { "epoch": 22.583333333333332, "grad_norm": 1.3184404900712061, "learning_rate": 5e-05, "loss": 0.0292, "num_input_tokens_seen": 494730208, "step": 5420 }, { "epoch": 22.583333333333332, "loss": 0.028579875826835632, "loss_ce": 2.3050801246427e-05, "loss_iou": 0.2001953125, "loss_num": 0.005706787109375, "loss_xval": 0.028564453125, "num_input_tokens_seen": 494730208, "step": 5420 }, { "epoch": 22.5875, "grad_norm": 4.326848124366721, "learning_rate": 5e-05, "loss": 0.0387, "num_input_tokens_seen": 494821976, "step": 5421 }, { "epoch": 22.5875, "loss": 0.04439885914325714, "loss_ce": 0.00027044268790632486, "loss_iou": 0.3125, "loss_num": 0.00885009765625, "loss_xval": 0.044189453125, "num_input_tokens_seen": 494821976, "step": 5421 }, { "epoch": 22.591666666666665, "grad_norm": 1.9149965433139318, "learning_rate": 5e-05, "loss": 0.028, "num_input_tokens_seen": 494913644, "step": 5422 }, { "epoch": 22.591666666666665, "loss": 0.03406630456447601, "loss_ce": 3.1575760658597574e-05, "loss_iou": 0.1962890625, "loss_num": 0.006805419921875, "loss_xval": 0.033935546875, "num_input_tokens_seen": 494913644, "step": 5422 }, { "epoch": 22.595833333333335, "grad_norm": 2.4941618530637837, "learning_rate": 5e-05, "loss": 0.0676, "num_input_tokens_seen": 495005188, "step": 5423 }, { "epoch": 22.595833333333335, "loss": 0.018450569361448288, "loss_ce": 0.00010950414434773847, "loss_iou": 0.19921875, "loss_num": 0.003662109375, "loss_xval": 0.018310546875, "num_input_tokens_seen": 495005188, "step": 5423 }, { "epoch": 22.6, "grad_norm": 3.29433751743764, "learning_rate": 5e-05, "loss": 0.0464, "num_input_tokens_seen": 495096512, "step": 5424 }, { "epoch": 22.6, "loss": 0.05725552886724472, "loss_ce": 3.507136716507375e-05, "loss_iou": 0.28125, "loss_num": 0.011474609375, "loss_xval": 0.05712890625, "num_input_tokens_seen": 495096512, "step": 5424 }, { "epoch": 22.604166666666668, "grad_norm": 2.8359036528199524, "learning_rate": 5e-05, "loss": 0.0667, "num_input_tokens_seen": 495187732, "step": 5425 }, { "epoch": 22.604166666666668, "loss": 0.07588600367307663, "loss_ce": 1.166831498267129e-05, "loss_iou": 0.2001953125, "loss_num": 0.01519775390625, "loss_xval": 0.07568359375, "num_input_tokens_seen": 495187732, "step": 5425 }, { "epoch": 22.608333333333334, "grad_norm": 1.358567931342101, "learning_rate": 5e-05, "loss": 0.0261, "num_input_tokens_seen": 495279620, "step": 5426 }, { "epoch": 22.608333333333334, "loss": 0.01989165134727955, "loss_ce": 2.47081188717857e-05, "loss_iou": 0.171875, "loss_num": 0.00396728515625, "loss_xval": 0.0198974609375, "num_input_tokens_seen": 495279620, "step": 5426 }, { "epoch": 22.6125, "grad_norm": 1.2792343501441592, "learning_rate": 5e-05, "loss": 0.0432, "num_input_tokens_seen": 495371628, "step": 5427 }, { "epoch": 22.6125, "loss": 0.016980396583676338, "loss_ce": 5.839836740051396e-05, "loss_iou": 0.1865234375, "loss_num": 0.003387451171875, "loss_xval": 0.0169677734375, "num_input_tokens_seen": 495371628, "step": 5427 }, { "epoch": 22.616666666666667, "grad_norm": 14.962869241023144, "learning_rate": 5e-05, "loss": 0.0682, "num_input_tokens_seen": 495462292, "step": 5428 }, { "epoch": 22.616666666666667, "loss": 0.03393097221851349, "loss_ce": 1.0683897926355712e-05, "loss_iou": 0.296875, "loss_num": 0.00677490234375, "loss_xval": 0.033935546875, "num_input_tokens_seen": 495462292, "step": 5428 }, { "epoch": 22.620833333333334, "grad_norm": 1.7903859967395788, "learning_rate": 5e-05, "loss": 0.0491, "num_input_tokens_seen": 495553636, "step": 5429 }, { "epoch": 22.620833333333334, "loss": 0.06721779704093933, "loss_ce": 0.0002851189929060638, "loss_iou": 0.283203125, "loss_num": 0.01336669921875, "loss_xval": 0.06689453125, "num_input_tokens_seen": 495553636, "step": 5429 }, { "epoch": 22.625, "grad_norm": 2.733708686422691, "learning_rate": 5e-05, "loss": 0.0337, "num_input_tokens_seen": 495645176, "step": 5430 }, { "epoch": 22.625, "loss": 0.03249605372548103, "loss_ce": 2.1535624910029583e-05, "loss_iou": 0.1923828125, "loss_num": 0.006500244140625, "loss_xval": 0.032470703125, "num_input_tokens_seen": 495645176, "step": 5430 }, { "epoch": 22.629166666666666, "grad_norm": 3.2757873666002766, "learning_rate": 5e-05, "loss": 0.0528, "num_input_tokens_seen": 495736032, "step": 5431 }, { "epoch": 22.629166666666666, "loss": 0.07405970990657806, "loss_ce": 0.0018932658713310957, "loss_iou": 0.3046875, "loss_num": 0.01446533203125, "loss_xval": 0.072265625, "num_input_tokens_seen": 495736032, "step": 5431 }, { "epoch": 22.633333333333333, "grad_norm": 3.369393966718524, "learning_rate": 5e-05, "loss": 0.0293, "num_input_tokens_seen": 495827532, "step": 5432 }, { "epoch": 22.633333333333333, "loss": 0.023361414670944214, "loss_ce": 5.3613319323631003e-05, "loss_iou": 0.2890625, "loss_num": 0.004669189453125, "loss_xval": 0.0233154296875, "num_input_tokens_seen": 495827532, "step": 5432 }, { "epoch": 22.6375, "grad_norm": 2.362708376690048, "learning_rate": 5e-05, "loss": 0.0265, "num_input_tokens_seen": 495918936, "step": 5433 }, { "epoch": 22.6375, "loss": 0.023834653198719025, "loss_ce": 0.00024456530809402466, "loss_iou": 0.2373046875, "loss_num": 0.00469970703125, "loss_xval": 0.0235595703125, "num_input_tokens_seen": 495918936, "step": 5433 }, { "epoch": 22.641666666666666, "grad_norm": 2.785947410744313, "learning_rate": 5e-05, "loss": 0.0361, "num_input_tokens_seen": 496010332, "step": 5434 }, { "epoch": 22.641666666666666, "loss": 0.03664480894804001, "loss_ce": 0.00014578511763829738, "loss_iou": 0.212890625, "loss_num": 0.007293701171875, "loss_xval": 0.03662109375, "num_input_tokens_seen": 496010332, "step": 5434 }, { "epoch": 22.645833333333332, "grad_norm": 4.351682632944168, "learning_rate": 5e-05, "loss": 0.1063, "num_input_tokens_seen": 496101500, "step": 5435 }, { "epoch": 22.645833333333332, "loss": 0.17546984553337097, "loss_ce": 0.00040574927697889507, "loss_iou": 0.44921875, "loss_num": 0.034912109375, "loss_xval": 0.1748046875, "num_input_tokens_seen": 496101500, "step": 5435 }, { "epoch": 22.65, "grad_norm": 3.1939984607540053, "learning_rate": 5e-05, "loss": 0.0277, "num_input_tokens_seen": 496193140, "step": 5436 }, { "epoch": 22.65, "loss": 0.026585184037685394, "loss_ce": 6.540679169120267e-05, "loss_iou": 0.1796875, "loss_num": 0.00531005859375, "loss_xval": 0.0264892578125, "num_input_tokens_seen": 496193140, "step": 5436 }, { "epoch": 22.654166666666665, "grad_norm": 3.436034185560342, "learning_rate": 5e-05, "loss": 0.0389, "num_input_tokens_seen": 496284496, "step": 5437 }, { "epoch": 22.654166666666665, "loss": 0.04690772294998169, "loss_ce": 0.0001090200967155397, "loss_iou": 0.265625, "loss_num": 0.00933837890625, "loss_xval": 0.046875, "num_input_tokens_seen": 496284496, "step": 5437 }, { "epoch": 22.658333333333335, "grad_norm": 1.6985662007924813, "learning_rate": 5e-05, "loss": 0.0326, "num_input_tokens_seen": 496375820, "step": 5438 }, { "epoch": 22.658333333333335, "loss": 0.03478825092315674, "loss_ce": 0.00031864611082710326, "loss_iou": 0.255859375, "loss_num": 0.00689697265625, "loss_xval": 0.034423828125, "num_input_tokens_seen": 496375820, "step": 5438 }, { "epoch": 22.6625, "grad_norm": 2.257397167145903, "learning_rate": 5e-05, "loss": 0.0423, "num_input_tokens_seen": 496466412, "step": 5439 }, { "epoch": 22.6625, "loss": 0.05808237940073013, "loss_ce": 0.00018290436128154397, "loss_iou": 0.201171875, "loss_num": 0.0115966796875, "loss_xval": 0.057861328125, "num_input_tokens_seen": 496466412, "step": 5439 }, { "epoch": 22.666666666666668, "grad_norm": 3.766741952029807, "learning_rate": 5e-05, "loss": 0.0446, "num_input_tokens_seen": 496557896, "step": 5440 }, { "epoch": 22.666666666666668, "loss": 0.026158884167671204, "loss_ce": 2.0578299881890416e-05, "loss_iou": 0.224609375, "loss_num": 0.005218505859375, "loss_xval": 0.026123046875, "num_input_tokens_seen": 496557896, "step": 5440 }, { "epoch": 22.670833333333334, "grad_norm": 2.7989902007061924, "learning_rate": 5e-05, "loss": 0.0486, "num_input_tokens_seen": 496649280, "step": 5441 }, { "epoch": 22.670833333333334, "loss": 0.06078026816248894, "loss_ce": 1.9770006474573165e-05, "loss_iou": 0.1953125, "loss_num": 0.01214599609375, "loss_xval": 0.060791015625, "num_input_tokens_seen": 496649280, "step": 5441 }, { "epoch": 22.675, "grad_norm": 3.8692101114858515, "learning_rate": 5e-05, "loss": 0.0421, "num_input_tokens_seen": 496739608, "step": 5442 }, { "epoch": 22.675, "loss": 0.0622529461979866, "loss_ce": 1.2350255929050036e-05, "loss_iou": 0.173828125, "loss_num": 0.012451171875, "loss_xval": 0.062255859375, "num_input_tokens_seen": 496739608, "step": 5442 }, { "epoch": 22.679166666666667, "grad_norm": 2.8602886566309023, "learning_rate": 5e-05, "loss": 0.0588, "num_input_tokens_seen": 496831084, "step": 5443 }, { "epoch": 22.679166666666667, "loss": 0.039982639253139496, "loss_ce": 1.987188807106577e-05, "loss_iou": 0.2236328125, "loss_num": 0.00799560546875, "loss_xval": 0.0400390625, "num_input_tokens_seen": 496831084, "step": 5443 }, { "epoch": 22.683333333333334, "grad_norm": 2.6429848934326396, "learning_rate": 5e-05, "loss": 0.0287, "num_input_tokens_seen": 496922348, "step": 5444 }, { "epoch": 22.683333333333334, "loss": 0.029625367373228073, "loss_ce": 0.00011868414003401995, "loss_iou": 0.1435546875, "loss_num": 0.005889892578125, "loss_xval": 0.029541015625, "num_input_tokens_seen": 496922348, "step": 5444 }, { "epoch": 22.6875, "grad_norm": 5.833692611461288, "learning_rate": 5e-05, "loss": 0.0692, "num_input_tokens_seen": 497013648, "step": 5445 }, { "epoch": 22.6875, "loss": 0.04049752280116081, "loss_ce": 1.595700450707227e-05, "loss_iou": 0.177734375, "loss_num": 0.00811767578125, "loss_xval": 0.04052734375, "num_input_tokens_seen": 497013648, "step": 5445 }, { "epoch": 22.691666666666666, "grad_norm": 2.1422200155879714, "learning_rate": 5e-05, "loss": 0.0388, "num_input_tokens_seen": 497104764, "step": 5446 }, { "epoch": 22.691666666666666, "loss": 0.04196276515722275, "loss_ce": 3.923796975868754e-05, "loss_iou": 0.2109375, "loss_num": 0.00836181640625, "loss_xval": 0.0419921875, "num_input_tokens_seen": 497104764, "step": 5446 }, { "epoch": 22.695833333333333, "grad_norm": 3.225344975091235, "learning_rate": 5e-05, "loss": 0.0694, "num_input_tokens_seen": 497196636, "step": 5447 }, { "epoch": 22.695833333333333, "loss": 0.06967813521623611, "loss_ce": 4.46484045824036e-05, "loss_iou": 0.306640625, "loss_num": 0.013916015625, "loss_xval": 0.06982421875, "num_input_tokens_seen": 497196636, "step": 5447 }, { "epoch": 22.7, "grad_norm": 4.874570569091514, "learning_rate": 5e-05, "loss": 0.0497, "num_input_tokens_seen": 497288316, "step": 5448 }, { "epoch": 22.7, "loss": 0.05220876634120941, "loss_ce": 0.00010000343172578141, "loss_iou": 0.234375, "loss_num": 0.01043701171875, "loss_xval": 0.052001953125, "num_input_tokens_seen": 497288316, "step": 5448 }, { "epoch": 22.704166666666666, "grad_norm": 3.17848314051083, "learning_rate": 5e-05, "loss": 0.0567, "num_input_tokens_seen": 497379588, "step": 5449 }, { "epoch": 22.704166666666666, "loss": 0.024372564628720284, "loss_ce": 1.9538067135727033e-05, "loss_iou": 0.2158203125, "loss_num": 0.0048828125, "loss_xval": 0.0244140625, "num_input_tokens_seen": 497379588, "step": 5449 }, { "epoch": 22.708333333333332, "grad_norm": 2.602008060990133, "learning_rate": 5e-05, "loss": 0.0364, "num_input_tokens_seen": 497471692, "step": 5450 }, { "epoch": 22.708333333333332, "loss": 0.03950629383325577, "loss_ce": 0.003465034067630768, "loss_iou": 0.294921875, "loss_num": 0.0072021484375, "loss_xval": 0.0361328125, "num_input_tokens_seen": 497471692, "step": 5450 }, { "epoch": 22.7125, "grad_norm": 1.9733341846682342, "learning_rate": 5e-05, "loss": 0.0672, "num_input_tokens_seen": 497562564, "step": 5451 }, { "epoch": 22.7125, "loss": 0.08485489338636398, "loss_ce": 0.00010757323616417125, "loss_iou": 0.298828125, "loss_num": 0.0169677734375, "loss_xval": 0.0849609375, "num_input_tokens_seen": 497562564, "step": 5451 }, { "epoch": 22.716666666666665, "grad_norm": 1.796458844908592, "learning_rate": 5e-05, "loss": 0.0774, "num_input_tokens_seen": 497653716, "step": 5452 }, { "epoch": 22.716666666666665, "loss": 0.03222234547138214, "loss_ce": 4.155963324592449e-05, "loss_iou": 0.0625, "loss_num": 0.006439208984375, "loss_xval": 0.0322265625, "num_input_tokens_seen": 497653716, "step": 5452 }, { "epoch": 22.720833333333335, "grad_norm": 0.7352762056029919, "learning_rate": 5e-05, "loss": 0.045, "num_input_tokens_seen": 497744864, "step": 5453 }, { "epoch": 22.720833333333335, "loss": 0.03118591569364071, "loss_ce": 1.2207925465190783e-05, "loss_iou": 0.24609375, "loss_num": 0.0062255859375, "loss_xval": 0.0311279296875, "num_input_tokens_seen": 497744864, "step": 5453 }, { "epoch": 22.725, "grad_norm": 2.196564472678189, "learning_rate": 5e-05, "loss": 0.0687, "num_input_tokens_seen": 497836280, "step": 5454 }, { "epoch": 22.725, "loss": 0.061884164810180664, "loss_ce": 1.740168227115646e-05, "loss_iou": 0.119140625, "loss_num": 0.01239013671875, "loss_xval": 0.061767578125, "num_input_tokens_seen": 497836280, "step": 5454 }, { "epoch": 22.729166666666668, "grad_norm": 1.600302678177142, "learning_rate": 5e-05, "loss": 0.0461, "num_input_tokens_seen": 497927552, "step": 5455 }, { "epoch": 22.729166666666668, "loss": 0.037744879722595215, "loss_ce": 0.0005744679365307093, "loss_iou": 0.1650390625, "loss_num": 0.0074462890625, "loss_xval": 0.037109375, "num_input_tokens_seen": 497927552, "step": 5455 }, { "epoch": 22.733333333333334, "grad_norm": 1.6767414487852184, "learning_rate": 5e-05, "loss": 0.0352, "num_input_tokens_seen": 498018864, "step": 5456 }, { "epoch": 22.733333333333334, "loss": 0.0258328877389431, "loss_ce": 0.00012182634236523882, "loss_iou": 0.265625, "loss_num": 0.005126953125, "loss_xval": 0.0257568359375, "num_input_tokens_seen": 498018864, "step": 5456 }, { "epoch": 22.7375, "grad_norm": 2.4109369563480625, "learning_rate": 5e-05, "loss": 0.0421, "num_input_tokens_seen": 498110780, "step": 5457 }, { "epoch": 22.7375, "loss": 0.0353752076625824, "loss_ce": 8.162675658240914e-05, "loss_iou": 0.2470703125, "loss_num": 0.007049560546875, "loss_xval": 0.035400390625, "num_input_tokens_seen": 498110780, "step": 5457 }, { "epoch": 22.741666666666667, "grad_norm": 3.127469878487144, "learning_rate": 5e-05, "loss": 0.031, "num_input_tokens_seen": 498202140, "step": 5458 }, { "epoch": 22.741666666666667, "loss": 0.03448343276977539, "loss_ce": 2.9086375434417278e-05, "loss_iou": 0.1669921875, "loss_num": 0.00689697265625, "loss_xval": 0.034423828125, "num_input_tokens_seen": 498202140, "step": 5458 }, { "epoch": 22.745833333333334, "grad_norm": 3.0979169922431016, "learning_rate": 5e-05, "loss": 0.0692, "num_input_tokens_seen": 498293228, "step": 5459 }, { "epoch": 22.745833333333334, "loss": 0.054097265005111694, "loss_ce": 0.0001269291969947517, "loss_iou": 0.2314453125, "loss_num": 0.01080322265625, "loss_xval": 0.053955078125, "num_input_tokens_seen": 498293228, "step": 5459 }, { "epoch": 22.75, "grad_norm": 2.7281658817110155, "learning_rate": 5e-05, "loss": 0.0295, "num_input_tokens_seen": 498384192, "step": 5460 }, { "epoch": 22.75, "loss": 0.024137474596500397, "loss_ce": 5.91044663451612e-05, "loss_iou": 0.298828125, "loss_num": 0.00482177734375, "loss_xval": 0.0240478515625, "num_input_tokens_seen": 498384192, "step": 5460 }, { "epoch": 22.754166666666666, "grad_norm": 2.6444292311403528, "learning_rate": 5e-05, "loss": 0.0264, "num_input_tokens_seen": 498475908, "step": 5461 }, { "epoch": 22.754166666666666, "loss": 0.029545985162258148, "loss_ce": 5.8375371736474335e-05, "loss_iou": 0.1982421875, "loss_num": 0.005889892578125, "loss_xval": 0.029541015625, "num_input_tokens_seen": 498475908, "step": 5461 }, { "epoch": 22.758333333333333, "grad_norm": 2.908463530849759, "learning_rate": 5e-05, "loss": 0.0317, "num_input_tokens_seen": 498565328, "step": 5462 }, { "epoch": 22.758333333333333, "loss": 0.03385067731142044, "loss_ce": 0.00017453177133575082, "loss_iou": 0.1767578125, "loss_num": 0.006744384765625, "loss_xval": 0.03369140625, "num_input_tokens_seen": 498565328, "step": 5462 }, { "epoch": 22.7625, "grad_norm": 3.6003888672228377, "learning_rate": 5e-05, "loss": 0.0341, "num_input_tokens_seen": 498657020, "step": 5463 }, { "epoch": 22.7625, "loss": 0.01993653178215027, "loss_ce": 1.9998524294351228e-05, "loss_iou": 0.1796875, "loss_num": 0.00396728515625, "loss_xval": 0.0198974609375, "num_input_tokens_seen": 498657020, "step": 5463 }, { "epoch": 22.766666666666666, "grad_norm": 2.367609533052053, "learning_rate": 5e-05, "loss": 0.0435, "num_input_tokens_seen": 498748380, "step": 5464 }, { "epoch": 22.766666666666666, "loss": 0.025592446327209473, "loss_ce": 1.8714170437306166e-05, "loss_iou": 0.1611328125, "loss_num": 0.005126953125, "loss_xval": 0.025634765625, "num_input_tokens_seen": 498748380, "step": 5464 }, { "epoch": 22.770833333333332, "grad_norm": 3.0676588689448865, "learning_rate": 5e-05, "loss": 0.0425, "num_input_tokens_seen": 498839220, "step": 5465 }, { "epoch": 22.770833333333332, "loss": 0.028899289667606354, "loss_ce": 4.4918717321706936e-05, "loss_iou": 0.21484375, "loss_num": 0.005767822265625, "loss_xval": 0.02880859375, "num_input_tokens_seen": 498839220, "step": 5465 }, { "epoch": 22.775, "grad_norm": 2.8282670386731663, "learning_rate": 5e-05, "loss": 0.0846, "num_input_tokens_seen": 498930532, "step": 5466 }, { "epoch": 22.775, "loss": 0.13063108921051025, "loss_ce": 1.2047556992911268e-05, "loss_iou": 0.2119140625, "loss_num": 0.026123046875, "loss_xval": 0.130859375, "num_input_tokens_seen": 498930532, "step": 5466 }, { "epoch": 22.779166666666665, "grad_norm": 2.358372792627603, "learning_rate": 5e-05, "loss": 0.0402, "num_input_tokens_seen": 499021668, "step": 5467 }, { "epoch": 22.779166666666665, "loss": 0.037430137395858765, "loss_ce": 3.08443013636861e-05, "loss_iou": 0.1982421875, "loss_num": 0.007476806640625, "loss_xval": 0.037353515625, "num_input_tokens_seen": 499021668, "step": 5467 }, { "epoch": 22.783333333333335, "grad_norm": 6.374577377660233, "learning_rate": 5e-05, "loss": 0.0778, "num_input_tokens_seen": 499112924, "step": 5468 }, { "epoch": 22.783333333333335, "loss": 0.03872350603342056, "loss_ce": 1.19601600090391e-05, "loss_iou": 0.298828125, "loss_num": 0.00775146484375, "loss_xval": 0.038818359375, "num_input_tokens_seen": 499112924, "step": 5468 }, { "epoch": 22.7875, "grad_norm": 4.431361490860381, "learning_rate": 5e-05, "loss": 0.0441, "num_input_tokens_seen": 499204276, "step": 5469 }, { "epoch": 22.7875, "loss": 0.04221169278025627, "loss_ce": 0.00010506440594326705, "loss_iou": 0.412109375, "loss_num": 0.0084228515625, "loss_xval": 0.0419921875, "num_input_tokens_seen": 499204276, "step": 5469 }, { "epoch": 22.791666666666668, "grad_norm": 2.181301579207962, "learning_rate": 5e-05, "loss": 0.0443, "num_input_tokens_seen": 499295268, "step": 5470 }, { "epoch": 22.791666666666668, "loss": 0.02486196532845497, "loss_ce": 2.06578042707406e-05, "loss_iou": 0.203125, "loss_num": 0.004974365234375, "loss_xval": 0.02490234375, "num_input_tokens_seen": 499295268, "step": 5470 }, { "epoch": 22.795833333333334, "grad_norm": 2.5528680243042245, "learning_rate": 5e-05, "loss": 0.0579, "num_input_tokens_seen": 499386908, "step": 5471 }, { "epoch": 22.795833333333334, "loss": 0.029936883598566055, "loss_ce": 3.728695082827471e-05, "loss_iou": 0.26953125, "loss_num": 0.0059814453125, "loss_xval": 0.0299072265625, "num_input_tokens_seen": 499386908, "step": 5471 }, { "epoch": 22.8, "grad_norm": 4.452297157296129, "learning_rate": 5e-05, "loss": 0.0375, "num_input_tokens_seen": 499478288, "step": 5472 }, { "epoch": 22.8, "loss": 0.026759404689073563, "loss_ce": 5.652549589285627e-05, "loss_iou": 0.255859375, "loss_num": 0.005340576171875, "loss_xval": 0.0267333984375, "num_input_tokens_seen": 499478288, "step": 5472 }, { "epoch": 22.804166666666667, "grad_norm": 2.9466131421578656, "learning_rate": 5e-05, "loss": 0.0524, "num_input_tokens_seen": 499569332, "step": 5473 }, { "epoch": 22.804166666666667, "loss": 0.027132410556077957, "loss_ce": 1.7542834029882215e-05, "loss_iou": 0.2109375, "loss_num": 0.00543212890625, "loss_xval": 0.027099609375, "num_input_tokens_seen": 499569332, "step": 5473 }, { "epoch": 22.808333333333334, "grad_norm": 7.186883142939526, "learning_rate": 5e-05, "loss": 0.047, "num_input_tokens_seen": 499661064, "step": 5474 }, { "epoch": 22.808333333333334, "loss": 0.061519306153059006, "loss_ce": 0.0020329162944108248, "loss_iou": 0.16015625, "loss_num": 0.01190185546875, "loss_xval": 0.0595703125, "num_input_tokens_seen": 499661064, "step": 5474 }, { "epoch": 22.8125, "grad_norm": 3.554601374181729, "learning_rate": 5e-05, "loss": 0.0612, "num_input_tokens_seen": 499752312, "step": 5475 }, { "epoch": 22.8125, "loss": 0.057638224214315414, "loss_ce": 1.3407001461018808e-05, "loss_iou": 0.1953125, "loss_num": 0.01153564453125, "loss_xval": 0.0576171875, "num_input_tokens_seen": 499752312, "step": 5475 }, { "epoch": 22.816666666666666, "grad_norm": 2.7402453833752336, "learning_rate": 5e-05, "loss": 0.0421, "num_input_tokens_seen": 499842672, "step": 5476 }, { "epoch": 22.816666666666666, "loss": 0.03763018548488617, "loss_ce": 1.7269090676563792e-05, "loss_iou": 0.181640625, "loss_num": 0.007537841796875, "loss_xval": 0.03759765625, "num_input_tokens_seen": 499842672, "step": 5476 }, { "epoch": 22.820833333333333, "grad_norm": 2.1660452232601135, "learning_rate": 5e-05, "loss": 0.0274, "num_input_tokens_seen": 499933896, "step": 5477 }, { "epoch": 22.820833333333333, "loss": 0.028365662321448326, "loss_ce": 0.00019030747353099287, "loss_iou": 0.322265625, "loss_num": 0.005645751953125, "loss_xval": 0.0281982421875, "num_input_tokens_seen": 499933896, "step": 5477 }, { "epoch": 22.825, "grad_norm": 5.217329977893872, "learning_rate": 5e-05, "loss": 0.029, "num_input_tokens_seen": 500025440, "step": 5478 }, { "epoch": 22.825, "loss": 0.0354175791144371, "loss_ce": 0.0007496136240661144, "loss_iou": 0.2490234375, "loss_num": 0.006927490234375, "loss_xval": 0.03466796875, "num_input_tokens_seen": 500025440, "step": 5478 }, { "epoch": 22.829166666666666, "grad_norm": 2.940064067788056, "learning_rate": 5e-05, "loss": 0.0624, "num_input_tokens_seen": 500116292, "step": 5479 }, { "epoch": 22.829166666666666, "loss": 0.08425501734018326, "loss_ce": 5.702173075405881e-05, "loss_iou": 0.326171875, "loss_num": 0.016845703125, "loss_xval": 0.083984375, "num_input_tokens_seen": 500116292, "step": 5479 }, { "epoch": 22.833333333333332, "grad_norm": 4.7275692494247, "learning_rate": 5e-05, "loss": 0.0417, "num_input_tokens_seen": 500208452, "step": 5480 }, { "epoch": 22.833333333333332, "loss": 0.028766117990016937, "loss_ce": 4.907785842078738e-05, "loss_iou": 0.236328125, "loss_num": 0.0057373046875, "loss_xval": 0.0286865234375, "num_input_tokens_seen": 500208452, "step": 5480 }, { "epoch": 22.8375, "grad_norm": 2.5102813220794524, "learning_rate": 5e-05, "loss": 0.034, "num_input_tokens_seen": 500300040, "step": 5481 }, { "epoch": 22.8375, "loss": 0.0304593276232481, "loss_ce": 1.0413465133751743e-05, "loss_iou": 0.279296875, "loss_num": 0.006103515625, "loss_xval": 0.0303955078125, "num_input_tokens_seen": 500300040, "step": 5481 }, { "epoch": 22.841666666666665, "grad_norm": 3.260438483373206, "learning_rate": 5e-05, "loss": 0.0604, "num_input_tokens_seen": 500390616, "step": 5482 }, { "epoch": 22.841666666666665, "loss": 0.04531024396419525, "loss_ce": 6.793491775169969e-05, "loss_iou": 0.28515625, "loss_num": 0.009033203125, "loss_xval": 0.045166015625, "num_input_tokens_seen": 500390616, "step": 5482 }, { "epoch": 22.845833333333335, "grad_norm": 3.018756694811929, "learning_rate": 5e-05, "loss": 0.0436, "num_input_tokens_seen": 500482756, "step": 5483 }, { "epoch": 22.845833333333335, "loss": 0.040407925844192505, "loss_ce": 4.080204962519929e-05, "loss_iou": 0.240234375, "loss_num": 0.008056640625, "loss_xval": 0.040283203125, "num_input_tokens_seen": 500482756, "step": 5483 }, { "epoch": 22.85, "grad_norm": 2.6317817181670455, "learning_rate": 5e-05, "loss": 0.026, "num_input_tokens_seen": 500573928, "step": 5484 }, { "epoch": 22.85, "loss": 0.03063778206706047, "loss_ce": 1.3393741028266959e-05, "loss_iou": 0.1953125, "loss_num": 0.006134033203125, "loss_xval": 0.0306396484375, "num_input_tokens_seen": 500573928, "step": 5484 }, { "epoch": 22.854166666666668, "grad_norm": 1.9321251904243877, "learning_rate": 5e-05, "loss": 0.0346, "num_input_tokens_seen": 500665412, "step": 5485 }, { "epoch": 22.854166666666668, "loss": 0.03834788501262665, "loss_ce": 1.3989140825287905e-05, "loss_iou": 0.162109375, "loss_num": 0.007659912109375, "loss_xval": 0.038330078125, "num_input_tokens_seen": 500665412, "step": 5485 }, { "epoch": 22.858333333333334, "grad_norm": 1.399606320446629, "learning_rate": 5e-05, "loss": 0.0412, "num_input_tokens_seen": 500756168, "step": 5486 }, { "epoch": 22.858333333333334, "loss": 0.06183755397796631, "loss_ce": 8.943742614064831e-06, "loss_iou": 0.298828125, "loss_num": 0.01239013671875, "loss_xval": 0.061767578125, "num_input_tokens_seen": 500756168, "step": 5486 }, { "epoch": 22.8625, "grad_norm": 0.778141119459895, "learning_rate": 5e-05, "loss": 0.0352, "num_input_tokens_seen": 500847932, "step": 5487 }, { "epoch": 22.8625, "loss": 0.05563715845346451, "loss_ce": 0.00026873595197685063, "loss_iou": 0.248046875, "loss_num": 0.01104736328125, "loss_xval": 0.055419921875, "num_input_tokens_seen": 500847932, "step": 5487 }, { "epoch": 22.866666666666667, "grad_norm": 0.732235143616492, "learning_rate": 5e-05, "loss": 0.0398, "num_input_tokens_seen": 500939360, "step": 5488 }, { "epoch": 22.866666666666667, "loss": 0.02937071956694126, "loss_ce": 3.569724867702462e-05, "loss_iou": 0.30078125, "loss_num": 0.005859375, "loss_xval": 0.029296875, "num_input_tokens_seen": 500939360, "step": 5488 }, { "epoch": 22.870833333333334, "grad_norm": 1.4242854197731527, "learning_rate": 5e-05, "loss": 0.0457, "num_input_tokens_seen": 501031012, "step": 5489 }, { "epoch": 22.870833333333334, "loss": 0.0672885924577713, "loss_ce": 1.2588001482072286e-05, "loss_iou": 0.197265625, "loss_num": 0.013427734375, "loss_xval": 0.0673828125, "num_input_tokens_seen": 501031012, "step": 5489 }, { "epoch": 22.875, "grad_norm": 1.4123170340242057, "learning_rate": 5e-05, "loss": 0.0349, "num_input_tokens_seen": 501122108, "step": 5490 }, { "epoch": 22.875, "loss": 0.02706168405711651, "loss_ce": 1.548104228277225e-05, "loss_iou": 0.2119140625, "loss_num": 0.00543212890625, "loss_xval": 0.027099609375, "num_input_tokens_seen": 501122108, "step": 5490 }, { "epoch": 22.879166666666666, "grad_norm": 1.9084277753171575, "learning_rate": 5e-05, "loss": 0.0648, "num_input_tokens_seen": 501213440, "step": 5491 }, { "epoch": 22.879166666666666, "loss": 0.06372037529945374, "loss_ce": 8.359698404092342e-05, "loss_iou": 0.28125, "loss_num": 0.0126953125, "loss_xval": 0.0634765625, "num_input_tokens_seen": 501213440, "step": 5491 }, { "epoch": 22.883333333333333, "grad_norm": 3.015035456018019, "learning_rate": 5e-05, "loss": 0.0564, "num_input_tokens_seen": 501304532, "step": 5492 }, { "epoch": 22.883333333333333, "loss": 0.07055270671844482, "loss_ce": 4.1843977669486776e-05, "loss_iou": 0.21875, "loss_num": 0.01409912109375, "loss_xval": 0.0703125, "num_input_tokens_seen": 501304532, "step": 5492 }, { "epoch": 22.8875, "grad_norm": 2.5319543671648184, "learning_rate": 5e-05, "loss": 0.0354, "num_input_tokens_seen": 501395636, "step": 5493 }, { "epoch": 22.8875, "loss": 0.04397744685411453, "loss_ce": 7.79082765802741e-05, "loss_iou": 0.2431640625, "loss_num": 0.0087890625, "loss_xval": 0.0439453125, "num_input_tokens_seen": 501395636, "step": 5493 }, { "epoch": 22.891666666666666, "grad_norm": 2.1391885521606966, "learning_rate": 5e-05, "loss": 0.0705, "num_input_tokens_seen": 501486500, "step": 5494 }, { "epoch": 22.891666666666666, "loss": 0.07880009710788727, "loss_ce": 1.1342142897774465e-05, "loss_iou": 0.16015625, "loss_num": 0.0157470703125, "loss_xval": 0.07861328125, "num_input_tokens_seen": 501486500, "step": 5494 }, { "epoch": 22.895833333333332, "grad_norm": 2.5589569030363566, "learning_rate": 5e-05, "loss": 0.0266, "num_input_tokens_seen": 501577884, "step": 5495 }, { "epoch": 22.895833333333332, "loss": 0.027315836399793625, "loss_ce": 0.0009867959888651967, "loss_iou": 0.1640625, "loss_num": 0.0052490234375, "loss_xval": 0.0263671875, "num_input_tokens_seen": 501577884, "step": 5495 }, { "epoch": 22.9, "grad_norm": 2.884642880141269, "learning_rate": 5e-05, "loss": 0.0253, "num_input_tokens_seen": 501669688, "step": 5496 }, { "epoch": 22.9, "loss": 0.029087038710713387, "loss_ce": 0.0001868915860541165, "loss_iou": 0.2734375, "loss_num": 0.005767822265625, "loss_xval": 0.0289306640625, "num_input_tokens_seen": 501669688, "step": 5496 }, { "epoch": 22.904166666666665, "grad_norm": 21.98736131357622, "learning_rate": 5e-05, "loss": 0.0646, "num_input_tokens_seen": 501760892, "step": 5497 }, { "epoch": 22.904166666666665, "loss": 0.05251148343086243, "loss_ce": 0.0001890959101729095, "loss_iou": 0.326171875, "loss_num": 0.01043701171875, "loss_xval": 0.05224609375, "num_input_tokens_seen": 501760892, "step": 5497 }, { "epoch": 22.908333333333335, "grad_norm": 2.8407728203382576, "learning_rate": 5e-05, "loss": 0.035, "num_input_tokens_seen": 501852496, "step": 5498 }, { "epoch": 22.908333333333335, "loss": 0.03330732882022858, "loss_ce": 2.028298877121415e-05, "loss_iou": 0.1787109375, "loss_num": 0.00665283203125, "loss_xval": 0.033203125, "num_input_tokens_seen": 501852496, "step": 5498 }, { "epoch": 22.9125, "grad_norm": 22.062520543176024, "learning_rate": 5e-05, "loss": 0.0606, "num_input_tokens_seen": 501944384, "step": 5499 }, { "epoch": 22.9125, "loss": 0.10073038935661316, "loss_ce": 5.289072942105122e-05, "loss_iou": 0.298828125, "loss_num": 0.0201416015625, "loss_xval": 0.1005859375, "num_input_tokens_seen": 501944384, "step": 5499 }, { "epoch": 22.916666666666668, "grad_norm": 3.530217917213057, "learning_rate": 5e-05, "loss": 0.0583, "num_input_tokens_seen": 502035740, "step": 5500 }, { "epoch": 22.916666666666668, "eval_seeclick_CIoU": 0.2251809574663639, "eval_seeclick_GIoU": 0.20743417367339134, "eval_seeclick_IoU": 0.3241002485156059, "eval_seeclick_MAE_all": 0.10077449306845665, "eval_seeclick_MAE_h": 0.07250755280256271, "eval_seeclick_MAE_w": 0.22622816264629364, "eval_seeclick_MAE_x_boxes": 0.21967153996229172, "eval_seeclick_MAE_y_boxes": 0.07597211748361588, "eval_seeclick_NUM_probability": 0.999996155500412, "eval_seeclick_inside_bbox": 0.5411931872367859, "eval_seeclick_loss": 0.5401211977005005, "eval_seeclick_loss_ce": 0.10009105876088142, "eval_seeclick_loss_iou": 0.4600830078125, "eval_seeclick_loss_num": 0.0853424072265625, "eval_seeclick_loss_xval": 0.4266357421875, "eval_seeclick_runtime": 76.5178, "eval_seeclick_samples_per_second": 0.562, "eval_seeclick_steps_per_second": 0.026, "num_input_tokens_seen": 502035740, "step": 5500 }, { "epoch": 22.916666666666668, "eval_icons_CIoU": 0.27284765988588333, "eval_icons_GIoU": 0.23190713673830032, "eval_icons_IoU": 0.37249836325645447, "eval_icons_MAE_all": 0.08035346493124962, "eval_icons_MAE_h": 0.1744953989982605, "eval_icons_MAE_w": 0.11795252934098244, "eval_icons_MAE_x_boxes": 0.12169399112462997, "eval_icons_MAE_y_boxes": 0.17085515707731247, "eval_icons_NUM_probability": 0.9999938905239105, "eval_icons_inside_bbox": 0.5347222238779068, "eval_icons_loss": 0.3813272714614868, "eval_icons_loss_ce": 0.0018568845698609948, "eval_icons_loss_iou": 0.19012451171875, "eval_icons_loss_num": 0.07770538330078125, "eval_icons_loss_xval": 0.38848876953125, "eval_icons_runtime": 85.1043, "eval_icons_samples_per_second": 0.588, "eval_icons_steps_per_second": 0.024, "num_input_tokens_seen": 502035740, "step": 5500 }, { "epoch": 22.916666666666668, "eval_screenspot_CIoU": 0.3852354089419047, "eval_screenspot_GIoU": 0.36637531717618305, "eval_screenspot_IoU": 0.4568243424097697, "eval_screenspot_MAE_all": 0.09730574985345204, "eval_screenspot_MAE_h": 0.09697473794221878, "eval_screenspot_MAE_w": 0.2014616330464681, "eval_screenspot_MAE_x_boxes": 0.17775118350982666, "eval_screenspot_MAE_y_boxes": 0.09227648874123891, "eval_screenspot_NUM_probability": 0.9999014139175415, "eval_screenspot_inside_bbox": 0.7458333373069763, "eval_screenspot_loss": 0.49223896861076355, "eval_screenspot_loss_ce": 0.0051902799362627166, "eval_screenspot_loss_iou": 0.346435546875, "eval_screenspot_loss_num": 0.1008148193359375, "eval_screenspot_loss_xval": 0.5041097005208334, "eval_screenspot_runtime": 150.1583, "eval_screenspot_samples_per_second": 0.593, "eval_screenspot_steps_per_second": 0.02, "num_input_tokens_seen": 502035740, "step": 5500 }, { "epoch": 22.916666666666668, "eval_compot_CIoU": 0.42840753495693207, "eval_compot_GIoU": 0.4277106374502182, "eval_compot_IoU": 0.5115479528903961, "eval_compot_MAE_all": 0.05933877266943455, "eval_compot_MAE_h": 0.06422919780015945, "eval_compot_MAE_w": 0.15599841624498367, "eval_compot_MAE_x_boxes": 0.15585462749004364, "eval_compot_MAE_y_boxes": 0.06365378201007843, "eval_compot_NUM_probability": 0.9999901056289673, "eval_compot_inside_bbox": 0.6458333432674408, "eval_compot_loss": 0.36150631308555603, "eval_compot_loss_ce": 0.06297808699309826, "eval_compot_loss_iou": 0.25494384765625, "eval_compot_loss_num": 0.05878448486328125, "eval_compot_loss_xval": 0.293853759765625, "eval_compot_runtime": 85.918, "eval_compot_samples_per_second": 0.582, "eval_compot_steps_per_second": 0.023, "num_input_tokens_seen": 502035740, "step": 5500 }, { "epoch": 22.916666666666668, "loss": 0.3481367528438568, "loss_ce": 0.06633742153644562, "loss_iou": 0.28515625, "loss_num": 0.056396484375, "loss_xval": 0.28125, "num_input_tokens_seen": 502035740, "step": 5500 }, { "epoch": 22.920833333333334, "grad_norm": 3.842643013858874, "learning_rate": 5e-05, "loss": 0.0406, "num_input_tokens_seen": 502126788, "step": 5501 }, { "epoch": 22.920833333333334, "loss": 0.0501682311296463, "loss_ce": 0.0003940639435313642, "loss_iou": 0.22265625, "loss_num": 0.00994873046875, "loss_xval": 0.0498046875, "num_input_tokens_seen": 502126788, "step": 5501 }, { "epoch": 22.925, "grad_norm": 3.3158857799497126, "learning_rate": 5e-05, "loss": 0.0714, "num_input_tokens_seen": 502218088, "step": 5502 }, { "epoch": 22.925, "loss": 0.04445202276110649, "loss_ce": 3.368945908732712e-05, "loss_iou": 0.3046875, "loss_num": 0.0089111328125, "loss_xval": 0.04443359375, "num_input_tokens_seen": 502218088, "step": 5502 }, { "epoch": 22.929166666666667, "grad_norm": 4.140354488668016, "learning_rate": 5e-05, "loss": 0.066, "num_input_tokens_seen": 502308548, "step": 5503 }, { "epoch": 22.929166666666667, "loss": 0.057744890451431274, "loss_ce": 0.00011244323832215741, "loss_iou": 0.228515625, "loss_num": 0.01153564453125, "loss_xval": 0.0576171875, "num_input_tokens_seen": 502308548, "step": 5503 }, { "epoch": 22.933333333333334, "grad_norm": 8.781019526057698, "learning_rate": 5e-05, "loss": 0.0368, "num_input_tokens_seen": 502399836, "step": 5504 }, { "epoch": 22.933333333333334, "loss": 0.043314363807439804, "loss_ce": 5.569650966208428e-05, "loss_iou": 0.1943359375, "loss_num": 0.0086669921875, "loss_xval": 0.043212890625, "num_input_tokens_seen": 502399836, "step": 5504 }, { "epoch": 22.9375, "grad_norm": 3.7258668949793123, "learning_rate": 5e-05, "loss": 0.0237, "num_input_tokens_seen": 502491764, "step": 5505 }, { "epoch": 22.9375, "loss": 0.02385835163295269, "loss_ce": 0.001237196265719831, "loss_iou": 0.2109375, "loss_num": 0.0045166015625, "loss_xval": 0.0225830078125, "num_input_tokens_seen": 502491764, "step": 5505 }, { "epoch": 22.941666666666666, "grad_norm": 2.332813839233267, "learning_rate": 5e-05, "loss": 0.0416, "num_input_tokens_seen": 502582812, "step": 5506 }, { "epoch": 22.941666666666666, "loss": 0.04756104573607445, "loss_ce": 1.465791319787968e-05, "loss_iou": 0.2578125, "loss_num": 0.009521484375, "loss_xval": 0.047607421875, "num_input_tokens_seen": 502582812, "step": 5506 }, { "epoch": 22.945833333333333, "grad_norm": 3.417653919295563, "learning_rate": 5e-05, "loss": 0.0641, "num_input_tokens_seen": 502673976, "step": 5507 }, { "epoch": 22.945833333333333, "loss": 0.06305991858243942, "loss_ce": 2.5857691070996225e-05, "loss_iou": 0.26953125, "loss_num": 0.0125732421875, "loss_xval": 0.06298828125, "num_input_tokens_seen": 502673976, "step": 5507 }, { "epoch": 22.95, "grad_norm": 3.655723485092921, "learning_rate": 5e-05, "loss": 0.0387, "num_input_tokens_seen": 502765292, "step": 5508 }, { "epoch": 22.95, "loss": 0.0506102591753006, "loss_ce": 1.2114001947338693e-05, "loss_iou": 0.3515625, "loss_num": 0.0101318359375, "loss_xval": 0.050537109375, "num_input_tokens_seen": 502765292, "step": 5508 }, { "epoch": 22.954166666666666, "grad_norm": 2.4607481843272843, "learning_rate": 5e-05, "loss": 0.0806, "num_input_tokens_seen": 502856384, "step": 5509 }, { "epoch": 22.954166666666666, "loss": 0.03219888359308243, "loss_ce": 1.0468009350006469e-05, "loss_iou": 0.271484375, "loss_num": 0.006439208984375, "loss_xval": 0.0322265625, "num_input_tokens_seen": 502856384, "step": 5509 }, { "epoch": 22.958333333333332, "grad_norm": 2.0587149015457027, "learning_rate": 5e-05, "loss": 0.0368, "num_input_tokens_seen": 502947704, "step": 5510 }, { "epoch": 22.958333333333332, "loss": 0.025407759472727776, "loss_ce": 1.713353776722215e-05, "loss_iou": 0.15625, "loss_num": 0.00506591796875, "loss_xval": 0.025390625, "num_input_tokens_seen": 502947704, "step": 5510 }, { "epoch": 22.9625, "grad_norm": 2.916617177381313, "learning_rate": 5e-05, "loss": 0.0364, "num_input_tokens_seen": 503039284, "step": 5511 }, { "epoch": 22.9625, "loss": 0.031609997153282166, "loss_ce": 5.482199776452035e-05, "loss_iou": 0.2890625, "loss_num": 0.006317138671875, "loss_xval": 0.031494140625, "num_input_tokens_seen": 503039284, "step": 5511 }, { "epoch": 22.966666666666665, "grad_norm": 2.9932210038096225, "learning_rate": 5e-05, "loss": 0.0594, "num_input_tokens_seen": 503130680, "step": 5512 }, { "epoch": 22.966666666666665, "loss": 0.04617247357964516, "loss_ce": 0.0009225325775332749, "loss_iou": 0.16796875, "loss_num": 0.009033203125, "loss_xval": 0.045166015625, "num_input_tokens_seen": 503130680, "step": 5512 }, { "epoch": 22.970833333333335, "grad_norm": 1.7561536928197354, "learning_rate": 5e-05, "loss": 0.0347, "num_input_tokens_seen": 503222204, "step": 5513 }, { "epoch": 22.970833333333335, "loss": 0.020861037075519562, "loss_ce": 0.00020826596301048994, "loss_iou": 0.158203125, "loss_num": 0.004119873046875, "loss_xval": 0.0206298828125, "num_input_tokens_seen": 503222204, "step": 5513 }, { "epoch": 22.975, "grad_norm": 7.348923333995983, "learning_rate": 5e-05, "loss": 0.0783, "num_input_tokens_seen": 503313968, "step": 5514 }, { "epoch": 22.975, "loss": 0.04428169131278992, "loss_ce": 0.00024482791195623577, "loss_iou": 0.279296875, "loss_num": 0.0087890625, "loss_xval": 0.0439453125, "num_input_tokens_seen": 503313968, "step": 5514 }, { "epoch": 22.979166666666668, "grad_norm": 1.9808279719757151, "learning_rate": 5e-05, "loss": 0.033, "num_input_tokens_seen": 503405344, "step": 5515 }, { "epoch": 22.979166666666668, "loss": 0.025117503479123116, "loss_ce": 1.679652268649079e-05, "loss_iou": 0.181640625, "loss_num": 0.0050048828125, "loss_xval": 0.025146484375, "num_input_tokens_seen": 503405344, "step": 5515 }, { "epoch": 22.983333333333334, "grad_norm": 2.335548030348546, "learning_rate": 5e-05, "loss": 0.0637, "num_input_tokens_seen": 503496644, "step": 5516 }, { "epoch": 22.983333333333334, "loss": 0.09109736979007721, "loss_ce": 7.869096589274704e-05, "loss_iou": 0.28515625, "loss_num": 0.0181884765625, "loss_xval": 0.0908203125, "num_input_tokens_seen": 503496644, "step": 5516 }, { "epoch": 22.9875, "grad_norm": 2.6540184672026235, "learning_rate": 5e-05, "loss": 0.026, "num_input_tokens_seen": 503587868, "step": 5517 }, { "epoch": 22.9875, "loss": 0.027322892099618912, "loss_ce": 7.069473940646276e-05, "loss_iou": 0.24609375, "loss_num": 0.005462646484375, "loss_xval": 0.0272216796875, "num_input_tokens_seen": 503587868, "step": 5517 }, { "epoch": 22.991666666666667, "grad_norm": 2.2279579443252535, "learning_rate": 5e-05, "loss": 0.0277, "num_input_tokens_seen": 503679220, "step": 5518 }, { "epoch": 22.991666666666667, "loss": 0.03166805952787399, "loss_ce": 6.072716132621281e-06, "loss_iou": 0.310546875, "loss_num": 0.00634765625, "loss_xval": 0.03173828125, "num_input_tokens_seen": 503679220, "step": 5518 }, { "epoch": 22.995833333333334, "grad_norm": 3.0643537195442683, "learning_rate": 5e-05, "loss": 0.0478, "num_input_tokens_seen": 503770452, "step": 5519 }, { "epoch": 22.995833333333334, "loss": 0.043202586472034454, "loss_ce": 3.547139203874394e-05, "loss_iou": 0.2333984375, "loss_num": 0.00860595703125, "loss_xval": 0.043212890625, "num_input_tokens_seen": 503770452, "step": 5519 }, { "epoch": 23.0, "grad_norm": 2.721422461831951, "learning_rate": 5e-05, "loss": 0.0323, "num_input_tokens_seen": 503861852, "step": 5520 }, { "epoch": 23.0, "loss": 0.04136139899492264, "loss_ce": 1.770826020219829e-05, "loss_iou": 0.1650390625, "loss_num": 0.00823974609375, "loss_xval": 0.041259765625, "num_input_tokens_seen": 503861852, "step": 5520 }, { "epoch": 23.004166666666666, "grad_norm": 2.016052229840184, "learning_rate": 5e-05, "loss": 0.0547, "num_input_tokens_seen": 503951212, "step": 5521 }, { "epoch": 23.004166666666666, "loss": 0.0762738585472107, "loss_ce": 1.8063037714455277e-05, "loss_iou": 0.1025390625, "loss_num": 0.0152587890625, "loss_xval": 0.076171875, "num_input_tokens_seen": 503951212, "step": 5521 }, { "epoch": 23.008333333333333, "grad_norm": 1.5897666750004529, "learning_rate": 5e-05, "loss": 0.0462, "num_input_tokens_seen": 504042516, "step": 5522 }, { "epoch": 23.008333333333333, "loss": 0.038626015186309814, "loss_ce": 4.417174932314083e-05, "loss_iou": 0.19921875, "loss_num": 0.007720947265625, "loss_xval": 0.03857421875, "num_input_tokens_seen": 504042516, "step": 5522 }, { "epoch": 23.0125, "grad_norm": 2.138708352238235, "learning_rate": 5e-05, "loss": 0.0331, "num_input_tokens_seen": 504134440, "step": 5523 }, { "epoch": 23.0125, "loss": 0.040923915803432465, "loss_ce": 1.5102302313607652e-05, "loss_iou": 0.2431640625, "loss_num": 0.0081787109375, "loss_xval": 0.041015625, "num_input_tokens_seen": 504134440, "step": 5523 }, { "epoch": 23.016666666666666, "grad_norm": 4.708663084029856, "learning_rate": 5e-05, "loss": 0.046, "num_input_tokens_seen": 504226180, "step": 5524 }, { "epoch": 23.016666666666666, "loss": 0.06659691035747528, "loss_ce": 5.3330928494688123e-05, "loss_iou": 0.20703125, "loss_num": 0.0133056640625, "loss_xval": 0.06640625, "num_input_tokens_seen": 504226180, "step": 5524 }, { "epoch": 23.020833333333332, "grad_norm": 2.53702349066275, "learning_rate": 5e-05, "loss": 0.0359, "num_input_tokens_seen": 504317652, "step": 5525 }, { "epoch": 23.020833333333332, "loss": 0.03276386857032776, "loss_ce": 1.0879201909119729e-05, "loss_iou": 0.212890625, "loss_num": 0.006561279296875, "loss_xval": 0.03271484375, "num_input_tokens_seen": 504317652, "step": 5525 }, { "epoch": 23.025, "grad_norm": 2.078387757852989, "learning_rate": 5e-05, "loss": 0.0465, "num_input_tokens_seen": 504408592, "step": 5526 }, { "epoch": 23.025, "loss": 0.05985802784562111, "loss_ce": 2.0683584807557054e-05, "loss_iou": 0.251953125, "loss_num": 0.011962890625, "loss_xval": 0.059814453125, "num_input_tokens_seen": 504408592, "step": 5526 }, { "epoch": 23.029166666666665, "grad_norm": 2.063475413626266, "learning_rate": 5e-05, "loss": 0.0365, "num_input_tokens_seen": 504498564, "step": 5527 }, { "epoch": 23.029166666666665, "loss": 0.021073997020721436, "loss_ce": 1.6867019439814612e-05, "loss_iou": 0.150390625, "loss_num": 0.00421142578125, "loss_xval": 0.02099609375, "num_input_tokens_seen": 504498564, "step": 5527 }, { "epoch": 23.033333333333335, "grad_norm": 1.5947610008134536, "learning_rate": 5e-05, "loss": 0.0427, "num_input_tokens_seen": 504590120, "step": 5528 }, { "epoch": 23.033333333333335, "loss": 0.03138340264558792, "loss_ce": 0.0002173245302401483, "loss_iou": 0.1865234375, "loss_num": 0.0062255859375, "loss_xval": 0.0311279296875, "num_input_tokens_seen": 504590120, "step": 5528 }, { "epoch": 23.0375, "grad_norm": 2.6926222323791267, "learning_rate": 5e-05, "loss": 0.0394, "num_input_tokens_seen": 504681684, "step": 5529 }, { "epoch": 23.0375, "loss": 0.051821961998939514, "loss_ce": 1.0743910024757497e-05, "loss_iou": 0.2255859375, "loss_num": 0.0103759765625, "loss_xval": 0.0517578125, "num_input_tokens_seen": 504681684, "step": 5529 }, { "epoch": 23.041666666666668, "grad_norm": 3.0147621341280004, "learning_rate": 5e-05, "loss": 0.0484, "num_input_tokens_seen": 504772928, "step": 5530 }, { "epoch": 23.041666666666668, "loss": 0.05108293890953064, "loss_ce": 1.1768741387641057e-05, "loss_iou": 0.3125, "loss_num": 0.01025390625, "loss_xval": 0.051025390625, "num_input_tokens_seen": 504772928, "step": 5530 }, { "epoch": 23.045833333333334, "grad_norm": 3.529754894607426, "learning_rate": 5e-05, "loss": 0.04, "num_input_tokens_seen": 504864352, "step": 5531 }, { "epoch": 23.045833333333334, "loss": 0.04382365196943283, "loss_ce": 0.00035135942744091153, "loss_iou": 0.234375, "loss_num": 0.0086669921875, "loss_xval": 0.04345703125, "num_input_tokens_seen": 504864352, "step": 5531 }, { "epoch": 23.05, "grad_norm": 2.2367983125380175, "learning_rate": 5e-05, "loss": 0.088, "num_input_tokens_seen": 504955744, "step": 5532 }, { "epoch": 23.05, "loss": 0.037780724465847015, "loss_ce": 3.81076788471546e-05, "loss_iou": 0.1904296875, "loss_num": 0.007568359375, "loss_xval": 0.037841796875, "num_input_tokens_seen": 504955744, "step": 5532 }, { "epoch": 23.054166666666667, "grad_norm": 2.6973369667724714, "learning_rate": 5e-05, "loss": 0.048, "num_input_tokens_seen": 505047124, "step": 5533 }, { "epoch": 23.054166666666667, "loss": 0.0411471463739872, "loss_ce": 9.451230653212406e-06, "loss_iou": 0.2275390625, "loss_num": 0.00823974609375, "loss_xval": 0.041015625, "num_input_tokens_seen": 505047124, "step": 5533 }, { "epoch": 23.058333333333334, "grad_norm": 2.6149604800336013, "learning_rate": 5e-05, "loss": 0.0247, "num_input_tokens_seen": 505138476, "step": 5534 }, { "epoch": 23.058333333333334, "loss": 0.02741740271449089, "loss_ce": 1.2616639651241712e-05, "loss_iou": 0.234375, "loss_num": 0.0054931640625, "loss_xval": 0.02734375, "num_input_tokens_seen": 505138476, "step": 5534 }, { "epoch": 23.0625, "grad_norm": 3.4400342266393626, "learning_rate": 5e-05, "loss": 0.0645, "num_input_tokens_seen": 505229832, "step": 5535 }, { "epoch": 23.0625, "loss": 0.041741691529750824, "loss_ce": 8.90308729140088e-06, "loss_iou": 0.265625, "loss_num": 0.00836181640625, "loss_xval": 0.041748046875, "num_input_tokens_seen": 505229832, "step": 5535 }, { "epoch": 23.066666666666666, "grad_norm": 5.02985472813745, "learning_rate": 5e-05, "loss": 0.0398, "num_input_tokens_seen": 505321416, "step": 5536 }, { "epoch": 23.066666666666666, "loss": 0.04101718217134476, "loss_ce": 6.259123620111495e-05, "loss_iou": 0.330078125, "loss_num": 0.0081787109375, "loss_xval": 0.041015625, "num_input_tokens_seen": 505321416, "step": 5536 }, { "epoch": 23.070833333333333, "grad_norm": 3.003234993754144, "learning_rate": 5e-05, "loss": 0.0574, "num_input_tokens_seen": 505412784, "step": 5537 }, { "epoch": 23.070833333333333, "loss": 0.03090827539563179, "loss_ce": 9.229412171407603e-06, "loss_iou": 0.291015625, "loss_num": 0.00616455078125, "loss_xval": 0.0308837890625, "num_input_tokens_seen": 505412784, "step": 5537 }, { "epoch": 23.075, "grad_norm": 2.281625317511175, "learning_rate": 5e-05, "loss": 0.0369, "num_input_tokens_seen": 505504404, "step": 5538 }, { "epoch": 23.075, "loss": 0.05516308546066284, "loss_ce": 3.307756196591072e-05, "loss_iou": 0.404296875, "loss_num": 0.01104736328125, "loss_xval": 0.05517578125, "num_input_tokens_seen": 505504404, "step": 5538 }, { "epoch": 23.079166666666666, "grad_norm": 2.681882718810235, "learning_rate": 5e-05, "loss": 0.0282, "num_input_tokens_seen": 505595452, "step": 5539 }, { "epoch": 23.079166666666666, "loss": 0.020011726766824722, "loss_ce": 2.2712603822583333e-05, "loss_iou": 0.0791015625, "loss_num": 0.003997802734375, "loss_xval": 0.02001953125, "num_input_tokens_seen": 505595452, "step": 5539 }, { "epoch": 23.083333333333332, "grad_norm": 1.9297403026196684, "learning_rate": 5e-05, "loss": 0.0309, "num_input_tokens_seen": 505687264, "step": 5540 }, { "epoch": 23.083333333333332, "loss": 0.02788577787578106, "loss_ce": 7.970892511366401e-06, "loss_iou": 0.248046875, "loss_num": 0.005584716796875, "loss_xval": 0.02783203125, "num_input_tokens_seen": 505687264, "step": 5540 }, { "epoch": 23.0875, "grad_norm": 2.1713723754796974, "learning_rate": 5e-05, "loss": 0.047, "num_input_tokens_seen": 505778960, "step": 5541 }, { "epoch": 23.0875, "loss": 0.029800841584801674, "loss_ce": 3.094381099799648e-05, "loss_iou": 0.2080078125, "loss_num": 0.005950927734375, "loss_xval": 0.02978515625, "num_input_tokens_seen": 505778960, "step": 5541 }, { "epoch": 23.091666666666665, "grad_norm": 2.7247538835885536, "learning_rate": 5e-05, "loss": 0.0237, "num_input_tokens_seen": 505870764, "step": 5542 }, { "epoch": 23.091666666666665, "loss": 0.0216769240796566, "loss_ce": 6.28488342044875e-05, "loss_iou": 0.2578125, "loss_num": 0.00433349609375, "loss_xval": 0.0216064453125, "num_input_tokens_seen": 505870764, "step": 5542 }, { "epoch": 23.095833333333335, "grad_norm": 2.7685141402952986, "learning_rate": 5e-05, "loss": 0.0513, "num_input_tokens_seen": 505962108, "step": 5543 }, { "epoch": 23.095833333333335, "loss": 0.06512368470430374, "loss_ce": 2.2063091819291003e-05, "loss_iou": 0.359375, "loss_num": 0.01300048828125, "loss_xval": 0.06494140625, "num_input_tokens_seen": 505962108, "step": 5543 }, { "epoch": 23.1, "grad_norm": 2.771916361936285, "learning_rate": 5e-05, "loss": 0.0685, "num_input_tokens_seen": 506053248, "step": 5544 }, { "epoch": 23.1, "loss": 0.045903388410806656, "loss_ce": 8.124551095534116e-05, "loss_iou": 0.201171875, "loss_num": 0.0091552734375, "loss_xval": 0.0458984375, "num_input_tokens_seen": 506053248, "step": 5544 }, { "epoch": 23.104166666666668, "grad_norm": 2.2927554760290954, "learning_rate": 5e-05, "loss": 0.0405, "num_input_tokens_seen": 506143796, "step": 5545 }, { "epoch": 23.104166666666668, "loss": 0.04807007312774658, "loss_ce": 4.886148417426739e-06, "loss_iou": 0.2119140625, "loss_num": 0.00958251953125, "loss_xval": 0.048095703125, "num_input_tokens_seen": 506143796, "step": 5545 }, { "epoch": 23.108333333333334, "grad_norm": 3.9575731812955586, "learning_rate": 5e-05, "loss": 0.0435, "num_input_tokens_seen": 506235168, "step": 5546 }, { "epoch": 23.108333333333334, "loss": 0.02666931226849556, "loss_ce": 1.2208596672280692e-05, "loss_iou": 0.1494140625, "loss_num": 0.005340576171875, "loss_xval": 0.026611328125, "num_input_tokens_seen": 506235168, "step": 5546 }, { "epoch": 23.1125, "grad_norm": 2.430795858501176, "learning_rate": 5e-05, "loss": 0.0289, "num_input_tokens_seen": 506325352, "step": 5547 }, { "epoch": 23.1125, "loss": 0.02848159149289131, "loss_ce": 8.690438335179351e-06, "loss_iou": 0.375, "loss_num": 0.005706787109375, "loss_xval": 0.0284423828125, "num_input_tokens_seen": 506325352, "step": 5547 }, { "epoch": 23.116666666666667, "grad_norm": 1.8939699679452089, "learning_rate": 5e-05, "loss": 0.0298, "num_input_tokens_seen": 506416588, "step": 5548 }, { "epoch": 23.116666666666667, "loss": 0.020750660449266434, "loss_ce": 5.211283132666722e-05, "loss_iou": 0.314453125, "loss_num": 0.004150390625, "loss_xval": 0.020751953125, "num_input_tokens_seen": 506416588, "step": 5548 }, { "epoch": 23.120833333333334, "grad_norm": 2.1950882893637242, "learning_rate": 5e-05, "loss": 0.0537, "num_input_tokens_seen": 506507772, "step": 5549 }, { "epoch": 23.120833333333334, "loss": 0.08341377973556519, "loss_ce": 2.4499406208633445e-05, "loss_iou": 0.1767578125, "loss_num": 0.0167236328125, "loss_xval": 0.08349609375, "num_input_tokens_seen": 506507772, "step": 5549 }, { "epoch": 23.125, "grad_norm": 2.8894699194279636, "learning_rate": 5e-05, "loss": 0.0305, "num_input_tokens_seen": 506598800, "step": 5550 }, { "epoch": 23.125, "loss": 0.030126187950372696, "loss_ce": 1.2968208466190845e-05, "loss_iou": 0.328125, "loss_num": 0.006011962890625, "loss_xval": 0.0301513671875, "num_input_tokens_seen": 506598800, "step": 5550 }, { "epoch": 23.129166666666666, "grad_norm": 2.4512212798468482, "learning_rate": 5e-05, "loss": 0.0615, "num_input_tokens_seen": 506690068, "step": 5551 }, { "epoch": 23.129166666666666, "loss": 0.03477202355861664, "loss_ce": 3.539209137670696e-05, "loss_iou": 0.040771484375, "loss_num": 0.0069580078125, "loss_xval": 0.03466796875, "num_input_tokens_seen": 506690068, "step": 5551 }, { "epoch": 23.133333333333333, "grad_norm": 2.06191445933578, "learning_rate": 5e-05, "loss": 0.054, "num_input_tokens_seen": 506781076, "step": 5552 }, { "epoch": 23.133333333333333, "loss": 0.05612967908382416, "loss_ce": 6.88864674884826e-05, "loss_iou": 0.267578125, "loss_num": 0.01123046875, "loss_xval": 0.05615234375, "num_input_tokens_seen": 506781076, "step": 5552 }, { "epoch": 23.1375, "grad_norm": 2.12436030769707, "learning_rate": 5e-05, "loss": 0.0392, "num_input_tokens_seen": 506871476, "step": 5553 }, { "epoch": 23.1375, "loss": 0.03658786416053772, "loss_ce": 1.2547091500891838e-05, "loss_iou": 0.2197265625, "loss_num": 0.00732421875, "loss_xval": 0.03662109375, "num_input_tokens_seen": 506871476, "step": 5553 }, { "epoch": 23.141666666666666, "grad_norm": 1.7897164036218582, "learning_rate": 5e-05, "loss": 0.032, "num_input_tokens_seen": 506962352, "step": 5554 }, { "epoch": 23.141666666666666, "loss": 0.04012250900268555, "loss_ce": 7.153533715609228e-06, "loss_iou": 0.2451171875, "loss_num": 0.008056640625, "loss_xval": 0.0400390625, "num_input_tokens_seen": 506962352, "step": 5554 }, { "epoch": 23.145833333333332, "grad_norm": 1.7781822095634223, "learning_rate": 5e-05, "loss": 0.035, "num_input_tokens_seen": 507053444, "step": 5555 }, { "epoch": 23.145833333333332, "loss": 0.026572369039058685, "loss_ce": 6.817023859184701e-06, "loss_iou": 0.232421875, "loss_num": 0.00531005859375, "loss_xval": 0.026611328125, "num_input_tokens_seen": 507053444, "step": 5555 }, { "epoch": 23.15, "grad_norm": 2.145307682262882, "learning_rate": 5e-05, "loss": 0.031, "num_input_tokens_seen": 507145192, "step": 5556 }, { "epoch": 23.15, "loss": 0.033625900745391846, "loss_ce": 1.0787004612211604e-05, "loss_iou": 0.158203125, "loss_num": 0.0067138671875, "loss_xval": 0.03369140625, "num_input_tokens_seen": 507145192, "step": 5556 }, { "epoch": 23.154166666666665, "grad_norm": 4.031522357261041, "learning_rate": 5e-05, "loss": 0.0297, "num_input_tokens_seen": 507236560, "step": 5557 }, { "epoch": 23.154166666666665, "loss": 0.02342827618122101, "loss_ce": 1.3663468052982353e-05, "loss_iou": 0.1884765625, "loss_num": 0.00469970703125, "loss_xval": 0.0234375, "num_input_tokens_seen": 507236560, "step": 5557 }, { "epoch": 23.158333333333335, "grad_norm": 2.1104238745025725, "learning_rate": 5e-05, "loss": 0.0404, "num_input_tokens_seen": 507327812, "step": 5558 }, { "epoch": 23.158333333333335, "loss": 0.05977874621748924, "loss_ce": 1.0070112693938427e-05, "loss_iou": 0.171875, "loss_num": 0.011962890625, "loss_xval": 0.059814453125, "num_input_tokens_seen": 507327812, "step": 5558 }, { "epoch": 23.1625, "grad_norm": 2.5630999970900707, "learning_rate": 5e-05, "loss": 0.0622, "num_input_tokens_seen": 507418900, "step": 5559 }, { "epoch": 23.1625, "loss": 0.04528948292136192, "loss_ce": 6.243255484150723e-05, "loss_iou": 0.2255859375, "loss_num": 0.009033203125, "loss_xval": 0.045166015625, "num_input_tokens_seen": 507418900, "step": 5559 }, { "epoch": 23.166666666666668, "grad_norm": 2.3850673187207754, "learning_rate": 5e-05, "loss": 0.0276, "num_input_tokens_seen": 507509864, "step": 5560 }, { "epoch": 23.166666666666668, "loss": 0.026554886251688004, "loss_ce": 3.511292743496597e-05, "loss_iou": 0.291015625, "loss_num": 0.00531005859375, "loss_xval": 0.0264892578125, "num_input_tokens_seen": 507509864, "step": 5560 }, { "epoch": 23.170833333333334, "grad_norm": 3.807294907387483, "learning_rate": 5e-05, "loss": 0.041, "num_input_tokens_seen": 507601516, "step": 5561 }, { "epoch": 23.170833333333334, "loss": 0.025786317884922028, "loss_ce": 2.9481008823495358e-05, "loss_iou": 0.23046875, "loss_num": 0.005157470703125, "loss_xval": 0.0257568359375, "num_input_tokens_seen": 507601516, "step": 5561 }, { "epoch": 23.175, "grad_norm": 3.2862311410004694, "learning_rate": 5e-05, "loss": 0.0743, "num_input_tokens_seen": 507692300, "step": 5562 }, { "epoch": 23.175, "loss": 0.0893373042345047, "loss_ce": 1.9988350686617196e-05, "loss_iou": 0.25, "loss_num": 0.017822265625, "loss_xval": 0.08935546875, "num_input_tokens_seen": 507692300, "step": 5562 }, { "epoch": 23.179166666666667, "grad_norm": 3.985297934381173, "learning_rate": 5e-05, "loss": 0.0407, "num_input_tokens_seen": 507784504, "step": 5563 }, { "epoch": 23.179166666666667, "loss": 0.03454427421092987, "loss_ce": 6.704148836433887e-05, "loss_iou": 0.287109375, "loss_num": 0.00689697265625, "loss_xval": 0.034423828125, "num_input_tokens_seen": 507784504, "step": 5563 }, { "epoch": 23.183333333333334, "grad_norm": 2.9936148131713076, "learning_rate": 5e-05, "loss": 0.0486, "num_input_tokens_seen": 507875576, "step": 5564 }, { "epoch": 23.183333333333334, "loss": 0.06957919895648956, "loss_ce": 2.9639351851074025e-05, "loss_iou": 0.205078125, "loss_num": 0.013916015625, "loss_xval": 0.0693359375, "num_input_tokens_seen": 507875576, "step": 5564 }, { "epoch": 23.1875, "grad_norm": 2.751201836326489, "learning_rate": 5e-05, "loss": 0.038, "num_input_tokens_seen": 507967012, "step": 5565 }, { "epoch": 23.1875, "loss": 0.056741319596767426, "loss_ce": 0.00013121204392518848, "loss_iou": 0.314453125, "loss_num": 0.0113525390625, "loss_xval": 0.056640625, "num_input_tokens_seen": 507967012, "step": 5565 }, { "epoch": 23.191666666666666, "grad_norm": 3.655389361669214, "learning_rate": 5e-05, "loss": 0.029, "num_input_tokens_seen": 508058164, "step": 5566 }, { "epoch": 23.191666666666666, "loss": 0.01976543664932251, "loss_ce": 2.0563513317028992e-05, "loss_iou": 0.2216796875, "loss_num": 0.003936767578125, "loss_xval": 0.019775390625, "num_input_tokens_seen": 508058164, "step": 5566 }, { "epoch": 23.195833333333333, "grad_norm": 3.2946355046531512, "learning_rate": 5e-05, "loss": 0.0429, "num_input_tokens_seen": 508148328, "step": 5567 }, { "epoch": 23.195833333333333, "loss": 0.056723110377788544, "loss_ce": 9.774295176612213e-05, "loss_iou": 0.212890625, "loss_num": 0.0113525390625, "loss_xval": 0.056640625, "num_input_tokens_seen": 508148328, "step": 5567 }, { "epoch": 23.2, "grad_norm": 2.3083514977333017, "learning_rate": 5e-05, "loss": 0.0602, "num_input_tokens_seen": 508238268, "step": 5568 }, { "epoch": 23.2, "loss": 0.07298716902732849, "loss_ce": 1.964076545846183e-05, "loss_iou": 0.2001953125, "loss_num": 0.01458740234375, "loss_xval": 0.07275390625, "num_input_tokens_seen": 508238268, "step": 5568 }, { "epoch": 23.204166666666666, "grad_norm": 2.787342467258707, "learning_rate": 5e-05, "loss": 0.0328, "num_input_tokens_seen": 508329436, "step": 5569 }, { "epoch": 23.204166666666666, "loss": 0.02801087126135826, "loss_ce": 0.0026660216972231865, "loss_iou": 0.2060546875, "loss_num": 0.00506591796875, "loss_xval": 0.025390625, "num_input_tokens_seen": 508329436, "step": 5569 }, { "epoch": 23.208333333333332, "grad_norm": 1.803000257307526, "learning_rate": 5e-05, "loss": 0.0448, "num_input_tokens_seen": 508420948, "step": 5570 }, { "epoch": 23.208333333333332, "loss": 0.03525412082672119, "loss_ce": 0.0007082189549691975, "loss_iou": 0.0546875, "loss_num": 0.00689697265625, "loss_xval": 0.03466796875, "num_input_tokens_seen": 508420948, "step": 5570 }, { "epoch": 23.2125, "grad_norm": 1.768192239444829, "learning_rate": 5e-05, "loss": 0.0501, "num_input_tokens_seen": 508512224, "step": 5571 }, { "epoch": 23.2125, "loss": 0.0395321287214756, "loss_ce": 2.7123618565383367e-05, "loss_iou": 0.25390625, "loss_num": 0.00787353515625, "loss_xval": 0.03955078125, "num_input_tokens_seen": 508512224, "step": 5571 }, { "epoch": 23.216666666666665, "grad_norm": 1.3839672931764273, "learning_rate": 5e-05, "loss": 0.0367, "num_input_tokens_seen": 508603140, "step": 5572 }, { "epoch": 23.216666666666665, "loss": 0.018957097083330154, "loss_ce": 5.68003724765731e-06, "loss_iou": 0.220703125, "loss_num": 0.0037841796875, "loss_xval": 0.0189208984375, "num_input_tokens_seen": 508603140, "step": 5572 }, { "epoch": 23.220833333333335, "grad_norm": 1.5503597653476882, "learning_rate": 5e-05, "loss": 0.037, "num_input_tokens_seen": 508694560, "step": 5573 }, { "epoch": 23.220833333333335, "loss": 0.04941952973604202, "loss_ce": 0.00024426612071692944, "loss_iou": 0.193359375, "loss_num": 0.00982666015625, "loss_xval": 0.049072265625, "num_input_tokens_seen": 508694560, "step": 5573 }, { "epoch": 23.225, "grad_norm": 1.8940524628460167, "learning_rate": 5e-05, "loss": 0.0337, "num_input_tokens_seen": 508785764, "step": 5574 }, { "epoch": 23.225, "loss": 0.03350139036774635, "loss_ce": 9.990063699660823e-05, "loss_iou": 0.1943359375, "loss_num": 0.006683349609375, "loss_xval": 0.033447265625, "num_input_tokens_seen": 508785764, "step": 5574 }, { "epoch": 23.229166666666668, "grad_norm": 2.607973144041499, "learning_rate": 5e-05, "loss": 0.0651, "num_input_tokens_seen": 508876752, "step": 5575 }, { "epoch": 23.229166666666668, "loss": 0.06860657036304474, "loss_ce": 0.0004608180024661124, "loss_iou": 0.3203125, "loss_num": 0.01361083984375, "loss_xval": 0.068359375, "num_input_tokens_seen": 508876752, "step": 5575 }, { "epoch": 23.233333333333334, "grad_norm": 3.5325802197812854, "learning_rate": 5e-05, "loss": 0.0353, "num_input_tokens_seen": 508968252, "step": 5576 }, { "epoch": 23.233333333333334, "loss": 0.046381525695323944, "loss_ce": 1.00669694802491e-05, "loss_iou": 0.271484375, "loss_num": 0.00927734375, "loss_xval": 0.04638671875, "num_input_tokens_seen": 508968252, "step": 5576 }, { "epoch": 23.2375, "grad_norm": 3.024591578915835, "learning_rate": 5e-05, "loss": 0.0315, "num_input_tokens_seen": 509058952, "step": 5577 }, { "epoch": 23.2375, "loss": 0.028720693662762642, "loss_ce": 1.8911972802015953e-05, "loss_iou": 0.3046875, "loss_num": 0.0057373046875, "loss_xval": 0.0286865234375, "num_input_tokens_seen": 509058952, "step": 5577 }, { "epoch": 23.241666666666667, "grad_norm": 2.56935630758592, "learning_rate": 5e-05, "loss": 0.0278, "num_input_tokens_seen": 509149648, "step": 5578 }, { "epoch": 23.241666666666667, "loss": 0.020547185093164444, "loss_ce": 8.854508450895082e-06, "loss_iou": 0.1640625, "loss_num": 0.004119873046875, "loss_xval": 0.0205078125, "num_input_tokens_seen": 509149648, "step": 5578 }, { "epoch": 23.245833333333334, "grad_norm": 2.1576622370877194, "learning_rate": 5e-05, "loss": 0.0348, "num_input_tokens_seen": 509241252, "step": 5579 }, { "epoch": 23.245833333333334, "loss": 0.0377604141831398, "loss_ce": 1.0170961104449816e-05, "loss_iou": 0.263671875, "loss_num": 0.007537841796875, "loss_xval": 0.037841796875, "num_input_tokens_seen": 509241252, "step": 5579 }, { "epoch": 23.25, "grad_norm": 4.0794898404105835, "learning_rate": 5e-05, "loss": 0.0617, "num_input_tokens_seen": 509332528, "step": 5580 }, { "epoch": 23.25, "loss": 0.059780120849609375, "loss_ce": 4.958963836543262e-05, "loss_iou": 0.2412109375, "loss_num": 0.011962890625, "loss_xval": 0.059814453125, "num_input_tokens_seen": 509332528, "step": 5580 }, { "epoch": 23.254166666666666, "grad_norm": 3.0912967392107884, "learning_rate": 5e-05, "loss": 0.0412, "num_input_tokens_seen": 509423812, "step": 5581 }, { "epoch": 23.254166666666666, "loss": 0.04734455794095993, "loss_ce": 7.28306476958096e-05, "loss_iou": 0.291015625, "loss_num": 0.00946044921875, "loss_xval": 0.04736328125, "num_input_tokens_seen": 509423812, "step": 5581 }, { "epoch": 23.258333333333333, "grad_norm": 3.2029406976315844, "learning_rate": 5e-05, "loss": 0.0287, "num_input_tokens_seen": 509515444, "step": 5582 }, { "epoch": 23.258333333333333, "loss": 0.0323030985891819, "loss_ce": 5.3646705055143684e-05, "loss_iou": 0.228515625, "loss_num": 0.006439208984375, "loss_xval": 0.0322265625, "num_input_tokens_seen": 509515444, "step": 5582 }, { "epoch": 23.2625, "grad_norm": 2.5528821665535215, "learning_rate": 5e-05, "loss": 0.0473, "num_input_tokens_seen": 509606412, "step": 5583 }, { "epoch": 23.2625, "loss": 0.03638289123773575, "loss_ce": 4.4086533307563514e-05, "loss_iou": 0.205078125, "loss_num": 0.00726318359375, "loss_xval": 0.036376953125, "num_input_tokens_seen": 509606412, "step": 5583 }, { "epoch": 23.266666666666666, "grad_norm": 2.021321254403423, "learning_rate": 5e-05, "loss": 0.0443, "num_input_tokens_seen": 509697308, "step": 5584 }, { "epoch": 23.266666666666666, "loss": 0.04420618340373039, "loss_ce": 1.6727790352888405e-05, "loss_iou": 0.1689453125, "loss_num": 0.00885009765625, "loss_xval": 0.044189453125, "num_input_tokens_seen": 509697308, "step": 5584 }, { "epoch": 23.270833333333332, "grad_norm": 1.019907924151526, "learning_rate": 5e-05, "loss": 0.0387, "num_input_tokens_seen": 509788872, "step": 5585 }, { "epoch": 23.270833333333332, "loss": 0.03787646442651749, "loss_ce": 0.0007403845083899796, "loss_iou": 0.23828125, "loss_num": 0.007415771484375, "loss_xval": 0.037109375, "num_input_tokens_seen": 509788872, "step": 5585 }, { "epoch": 23.275, "grad_norm": 1.211511372144261, "learning_rate": 5e-05, "loss": 0.0448, "num_input_tokens_seen": 509880496, "step": 5586 }, { "epoch": 23.275, "loss": 0.043580561876297, "loss_ce": 1.672182224865537e-05, "loss_iou": 0.142578125, "loss_num": 0.00872802734375, "loss_xval": 0.04345703125, "num_input_tokens_seen": 509880496, "step": 5586 }, { "epoch": 23.279166666666665, "grad_norm": 2.38833077590804, "learning_rate": 5e-05, "loss": 0.0276, "num_input_tokens_seen": 509972204, "step": 5587 }, { "epoch": 23.279166666666665, "loss": 0.03036055527627468, "loss_ce": 3.371208003954962e-05, "loss_iou": 0.26953125, "loss_num": 0.006072998046875, "loss_xval": 0.0302734375, "num_input_tokens_seen": 509972204, "step": 5587 }, { "epoch": 23.283333333333335, "grad_norm": 2.455506356018568, "learning_rate": 5e-05, "loss": 0.0586, "num_input_tokens_seen": 510063756, "step": 5588 }, { "epoch": 23.283333333333335, "loss": 0.027071993798017502, "loss_ce": 1.8160757463192567e-05, "loss_iou": 0.2265625, "loss_num": 0.005401611328125, "loss_xval": 0.027099609375, "num_input_tokens_seen": 510063756, "step": 5588 }, { "epoch": 23.2875, "grad_norm": 4.171101864763519, "learning_rate": 5e-05, "loss": 0.0375, "num_input_tokens_seen": 510155320, "step": 5589 }, { "epoch": 23.2875, "loss": 0.028365176171064377, "loss_ce": 4.486342368181795e-05, "loss_iou": 0.3125, "loss_num": 0.00567626953125, "loss_xval": 0.0283203125, "num_input_tokens_seen": 510155320, "step": 5589 }, { "epoch": 23.291666666666668, "grad_norm": 3.2719513676794, "learning_rate": 5e-05, "loss": 0.0689, "num_input_tokens_seen": 510246112, "step": 5590 }, { "epoch": 23.291666666666668, "loss": 0.0256138164550066, "loss_ce": 9.567937013343908e-06, "loss_iou": 0.19140625, "loss_num": 0.005126953125, "loss_xval": 0.025634765625, "num_input_tokens_seen": 510246112, "step": 5590 }, { "epoch": 23.295833333333334, "grad_norm": 2.1603423840105767, "learning_rate": 5e-05, "loss": 0.0305, "num_input_tokens_seen": 510337288, "step": 5591 }, { "epoch": 23.295833333333334, "loss": 0.02922121249139309, "loss_ce": 2.3519523892900907e-05, "loss_iou": 0.216796875, "loss_num": 0.005828857421875, "loss_xval": 0.0291748046875, "num_input_tokens_seen": 510337288, "step": 5591 }, { "epoch": 23.3, "grad_norm": 1.7808574393579057, "learning_rate": 5e-05, "loss": 0.0384, "num_input_tokens_seen": 510428620, "step": 5592 }, { "epoch": 23.3, "loss": 0.05729286000132561, "loss_ce": 1.8992475816048682e-05, "loss_iou": 0.013671875, "loss_num": 0.011474609375, "loss_xval": 0.057373046875, "num_input_tokens_seen": 510428620, "step": 5592 }, { "epoch": 23.304166666666667, "grad_norm": 12.535122906527729, "learning_rate": 5e-05, "loss": 0.0344, "num_input_tokens_seen": 510518272, "step": 5593 }, { "epoch": 23.304166666666667, "loss": 0.030911097303032875, "loss_ce": 2.7308211429044604e-05, "loss_iou": 0.1826171875, "loss_num": 0.00616455078125, "loss_xval": 0.0308837890625, "num_input_tokens_seen": 510518272, "step": 5593 }, { "epoch": 23.308333333333334, "grad_norm": 2.121029182691071, "learning_rate": 5e-05, "loss": 0.0237, "num_input_tokens_seen": 510609588, "step": 5594 }, { "epoch": 23.308333333333334, "loss": 0.02242228388786316, "loss_ce": 7.123675459297374e-06, "loss_iou": 0.2041015625, "loss_num": 0.004486083984375, "loss_xval": 0.0224609375, "num_input_tokens_seen": 510609588, "step": 5594 }, { "epoch": 23.3125, "grad_norm": 2.2235792181374423, "learning_rate": 5e-05, "loss": 0.0532, "num_input_tokens_seen": 510701224, "step": 5595 }, { "epoch": 23.3125, "loss": 0.06232095882296562, "loss_ce": 5.747355316998437e-05, "loss_iou": 0.138671875, "loss_num": 0.012451171875, "loss_xval": 0.062255859375, "num_input_tokens_seen": 510701224, "step": 5595 }, { "epoch": 23.316666666666666, "grad_norm": 2.1027845062454613, "learning_rate": 5e-05, "loss": 0.0282, "num_input_tokens_seen": 510792288, "step": 5596 }, { "epoch": 23.316666666666666, "loss": 0.01713455468416214, "loss_ce": 6.564921932294965e-06, "loss_iou": 0.13671875, "loss_num": 0.00341796875, "loss_xval": 0.01708984375, "num_input_tokens_seen": 510792288, "step": 5596 }, { "epoch": 23.320833333333333, "grad_norm": 2.039990023904755, "learning_rate": 5e-05, "loss": 0.0279, "num_input_tokens_seen": 510883200, "step": 5597 }, { "epoch": 23.320833333333333, "loss": 0.025514788925647736, "loss_ce": 9.722947652335279e-06, "loss_iou": 0.1611328125, "loss_num": 0.005096435546875, "loss_xval": 0.0255126953125, "num_input_tokens_seen": 510883200, "step": 5597 }, { "epoch": 23.325, "grad_norm": 3.283042243440875, "learning_rate": 5e-05, "loss": 0.041, "num_input_tokens_seen": 510974092, "step": 5598 }, { "epoch": 23.325, "loss": 0.0327766016125679, "loss_ce": 2.3610751668456942e-05, "loss_iou": 0.224609375, "loss_num": 0.006561279296875, "loss_xval": 0.03271484375, "num_input_tokens_seen": 510974092, "step": 5598 }, { "epoch": 23.329166666666666, "grad_norm": 3.2783258428697626, "learning_rate": 5e-05, "loss": 0.0355, "num_input_tokens_seen": 511065076, "step": 5599 }, { "epoch": 23.329166666666666, "loss": 0.044812239706516266, "loss_ce": 6.583758658962324e-05, "loss_iou": 0.031982421875, "loss_num": 0.0089111328125, "loss_xval": 0.044677734375, "num_input_tokens_seen": 511065076, "step": 5599 }, { "epoch": 23.333333333333332, "grad_norm": 3.227421569002465, "learning_rate": 5e-05, "loss": 0.0626, "num_input_tokens_seen": 511156564, "step": 5600 }, { "epoch": 23.333333333333332, "loss": 0.09824306517839432, "loss_ce": 6.981414117035456e-06, "loss_iou": 0.19921875, "loss_num": 0.0196533203125, "loss_xval": 0.09814453125, "num_input_tokens_seen": 511156564, "step": 5600 }, { "epoch": 23.3375, "grad_norm": 4.576749131718821, "learning_rate": 5e-05, "loss": 0.054, "num_input_tokens_seen": 511247844, "step": 5601 }, { "epoch": 23.3375, "loss": 0.03233812749385834, "loss_ce": 4.754355359182227e-06, "loss_iou": 0.32421875, "loss_num": 0.0064697265625, "loss_xval": 0.0322265625, "num_input_tokens_seen": 511247844, "step": 5601 }, { "epoch": 23.341666666666665, "grad_norm": 1.756281817515379, "learning_rate": 5e-05, "loss": 0.0412, "num_input_tokens_seen": 511338828, "step": 5602 }, { "epoch": 23.341666666666665, "loss": 0.04080682992935181, "loss_ce": 2.0088969904463738e-05, "loss_iou": 0.2734375, "loss_num": 0.0081787109375, "loss_xval": 0.040771484375, "num_input_tokens_seen": 511338828, "step": 5602 }, { "epoch": 23.345833333333335, "grad_norm": 2.3779147808503587, "learning_rate": 5e-05, "loss": 0.0343, "num_input_tokens_seen": 511430068, "step": 5603 }, { "epoch": 23.345833333333335, "loss": 0.046825163066387177, "loss_ce": 1.1200661901966669e-05, "loss_iou": 0.2265625, "loss_num": 0.00933837890625, "loss_xval": 0.046875, "num_input_tokens_seen": 511430068, "step": 5603 }, { "epoch": 23.35, "grad_norm": 2.3823996882656218, "learning_rate": 5e-05, "loss": 0.0351, "num_input_tokens_seen": 511521604, "step": 5604 }, { "epoch": 23.35, "loss": 0.018639925867319107, "loss_ce": 2.4203442080761306e-05, "loss_iou": 0.1572265625, "loss_num": 0.00372314453125, "loss_xval": 0.0185546875, "num_input_tokens_seen": 511521604, "step": 5604 }, { "epoch": 23.354166666666668, "grad_norm": 3.1417895799293074, "learning_rate": 5e-05, "loss": 0.0366, "num_input_tokens_seen": 511613616, "step": 5605 }, { "epoch": 23.354166666666668, "loss": 0.03632240742444992, "loss_ce": 2.1747357095591724e-05, "loss_iou": 0.197265625, "loss_num": 0.00726318359375, "loss_xval": 0.036376953125, "num_input_tokens_seen": 511613616, "step": 5605 }, { "epoch": 23.358333333333334, "grad_norm": 2.2423365837651823, "learning_rate": 5e-05, "loss": 0.0664, "num_input_tokens_seen": 511704868, "step": 5606 }, { "epoch": 23.358333333333334, "loss": 0.07503880560398102, "loss_ce": 0.002468003425747156, "loss_iou": 0.1650390625, "loss_num": 0.0145263671875, "loss_xval": 0.07275390625, "num_input_tokens_seen": 511704868, "step": 5606 }, { "epoch": 23.3625, "grad_norm": 1.4279371299504686, "learning_rate": 5e-05, "loss": 0.0287, "num_input_tokens_seen": 511796228, "step": 5607 }, { "epoch": 23.3625, "loss": 0.021348990499973297, "loss_ce": 4.390611138660461e-05, "loss_iou": 0.1982421875, "loss_num": 0.0042724609375, "loss_xval": 0.0213623046875, "num_input_tokens_seen": 511796228, "step": 5607 }, { "epoch": 23.366666666666667, "grad_norm": 2.260574375748545, "learning_rate": 5e-05, "loss": 0.0366, "num_input_tokens_seen": 511887356, "step": 5608 }, { "epoch": 23.366666666666667, "loss": 0.024202514439821243, "loss_ce": 1.733544559101574e-05, "loss_iou": 0.16015625, "loss_num": 0.00482177734375, "loss_xval": 0.024169921875, "num_input_tokens_seen": 511887356, "step": 5608 }, { "epoch": 23.370833333333334, "grad_norm": 2.2463134527685646, "learning_rate": 5e-05, "loss": 0.0794, "num_input_tokens_seen": 511978756, "step": 5609 }, { "epoch": 23.370833333333334, "loss": 0.08751125633716583, "loss_ce": 3.2622563594486564e-05, "loss_iou": 0.271484375, "loss_num": 0.0174560546875, "loss_xval": 0.08740234375, "num_input_tokens_seen": 511978756, "step": 5609 }, { "epoch": 23.375, "grad_norm": 1.5376900540037068, "learning_rate": 5e-05, "loss": 0.0331, "num_input_tokens_seen": 512069884, "step": 5610 }, { "epoch": 23.375, "loss": 0.028105616569519043, "loss_ce": 6.5541730691620614e-06, "loss_iou": 0.2021484375, "loss_num": 0.005615234375, "loss_xval": 0.028076171875, "num_input_tokens_seen": 512069884, "step": 5610 }, { "epoch": 23.379166666666666, "grad_norm": 2.708976095662761, "learning_rate": 5e-05, "loss": 0.0549, "num_input_tokens_seen": 512160852, "step": 5611 }, { "epoch": 23.379166666666666, "loss": 0.07938659191131592, "loss_ce": 0.00013243715511634946, "loss_iou": 0.25390625, "loss_num": 0.015869140625, "loss_xval": 0.0791015625, "num_input_tokens_seen": 512160852, "step": 5611 }, { "epoch": 23.383333333333333, "grad_norm": 2.4624848588135295, "learning_rate": 5e-05, "loss": 0.0416, "num_input_tokens_seen": 512252172, "step": 5612 }, { "epoch": 23.383333333333333, "loss": 0.028108566999435425, "loss_ce": 7.817186997272074e-05, "loss_iou": 0.13671875, "loss_num": 0.005615234375, "loss_xval": 0.028076171875, "num_input_tokens_seen": 512252172, "step": 5612 }, { "epoch": 23.3875, "grad_norm": 2.234790836550349, "learning_rate": 5e-05, "loss": 0.0236, "num_input_tokens_seen": 512343476, "step": 5613 }, { "epoch": 23.3875, "loss": 0.024507207795977592, "loss_ce": 0.00011603281018324196, "loss_iou": 0.3046875, "loss_num": 0.0048828125, "loss_xval": 0.0244140625, "num_input_tokens_seen": 512343476, "step": 5613 }, { "epoch": 23.391666666666666, "grad_norm": 2.6623352490414596, "learning_rate": 5e-05, "loss": 0.0342, "num_input_tokens_seen": 512434636, "step": 5614 }, { "epoch": 23.391666666666666, "loss": 0.0387955941259861, "loss_ce": 7.751992598059587e-06, "loss_iou": 0.267578125, "loss_num": 0.007781982421875, "loss_xval": 0.038818359375, "num_input_tokens_seen": 512434636, "step": 5614 }, { "epoch": 23.395833333333332, "grad_norm": 3.3554717038231856, "learning_rate": 5e-05, "loss": 0.0293, "num_input_tokens_seen": 512525512, "step": 5615 }, { "epoch": 23.395833333333332, "loss": 0.024253401905298233, "loss_ce": 7.185792583186412e-06, "loss_iou": 0.306640625, "loss_num": 0.004852294921875, "loss_xval": 0.0242919921875, "num_input_tokens_seen": 512525512, "step": 5615 }, { "epoch": 23.4, "grad_norm": 2.4752797068979233, "learning_rate": 5e-05, "loss": 0.0277, "num_input_tokens_seen": 512617024, "step": 5616 }, { "epoch": 23.4, "loss": 0.020623400807380676, "loss_ce": 8.507090387865901e-05, "loss_iou": 0.169921875, "loss_num": 0.004119873046875, "loss_xval": 0.0205078125, "num_input_tokens_seen": 512617024, "step": 5616 }, { "epoch": 23.404166666666665, "grad_norm": 1.6684475913014143, "learning_rate": 5e-05, "loss": 0.0228, "num_input_tokens_seen": 512708132, "step": 5617 }, { "epoch": 23.404166666666665, "loss": 0.02751937136054039, "loss_ce": 7.775272024446167e-06, "loss_iou": 0.216796875, "loss_num": 0.0054931640625, "loss_xval": 0.0274658203125, "num_input_tokens_seen": 512708132, "step": 5617 }, { "epoch": 23.408333333333335, "grad_norm": 1.8713570994552078, "learning_rate": 5e-05, "loss": 0.0337, "num_input_tokens_seen": 512798976, "step": 5618 }, { "epoch": 23.408333333333335, "loss": 0.03592763468623161, "loss_ce": 8.446499123238027e-06, "loss_iou": 0.2392578125, "loss_num": 0.007171630859375, "loss_xval": 0.035888671875, "num_input_tokens_seen": 512798976, "step": 5618 }, { "epoch": 23.4125, "grad_norm": 3.3172557575079473, "learning_rate": 5e-05, "loss": 0.0498, "num_input_tokens_seen": 512890368, "step": 5619 }, { "epoch": 23.4125, "loss": 0.03127144277095795, "loss_ce": 5.196149504627101e-05, "loss_iou": 0.287109375, "loss_num": 0.006256103515625, "loss_xval": 0.03125, "num_input_tokens_seen": 512890368, "step": 5619 }, { "epoch": 23.416666666666668, "grad_norm": 2.2956983752384836, "learning_rate": 5e-05, "loss": 0.0261, "num_input_tokens_seen": 512982264, "step": 5620 }, { "epoch": 23.416666666666668, "loss": 0.02757253870368004, "loss_ce": 3.0425067961914465e-05, "loss_iou": 0.259765625, "loss_num": 0.0054931640625, "loss_xval": 0.027587890625, "num_input_tokens_seen": 512982264, "step": 5620 }, { "epoch": 23.420833333333334, "grad_norm": 1.5291227424452554, "learning_rate": 5e-05, "loss": 0.039, "num_input_tokens_seen": 513073336, "step": 5621 }, { "epoch": 23.420833333333334, "loss": 0.024489475414156914, "loss_ce": 1.437848732166458e-05, "loss_iou": 0.1357421875, "loss_num": 0.0048828125, "loss_xval": 0.0244140625, "num_input_tokens_seen": 513073336, "step": 5621 }, { "epoch": 23.425, "grad_norm": 1.4357362026895593, "learning_rate": 5e-05, "loss": 0.0634, "num_input_tokens_seen": 513165060, "step": 5622 }, { "epoch": 23.425, "loss": 0.08363399654626846, "loss_ce": 8.204024197766557e-06, "loss_iou": 0.1962890625, "loss_num": 0.0167236328125, "loss_xval": 0.08349609375, "num_input_tokens_seen": 513165060, "step": 5622 }, { "epoch": 23.429166666666667, "grad_norm": 2.465944564759923, "learning_rate": 5e-05, "loss": 0.0422, "num_input_tokens_seen": 513256796, "step": 5623 }, { "epoch": 23.429166666666667, "loss": 0.057980284094810486, "loss_ce": 1.2144162610638887e-05, "loss_iou": 0.359375, "loss_num": 0.0115966796875, "loss_xval": 0.057861328125, "num_input_tokens_seen": 513256796, "step": 5623 }, { "epoch": 23.433333333333334, "grad_norm": 1.8353432433000278, "learning_rate": 5e-05, "loss": 0.0444, "num_input_tokens_seen": 513348044, "step": 5624 }, { "epoch": 23.433333333333334, "loss": 0.06825631856918335, "loss_ce": 1.1382675438653678e-05, "loss_iou": 0.2138671875, "loss_num": 0.013671875, "loss_xval": 0.068359375, "num_input_tokens_seen": 513348044, "step": 5624 }, { "epoch": 23.4375, "grad_norm": 2.3975083646021935, "learning_rate": 5e-05, "loss": 0.0407, "num_input_tokens_seen": 513439524, "step": 5625 }, { "epoch": 23.4375, "loss": 0.018934469670057297, "loss_ce": 5.941654762864346e-06, "loss_iou": 0.251953125, "loss_num": 0.0037841796875, "loss_xval": 0.0189208984375, "num_input_tokens_seen": 513439524, "step": 5625 }, { "epoch": 23.441666666666666, "grad_norm": 2.7420797301869957, "learning_rate": 5e-05, "loss": 0.0498, "num_input_tokens_seen": 513531316, "step": 5626 }, { "epoch": 23.441666666666666, "loss": 0.036931782960891724, "loss_ce": 5.513968972081784e-06, "loss_iou": 0.234375, "loss_num": 0.00738525390625, "loss_xval": 0.036865234375, "num_input_tokens_seen": 513531316, "step": 5626 }, { "epoch": 23.445833333333333, "grad_norm": 3.887594423043908, "learning_rate": 5e-05, "loss": 0.0709, "num_input_tokens_seen": 513622396, "step": 5627 }, { "epoch": 23.445833333333333, "loss": 0.11473788321018219, "loss_ce": 3.7564546801149845e-05, "loss_iou": 0.1962890625, "loss_num": 0.02294921875, "loss_xval": 0.11474609375, "num_input_tokens_seen": 513622396, "step": 5627 }, { "epoch": 23.45, "grad_norm": 3.32244812447736, "learning_rate": 5e-05, "loss": 0.033, "num_input_tokens_seen": 513711996, "step": 5628 }, { "epoch": 23.45, "loss": 0.03876044601202011, "loss_ce": 1.0750181900220923e-05, "loss_iou": 0.310546875, "loss_num": 0.00775146484375, "loss_xval": 0.038818359375, "num_input_tokens_seen": 513711996, "step": 5628 }, { "epoch": 23.454166666666666, "grad_norm": 2.196725364122393, "learning_rate": 5e-05, "loss": 0.0509, "num_input_tokens_seen": 513803728, "step": 5629 }, { "epoch": 23.454166666666666, "loss": 0.044431425631046295, "loss_ce": 1.3089969797874801e-05, "loss_iou": 0.21484375, "loss_num": 0.0089111328125, "loss_xval": 0.04443359375, "num_input_tokens_seen": 513803728, "step": 5629 }, { "epoch": 23.458333333333332, "grad_norm": 2.0038379188545035, "learning_rate": 5e-05, "loss": 0.0297, "num_input_tokens_seen": 513895252, "step": 5630 }, { "epoch": 23.458333333333332, "loss": 0.01975741982460022, "loss_ce": 1.254726703336928e-05, "loss_iou": 0.1455078125, "loss_num": 0.003936767578125, "loss_xval": 0.019775390625, "num_input_tokens_seen": 513895252, "step": 5630 }, { "epoch": 23.4625, "grad_norm": 2.1180023798296244, "learning_rate": 5e-05, "loss": 0.0537, "num_input_tokens_seen": 513985996, "step": 5631 }, { "epoch": 23.4625, "loss": 0.05978050082921982, "loss_ce": 4.191587322566193e-06, "loss_iou": 0.236328125, "loss_num": 0.011962890625, "loss_xval": 0.059814453125, "num_input_tokens_seen": 513985996, "step": 5631 }, { "epoch": 23.466666666666665, "grad_norm": 2.1144073204801748, "learning_rate": 5e-05, "loss": 0.027, "num_input_tokens_seen": 514077428, "step": 5632 }, { "epoch": 23.466666666666665, "loss": 0.022730417549610138, "loss_ce": 1.0080520951305516e-05, "loss_iou": 0.15625, "loss_num": 0.004547119140625, "loss_xval": 0.022705078125, "num_input_tokens_seen": 514077428, "step": 5632 }, { "epoch": 23.470833333333335, "grad_norm": 2.089796949258426, "learning_rate": 5e-05, "loss": 0.0248, "num_input_tokens_seen": 514168880, "step": 5633 }, { "epoch": 23.470833333333335, "loss": 0.024836096912622452, "loss_ce": 1.004880505206529e-05, "loss_iou": 0.2109375, "loss_num": 0.004974365234375, "loss_xval": 0.0247802734375, "num_input_tokens_seen": 514168880, "step": 5633 }, { "epoch": 23.475, "grad_norm": 1.985972227164406, "learning_rate": 5e-05, "loss": 0.039, "num_input_tokens_seen": 514261112, "step": 5634 }, { "epoch": 23.475, "loss": 0.050126075744628906, "loss_ce": 8.583403541706502e-06, "loss_iou": 0.2578125, "loss_num": 0.010009765625, "loss_xval": 0.050048828125, "num_input_tokens_seen": 514261112, "step": 5634 }, { "epoch": 23.479166666666668, "grad_norm": 2.72394390309996, "learning_rate": 5e-05, "loss": 0.0298, "num_input_tokens_seen": 514351744, "step": 5635 }, { "epoch": 23.479166666666668, "loss": 0.0400192067027092, "loss_ce": 1.066156619344838e-05, "loss_iou": 0.193359375, "loss_num": 0.00799560546875, "loss_xval": 0.0400390625, "num_input_tokens_seen": 514351744, "step": 5635 }, { "epoch": 23.483333333333334, "grad_norm": 2.702846446322467, "learning_rate": 5e-05, "loss": 0.0338, "num_input_tokens_seen": 514442600, "step": 5636 }, { "epoch": 23.483333333333334, "loss": 0.02324534021317959, "loss_ce": 9.775830403668806e-05, "loss_iou": 0.248046875, "loss_num": 0.004638671875, "loss_xval": 0.023193359375, "num_input_tokens_seen": 514442600, "step": 5636 }, { "epoch": 23.4875, "grad_norm": 2.4199996769429224, "learning_rate": 5e-05, "loss": 0.0572, "num_input_tokens_seen": 514534080, "step": 5637 }, { "epoch": 23.4875, "loss": 0.02815338969230652, "loss_ce": 1.236794105352601e-05, "loss_iou": 0.1640625, "loss_num": 0.005615234375, "loss_xval": 0.0281982421875, "num_input_tokens_seen": 514534080, "step": 5637 }, { "epoch": 23.491666666666667, "grad_norm": 2.728410632734877, "learning_rate": 5e-05, "loss": 0.0806, "num_input_tokens_seen": 514624976, "step": 5638 }, { "epoch": 23.491666666666667, "loss": 0.03780459612607956, "loss_ce": 1.6205263818847016e-05, "loss_iou": 0.2451171875, "loss_num": 0.007568359375, "loss_xval": 0.037841796875, "num_input_tokens_seen": 514624976, "step": 5638 }, { "epoch": 23.495833333333334, "grad_norm": 2.685654801734654, "learning_rate": 5e-05, "loss": 0.0467, "num_input_tokens_seen": 514716316, "step": 5639 }, { "epoch": 23.495833333333334, "loss": 0.0410284698009491, "loss_ce": 4.3360669224057347e-05, "loss_iou": 0.28515625, "loss_num": 0.0081787109375, "loss_xval": 0.041015625, "num_input_tokens_seen": 514716316, "step": 5639 }, { "epoch": 23.5, "grad_norm": 2.8698868644425732, "learning_rate": 5e-05, "loss": 0.0442, "num_input_tokens_seen": 514807012, "step": 5640 }, { "epoch": 23.5, "loss": 0.04485444352030754, "loss_ce": 8.859709851094522e-06, "loss_iou": 0.32421875, "loss_num": 0.00897216796875, "loss_xval": 0.044921875, "num_input_tokens_seen": 514807012, "step": 5640 }, { "epoch": 23.504166666666666, "grad_norm": 2.716969476117626, "learning_rate": 5e-05, "loss": 0.0425, "num_input_tokens_seen": 514896080, "step": 5641 }, { "epoch": 23.504166666666666, "loss": 0.048580192029476166, "loss_ce": 3.835902589344187e-06, "loss_iou": 0.2392578125, "loss_num": 0.00970458984375, "loss_xval": 0.048583984375, "num_input_tokens_seen": 514896080, "step": 5641 }, { "epoch": 23.508333333333333, "grad_norm": 2.975009957374591, "learning_rate": 5e-05, "loss": 0.0884, "num_input_tokens_seen": 514988056, "step": 5642 }, { "epoch": 23.508333333333333, "loss": 0.09305469691753387, "loss_ce": 6.598234904231504e-06, "loss_iou": 0.22265625, "loss_num": 0.0186767578125, "loss_xval": 0.09326171875, "num_input_tokens_seen": 514988056, "step": 5642 }, { "epoch": 23.5125, "grad_norm": 1.8440447054389957, "learning_rate": 5e-05, "loss": 0.1001, "num_input_tokens_seen": 515079164, "step": 5643 }, { "epoch": 23.5125, "loss": 0.053382910788059235, "loss_ce": 1.5296969650080428e-05, "loss_iou": 0.2392578125, "loss_num": 0.01068115234375, "loss_xval": 0.053466796875, "num_input_tokens_seen": 515079164, "step": 5643 }, { "epoch": 23.516666666666666, "grad_norm": 1.9757833576710389, "learning_rate": 5e-05, "loss": 0.0374, "num_input_tokens_seen": 515170564, "step": 5644 }, { "epoch": 23.516666666666666, "loss": 0.04524645209312439, "loss_ce": 4.142591023992281e-06, "loss_iou": 0.1513671875, "loss_num": 0.009033203125, "loss_xval": 0.045166015625, "num_input_tokens_seen": 515170564, "step": 5644 }, { "epoch": 23.520833333333332, "grad_norm": 2.7063172851091832, "learning_rate": 5e-05, "loss": 0.0594, "num_input_tokens_seen": 515262060, "step": 5645 }, { "epoch": 23.520833333333332, "loss": 0.05944689363241196, "loss_ce": 1.3914520422986243e-05, "loss_iou": 0.380859375, "loss_num": 0.01190185546875, "loss_xval": 0.059326171875, "num_input_tokens_seen": 515262060, "step": 5645 }, { "epoch": 23.525, "grad_norm": 2.179403906582386, "learning_rate": 5e-05, "loss": 0.0346, "num_input_tokens_seen": 515352696, "step": 5646 }, { "epoch": 23.525, "loss": 0.03545938432216644, "loss_ce": 5.586158749792958e-06, "loss_iou": 0.1748046875, "loss_num": 0.007110595703125, "loss_xval": 0.035400390625, "num_input_tokens_seen": 515352696, "step": 5646 }, { "epoch": 23.529166666666665, "grad_norm": 3.323901434148456, "learning_rate": 5e-05, "loss": 0.0312, "num_input_tokens_seen": 515443752, "step": 5647 }, { "epoch": 23.529166666666665, "loss": 0.035969581454992294, "loss_ce": 1.9874481949955225e-05, "loss_iou": 0.353515625, "loss_num": 0.0072021484375, "loss_xval": 0.035888671875, "num_input_tokens_seen": 515443752, "step": 5647 }, { "epoch": 23.533333333333335, "grad_norm": 3.3645703196290655, "learning_rate": 5e-05, "loss": 0.0409, "num_input_tokens_seen": 515535332, "step": 5648 }, { "epoch": 23.533333333333335, "loss": 0.04916052892804146, "loss_ce": 7.30073224985972e-05, "loss_iou": 0.267578125, "loss_num": 0.00982666015625, "loss_xval": 0.049072265625, "num_input_tokens_seen": 515535332, "step": 5648 }, { "epoch": 23.5375, "grad_norm": 2.3016853169182405, "learning_rate": 5e-05, "loss": 0.0599, "num_input_tokens_seen": 515626824, "step": 5649 }, { "epoch": 23.5375, "loss": 0.1020599901676178, "loss_ce": 9.21357968763914e-06, "loss_iou": 0.23828125, "loss_num": 0.0203857421875, "loss_xval": 0.10205078125, "num_input_tokens_seen": 515626824, "step": 5649 }, { "epoch": 23.541666666666668, "grad_norm": 2.678787146150945, "learning_rate": 5e-05, "loss": 0.0282, "num_input_tokens_seen": 515717788, "step": 5650 }, { "epoch": 23.541666666666668, "loss": 0.0280543714761734, "loss_ce": 8.716306183487177e-06, "loss_iou": 0.2890625, "loss_num": 0.005615234375, "loss_xval": 0.028076171875, "num_input_tokens_seen": 515717788, "step": 5650 }, { "epoch": 23.545833333333334, "grad_norm": 3.1924204887193137, "learning_rate": 5e-05, "loss": 0.0405, "num_input_tokens_seen": 515809500, "step": 5651 }, { "epoch": 23.545833333333334, "loss": 0.022662218660116196, "loss_ce": 9.447037882637233e-05, "loss_iou": 0.3046875, "loss_num": 0.0045166015625, "loss_xval": 0.0225830078125, "num_input_tokens_seen": 515809500, "step": 5651 }, { "epoch": 23.55, "grad_norm": 2.1580431546123497, "learning_rate": 5e-05, "loss": 0.044, "num_input_tokens_seen": 515900556, "step": 5652 }, { "epoch": 23.55, "loss": 0.04361415654420853, "loss_ce": 4.532886578090256e-06, "loss_iou": 0.1455078125, "loss_num": 0.00872802734375, "loss_xval": 0.043701171875, "num_input_tokens_seen": 515900556, "step": 5652 }, { "epoch": 23.554166666666667, "grad_norm": 2.0805932446315856, "learning_rate": 5e-05, "loss": 0.0769, "num_input_tokens_seen": 515991728, "step": 5653 }, { "epoch": 23.554166666666667, "loss": 0.09516514092683792, "loss_ce": 3.702312824316323e-06, "loss_iou": 0.296875, "loss_num": 0.01904296875, "loss_xval": 0.09521484375, "num_input_tokens_seen": 515991728, "step": 5653 }, { "epoch": 23.558333333333334, "grad_norm": 2.322630467922017, "learning_rate": 5e-05, "loss": 0.0296, "num_input_tokens_seen": 516082928, "step": 5654 }, { "epoch": 23.558333333333334, "loss": 0.03366834297776222, "loss_ce": 7.451967121596681e-06, "loss_iou": 0.2099609375, "loss_num": 0.006744384765625, "loss_xval": 0.03369140625, "num_input_tokens_seen": 516082928, "step": 5654 }, { "epoch": 23.5625, "grad_norm": 2.53132198944104, "learning_rate": 5e-05, "loss": 0.0344, "num_input_tokens_seen": 516174480, "step": 5655 }, { "epoch": 23.5625, "loss": 0.023414544761180878, "loss_ce": 3.808091423707083e-05, "loss_iou": 0.16796875, "loss_num": 0.004669189453125, "loss_xval": 0.0234375, "num_input_tokens_seen": 516174480, "step": 5655 }, { "epoch": 23.566666666666666, "grad_norm": 3.3568984132217246, "learning_rate": 5e-05, "loss": 0.0743, "num_input_tokens_seen": 516264312, "step": 5656 }, { "epoch": 23.566666666666666, "loss": 0.07518689334392548, "loss_ce": 6.83938969814335e-06, "loss_iou": 0.2236328125, "loss_num": 0.01507568359375, "loss_xval": 0.0751953125, "num_input_tokens_seen": 516264312, "step": 5656 }, { "epoch": 23.570833333333333, "grad_norm": 3.0716487542142206, "learning_rate": 5e-05, "loss": 0.0637, "num_input_tokens_seen": 516355408, "step": 5657 }, { "epoch": 23.570833333333333, "loss": 0.058454468846321106, "loss_ce": 7.43414057069458e-05, "loss_iou": 0.275390625, "loss_num": 0.01171875, "loss_xval": 0.058349609375, "num_input_tokens_seen": 516355408, "step": 5657 }, { "epoch": 23.575, "grad_norm": 3.63304833056305, "learning_rate": 5e-05, "loss": 0.0593, "num_input_tokens_seen": 516447012, "step": 5658 }, { "epoch": 23.575, "loss": 0.08642973005771637, "loss_ce": 3.446522532613017e-05, "loss_iou": 0.306640625, "loss_num": 0.017333984375, "loss_xval": 0.08642578125, "num_input_tokens_seen": 516447012, "step": 5658 }, { "epoch": 23.579166666666666, "grad_norm": 3.6600622195241517, "learning_rate": 5e-05, "loss": 0.0458, "num_input_tokens_seen": 516538268, "step": 5659 }, { "epoch": 23.579166666666666, "loss": 0.031593114137649536, "loss_ce": 1.5052954040584154e-05, "loss_iou": 0.232421875, "loss_num": 0.006317138671875, "loss_xval": 0.031494140625, "num_input_tokens_seen": 516538268, "step": 5659 }, { "epoch": 23.583333333333332, "grad_norm": 3.4145638310871465, "learning_rate": 5e-05, "loss": 0.0356, "num_input_tokens_seen": 516629376, "step": 5660 }, { "epoch": 23.583333333333332, "loss": 0.03159729763865471, "loss_ce": 1.1604059181991033e-05, "loss_iou": 0.205078125, "loss_num": 0.006317138671875, "loss_xval": 0.031494140625, "num_input_tokens_seen": 516629376, "step": 5660 }, { "epoch": 23.5875, "grad_norm": 2.025115193252702, "learning_rate": 5e-05, "loss": 0.0373, "num_input_tokens_seen": 516720792, "step": 5661 }, { "epoch": 23.5875, "loss": 0.037495002150535583, "loss_ce": 4.157373041380197e-06, "loss_iou": 0.1787109375, "loss_num": 0.00750732421875, "loss_xval": 0.03759765625, "num_input_tokens_seen": 516720792, "step": 5661 }, { "epoch": 23.591666666666665, "grad_norm": 2.747381866770108, "learning_rate": 5e-05, "loss": 0.0277, "num_input_tokens_seen": 516811784, "step": 5662 }, { "epoch": 23.591666666666665, "loss": 0.028072349727153778, "loss_ce": 3.805643473242526e-06, "loss_iou": 0.236328125, "loss_num": 0.005615234375, "loss_xval": 0.028076171875, "num_input_tokens_seen": 516811784, "step": 5662 }, { "epoch": 23.595833333333335, "grad_norm": 5.975171417229366, "learning_rate": 5e-05, "loss": 0.0705, "num_input_tokens_seen": 516902588, "step": 5663 }, { "epoch": 23.595833333333335, "loss": 0.029614463448524475, "loss_ce": 4.786224963027053e-06, "loss_iou": 0.30078125, "loss_num": 0.00592041015625, "loss_xval": 0.0296630859375, "num_input_tokens_seen": 516902588, "step": 5663 }, { "epoch": 23.6, "grad_norm": 4.029427007700395, "learning_rate": 5e-05, "loss": 0.0311, "num_input_tokens_seen": 516994656, "step": 5664 }, { "epoch": 23.6, "loss": 0.03429003804922104, "loss_ce": 0.0009038057178258896, "loss_iou": 0.35546875, "loss_num": 0.006683349609375, "loss_xval": 0.033447265625, "num_input_tokens_seen": 516994656, "step": 5664 }, { "epoch": 23.604166666666668, "grad_norm": 6.616968713190724, "learning_rate": 5e-05, "loss": 0.0357, "num_input_tokens_seen": 517086096, "step": 5665 }, { "epoch": 23.604166666666668, "loss": 0.030396755784749985, "loss_ce": 1.65061246661935e-05, "loss_iou": 0.306640625, "loss_num": 0.006072998046875, "loss_xval": 0.0303955078125, "num_input_tokens_seen": 517086096, "step": 5665 }, { "epoch": 23.608333333333334, "grad_norm": 2.383206818989159, "learning_rate": 5e-05, "loss": 0.0488, "num_input_tokens_seen": 517177220, "step": 5666 }, { "epoch": 23.608333333333334, "loss": 0.06495459377765656, "loss_ce": 4.3700241803890094e-05, "loss_iou": 0.171875, "loss_num": 0.01300048828125, "loss_xval": 0.06494140625, "num_input_tokens_seen": 517177220, "step": 5666 }, { "epoch": 23.6125, "grad_norm": 4.632698059280031, "learning_rate": 5e-05, "loss": 0.0617, "num_input_tokens_seen": 517268364, "step": 5667 }, { "epoch": 23.6125, "loss": 0.05848491191864014, "loss_ce": 0.00015056514530442655, "loss_iou": 0.1142578125, "loss_num": 0.01165771484375, "loss_xval": 0.058349609375, "num_input_tokens_seen": 517268364, "step": 5667 }, { "epoch": 23.616666666666667, "grad_norm": 2.6065365247113124, "learning_rate": 5e-05, "loss": 0.0422, "num_input_tokens_seen": 517359364, "step": 5668 }, { "epoch": 23.616666666666667, "loss": 0.04949769377708435, "loss_ce": 1.3440771908790339e-05, "loss_iou": 0.275390625, "loss_num": 0.0098876953125, "loss_xval": 0.049560546875, "num_input_tokens_seen": 517359364, "step": 5668 }, { "epoch": 23.620833333333334, "grad_norm": 1.5957956640113704, "learning_rate": 5e-05, "loss": 0.059, "num_input_tokens_seen": 517451020, "step": 5669 }, { "epoch": 23.620833333333334, "loss": 0.09316151589155197, "loss_ce": 0.0001363093324471265, "loss_iou": 0.279296875, "loss_num": 0.0185546875, "loss_xval": 0.09326171875, "num_input_tokens_seen": 517451020, "step": 5669 }, { "epoch": 23.625, "grad_norm": 1.7661471096168884, "learning_rate": 5e-05, "loss": 0.0292, "num_input_tokens_seen": 517542264, "step": 5670 }, { "epoch": 23.625, "loss": 0.031134188175201416, "loss_ce": 6.257385848584818e-06, "loss_iou": 0.1669921875, "loss_num": 0.0062255859375, "loss_xval": 0.0311279296875, "num_input_tokens_seen": 517542264, "step": 5670 }, { "epoch": 23.629166666666666, "grad_norm": 1.9049760315075954, "learning_rate": 5e-05, "loss": 0.0836, "num_input_tokens_seen": 517633508, "step": 5671 }, { "epoch": 23.629166666666666, "loss": 0.15114159882068634, "loss_ce": 3.300470325484639e-06, "loss_iou": 0.1337890625, "loss_num": 0.0301513671875, "loss_xval": 0.1513671875, "num_input_tokens_seen": 517633508, "step": 5671 }, { "epoch": 23.633333333333333, "grad_norm": 3.357283780868695, "learning_rate": 5e-05, "loss": 0.0601, "num_input_tokens_seen": 517725464, "step": 5672 }, { "epoch": 23.633333333333333, "loss": 0.06346787512302399, "loss_ce": 6.760370160918683e-05, "loss_iou": 0.1572265625, "loss_num": 0.0126953125, "loss_xval": 0.0634765625, "num_input_tokens_seen": 517725464, "step": 5672 }, { "epoch": 23.6375, "grad_norm": 1.5407731254040886, "learning_rate": 5e-05, "loss": 0.0464, "num_input_tokens_seen": 517816540, "step": 5673 }, { "epoch": 23.6375, "loss": 0.048604048788547516, "loss_ce": 4.8071092351165134e-06, "loss_iou": 0.302734375, "loss_num": 0.00970458984375, "loss_xval": 0.048583984375, "num_input_tokens_seen": 517816540, "step": 5673 }, { "epoch": 23.641666666666666, "grad_norm": 2.323368477040437, "learning_rate": 5e-05, "loss": 0.0521, "num_input_tokens_seen": 517907760, "step": 5674 }, { "epoch": 23.641666666666666, "loss": 0.06671786308288574, "loss_ce": 6.43744670014712e-06, "loss_iou": 0.1982421875, "loss_num": 0.01336669921875, "loss_xval": 0.06689453125, "num_input_tokens_seen": 517907760, "step": 5674 }, { "epoch": 23.645833333333332, "grad_norm": 3.0599498239958325, "learning_rate": 5e-05, "loss": 0.0501, "num_input_tokens_seen": 517999192, "step": 5675 }, { "epoch": 23.645833333333332, "loss": 0.04143279045820236, "loss_ce": 5.178941592021147e-06, "loss_iou": 0.2314453125, "loss_num": 0.00830078125, "loss_xval": 0.04150390625, "num_input_tokens_seen": 517999192, "step": 5675 }, { "epoch": 23.65, "grad_norm": 2.769269428960717, "learning_rate": 5e-05, "loss": 0.0544, "num_input_tokens_seen": 518090364, "step": 5676 }, { "epoch": 23.65, "loss": 0.03592964634299278, "loss_ce": 1.0455694791744463e-05, "loss_iou": 0.333984375, "loss_num": 0.0072021484375, "loss_xval": 0.035888671875, "num_input_tokens_seen": 518090364, "step": 5676 }, { "epoch": 23.654166666666665, "grad_norm": 3.6940671800591534, "learning_rate": 5e-05, "loss": 0.0602, "num_input_tokens_seen": 518182272, "step": 5677 }, { "epoch": 23.654166666666665, "loss": 0.061431143432855606, "loss_ce": 6.890437816764461e-06, "loss_iou": 0.1845703125, "loss_num": 0.01226806640625, "loss_xval": 0.0615234375, "num_input_tokens_seen": 518182272, "step": 5677 }, { "epoch": 23.658333333333335, "grad_norm": 2.0216151866354664, "learning_rate": 5e-05, "loss": 0.0426, "num_input_tokens_seen": 518273624, "step": 5678 }, { "epoch": 23.658333333333335, "loss": 0.03560943901538849, "loss_ce": 1.0682435458875261e-05, "loss_iou": 0.23046875, "loss_num": 0.007110595703125, "loss_xval": 0.03564453125, "num_input_tokens_seen": 518273624, "step": 5678 }, { "epoch": 23.6625, "grad_norm": 2.523235154108421, "learning_rate": 5e-05, "loss": 0.0428, "num_input_tokens_seen": 518365116, "step": 5679 }, { "epoch": 23.6625, "loss": 0.025457948446273804, "loss_ce": 6.287944870564388e-06, "loss_iou": 0.212890625, "loss_num": 0.005096435546875, "loss_xval": 0.025390625, "num_input_tokens_seen": 518365116, "step": 5679 }, { "epoch": 23.666666666666668, "grad_norm": 3.5324845201005473, "learning_rate": 5e-05, "loss": 0.064, "num_input_tokens_seen": 518456160, "step": 5680 }, { "epoch": 23.666666666666668, "loss": 0.053803130984306335, "loss_ce": 4.641583655029535e-05, "loss_iou": 0.28125, "loss_num": 0.0107421875, "loss_xval": 0.0537109375, "num_input_tokens_seen": 518456160, "step": 5680 }, { "epoch": 23.670833333333334, "grad_norm": 1.614914817798423, "learning_rate": 5e-05, "loss": 0.0455, "num_input_tokens_seen": 518547808, "step": 5681 }, { "epoch": 23.670833333333334, "loss": 0.04403459653258324, "loss_ce": 5.876697468920611e-05, "loss_iou": 0.203125, "loss_num": 0.0087890625, "loss_xval": 0.0439453125, "num_input_tokens_seen": 518547808, "step": 5681 }, { "epoch": 23.675, "grad_norm": 1.7345038472368803, "learning_rate": 5e-05, "loss": 0.0223, "num_input_tokens_seen": 518639376, "step": 5682 }, { "epoch": 23.675, "loss": 0.018705224618315697, "loss_ce": 5.8984762290492654e-05, "loss_iou": 0.1005859375, "loss_num": 0.00372314453125, "loss_xval": 0.0186767578125, "num_input_tokens_seen": 518639376, "step": 5682 }, { "epoch": 23.679166666666667, "grad_norm": 2.8706427380183652, "learning_rate": 5e-05, "loss": 0.0627, "num_input_tokens_seen": 518731060, "step": 5683 }, { "epoch": 23.679166666666667, "loss": 0.06468746066093445, "loss_ce": 5.123291339259595e-05, "loss_iou": 0.375, "loss_num": 0.012939453125, "loss_xval": 0.064453125, "num_input_tokens_seen": 518731060, "step": 5683 }, { "epoch": 23.683333333333334, "grad_norm": 2.297743551190372, "learning_rate": 5e-05, "loss": 0.0242, "num_input_tokens_seen": 518822916, "step": 5684 }, { "epoch": 23.683333333333334, "loss": 0.023034725338220596, "loss_ce": 3.973131606471725e-05, "loss_iou": 0.1728515625, "loss_num": 0.004608154296875, "loss_xval": 0.02294921875, "num_input_tokens_seen": 518822916, "step": 5684 }, { "epoch": 23.6875, "grad_norm": 2.9373343094831625, "learning_rate": 5e-05, "loss": 0.0361, "num_input_tokens_seen": 518914300, "step": 5685 }, { "epoch": 23.6875, "loss": 0.024574488401412964, "loss_ce": 7.83845371188363e-06, "loss_iou": 0.283203125, "loss_num": 0.004913330078125, "loss_xval": 0.0245361328125, "num_input_tokens_seen": 518914300, "step": 5685 }, { "epoch": 23.691666666666666, "grad_norm": 2.7813206419182723, "learning_rate": 5e-05, "loss": 0.0559, "num_input_tokens_seen": 519006072, "step": 5686 }, { "epoch": 23.691666666666666, "loss": 0.051136475056409836, "loss_ce": 4.271988018444972e-06, "loss_iou": 0.25390625, "loss_num": 0.01019287109375, "loss_xval": 0.051025390625, "num_input_tokens_seen": 519006072, "step": 5686 }, { "epoch": 23.695833333333333, "grad_norm": 3.26538931228411, "learning_rate": 5e-05, "loss": 0.0244, "num_input_tokens_seen": 519097556, "step": 5687 }, { "epoch": 23.695833333333333, "loss": 0.02645106054842472, "loss_ce": 7.578772965644021e-06, "loss_iou": 0.2041015625, "loss_num": 0.005279541015625, "loss_xval": 0.0264892578125, "num_input_tokens_seen": 519097556, "step": 5687 }, { "epoch": 23.7, "grad_norm": 2.546712078646406, "learning_rate": 5e-05, "loss": 0.0645, "num_input_tokens_seen": 519188724, "step": 5688 }, { "epoch": 23.7, "loss": 0.028148168697953224, "loss_ce": 1.0961780390061904e-05, "loss_iou": 0.22265625, "loss_num": 0.005615234375, "loss_xval": 0.028076171875, "num_input_tokens_seen": 519188724, "step": 5688 }, { "epoch": 23.704166666666666, "grad_norm": 2.802237920982695, "learning_rate": 5e-05, "loss": 0.0302, "num_input_tokens_seen": 519280104, "step": 5689 }, { "epoch": 23.704166666666666, "loss": 0.02710503339767456, "loss_ce": 5.424570190371014e-06, "loss_iou": 0.2578125, "loss_num": 0.00543212890625, "loss_xval": 0.027099609375, "num_input_tokens_seen": 519280104, "step": 5689 }, { "epoch": 23.708333333333332, "grad_norm": 2.5827248584449936, "learning_rate": 5e-05, "loss": 0.036, "num_input_tokens_seen": 519370780, "step": 5690 }, { "epoch": 23.708333333333332, "loss": 0.029688237234950066, "loss_ce": 9.89344607660314e-06, "loss_iou": 0.3203125, "loss_num": 0.00592041015625, "loss_xval": 0.0296630859375, "num_input_tokens_seen": 519370780, "step": 5690 }, { "epoch": 23.7125, "grad_norm": 1.889586030914547, "learning_rate": 5e-05, "loss": 0.0361, "num_input_tokens_seen": 519462148, "step": 5691 }, { "epoch": 23.7125, "loss": 0.026575732976198196, "loss_ce": 2.551576017140178e-06, "loss_iou": 0.12451171875, "loss_num": 0.00531005859375, "loss_xval": 0.026611328125, "num_input_tokens_seen": 519462148, "step": 5691 }, { "epoch": 23.716666666666665, "grad_norm": 2.020125572866113, "learning_rate": 5e-05, "loss": 0.047, "num_input_tokens_seen": 519553376, "step": 5692 }, { "epoch": 23.716666666666665, "loss": 0.02690189890563488, "loss_ce": 1.5912401067907922e-05, "loss_iou": 0.21484375, "loss_num": 0.00537109375, "loss_xval": 0.02685546875, "num_input_tokens_seen": 519553376, "step": 5692 }, { "epoch": 23.720833333333335, "grad_norm": 2.432818855651626, "learning_rate": 5e-05, "loss": 0.0359, "num_input_tokens_seen": 519644864, "step": 5693 }, { "epoch": 23.720833333333335, "loss": 0.02428411692380905, "loss_ce": 2.26416923396755e-05, "loss_iou": 0.2099609375, "loss_num": 0.004852294921875, "loss_xval": 0.0242919921875, "num_input_tokens_seen": 519644864, "step": 5693 }, { "epoch": 23.725, "grad_norm": 3.385207640452561, "learning_rate": 5e-05, "loss": 0.0294, "num_input_tokens_seen": 519736944, "step": 5694 }, { "epoch": 23.725, "loss": 0.03325570002198219, "loss_ce": 9.835181845119223e-05, "loss_iou": 0.1787109375, "loss_num": 0.006622314453125, "loss_xval": 0.033203125, "num_input_tokens_seen": 519736944, "step": 5694 }, { "epoch": 23.729166666666668, "grad_norm": 3.044204278231839, "learning_rate": 5e-05, "loss": 0.0224, "num_input_tokens_seen": 519828240, "step": 5695 }, { "epoch": 23.729166666666668, "loss": 0.02027270570397377, "loss_ce": 0.0020460819359868765, "loss_iou": 0.234375, "loss_num": 0.0036468505859375, "loss_xval": 0.0181884765625, "num_input_tokens_seen": 519828240, "step": 5695 }, { "epoch": 23.733333333333334, "grad_norm": 2.4047813414462795, "learning_rate": 5e-05, "loss": 0.0336, "num_input_tokens_seen": 519917812, "step": 5696 }, { "epoch": 23.733333333333334, "loss": 0.024971704930067062, "loss_ce": 8.326714123541024e-06, "loss_iou": 0.251953125, "loss_num": 0.0050048828125, "loss_xval": 0.02490234375, "num_input_tokens_seen": 519917812, "step": 5696 }, { "epoch": 23.7375, "grad_norm": 2.0692491005082876, "learning_rate": 5e-05, "loss": 0.0451, "num_input_tokens_seen": 520008928, "step": 5697 }, { "epoch": 23.7375, "loss": 0.048609986901283264, "loss_ce": 2.6005120162153617e-05, "loss_iou": 0.27734375, "loss_num": 0.00970458984375, "loss_xval": 0.048583984375, "num_input_tokens_seen": 520008928, "step": 5697 }, { "epoch": 23.741666666666667, "grad_norm": 5.549537271069052, "learning_rate": 5e-05, "loss": 0.0607, "num_input_tokens_seen": 520100712, "step": 5698 }, { "epoch": 23.741666666666667, "loss": 0.07673782110214233, "loss_ce": 9.292815957451239e-05, "loss_iou": 0.220703125, "loss_num": 0.01531982421875, "loss_xval": 0.07666015625, "num_input_tokens_seen": 520100712, "step": 5698 }, { "epoch": 23.745833333333334, "grad_norm": 0.8884850629976035, "learning_rate": 5e-05, "loss": 0.0429, "num_input_tokens_seen": 520192780, "step": 5699 }, { "epoch": 23.745833333333334, "loss": 0.06898298114538193, "loss_ce": 4.377113509690389e-05, "loss_iou": 0.212890625, "loss_num": 0.0137939453125, "loss_xval": 0.06884765625, "num_input_tokens_seen": 520192780, "step": 5699 }, { "epoch": 23.75, "grad_norm": 5.180084394794358, "learning_rate": 5e-05, "loss": 0.0338, "num_input_tokens_seen": 520284164, "step": 5700 }, { "epoch": 23.75, "loss": 0.02840365283191204, "loss_ce": 7.046019163681194e-06, "loss_iou": 0.240234375, "loss_num": 0.00567626953125, "loss_xval": 0.0284423828125, "num_input_tokens_seen": 520284164, "step": 5700 }, { "epoch": 23.754166666666666, "grad_norm": 3.16269972531882, "learning_rate": 5e-05, "loss": 0.0351, "num_input_tokens_seen": 520375736, "step": 5701 }, { "epoch": 23.754166666666666, "loss": 0.043525584042072296, "loss_ce": 7.5167668001085985e-06, "loss_iou": 0.306640625, "loss_num": 0.00872802734375, "loss_xval": 0.04345703125, "num_input_tokens_seen": 520375736, "step": 5701 }, { "epoch": 23.758333333333333, "grad_norm": 4.3941677964058705, "learning_rate": 5e-05, "loss": 0.0589, "num_input_tokens_seen": 520466708, "step": 5702 }, { "epoch": 23.758333333333333, "loss": 0.040162280201911926, "loss_ce": 8.777634320722427e-06, "loss_iou": 0.259765625, "loss_num": 0.008056640625, "loss_xval": 0.0400390625, "num_input_tokens_seen": 520466708, "step": 5702 }, { "epoch": 23.7625, "grad_norm": 6.172707932635992, "learning_rate": 5e-05, "loss": 0.0557, "num_input_tokens_seen": 520558396, "step": 5703 }, { "epoch": 23.7625, "loss": 0.08005905896425247, "loss_ce": 7.24865822121501e-05, "loss_iou": 0.333984375, "loss_num": 0.0159912109375, "loss_xval": 0.080078125, "num_input_tokens_seen": 520558396, "step": 5703 }, { "epoch": 23.766666666666666, "grad_norm": 4.081751568320687, "learning_rate": 5e-05, "loss": 0.0469, "num_input_tokens_seen": 520649760, "step": 5704 }, { "epoch": 23.766666666666666, "loss": 0.036110296845436096, "loss_ce": 7.999641638889443e-06, "loss_iou": 0.275390625, "loss_num": 0.007232666015625, "loss_xval": 0.0361328125, "num_input_tokens_seen": 520649760, "step": 5704 }, { "epoch": 23.770833333333332, "grad_norm": 2.1443196714070716, "learning_rate": 5e-05, "loss": 0.0521, "num_input_tokens_seen": 520739288, "step": 5705 }, { "epoch": 23.770833333333332, "loss": 0.0386023223400116, "loss_ce": 1.2845989658671897e-05, "loss_iou": 0.30078125, "loss_num": 0.007720947265625, "loss_xval": 0.03857421875, "num_input_tokens_seen": 520739288, "step": 5705 }, { "epoch": 23.775, "grad_norm": 6.136813251555154, "learning_rate": 5e-05, "loss": 0.0257, "num_input_tokens_seen": 520830760, "step": 5706 }, { "epoch": 23.775, "loss": 0.025622554123401642, "loss_ce": 0.00014037435175850987, "loss_iou": 0.25, "loss_num": 0.005096435546875, "loss_xval": 0.0255126953125, "num_input_tokens_seen": 520830760, "step": 5706 }, { "epoch": 23.779166666666665, "grad_norm": 1.9800169580594917, "learning_rate": 5e-05, "loss": 0.0519, "num_input_tokens_seen": 520920600, "step": 5707 }, { "epoch": 23.779166666666665, "loss": 0.047232139855623245, "loss_ce": 6.186518021422671e-06, "loss_iou": 0.1708984375, "loss_num": 0.00946044921875, "loss_xval": 0.047119140625, "num_input_tokens_seen": 520920600, "step": 5707 }, { "epoch": 23.783333333333335, "grad_norm": 2.7088956283659313, "learning_rate": 5e-05, "loss": 0.0355, "num_input_tokens_seen": 521011732, "step": 5708 }, { "epoch": 23.783333333333335, "loss": 0.03868023678660393, "loss_ce": 2.2094476662459783e-05, "loss_iou": 0.359375, "loss_num": 0.00775146484375, "loss_xval": 0.03857421875, "num_input_tokens_seen": 521011732, "step": 5708 }, { "epoch": 23.7875, "grad_norm": 4.752971767412011, "learning_rate": 5e-05, "loss": 0.0837, "num_input_tokens_seen": 521103056, "step": 5709 }, { "epoch": 23.7875, "loss": 0.0954560711979866, "loss_ce": 1.2343047274043784e-05, "loss_iou": 0.337890625, "loss_num": 0.01904296875, "loss_xval": 0.09521484375, "num_input_tokens_seen": 521103056, "step": 5709 }, { "epoch": 23.791666666666668, "grad_norm": 3.074283646205475, "learning_rate": 5e-05, "loss": 0.0413, "num_input_tokens_seen": 521194392, "step": 5710 }, { "epoch": 23.791666666666668, "loss": 0.046271517872810364, "loss_ce": 0.0009071393287740648, "loss_iou": 0.3515625, "loss_num": 0.00909423828125, "loss_xval": 0.04541015625, "num_input_tokens_seen": 521194392, "step": 5710 }, { "epoch": 23.795833333333334, "grad_norm": 2.3827833336182005, "learning_rate": 5e-05, "loss": 0.0271, "num_input_tokens_seen": 521285936, "step": 5711 }, { "epoch": 23.795833333333334, "loss": 0.03246616944670677, "loss_ce": 3.0945029720896855e-06, "loss_iou": 0.2236328125, "loss_num": 0.006500244140625, "loss_xval": 0.032470703125, "num_input_tokens_seen": 521285936, "step": 5711 }, { "epoch": 23.8, "grad_norm": 3.117069638103086, "learning_rate": 5e-05, "loss": 0.0361, "num_input_tokens_seen": 521376912, "step": 5712 }, { "epoch": 23.8, "loss": 0.04672073572874069, "loss_ce": 5.953358595434111e-06, "loss_iou": 0.24609375, "loss_num": 0.00933837890625, "loss_xval": 0.046630859375, "num_input_tokens_seen": 521376912, "step": 5712 }, { "epoch": 23.804166666666667, "grad_norm": 3.8832364392280327, "learning_rate": 5e-05, "loss": 0.0384, "num_input_tokens_seen": 521467616, "step": 5713 }, { "epoch": 23.804166666666667, "loss": 0.042544711381196976, "loss_ce": 3.2088705665955786e-06, "loss_iou": 0.240234375, "loss_num": 0.00848388671875, "loss_xval": 0.04248046875, "num_input_tokens_seen": 521467616, "step": 5713 }, { "epoch": 23.808333333333334, "grad_norm": 2.2566435105155254, "learning_rate": 5e-05, "loss": 0.0611, "num_input_tokens_seen": 521559408, "step": 5714 }, { "epoch": 23.808333333333334, "loss": 0.0935177356004715, "loss_ce": 4.247904143994674e-06, "loss_iou": 0.1552734375, "loss_num": 0.0186767578125, "loss_xval": 0.09375, "num_input_tokens_seen": 521559408, "step": 5714 }, { "epoch": 23.8125, "grad_norm": 2.4054971165737107, "learning_rate": 5e-05, "loss": 0.0314, "num_input_tokens_seen": 521650176, "step": 5715 }, { "epoch": 23.8125, "loss": 0.040401313453912735, "loss_ce": 3.6688702493847813e-06, "loss_iou": 0.2158203125, "loss_num": 0.008056640625, "loss_xval": 0.040283203125, "num_input_tokens_seen": 521650176, "step": 5715 }, { "epoch": 23.816666666666666, "grad_norm": 2.791598862794589, "learning_rate": 5e-05, "loss": 0.0304, "num_input_tokens_seen": 521741528, "step": 5716 }, { "epoch": 23.816666666666666, "loss": 0.030244871973991394, "loss_ce": 9.350531036034226e-05, "loss_iou": 0.193359375, "loss_num": 0.00604248046875, "loss_xval": 0.0301513671875, "num_input_tokens_seen": 521741528, "step": 5716 }, { "epoch": 23.820833333333333, "grad_norm": 2.8172338788712628, "learning_rate": 5e-05, "loss": 0.0228, "num_input_tokens_seen": 521832880, "step": 5717 }, { "epoch": 23.820833333333333, "loss": 0.026685720309615135, "loss_ce": 5.727418283640873e-06, "loss_iou": 0.263671875, "loss_num": 0.005340576171875, "loss_xval": 0.0267333984375, "num_input_tokens_seen": 521832880, "step": 5717 }, { "epoch": 23.825, "grad_norm": 3.34368630324826, "learning_rate": 5e-05, "loss": 0.0569, "num_input_tokens_seen": 521924456, "step": 5718 }, { "epoch": 23.825, "loss": 0.05164847895503044, "loss_ce": 1.2735023119603284e-05, "loss_iou": 0.1982421875, "loss_num": 0.01031494140625, "loss_xval": 0.0517578125, "num_input_tokens_seen": 521924456, "step": 5718 }, { "epoch": 23.829166666666666, "grad_norm": 2.6815516294356097, "learning_rate": 5e-05, "loss": 0.0323, "num_input_tokens_seen": 522015648, "step": 5719 }, { "epoch": 23.829166666666666, "loss": 0.03111160174012184, "loss_ce": 6.561008376593236e-06, "loss_iou": 0.25, "loss_num": 0.0062255859375, "loss_xval": 0.0311279296875, "num_input_tokens_seen": 522015648, "step": 5719 }, { "epoch": 23.833333333333332, "grad_norm": 2.6739108353856627, "learning_rate": 5e-05, "loss": 0.0549, "num_input_tokens_seen": 522107236, "step": 5720 }, { "epoch": 23.833333333333332, "loss": 0.06111540645360947, "loss_ce": 3.956250111514237e-06, "loss_iou": 0.38671875, "loss_num": 0.01220703125, "loss_xval": 0.06103515625, "num_input_tokens_seen": 522107236, "step": 5720 }, { "epoch": 23.8375, "grad_norm": 2.837654807232517, "learning_rate": 5e-05, "loss": 0.0312, "num_input_tokens_seen": 522197216, "step": 5721 }, { "epoch": 23.8375, "loss": 0.044276271015405655, "loss_ce": 1.0525964171392843e-05, "loss_iou": 0.232421875, "loss_num": 0.00885009765625, "loss_xval": 0.044189453125, "num_input_tokens_seen": 522197216, "step": 5721 }, { "epoch": 23.841666666666665, "grad_norm": 3.3273477416902866, "learning_rate": 5e-05, "loss": 0.0346, "num_input_tokens_seen": 522288964, "step": 5722 }, { "epoch": 23.841666666666665, "loss": 0.02608104608952999, "loss_ce": 6.481039599748328e-05, "loss_iou": 0.19921875, "loss_num": 0.00518798828125, "loss_xval": 0.0260009765625, "num_input_tokens_seen": 522288964, "step": 5722 }, { "epoch": 23.845833333333335, "grad_norm": 2.5924484025116574, "learning_rate": 5e-05, "loss": 0.0429, "num_input_tokens_seen": 522379756, "step": 5723 }, { "epoch": 23.845833333333335, "loss": 0.0439254529774189, "loss_ce": 1.0658584869815968e-05, "loss_iou": 0.2041015625, "loss_num": 0.0087890625, "loss_xval": 0.0439453125, "num_input_tokens_seen": 522379756, "step": 5723 }, { "epoch": 23.85, "grad_norm": 2.9276761082141527, "learning_rate": 5e-05, "loss": 0.0414, "num_input_tokens_seen": 522470980, "step": 5724 }, { "epoch": 23.85, "loss": 0.04932574927806854, "loss_ce": 9.343220881419256e-06, "loss_iou": 0.25390625, "loss_num": 0.00982666015625, "loss_xval": 0.04931640625, "num_input_tokens_seen": 522470980, "step": 5724 }, { "epoch": 23.854166666666668, "grad_norm": 2.089152950539155, "learning_rate": 5e-05, "loss": 0.0494, "num_input_tokens_seen": 522561248, "step": 5725 }, { "epoch": 23.854166666666668, "loss": 0.02687397226691246, "loss_ce": 3.2470645692228572e-06, "loss_iou": 0.314453125, "loss_num": 0.00537109375, "loss_xval": 0.02685546875, "num_input_tokens_seen": 522561248, "step": 5725 }, { "epoch": 23.858333333333334, "grad_norm": 2.4594153447981038, "learning_rate": 5e-05, "loss": 0.0431, "num_input_tokens_seen": 522652544, "step": 5726 }, { "epoch": 23.858333333333334, "loss": 0.0471075065433979, "loss_ce": 3.6227847886038944e-06, "loss_iou": 0.318359375, "loss_num": 0.0093994140625, "loss_xval": 0.047119140625, "num_input_tokens_seen": 522652544, "step": 5726 }, { "epoch": 23.8625, "grad_norm": 1.9635727454926588, "learning_rate": 5e-05, "loss": 0.0418, "num_input_tokens_seen": 522744140, "step": 5727 }, { "epoch": 23.8625, "loss": 0.03888658434152603, "loss_ce": 0.00014451687457039952, "loss_iou": 0.259765625, "loss_num": 0.00775146484375, "loss_xval": 0.038818359375, "num_input_tokens_seen": 522744140, "step": 5727 }, { "epoch": 23.866666666666667, "grad_norm": 0.9211156787835062, "learning_rate": 5e-05, "loss": 0.0933, "num_input_tokens_seen": 522835084, "step": 5728 }, { "epoch": 23.866666666666667, "loss": 0.15552428364753723, "loss_ce": 2.894853651014273e-06, "loss_iou": 0.10888671875, "loss_num": 0.0311279296875, "loss_xval": 0.1552734375, "num_input_tokens_seen": 522835084, "step": 5728 }, { "epoch": 23.870833333333334, "grad_norm": 0.9425039639689817, "learning_rate": 5e-05, "loss": 0.0438, "num_input_tokens_seen": 522926308, "step": 5729 }, { "epoch": 23.870833333333334, "loss": 0.05509873479604721, "loss_ce": 1.4508611457131337e-05, "loss_iou": 0.2099609375, "loss_num": 0.010986328125, "loss_xval": 0.05517578125, "num_input_tokens_seen": 522926308, "step": 5729 }, { "epoch": 23.875, "grad_norm": 1.2982466535460244, "learning_rate": 5e-05, "loss": 0.0478, "num_input_tokens_seen": 523017560, "step": 5730 }, { "epoch": 23.875, "loss": 0.038000769913196564, "loss_ce": 6.385762389982119e-06, "loss_iou": 0.283203125, "loss_num": 0.007598876953125, "loss_xval": 0.0380859375, "num_input_tokens_seen": 523017560, "step": 5730 }, { "epoch": 23.879166666666666, "grad_norm": 1.4479330651413629, "learning_rate": 5e-05, "loss": 0.0408, "num_input_tokens_seen": 523108756, "step": 5731 }, { "epoch": 23.879166666666666, "loss": 0.02427910827100277, "loss_ce": 0.00010918613406829536, "loss_iou": 0.0927734375, "loss_num": 0.00482177734375, "loss_xval": 0.024169921875, "num_input_tokens_seen": 523108756, "step": 5731 }, { "epoch": 23.883333333333333, "grad_norm": 2.694617725258283, "learning_rate": 5e-05, "loss": 0.0513, "num_input_tokens_seen": 523200448, "step": 5732 }, { "epoch": 23.883333333333333, "loss": 0.030782945454120636, "loss_ce": 1.359751149720978e-05, "loss_iou": 0.2080078125, "loss_num": 0.00616455078125, "loss_xval": 0.03076171875, "num_input_tokens_seen": 523200448, "step": 5732 }, { "epoch": 23.8875, "grad_norm": 2.8220834724397847, "learning_rate": 5e-05, "loss": 0.0747, "num_input_tokens_seen": 523292276, "step": 5733 }, { "epoch": 23.8875, "loss": 0.0910167396068573, "loss_ce": 5.691027581633534e-06, "loss_iou": 0.19921875, "loss_num": 0.0181884765625, "loss_xval": 0.0908203125, "num_input_tokens_seen": 523292276, "step": 5733 }, { "epoch": 23.891666666666666, "grad_norm": 2.1295513985099093, "learning_rate": 5e-05, "loss": 0.0654, "num_input_tokens_seen": 523384228, "step": 5734 }, { "epoch": 23.891666666666666, "loss": 0.10112430155277252, "loss_ce": 4.301551598473452e-06, "loss_iou": 0.173828125, "loss_num": 0.020263671875, "loss_xval": 0.10107421875, "num_input_tokens_seen": 523384228, "step": 5734 }, { "epoch": 23.895833333333332, "grad_norm": 3.017383371266045, "learning_rate": 5e-05, "loss": 0.0294, "num_input_tokens_seen": 523475492, "step": 5735 }, { "epoch": 23.895833333333332, "loss": 0.025943610817193985, "loss_ce": 3.6696526422019815e-06, "loss_iou": 0.2021484375, "loss_num": 0.00518798828125, "loss_xval": 0.02587890625, "num_input_tokens_seen": 523475492, "step": 5735 }, { "epoch": 23.9, "grad_norm": 1.2758619859096862, "learning_rate": 5e-05, "loss": 0.0385, "num_input_tokens_seen": 523566848, "step": 5736 }, { "epoch": 23.9, "loss": 0.04210450500249863, "loss_ce": 5.505870831257198e-06, "loss_iou": 0.138671875, "loss_num": 0.0084228515625, "loss_xval": 0.0419921875, "num_input_tokens_seen": 523566848, "step": 5736 }, { "epoch": 23.904166666666665, "grad_norm": 1.2387003727139727, "learning_rate": 5e-05, "loss": 0.0278, "num_input_tokens_seen": 523658692, "step": 5737 }, { "epoch": 23.904166666666665, "loss": 0.022983882576227188, "loss_ce": 1.9404327758820727e-05, "loss_iou": 0.296875, "loss_num": 0.00457763671875, "loss_xval": 0.02294921875, "num_input_tokens_seen": 523658692, "step": 5737 }, { "epoch": 23.908333333333335, "grad_norm": 1.9687035345363064, "learning_rate": 5e-05, "loss": 0.0711, "num_input_tokens_seen": 523750232, "step": 5738 }, { "epoch": 23.908333333333335, "loss": 0.07121194899082184, "loss_ce": 0.00019754076492972672, "loss_iou": 0.2294921875, "loss_num": 0.01422119140625, "loss_xval": 0.07080078125, "num_input_tokens_seen": 523750232, "step": 5738 }, { "epoch": 23.9125, "grad_norm": 1.7952091530297702, "learning_rate": 5e-05, "loss": 0.0715, "num_input_tokens_seen": 523841588, "step": 5739 }, { "epoch": 23.9125, "loss": 0.06907568871974945, "loss_ce": 1.4407991329790093e-05, "loss_iou": 0.21875, "loss_num": 0.01385498046875, "loss_xval": 0.06884765625, "num_input_tokens_seen": 523841588, "step": 5739 }, { "epoch": 23.916666666666668, "grad_norm": 1.9967058223364322, "learning_rate": 5e-05, "loss": 0.0332, "num_input_tokens_seen": 523932836, "step": 5740 }, { "epoch": 23.916666666666668, "loss": 0.036780357360839844, "loss_ce": 6.675976237602299e-06, "loss_iou": 0.2255859375, "loss_num": 0.007354736328125, "loss_xval": 0.036865234375, "num_input_tokens_seen": 523932836, "step": 5740 }, { "epoch": 23.920833333333334, "grad_norm": 1.4979108449408447, "learning_rate": 5e-05, "loss": 0.0686, "num_input_tokens_seen": 524023568, "step": 5741 }, { "epoch": 23.920833333333334, "loss": 0.08334586024284363, "loss_ce": 2.3527120447397465e-06, "loss_iou": 0.26171875, "loss_num": 0.0167236328125, "loss_xval": 0.08349609375, "num_input_tokens_seen": 524023568, "step": 5741 }, { "epoch": 23.925, "grad_norm": 0.9084482747463926, "learning_rate": 5e-05, "loss": 0.0274, "num_input_tokens_seen": 524115120, "step": 5742 }, { "epoch": 23.925, "loss": 0.02169908955693245, "loss_ce": 3.160901178489439e-05, "loss_iou": 0.1796875, "loss_num": 0.00433349609375, "loss_xval": 0.021728515625, "num_input_tokens_seen": 524115120, "step": 5742 }, { "epoch": 23.929166666666667, "grad_norm": 1.268154810820986, "learning_rate": 5e-05, "loss": 0.0263, "num_input_tokens_seen": 524206932, "step": 5743 }, { "epoch": 23.929166666666667, "loss": 0.0273551344871521, "loss_ce": 1.1384383469703607e-05, "loss_iou": 0.2138671875, "loss_num": 0.005462646484375, "loss_xval": 0.02734375, "num_input_tokens_seen": 524206932, "step": 5743 }, { "epoch": 23.933333333333334, "grad_norm": 1.048607072370536, "learning_rate": 5e-05, "loss": 0.0304, "num_input_tokens_seen": 524298416, "step": 5744 }, { "epoch": 23.933333333333334, "loss": 0.03643643110990524, "loss_ce": 6.072639735066332e-06, "loss_iou": 0.09619140625, "loss_num": 0.007293701171875, "loss_xval": 0.036376953125, "num_input_tokens_seen": 524298416, "step": 5744 }, { "epoch": 23.9375, "grad_norm": 0.8956328009301813, "learning_rate": 5e-05, "loss": 0.0439, "num_input_tokens_seen": 524390416, "step": 5745 }, { "epoch": 23.9375, "loss": 0.047046225517988205, "loss_ce": 6.441253935918212e-05, "loss_iou": 0.1591796875, "loss_num": 0.0093994140625, "loss_xval": 0.046875, "num_input_tokens_seen": 524390416, "step": 5745 }, { "epoch": 23.941666666666666, "grad_norm": 1.490795535747788, "learning_rate": 5e-05, "loss": 0.062, "num_input_tokens_seen": 524481288, "step": 5746 }, { "epoch": 23.941666666666666, "loss": 0.047673288732767105, "loss_ce": 8.644723493489437e-06, "loss_iou": 0.177734375, "loss_num": 0.009521484375, "loss_xval": 0.047607421875, "num_input_tokens_seen": 524481288, "step": 5746 }, { "epoch": 23.945833333333333, "grad_norm": 2.632357504177815, "learning_rate": 5e-05, "loss": 0.0261, "num_input_tokens_seen": 524573912, "step": 5747 }, { "epoch": 23.945833333333333, "loss": 0.030250361189246178, "loss_ce": 6.847688200650737e-05, "loss_iou": 0.2275390625, "loss_num": 0.00604248046875, "loss_xval": 0.0301513671875, "num_input_tokens_seen": 524573912, "step": 5747 }, { "epoch": 23.95, "grad_norm": 3.2126670774182244, "learning_rate": 5e-05, "loss": 0.0586, "num_input_tokens_seen": 524665208, "step": 5748 }, { "epoch": 23.95, "loss": 0.09400118887424469, "loss_ce": 7.044025551294908e-06, "loss_iou": 0.1953125, "loss_num": 0.018798828125, "loss_xval": 0.09375, "num_input_tokens_seen": 524665208, "step": 5748 }, { "epoch": 23.954166666666666, "grad_norm": 2.3353116804874805, "learning_rate": 5e-05, "loss": 0.0466, "num_input_tokens_seen": 524756128, "step": 5749 }, { "epoch": 23.954166666666666, "loss": 0.04571268707513809, "loss_ce": 2.787526318570599e-05, "loss_iou": 0.376953125, "loss_num": 0.0091552734375, "loss_xval": 0.045654296875, "num_input_tokens_seen": 524756128, "step": 5749 }, { "epoch": 23.958333333333332, "grad_norm": 2.554961548389431, "learning_rate": 5e-05, "loss": 0.0399, "num_input_tokens_seen": 524847664, "step": 5750 }, { "epoch": 23.958333333333332, "eval_seeclick_CIoU": 0.20855355635285378, "eval_seeclick_GIoU": 0.1821647845208645, "eval_seeclick_IoU": 0.3190717250108719, "eval_seeclick_MAE_all": 0.12102610990405083, "eval_seeclick_MAE_h": 0.11113836616277695, "eval_seeclick_MAE_w": 0.2582087442278862, "eval_seeclick_MAE_x_boxes": 0.2431691437959671, "eval_seeclick_MAE_y_boxes": 0.11242787539958954, "eval_seeclick_NUM_probability": 0.999996691942215, "eval_seeclick_inside_bbox": 0.6008522808551788, "eval_seeclick_loss": 0.6176849007606506, "eval_seeclick_loss_ce": 0.09981685131788254, "eval_seeclick_loss_iou": 0.4412841796875, "eval_seeclick_loss_num": 0.0973968505859375, "eval_seeclick_loss_xval": 0.48699951171875, "eval_seeclick_runtime": 78.2106, "eval_seeclick_samples_per_second": 0.55, "eval_seeclick_steps_per_second": 0.026, "num_input_tokens_seen": 524847664, "step": 5750 }, { "epoch": 23.958333333333332, "eval_icons_CIoU": 0.24831150472164154, "eval_icons_GIoU": 0.1872703991830349, "eval_icons_IoU": 0.3625485599040985, "eval_icons_MAE_all": 0.08768786117434502, "eval_icons_MAE_h": 0.1570756696164608, "eval_icons_MAE_w": 0.1565881222486496, "eval_icons_MAE_x_boxes": 0.16092297434806824, "eval_icons_MAE_y_boxes": 0.1555836908519268, "eval_icons_NUM_probability": 0.9999974370002747, "eval_icons_inside_bbox": 0.5381944477558136, "eval_icons_loss": 0.41147759556770325, "eval_icons_loss_ce": 0.0004623572021955624, "eval_icons_loss_iou": 0.2532958984375, "eval_icons_loss_num": 0.083740234375, "eval_icons_loss_xval": 0.418701171875, "eval_icons_runtime": 85.4332, "eval_icons_samples_per_second": 0.585, "eval_icons_steps_per_second": 0.023, "num_input_tokens_seen": 524847664, "step": 5750 }, { "epoch": 23.958333333333332, "eval_screenspot_CIoU": 0.38652878999710083, "eval_screenspot_GIoU": 0.3825633426507314, "eval_screenspot_IoU": 0.4596469004948934, "eval_screenspot_MAE_all": 0.09237878521283467, "eval_screenspot_MAE_h": 0.08135384569565456, "eval_screenspot_MAE_w": 0.20547441641489664, "eval_screenspot_MAE_x_boxes": 0.18527878324190775, "eval_screenspot_MAE_y_boxes": 0.07767945279677708, "eval_screenspot_NUM_probability": 0.9999972383181254, "eval_screenspot_inside_bbox": 0.7145833373069763, "eval_screenspot_loss": 0.46794506907463074, "eval_screenspot_loss_ce": 0.0023575042529652515, "eval_screenspot_loss_iou": 0.3312581380208333, "eval_screenspot_loss_num": 0.09613037109375, "eval_screenspot_loss_xval": 0.48065185546875, "eval_screenspot_runtime": 150.8434, "eval_screenspot_samples_per_second": 0.59, "eval_screenspot_steps_per_second": 0.02, "num_input_tokens_seen": 524847664, "step": 5750 }, { "epoch": 23.958333333333332, "eval_compot_CIoU": 0.4482163190841675, "eval_compot_GIoU": 0.43212345242500305, "eval_compot_IoU": 0.5305320024490356, "eval_compot_MAE_all": 0.06178927421569824, "eval_compot_MAE_h": 0.07599110901355743, "eval_compot_MAE_w": 0.15570373833179474, "eval_compot_MAE_x_boxes": 0.15898607671260834, "eval_compot_MAE_y_boxes": 0.07342762127518654, "eval_compot_NUM_probability": 0.9999977648258209, "eval_compot_inside_bbox": 0.7048611044883728, "eval_compot_loss": 0.36854714155197144, "eval_compot_loss_ce": 0.060504671186208725, "eval_compot_loss_iou": 0.2802734375, "eval_compot_loss_num": 0.06056976318359375, "eval_compot_loss_xval": 0.302886962890625, "eval_compot_runtime": 91.2487, "eval_compot_samples_per_second": 0.548, "eval_compot_steps_per_second": 0.022, "num_input_tokens_seen": 524847664, "step": 5750 }, { "epoch": 23.958333333333332, "loss": 0.3686365485191345, "loss_ce": 0.06394904106855392, "loss_iou": 0.279296875, "loss_num": 0.06103515625, "loss_xval": 0.3046875, "num_input_tokens_seen": 524847664, "step": 5750 }, { "epoch": 23.9625, "grad_norm": 5.963476561237152, "learning_rate": 5e-05, "loss": 0.0481, "num_input_tokens_seen": 524938412, "step": 5751 }, { "epoch": 23.9625, "loss": 0.04771061986684799, "loss_ce": 4.9790302000474185e-05, "loss_iou": 0.1953125, "loss_num": 0.009521484375, "loss_xval": 0.047607421875, "num_input_tokens_seen": 524938412, "step": 5751 }, { "epoch": 23.966666666666665, "grad_norm": 2.50413266026656, "learning_rate": 5e-05, "loss": 0.0628, "num_input_tokens_seen": 525029396, "step": 5752 }, { "epoch": 23.966666666666665, "loss": 0.020784705877304077, "loss_ce": 9.865495485428255e-06, "loss_iou": 0.1962890625, "loss_num": 0.004150390625, "loss_xval": 0.020751953125, "num_input_tokens_seen": 525029396, "step": 5752 }, { "epoch": 23.970833333333335, "grad_norm": 3.325364795867375, "learning_rate": 5e-05, "loss": 0.0478, "num_input_tokens_seen": 525121112, "step": 5753 }, { "epoch": 23.970833333333335, "loss": 0.049129169434309006, "loss_ce": 1.1128087862743996e-05, "loss_iou": 0.2353515625, "loss_num": 0.00982666015625, "loss_xval": 0.049072265625, "num_input_tokens_seen": 525121112, "step": 5753 }, { "epoch": 23.975, "grad_norm": 3.1010127423143454, "learning_rate": 5e-05, "loss": 0.045, "num_input_tokens_seen": 525212764, "step": 5754 }, { "epoch": 23.975, "loss": 0.0367819145321846, "loss_ce": 1.586503640282899e-05, "loss_iou": 0.1259765625, "loss_num": 0.00732421875, "loss_xval": 0.036865234375, "num_input_tokens_seen": 525212764, "step": 5754 }, { "epoch": 23.979166666666668, "grad_norm": 2.509055326477751, "learning_rate": 5e-05, "loss": 0.0633, "num_input_tokens_seen": 525304080, "step": 5755 }, { "epoch": 23.979166666666668, "loss": 0.065666064620018, "loss_ce": 3.801234925049357e-05, "loss_iou": 0.2578125, "loss_num": 0.01312255859375, "loss_xval": 0.0654296875, "num_input_tokens_seen": 525304080, "step": 5755 }, { "epoch": 23.983333333333334, "grad_norm": 4.192342223928191, "learning_rate": 5e-05, "loss": 0.0378, "num_input_tokens_seen": 525395496, "step": 5756 }, { "epoch": 23.983333333333334, "loss": 0.030342694371938705, "loss_ce": 3.111306796199642e-05, "loss_iou": 0.21875, "loss_num": 0.006072998046875, "loss_xval": 0.0302734375, "num_input_tokens_seen": 525395496, "step": 5756 }, { "epoch": 23.9875, "grad_norm": 2.277673951286204, "learning_rate": 5e-05, "loss": 0.0787, "num_input_tokens_seen": 525486396, "step": 5757 }, { "epoch": 23.9875, "loss": 0.12976506352424622, "loss_ce": 0.005314384587109089, "loss_iou": 0.25, "loss_num": 0.02490234375, "loss_xval": 0.12451171875, "num_input_tokens_seen": 525486396, "step": 5757 }, { "epoch": 23.991666666666667, "grad_norm": 3.663415045011244, "learning_rate": 5e-05, "loss": 0.0423, "num_input_tokens_seen": 525577360, "step": 5758 }, { "epoch": 23.991666666666667, "loss": 0.02190682850778103, "loss_ce": 2.836089606716996e-06, "loss_iou": 0.15625, "loss_num": 0.004364013671875, "loss_xval": 0.0218505859375, "num_input_tokens_seen": 525577360, "step": 5758 }, { "epoch": 23.995833333333334, "grad_norm": 2.4707032068519803, "learning_rate": 5e-05, "loss": 0.0376, "num_input_tokens_seen": 525668344, "step": 5759 }, { "epoch": 23.995833333333334, "loss": 0.044314391911029816, "loss_ce": 2.866746399377007e-06, "loss_iou": 0.248046875, "loss_num": 0.00885009765625, "loss_xval": 0.04443359375, "num_input_tokens_seen": 525668344, "step": 5759 }, { "epoch": 24.0, "grad_norm": 1.9659605951454453, "learning_rate": 5e-05, "loss": 0.0426, "num_input_tokens_seen": 525758440, "step": 5760 }, { "epoch": 24.0, "loss": 0.04882945120334625, "loss_ce": 8.954636541602667e-06, "loss_iou": 0.24609375, "loss_num": 0.009765625, "loss_xval": 0.048828125, "num_input_tokens_seen": 525758440, "step": 5760 }, { "epoch": 24.004166666666666, "grad_norm": 2.824927214021873, "learning_rate": 5e-05, "loss": 0.0271, "num_input_tokens_seen": 525849888, "step": 5761 }, { "epoch": 24.004166666666666, "loss": 0.02209128439426422, "loss_ce": 4.188737420918187e-06, "loss_iou": 0.203125, "loss_num": 0.004425048828125, "loss_xval": 0.0220947265625, "num_input_tokens_seen": 525849888, "step": 5761 }, { "epoch": 24.008333333333333, "grad_norm": 3.656224607655451, "learning_rate": 5e-05, "loss": 0.0442, "num_input_tokens_seen": 525940484, "step": 5762 }, { "epoch": 24.008333333333333, "loss": 0.04184925556182861, "loss_ce": 9.655170288169757e-06, "loss_iou": 0.28515625, "loss_num": 0.00836181640625, "loss_xval": 0.041748046875, "num_input_tokens_seen": 525940484, "step": 5762 }, { "epoch": 24.0125, "grad_norm": 2.9338961938291477, "learning_rate": 5e-05, "loss": 0.0392, "num_input_tokens_seen": 526031980, "step": 5763 }, { "epoch": 24.0125, "loss": 0.04121887683868408, "loss_ce": 0.00023377228353638202, "loss_iou": 0.26953125, "loss_num": 0.0081787109375, "loss_xval": 0.041015625, "num_input_tokens_seen": 526031980, "step": 5763 }, { "epoch": 24.016666666666666, "grad_norm": 3.04974742807937, "learning_rate": 5e-05, "loss": 0.0421, "num_input_tokens_seen": 526123056, "step": 5764 }, { "epoch": 24.016666666666666, "loss": 0.030127380043268204, "loss_ce": 2.941884122265037e-05, "loss_iou": 0.27734375, "loss_num": 0.006011962890625, "loss_xval": 0.0301513671875, "num_input_tokens_seen": 526123056, "step": 5764 }, { "epoch": 24.020833333333332, "grad_norm": 2.788804168959336, "learning_rate": 5e-05, "loss": 0.0529, "num_input_tokens_seen": 526214632, "step": 5765 }, { "epoch": 24.020833333333332, "loss": 0.04753812402486801, "loss_ce": 6.996840966166928e-06, "loss_iou": 0.2333984375, "loss_num": 0.009521484375, "loss_xval": 0.047607421875, "num_input_tokens_seen": 526214632, "step": 5765 }, { "epoch": 24.025, "grad_norm": 2.4890111163498854, "learning_rate": 5e-05, "loss": 0.033, "num_input_tokens_seen": 526305964, "step": 5766 }, { "epoch": 24.025, "loss": 0.0316397026181221, "loss_ce": 8.229689228755888e-06, "loss_iou": 0.2353515625, "loss_num": 0.006317138671875, "loss_xval": 0.03173828125, "num_input_tokens_seen": 526305964, "step": 5766 }, { "epoch": 24.029166666666665, "grad_norm": 2.6348467698823947, "learning_rate": 5e-05, "loss": 0.0287, "num_input_tokens_seen": 526397548, "step": 5767 }, { "epoch": 24.029166666666665, "loss": 0.033110395073890686, "loss_ce": 6.449178272305289e-06, "loss_iou": 0.189453125, "loss_num": 0.006622314453125, "loss_xval": 0.033203125, "num_input_tokens_seen": 526397548, "step": 5767 }, { "epoch": 24.033333333333335, "grad_norm": 3.1410931077727784, "learning_rate": 5e-05, "loss": 0.0572, "num_input_tokens_seen": 526488664, "step": 5768 }, { "epoch": 24.033333333333335, "loss": 0.07993629574775696, "loss_ce": 1.075953696272336e-05, "loss_iou": 0.330078125, "loss_num": 0.0159912109375, "loss_xval": 0.080078125, "num_input_tokens_seen": 526488664, "step": 5768 }, { "epoch": 24.0375, "grad_norm": 3.1439928139816, "learning_rate": 5e-05, "loss": 0.0634, "num_input_tokens_seen": 526580092, "step": 5769 }, { "epoch": 24.0375, "loss": 0.0736217349767685, "loss_ce": 4.385370630188845e-05, "loss_iou": 0.302734375, "loss_num": 0.01470947265625, "loss_xval": 0.07373046875, "num_input_tokens_seen": 526580092, "step": 5769 }, { "epoch": 24.041666666666668, "grad_norm": 2.068542934078751, "learning_rate": 5e-05, "loss": 0.0448, "num_input_tokens_seen": 526672048, "step": 5770 }, { "epoch": 24.041666666666668, "loss": 0.06701646745204926, "loss_ce": 4.564270784612745e-05, "loss_iou": 0.2177734375, "loss_num": 0.01336669921875, "loss_xval": 0.06689453125, "num_input_tokens_seen": 526672048, "step": 5770 }, { "epoch": 24.045833333333334, "grad_norm": 3.422356384221351, "learning_rate": 5e-05, "loss": 0.0597, "num_input_tokens_seen": 526762864, "step": 5771 }, { "epoch": 24.045833333333334, "loss": 0.05668189376592636, "loss_ce": 3.1243744160747156e-06, "loss_iou": 0.275390625, "loss_num": 0.0113525390625, "loss_xval": 0.056640625, "num_input_tokens_seen": 526762864, "step": 5771 }, { "epoch": 24.05, "grad_norm": 2.8548143452335255, "learning_rate": 5e-05, "loss": 0.0289, "num_input_tokens_seen": 526854648, "step": 5772 }, { "epoch": 24.05, "loss": 0.029136527329683304, "loss_ce": 3.038828981516417e-05, "loss_iou": 0.29296875, "loss_num": 0.005828857421875, "loss_xval": 0.029052734375, "num_input_tokens_seen": 526854648, "step": 5772 }, { "epoch": 24.054166666666667, "grad_norm": 3.744100095025013, "learning_rate": 5e-05, "loss": 0.0245, "num_input_tokens_seen": 526945920, "step": 5773 }, { "epoch": 24.054166666666667, "loss": 0.025915004312992096, "loss_ce": 5.581115146924276e-06, "loss_iou": 0.22265625, "loss_num": 0.00518798828125, "loss_xval": 0.02587890625, "num_input_tokens_seen": 526945920, "step": 5773 }, { "epoch": 24.058333333333334, "grad_norm": 2.647107076103916, "learning_rate": 5e-05, "loss": 0.0606, "num_input_tokens_seen": 527036784, "step": 5774 }, { "epoch": 24.058333333333334, "loss": 0.04015457630157471, "loss_ce": 8.70062558533391e-06, "loss_iou": 0.349609375, "loss_num": 0.008056640625, "loss_xval": 0.0400390625, "num_input_tokens_seen": 527036784, "step": 5774 }, { "epoch": 24.0625, "grad_norm": 2.331636608977699, "learning_rate": 5e-05, "loss": 0.0315, "num_input_tokens_seen": 527128884, "step": 5775 }, { "epoch": 24.0625, "loss": 0.02479863166809082, "loss_ce": 0.00012517115101218224, "loss_iou": 0.1181640625, "loss_num": 0.00494384765625, "loss_xval": 0.024658203125, "num_input_tokens_seen": 527128884, "step": 5775 }, { "epoch": 24.066666666666666, "grad_norm": 1.5536981632931506, "learning_rate": 5e-05, "loss": 0.0271, "num_input_tokens_seen": 527220576, "step": 5776 }, { "epoch": 24.066666666666666, "loss": 0.028964003548026085, "loss_ce": 1.808075830922462e-05, "loss_iou": 0.205078125, "loss_num": 0.00579833984375, "loss_xval": 0.0289306640625, "num_input_tokens_seen": 527220576, "step": 5776 }, { "epoch": 24.070833333333333, "grad_norm": 0.988929743493313, "learning_rate": 5e-05, "loss": 0.0172, "num_input_tokens_seen": 527312016, "step": 5777 }, { "epoch": 24.070833333333333, "loss": 0.017999500036239624, "loss_ce": 2.4647210011607967e-05, "loss_iou": 0.1826171875, "loss_num": 0.00360107421875, "loss_xval": 0.0179443359375, "num_input_tokens_seen": 527312016, "step": 5777 }, { "epoch": 24.075, "grad_norm": 1.4301392756288007, "learning_rate": 5e-05, "loss": 0.0266, "num_input_tokens_seen": 527403400, "step": 5778 }, { "epoch": 24.075, "loss": 0.03159000352025032, "loss_ce": 4.308335064706625e-06, "loss_iou": 0.3359375, "loss_num": 0.006317138671875, "loss_xval": 0.031494140625, "num_input_tokens_seen": 527403400, "step": 5778 }, { "epoch": 24.079166666666666, "grad_norm": 2.4242658306767693, "learning_rate": 5e-05, "loss": 0.0713, "num_input_tokens_seen": 527495060, "step": 5779 }, { "epoch": 24.079166666666666, "loss": 0.03482062369585037, "loss_ce": 7.694960913795512e-06, "loss_iou": 0.251953125, "loss_num": 0.0069580078125, "loss_xval": 0.034912109375, "num_input_tokens_seen": 527495060, "step": 5779 }, { "epoch": 24.083333333333332, "grad_norm": 3.061292902066298, "learning_rate": 5e-05, "loss": 0.0395, "num_input_tokens_seen": 527586612, "step": 5780 }, { "epoch": 24.083333333333332, "loss": 0.05456852912902832, "loss_ce": 3.361558265169151e-05, "loss_iou": 0.24609375, "loss_num": 0.01092529296875, "loss_xval": 0.054443359375, "num_input_tokens_seen": 527586612, "step": 5780 }, { "epoch": 24.0875, "grad_norm": 3.0053826041693625, "learning_rate": 5e-05, "loss": 0.0708, "num_input_tokens_seen": 527678016, "step": 5781 }, { "epoch": 24.0875, "loss": 0.06668329238891602, "loss_ce": 2.387518179602921e-06, "loss_iou": 0.25390625, "loss_num": 0.01336669921875, "loss_xval": 0.06689453125, "num_input_tokens_seen": 527678016, "step": 5781 }, { "epoch": 24.091666666666665, "grad_norm": 3.228063081715965, "learning_rate": 5e-05, "loss": 0.0454, "num_input_tokens_seen": 527769724, "step": 5782 }, { "epoch": 24.091666666666665, "loss": 0.025066962465643883, "loss_ce": 8.215374691644683e-06, "loss_iou": 0.201171875, "loss_num": 0.0050048828125, "loss_xval": 0.0250244140625, "num_input_tokens_seen": 527769724, "step": 5782 }, { "epoch": 24.095833333333335, "grad_norm": 3.9331987558361563, "learning_rate": 5e-05, "loss": 0.0596, "num_input_tokens_seen": 527859344, "step": 5783 }, { "epoch": 24.095833333333335, "loss": 0.08312556147575378, "loss_ce": 3.3073583836085163e-06, "loss_iou": 0.1533203125, "loss_num": 0.0166015625, "loss_xval": 0.0830078125, "num_input_tokens_seen": 527859344, "step": 5783 }, { "epoch": 24.1, "grad_norm": 2.1210316223620156, "learning_rate": 5e-05, "loss": 0.0517, "num_input_tokens_seen": 527950948, "step": 5784 }, { "epoch": 24.1, "loss": 0.08202598989009857, "loss_ce": 0.0043129813857376575, "loss_iou": 0.2080078125, "loss_num": 0.0155029296875, "loss_xval": 0.07763671875, "num_input_tokens_seen": 527950948, "step": 5784 }, { "epoch": 24.104166666666668, "grad_norm": 1.1943825243234527, "learning_rate": 5e-05, "loss": 0.0472, "num_input_tokens_seen": 528041896, "step": 5785 }, { "epoch": 24.104166666666668, "loss": 0.025781847536563873, "loss_ce": 2.124416369042592e-06, "loss_iou": 0.265625, "loss_num": 0.005157470703125, "loss_xval": 0.0257568359375, "num_input_tokens_seen": 528041896, "step": 5785 }, { "epoch": 24.108333333333334, "grad_norm": 1.8329688133690072, "learning_rate": 5e-05, "loss": 0.0323, "num_input_tokens_seen": 528133600, "step": 5786 }, { "epoch": 24.108333333333334, "loss": 0.03760939836502075, "loss_ce": 2.7001751732314005e-05, "loss_iou": 0.16796875, "loss_num": 0.00750732421875, "loss_xval": 0.03759765625, "num_input_tokens_seen": 528133600, "step": 5786 }, { "epoch": 24.1125, "grad_norm": 3.261515832216803, "learning_rate": 5e-05, "loss": 0.0251, "num_input_tokens_seen": 528224624, "step": 5787 }, { "epoch": 24.1125, "loss": 0.021124158054590225, "loss_ce": 1.362401235383004e-05, "loss_iou": 0.232421875, "loss_num": 0.00421142578125, "loss_xval": 0.0211181640625, "num_input_tokens_seen": 528224624, "step": 5787 }, { "epoch": 24.116666666666667, "grad_norm": 3.310631450485702, "learning_rate": 5e-05, "loss": 0.0276, "num_input_tokens_seen": 528315756, "step": 5788 }, { "epoch": 24.116666666666667, "loss": 0.028005395084619522, "loss_ce": 0.00029543385608121753, "loss_iou": 0.38671875, "loss_num": 0.00555419921875, "loss_xval": 0.0277099609375, "num_input_tokens_seen": 528315756, "step": 5788 }, { "epoch": 24.120833333333334, "grad_norm": 2.3412593794878704, "learning_rate": 5e-05, "loss": 0.0334, "num_input_tokens_seen": 528407176, "step": 5789 }, { "epoch": 24.120833333333334, "loss": 0.024646885693073273, "loss_ce": 8.786471880739555e-05, "loss_iou": 0.23046875, "loss_num": 0.004913330078125, "loss_xval": 0.0245361328125, "num_input_tokens_seen": 528407176, "step": 5789 }, { "epoch": 24.125, "grad_norm": 2.243844554830831, "learning_rate": 5e-05, "loss": 0.0392, "num_input_tokens_seen": 528498852, "step": 5790 }, { "epoch": 24.125, "loss": 0.03345056623220444, "loss_ce": 0.00045343622332438827, "loss_iou": 0.19921875, "loss_num": 0.006622314453125, "loss_xval": 0.032958984375, "num_input_tokens_seen": 528498852, "step": 5790 }, { "epoch": 24.129166666666666, "grad_norm": 2.1521874782066295, "learning_rate": 5e-05, "loss": 0.0519, "num_input_tokens_seen": 528590380, "step": 5791 }, { "epoch": 24.129166666666666, "loss": 0.06372282654047012, "loss_ce": 1.7384611055604182e-05, "loss_iou": 0.193359375, "loss_num": 0.01275634765625, "loss_xval": 0.0634765625, "num_input_tokens_seen": 528590380, "step": 5791 }, { "epoch": 24.133333333333333, "grad_norm": 2.3804266455050396, "learning_rate": 5e-05, "loss": 0.0491, "num_input_tokens_seen": 528681884, "step": 5792 }, { "epoch": 24.133333333333333, "loss": 0.0350668840110302, "loss_ce": 2.1849505174031947e-06, "loss_iou": 0.306640625, "loss_num": 0.00701904296875, "loss_xval": 0.03515625, "num_input_tokens_seen": 528681884, "step": 5792 }, { "epoch": 24.1375, "grad_norm": 2.7841512549241845, "learning_rate": 5e-05, "loss": 0.0517, "num_input_tokens_seen": 528772944, "step": 5793 }, { "epoch": 24.1375, "loss": 0.07232136279344559, "loss_ce": 2.3284333110495936e-06, "loss_iou": 0.1943359375, "loss_num": 0.01446533203125, "loss_xval": 0.072265625, "num_input_tokens_seen": 528772944, "step": 5793 }, { "epoch": 24.141666666666666, "grad_norm": 2.399875513009259, "learning_rate": 5e-05, "loss": 0.0609, "num_input_tokens_seen": 528864260, "step": 5794 }, { "epoch": 24.141666666666666, "loss": 0.09643752872943878, "loss_ce": 1.9853255253110547e-06, "loss_iou": 0.1953125, "loss_num": 0.019287109375, "loss_xval": 0.0966796875, "num_input_tokens_seen": 528864260, "step": 5794 }, { "epoch": 24.145833333333332, "grad_norm": 2.2389868112988216, "learning_rate": 5e-05, "loss": 0.0337, "num_input_tokens_seen": 528955488, "step": 5795 }, { "epoch": 24.145833333333332, "loss": 0.03514765202999115, "loss_ce": 2.1921698134974577e-05, "loss_iou": 0.0810546875, "loss_num": 0.00701904296875, "loss_xval": 0.03515625, "num_input_tokens_seen": 528955488, "step": 5795 }, { "epoch": 24.15, "grad_norm": 1.9068440934972977, "learning_rate": 5e-05, "loss": 0.0251, "num_input_tokens_seen": 529046428, "step": 5796 }, { "epoch": 24.15, "loss": 0.02915019728243351, "loss_ce": 5.908582352276426e-06, "loss_iou": 0.1640625, "loss_num": 0.005828857421875, "loss_xval": 0.0291748046875, "num_input_tokens_seen": 529046428, "step": 5796 }, { "epoch": 24.154166666666665, "grad_norm": 1.9521876939459881, "learning_rate": 5e-05, "loss": 0.0565, "num_input_tokens_seen": 529137792, "step": 5797 }, { "epoch": 24.154166666666665, "loss": 0.06888218224048615, "loss_ce": 1.926880213432014e-05, "loss_iou": 0.201171875, "loss_num": 0.0137939453125, "loss_xval": 0.06884765625, "num_input_tokens_seen": 529137792, "step": 5797 }, { "epoch": 24.158333333333335, "grad_norm": 2.960062059778355, "learning_rate": 5e-05, "loss": 0.0235, "num_input_tokens_seen": 529228716, "step": 5798 }, { "epoch": 24.158333333333335, "loss": 0.0200907401740551, "loss_ce": 1.0174837370868772e-05, "loss_iou": 0.2177734375, "loss_num": 0.0040283203125, "loss_xval": 0.02001953125, "num_input_tokens_seen": 529228716, "step": 5798 }, { "epoch": 24.1625, "grad_norm": 2.1244946710045514, "learning_rate": 5e-05, "loss": 0.0562, "num_input_tokens_seen": 529320692, "step": 5799 }, { "epoch": 24.1625, "loss": 0.04741659015417099, "loss_ce": 4.186751175438985e-05, "loss_iou": 0.314453125, "loss_num": 0.00946044921875, "loss_xval": 0.04736328125, "num_input_tokens_seen": 529320692, "step": 5799 }, { "epoch": 24.166666666666668, "grad_norm": 0.9728819809690822, "learning_rate": 5e-05, "loss": 0.0388, "num_input_tokens_seen": 529411972, "step": 5800 }, { "epoch": 24.166666666666668, "loss": 0.0351259745657444, "loss_ce": 7.87230601417832e-06, "loss_iou": 0.16015625, "loss_num": 0.00701904296875, "loss_xval": 0.03515625, "num_input_tokens_seen": 529411972, "step": 5800 }, { "epoch": 24.170833333333334, "grad_norm": 1.0448045563911708, "learning_rate": 5e-05, "loss": 0.0766, "num_input_tokens_seen": 529503304, "step": 5801 }, { "epoch": 24.170833333333334, "loss": 0.10207903385162354, "loss_ce": 5.366885943658417e-06, "loss_iou": 0.2734375, "loss_num": 0.0203857421875, "loss_xval": 0.10205078125, "num_input_tokens_seen": 529503304, "step": 5801 }, { "epoch": 24.175, "grad_norm": 1.406917201813283, "learning_rate": 5e-05, "loss": 0.0525, "num_input_tokens_seen": 529594264, "step": 5802 }, { "epoch": 24.175, "loss": 0.033842310309410095, "loss_ce": 0.00047133976477198303, "loss_iou": 0.2353515625, "loss_num": 0.006683349609375, "loss_xval": 0.033447265625, "num_input_tokens_seen": 529594264, "step": 5802 }, { "epoch": 24.179166666666667, "grad_norm": 2.4702882382279254, "learning_rate": 5e-05, "loss": 0.0264, "num_input_tokens_seen": 529685644, "step": 5803 }, { "epoch": 24.179166666666667, "loss": 0.02854456752538681, "loss_ce": 1.4447728972299956e-05, "loss_iou": 0.208984375, "loss_num": 0.005706787109375, "loss_xval": 0.028564453125, "num_input_tokens_seen": 529685644, "step": 5803 }, { "epoch": 24.183333333333334, "grad_norm": 3.463313588988296, "learning_rate": 5e-05, "loss": 0.039, "num_input_tokens_seen": 529776884, "step": 5804 }, { "epoch": 24.183333333333334, "loss": 0.04076559096574783, "loss_ce": 9.366483027406503e-06, "loss_iou": 0.3671875, "loss_num": 0.0081787109375, "loss_xval": 0.040771484375, "num_input_tokens_seen": 529776884, "step": 5804 }, { "epoch": 24.1875, "grad_norm": 3.6153047387307544, "learning_rate": 5e-05, "loss": 0.0422, "num_input_tokens_seen": 529868612, "step": 5805 }, { "epoch": 24.1875, "loss": 0.04769245535135269, "loss_ce": 2.3997785319807008e-05, "loss_iou": 0.38671875, "loss_num": 0.009521484375, "loss_xval": 0.047607421875, "num_input_tokens_seen": 529868612, "step": 5805 }, { "epoch": 24.191666666666666, "grad_norm": 2.5920213435423065, "learning_rate": 5e-05, "loss": 0.0246, "num_input_tokens_seen": 529959920, "step": 5806 }, { "epoch": 24.191666666666666, "loss": 0.028584707528352737, "loss_ce": 1.2625709132407792e-05, "loss_iou": 0.29296875, "loss_num": 0.005706787109375, "loss_xval": 0.028564453125, "num_input_tokens_seen": 529959920, "step": 5806 }, { "epoch": 24.195833333333333, "grad_norm": 2.306361847712388, "learning_rate": 5e-05, "loss": 0.0323, "num_input_tokens_seen": 530050764, "step": 5807 }, { "epoch": 24.195833333333333, "loss": 0.02445165440440178, "loss_ce": 1.4706112779094838e-05, "loss_iou": 0.275390625, "loss_num": 0.0048828125, "loss_xval": 0.0244140625, "num_input_tokens_seen": 530050764, "step": 5807 }, { "epoch": 24.2, "grad_norm": 5.89266302181465, "learning_rate": 5e-05, "loss": 0.041, "num_input_tokens_seen": 530142624, "step": 5808 }, { "epoch": 24.2, "loss": 0.0434047132730484, "loss_ce": 8.719249308342114e-06, "loss_iou": 0.212890625, "loss_num": 0.0086669921875, "loss_xval": 0.04345703125, "num_input_tokens_seen": 530142624, "step": 5808 }, { "epoch": 24.204166666666666, "grad_norm": 2.340488034661521, "learning_rate": 5e-05, "loss": 0.0574, "num_input_tokens_seen": 530234124, "step": 5809 }, { "epoch": 24.204166666666666, "loss": 0.04728977009654045, "loss_ce": 2.782668843792635e-06, "loss_iou": 0.2216796875, "loss_num": 0.00946044921875, "loss_xval": 0.04736328125, "num_input_tokens_seen": 530234124, "step": 5809 }, { "epoch": 24.208333333333332, "grad_norm": 2.644660101456627, "learning_rate": 5e-05, "loss": 0.0636, "num_input_tokens_seen": 530324832, "step": 5810 }, { "epoch": 24.208333333333332, "loss": 0.07045643031597137, "loss_ce": 5.238256562734023e-05, "loss_iou": 0.208984375, "loss_num": 0.01409912109375, "loss_xval": 0.0703125, "num_input_tokens_seen": 530324832, "step": 5810 }, { "epoch": 24.2125, "grad_norm": 1.476402483879766, "learning_rate": 5e-05, "loss": 0.0384, "num_input_tokens_seen": 530416428, "step": 5811 }, { "epoch": 24.2125, "loss": 0.05975855141878128, "loss_ce": 5.134122147865128e-06, "loss_iou": 0.26171875, "loss_num": 0.011962890625, "loss_xval": 0.059814453125, "num_input_tokens_seen": 530416428, "step": 5811 }, { "epoch": 24.216666666666665, "grad_norm": 2.6559357381664386, "learning_rate": 5e-05, "loss": 0.0322, "num_input_tokens_seen": 530507672, "step": 5812 }, { "epoch": 24.216666666666665, "loss": 0.03925604373216629, "loss_ce": 0.0005216074059717357, "loss_iou": 0.2158203125, "loss_num": 0.00775146484375, "loss_xval": 0.038818359375, "num_input_tokens_seen": 530507672, "step": 5812 }, { "epoch": 24.220833333333335, "grad_norm": 2.797958040194132, "learning_rate": 5e-05, "loss": 0.0416, "num_input_tokens_seen": 530599024, "step": 5813 }, { "epoch": 24.220833333333335, "loss": 0.05553257465362549, "loss_ce": 5.839197001478169e-06, "loss_iou": 0.212890625, "loss_num": 0.0111083984375, "loss_xval": 0.055419921875, "num_input_tokens_seen": 530599024, "step": 5813 }, { "epoch": 24.225, "grad_norm": 2.2344297164372944, "learning_rate": 5e-05, "loss": 0.0431, "num_input_tokens_seen": 530688792, "step": 5814 }, { "epoch": 24.225, "loss": 0.025300024077296257, "loss_ce": 1.6210691683227196e-05, "loss_iou": 0.337890625, "loss_num": 0.00506591796875, "loss_xval": 0.0252685546875, "num_input_tokens_seen": 530688792, "step": 5814 }, { "epoch": 24.229166666666668, "grad_norm": 2.558663557519644, "learning_rate": 5e-05, "loss": 0.0538, "num_input_tokens_seen": 530780032, "step": 5815 }, { "epoch": 24.229166666666668, "loss": 0.03335661441087723, "loss_ce": 8.532742867828347e-06, "loss_iou": 0.296875, "loss_num": 0.00665283203125, "loss_xval": 0.033447265625, "num_input_tokens_seen": 530780032, "step": 5815 }, { "epoch": 24.233333333333334, "grad_norm": 3.527114855363792, "learning_rate": 5e-05, "loss": 0.029, "num_input_tokens_seen": 530871548, "step": 5816 }, { "epoch": 24.233333333333334, "loss": 0.0321367122232914, "loss_ce": 1.7028531829055282e-06, "loss_iou": 0.29296875, "loss_num": 0.006439208984375, "loss_xval": 0.0322265625, "num_input_tokens_seen": 530871548, "step": 5816 }, { "epoch": 24.2375, "grad_norm": 3.648796475096683, "learning_rate": 5e-05, "loss": 0.0457, "num_input_tokens_seen": 530962972, "step": 5817 }, { "epoch": 24.2375, "loss": 0.03030816838145256, "loss_ce": 1.947232522070408e-05, "loss_iou": 0.166015625, "loss_num": 0.00604248046875, "loss_xval": 0.0302734375, "num_input_tokens_seen": 530962972, "step": 5817 }, { "epoch": 24.241666666666667, "grad_norm": 2.5664420438996842, "learning_rate": 5e-05, "loss": 0.0772, "num_input_tokens_seen": 531054116, "step": 5818 }, { "epoch": 24.241666666666667, "loss": 0.12223930656909943, "loss_ce": 1.6416102880612016e-05, "loss_iou": 0.109375, "loss_num": 0.0244140625, "loss_xval": 0.1220703125, "num_input_tokens_seen": 531054116, "step": 5818 }, { "epoch": 24.245833333333334, "grad_norm": 3.0292135260258783, "learning_rate": 5e-05, "loss": 0.0555, "num_input_tokens_seen": 531145604, "step": 5819 }, { "epoch": 24.245833333333334, "loss": 0.08507491648197174, "loss_ce": 3.768545502680354e-05, "loss_iou": 0.37109375, "loss_num": 0.0169677734375, "loss_xval": 0.0849609375, "num_input_tokens_seen": 531145604, "step": 5819 }, { "epoch": 24.25, "grad_norm": 3.3324941616368946, "learning_rate": 5e-05, "loss": 0.041, "num_input_tokens_seen": 531237248, "step": 5820 }, { "epoch": 24.25, "loss": 0.028207680210471153, "loss_ce": 0.000329872767906636, "loss_iou": 0.35546875, "loss_num": 0.00555419921875, "loss_xval": 0.02783203125, "num_input_tokens_seen": 531237248, "step": 5820 }, { "epoch": 24.254166666666666, "grad_norm": 2.4142162953720256, "learning_rate": 5e-05, "loss": 0.0393, "num_input_tokens_seen": 531329000, "step": 5821 }, { "epoch": 24.254166666666666, "loss": 0.04822106659412384, "loss_ce": 3.289967935415916e-06, "loss_iou": 0.12158203125, "loss_num": 0.0096435546875, "loss_xval": 0.04833984375, "num_input_tokens_seen": 531329000, "step": 5821 }, { "epoch": 24.258333333333333, "grad_norm": 1.4771427598436098, "learning_rate": 5e-05, "loss": 0.0467, "num_input_tokens_seen": 531420252, "step": 5822 }, { "epoch": 24.258333333333333, "loss": 0.06313102692365646, "loss_ce": 5.416107342171017e-06, "loss_iou": 0.0625, "loss_num": 0.01263427734375, "loss_xval": 0.06298828125, "num_input_tokens_seen": 531420252, "step": 5822 }, { "epoch": 24.2625, "grad_norm": 2.3319254388492543, "learning_rate": 5e-05, "loss": 0.0359, "num_input_tokens_seen": 531511460, "step": 5823 }, { "epoch": 24.2625, "loss": 0.029953958466649055, "loss_ce": 0.0006036765989847481, "loss_iou": 0.232421875, "loss_num": 0.005859375, "loss_xval": 0.029296875, "num_input_tokens_seen": 531511460, "step": 5823 }, { "epoch": 24.266666666666666, "grad_norm": 2.276987456311338, "learning_rate": 5e-05, "loss": 0.0564, "num_input_tokens_seen": 531603208, "step": 5824 }, { "epoch": 24.266666666666666, "loss": 0.08067796379327774, "loss_ce": 4.747425009554718e-06, "loss_iou": 0.28125, "loss_num": 0.01611328125, "loss_xval": 0.08056640625, "num_input_tokens_seen": 531603208, "step": 5824 }, { "epoch": 24.270833333333332, "grad_norm": 1.7570098726468637, "learning_rate": 5e-05, "loss": 0.0956, "num_input_tokens_seen": 531694384, "step": 5825 }, { "epoch": 24.270833333333332, "loss": 0.15547670423984528, "loss_ce": 4.906281901639886e-06, "loss_iou": 0.1669921875, "loss_num": 0.0311279296875, "loss_xval": 0.1552734375, "num_input_tokens_seen": 531694384, "step": 5825 }, { "epoch": 24.275, "grad_norm": 1.2715875752875219, "learning_rate": 5e-05, "loss": 0.0187, "num_input_tokens_seen": 531785304, "step": 5826 }, { "epoch": 24.275, "loss": 0.01852235570549965, "loss_ce": 2.0009840682178037e-06, "loss_iou": 0.1787109375, "loss_num": 0.0037078857421875, "loss_xval": 0.0185546875, "num_input_tokens_seen": 531785304, "step": 5826 }, { "epoch": 24.279166666666665, "grad_norm": 4.224597135091308, "learning_rate": 5e-05, "loss": 0.0451, "num_input_tokens_seen": 531876528, "step": 5827 }, { "epoch": 24.279166666666665, "loss": 0.0325944647192955, "loss_ce": 9.317414878751151e-06, "loss_iou": 0.1201171875, "loss_num": 0.00653076171875, "loss_xval": 0.032470703125, "num_input_tokens_seen": 531876528, "step": 5827 }, { "epoch": 24.283333333333335, "grad_norm": 1.0235952605649894, "learning_rate": 5e-05, "loss": 0.0438, "num_input_tokens_seen": 531967608, "step": 5828 }, { "epoch": 24.283333333333335, "loss": 0.03589292988181114, "loss_ce": 4.256347892805934e-06, "loss_iou": 0.26171875, "loss_num": 0.007171630859375, "loss_xval": 0.035888671875, "num_input_tokens_seen": 531967608, "step": 5828 }, { "epoch": 24.2875, "grad_norm": 4.7815468426885275, "learning_rate": 5e-05, "loss": 0.0311, "num_input_tokens_seen": 532059628, "step": 5829 }, { "epoch": 24.2875, "loss": 0.037222668528556824, "loss_ce": 6.481990567408502e-06, "loss_iou": 0.2294921875, "loss_num": 0.0074462890625, "loss_xval": 0.037109375, "num_input_tokens_seen": 532059628, "step": 5829 }, { "epoch": 24.291666666666668, "grad_norm": 2.1804774460721172, "learning_rate": 5e-05, "loss": 0.0483, "num_input_tokens_seen": 532150832, "step": 5830 }, { "epoch": 24.291666666666668, "loss": 0.0251055508852005, "loss_ce": 4.842361704504583e-06, "loss_iou": 0.10205078125, "loss_num": 0.0050048828125, "loss_xval": 0.025146484375, "num_input_tokens_seen": 532150832, "step": 5830 }, { "epoch": 24.295833333333334, "grad_norm": 2.6740592119510853, "learning_rate": 5e-05, "loss": 0.0535, "num_input_tokens_seen": 532242692, "step": 5831 }, { "epoch": 24.295833333333334, "loss": 0.05839364975690842, "loss_ce": 0.00021188265236560255, "loss_iou": 0.294921875, "loss_num": 0.0115966796875, "loss_xval": 0.05810546875, "num_input_tokens_seen": 532242692, "step": 5831 }, { "epoch": 24.3, "grad_norm": 2.9955427450303573, "learning_rate": 5e-05, "loss": 0.0438, "num_input_tokens_seen": 532334064, "step": 5832 }, { "epoch": 24.3, "loss": 0.044400908052921295, "loss_ce": 2.0719862732221372e-05, "loss_iou": 0.279296875, "loss_num": 0.0089111328125, "loss_xval": 0.04443359375, "num_input_tokens_seen": 532334064, "step": 5832 }, { "epoch": 24.304166666666667, "grad_norm": 3.123291369024042, "learning_rate": 5e-05, "loss": 0.0376, "num_input_tokens_seen": 532426100, "step": 5833 }, { "epoch": 24.304166666666667, "loss": 0.038879990577697754, "loss_ce": 8.226255886256695e-06, "loss_iou": 0.10595703125, "loss_num": 0.007781982421875, "loss_xval": 0.038818359375, "num_input_tokens_seen": 532426100, "step": 5833 }, { "epoch": 24.308333333333334, "grad_norm": 1.2739468334377282, "learning_rate": 5e-05, "loss": 0.0369, "num_input_tokens_seen": 532517564, "step": 5834 }, { "epoch": 24.308333333333334, "loss": 0.03598446026444435, "loss_ce": 4.234915650158655e-06, "loss_iou": 0.376953125, "loss_num": 0.0072021484375, "loss_xval": 0.035888671875, "num_input_tokens_seen": 532517564, "step": 5834 }, { "epoch": 24.3125, "grad_norm": 2.1308749770898876, "learning_rate": 5e-05, "loss": 0.0417, "num_input_tokens_seen": 532608880, "step": 5835 }, { "epoch": 24.3125, "loss": 0.04994537681341171, "loss_ce": 3.362632469361415e-06, "loss_iou": 0.267578125, "loss_num": 0.010009765625, "loss_xval": 0.050048828125, "num_input_tokens_seen": 532608880, "step": 5835 }, { "epoch": 24.316666666666666, "grad_norm": 2.5524278736227934, "learning_rate": 5e-05, "loss": 0.0352, "num_input_tokens_seen": 532700312, "step": 5836 }, { "epoch": 24.316666666666666, "loss": 0.04723707586526871, "loss_ce": 1.1121027455374133e-05, "loss_iou": 0.283203125, "loss_num": 0.00946044921875, "loss_xval": 0.047119140625, "num_input_tokens_seen": 532700312, "step": 5836 }, { "epoch": 24.320833333333333, "grad_norm": 2.18093667139612, "learning_rate": 5e-05, "loss": 0.0285, "num_input_tokens_seen": 532791900, "step": 5837 }, { "epoch": 24.320833333333333, "loss": 0.025013525038957596, "loss_ce": 4.369113412394654e-06, "loss_iou": 0.208984375, "loss_num": 0.0050048828125, "loss_xval": 0.0250244140625, "num_input_tokens_seen": 532791900, "step": 5837 }, { "epoch": 24.325, "grad_norm": 3.4516992775793227, "learning_rate": 5e-05, "loss": 0.0382, "num_input_tokens_seen": 532883116, "step": 5838 }, { "epoch": 24.325, "loss": 0.05090608820319176, "loss_ce": 2.7665159905154724e-06, "loss_iou": 0.470703125, "loss_num": 0.01019287109375, "loss_xval": 0.05078125, "num_input_tokens_seen": 532883116, "step": 5838 }, { "epoch": 24.329166666666666, "grad_norm": 2.892724046199087, "learning_rate": 5e-05, "loss": 0.0348, "num_input_tokens_seen": 532974236, "step": 5839 }, { "epoch": 24.329166666666666, "loss": 0.034184593707323074, "loss_ce": 4.90624779558857e-06, "loss_iou": 0.33984375, "loss_num": 0.0068359375, "loss_xval": 0.0341796875, "num_input_tokens_seen": 532974236, "step": 5839 }, { "epoch": 24.333333333333332, "grad_norm": 3.0937124821286814, "learning_rate": 5e-05, "loss": 0.0494, "num_input_tokens_seen": 533065456, "step": 5840 }, { "epoch": 24.333333333333332, "loss": 0.042577601969242096, "loss_ce": 5.579438948188908e-06, "loss_iou": 0.345703125, "loss_num": 0.008544921875, "loss_xval": 0.04248046875, "num_input_tokens_seen": 533065456, "step": 5840 }, { "epoch": 24.3375, "grad_norm": 3.3613541657894075, "learning_rate": 5e-05, "loss": 0.0505, "num_input_tokens_seen": 533155756, "step": 5841 }, { "epoch": 24.3375, "loss": 0.050340019166469574, "loss_ce": 8.902155059331562e-06, "loss_iou": 0.043701171875, "loss_num": 0.01007080078125, "loss_xval": 0.05029296875, "num_input_tokens_seen": 533155756, "step": 5841 }, { "epoch": 24.341666666666665, "grad_norm": 0.7344652474322563, "learning_rate": 5e-05, "loss": 0.0603, "num_input_tokens_seen": 533246872, "step": 5842 }, { "epoch": 24.341666666666665, "loss": 0.0807098001241684, "loss_ce": 6.0692091210512444e-06, "loss_iou": 0.265625, "loss_num": 0.01611328125, "loss_xval": 0.08056640625, "num_input_tokens_seen": 533246872, "step": 5842 }, { "epoch": 24.345833333333335, "grad_norm": 1.4355854996546975, "learning_rate": 5e-05, "loss": 0.0379, "num_input_tokens_seen": 533338396, "step": 5843 }, { "epoch": 24.345833333333335, "loss": 0.03224565088748932, "loss_ce": 0.00011063837155234069, "loss_iou": 0.1943359375, "loss_num": 0.00640869140625, "loss_xval": 0.0322265625, "num_input_tokens_seen": 533338396, "step": 5843 }, { "epoch": 24.35, "grad_norm": 1.298353541852667, "learning_rate": 5e-05, "loss": 0.0325, "num_input_tokens_seen": 533429764, "step": 5844 }, { "epoch": 24.35, "loss": 0.040697984397411346, "loss_ce": 2.79236428468721e-06, "loss_iou": 0.1591796875, "loss_num": 0.00811767578125, "loss_xval": 0.040771484375, "num_input_tokens_seen": 533429764, "step": 5844 }, { "epoch": 24.354166666666668, "grad_norm": 1.4414559994848586, "learning_rate": 5e-05, "loss": 0.0287, "num_input_tokens_seen": 533520880, "step": 5845 }, { "epoch": 24.354166666666668, "loss": 0.018156087026000023, "loss_ce": 5.757582130172523e-06, "loss_iou": 0.119140625, "loss_num": 0.003631591796875, "loss_xval": 0.0181884765625, "num_input_tokens_seen": 533520880, "step": 5845 }, { "epoch": 24.358333333333334, "grad_norm": 2.666583677527782, "learning_rate": 5e-05, "loss": 0.0403, "num_input_tokens_seen": 533612312, "step": 5846 }, { "epoch": 24.358333333333334, "loss": 0.034728314727544785, "loss_ce": 6.940867024241015e-06, "loss_iou": 0.26953125, "loss_num": 0.0069580078125, "loss_xval": 0.03466796875, "num_input_tokens_seen": 533612312, "step": 5846 }, { "epoch": 24.3625, "grad_norm": 3.2379036976763613, "learning_rate": 5e-05, "loss": 0.0696, "num_input_tokens_seen": 533703372, "step": 5847 }, { "epoch": 24.3625, "loss": 0.027110453695058823, "loss_ce": 3.214926209693658e-06, "loss_iou": 0.2890625, "loss_num": 0.00543212890625, "loss_xval": 0.027099609375, "num_input_tokens_seen": 533703372, "step": 5847 }, { "epoch": 24.366666666666667, "grad_norm": 3.387507370053438, "learning_rate": 5e-05, "loss": 0.0343, "num_input_tokens_seen": 533794456, "step": 5848 }, { "epoch": 24.366666666666667, "loss": 0.03715943545103073, "loss_ce": 4.285943759896327e-06, "loss_iou": 0.3984375, "loss_num": 0.007415771484375, "loss_xval": 0.037109375, "num_input_tokens_seen": 533794456, "step": 5848 }, { "epoch": 24.370833333333334, "grad_norm": 2.5877390535824008, "learning_rate": 5e-05, "loss": 0.0551, "num_input_tokens_seen": 533885400, "step": 5849 }, { "epoch": 24.370833333333334, "loss": 0.04111175611615181, "loss_ce": 4.5775800572300795e-06, "loss_iou": 0.19140625, "loss_num": 0.00823974609375, "loss_xval": 0.041015625, "num_input_tokens_seen": 533885400, "step": 5849 }, { "epoch": 24.375, "grad_norm": 2.70954407175507, "learning_rate": 5e-05, "loss": 0.0407, "num_input_tokens_seen": 533976732, "step": 5850 }, { "epoch": 24.375, "loss": 0.035382576286792755, "loss_ce": 8.136580436257645e-05, "loss_iou": 0.208984375, "loss_num": 0.007049560546875, "loss_xval": 0.035400390625, "num_input_tokens_seen": 533976732, "step": 5850 }, { "epoch": 24.379166666666666, "grad_norm": 3.679600535484754, "learning_rate": 5e-05, "loss": 0.05, "num_input_tokens_seen": 534068004, "step": 5851 }, { "epoch": 24.379166666666666, "loss": 0.07135792076587677, "loss_ce": 0.00014515325892716646, "loss_iou": 0.2197265625, "loss_num": 0.0142822265625, "loss_xval": 0.0712890625, "num_input_tokens_seen": 534068004, "step": 5851 }, { "epoch": 24.383333333333333, "grad_norm": 2.9961527662194998, "learning_rate": 5e-05, "loss": 0.0532, "num_input_tokens_seen": 534158804, "step": 5852 }, { "epoch": 24.383333333333333, "loss": 0.0763065367937088, "loss_ce": 4.957843884767499e-06, "loss_iou": 0.31640625, "loss_num": 0.0152587890625, "loss_xval": 0.076171875, "num_input_tokens_seen": 534158804, "step": 5852 }, { "epoch": 24.3875, "grad_norm": 2.6116225656771643, "learning_rate": 5e-05, "loss": 0.0696, "num_input_tokens_seen": 534249748, "step": 5853 }, { "epoch": 24.3875, "loss": 0.04672173783183098, "loss_ce": 6.956057404750027e-06, "loss_iou": 0.21875, "loss_num": 0.00933837890625, "loss_xval": 0.046630859375, "num_input_tokens_seen": 534249748, "step": 5853 }, { "epoch": 24.391666666666666, "grad_norm": 3.241412571020756, "learning_rate": 5e-05, "loss": 0.056, "num_input_tokens_seen": 534340768, "step": 5854 }, { "epoch": 24.391666666666666, "loss": 0.08435220271348953, "loss_ce": 1.6172627965715947e-06, "loss_iou": 0.251953125, "loss_num": 0.016845703125, "loss_xval": 0.08447265625, "num_input_tokens_seen": 534340768, "step": 5854 }, { "epoch": 24.395833333333332, "grad_norm": 2.9697163409184744, "learning_rate": 5e-05, "loss": 0.0359, "num_input_tokens_seen": 534431588, "step": 5855 }, { "epoch": 24.395833333333332, "loss": 0.02374483086168766, "loss_ce": 3.267165084253065e-05, "loss_iou": 0.2451171875, "loss_num": 0.004730224609375, "loss_xval": 0.023681640625, "num_input_tokens_seen": 534431588, "step": 5855 }, { "epoch": 24.4, "grad_norm": 2.96063650221661, "learning_rate": 5e-05, "loss": 0.0288, "num_input_tokens_seen": 534522928, "step": 5856 }, { "epoch": 24.4, "loss": 0.02467840537428856, "loss_ce": 4.943191015627235e-06, "loss_iou": 0.259765625, "loss_num": 0.00494384765625, "loss_xval": 0.024658203125, "num_input_tokens_seen": 534522928, "step": 5856 }, { "epoch": 24.404166666666665, "grad_norm": 2.0848545508983465, "learning_rate": 5e-05, "loss": 0.064, "num_input_tokens_seen": 534614356, "step": 5857 }, { "epoch": 24.404166666666665, "loss": 0.039054907858371735, "loss_ce": 8.396092744078487e-05, "loss_iou": 0.1982421875, "loss_num": 0.007781982421875, "loss_xval": 0.0390625, "num_input_tokens_seen": 534614356, "step": 5857 }, { "epoch": 24.408333333333335, "grad_norm": 1.946150724260364, "learning_rate": 5e-05, "loss": 0.0372, "num_input_tokens_seen": 534704084, "step": 5858 }, { "epoch": 24.408333333333335, "loss": 0.027506262063980103, "loss_ce": 2.2961814920563484e-06, "loss_iou": 0.1669921875, "loss_num": 0.0054931640625, "loss_xval": 0.0274658203125, "num_input_tokens_seen": 534704084, "step": 5858 }, { "epoch": 24.4125, "grad_norm": 2.0678636855620534, "learning_rate": 5e-05, "loss": 0.0477, "num_input_tokens_seen": 534795508, "step": 5859 }, { "epoch": 24.4125, "loss": 0.03595554456114769, "loss_ce": 5.838414836034644e-06, "loss_iou": 0.181640625, "loss_num": 0.007171630859375, "loss_xval": 0.035888671875, "num_input_tokens_seen": 534795508, "step": 5859 }, { "epoch": 24.416666666666668, "grad_norm": 2.1996007767003674, "learning_rate": 5e-05, "loss": 0.0373, "num_input_tokens_seen": 534887200, "step": 5860 }, { "epoch": 24.416666666666668, "loss": 0.03372432291507721, "loss_ce": 5.5806183809181675e-05, "loss_iou": 0.2421875, "loss_num": 0.006744384765625, "loss_xval": 0.03369140625, "num_input_tokens_seen": 534887200, "step": 5860 }, { "epoch": 24.420833333333334, "grad_norm": 2.484761238516966, "learning_rate": 5e-05, "loss": 0.0426, "num_input_tokens_seen": 534978140, "step": 5861 }, { "epoch": 24.420833333333334, "loss": 0.031754445284605026, "loss_ce": 3.142125206068158e-05, "loss_iou": 0.3125, "loss_num": 0.00634765625, "loss_xval": 0.03173828125, "num_input_tokens_seen": 534978140, "step": 5861 }, { "epoch": 24.425, "grad_norm": 2.0217484965386037, "learning_rate": 5e-05, "loss": 0.034, "num_input_tokens_seen": 535069568, "step": 5862 }, { "epoch": 24.425, "loss": 0.043887313455343246, "loss_ce": 3.037053375010146e-06, "loss_iou": 0.185546875, "loss_num": 0.0087890625, "loss_xval": 0.0439453125, "num_input_tokens_seen": 535069568, "step": 5862 }, { "epoch": 24.429166666666667, "grad_norm": 2.4210319897447197, "learning_rate": 5e-05, "loss": 0.0386, "num_input_tokens_seen": 535160888, "step": 5863 }, { "epoch": 24.429166666666667, "loss": 0.040758512914180756, "loss_ce": 2.287687721036491e-06, "loss_iou": 0.2734375, "loss_num": 0.0081787109375, "loss_xval": 0.040771484375, "num_input_tokens_seen": 535160888, "step": 5863 }, { "epoch": 24.433333333333334, "grad_norm": 3.0517251807657018, "learning_rate": 5e-05, "loss": 0.0805, "num_input_tokens_seen": 535251832, "step": 5864 }, { "epoch": 24.433333333333334, "loss": 0.04389939457178116, "loss_ce": 3.037423812202178e-05, "loss_iou": 0.17578125, "loss_num": 0.0087890625, "loss_xval": 0.0439453125, "num_input_tokens_seen": 535251832, "step": 5864 }, { "epoch": 24.4375, "grad_norm": 2.2832054005693054, "learning_rate": 5e-05, "loss": 0.0353, "num_input_tokens_seen": 535343836, "step": 5865 }, { "epoch": 24.4375, "loss": 0.026977423578500748, "loss_ce": 7.515183824580163e-06, "loss_iou": 0.19140625, "loss_num": 0.005401611328125, "loss_xval": 0.0269775390625, "num_input_tokens_seen": 535343836, "step": 5865 }, { "epoch": 24.441666666666666, "grad_norm": 1.8672205491893512, "learning_rate": 5e-05, "loss": 0.0316, "num_input_tokens_seen": 535435272, "step": 5866 }, { "epoch": 24.441666666666666, "loss": 0.042527902871370316, "loss_ce": 1.6572091681155143e-06, "loss_iou": 0.29296875, "loss_num": 0.00848388671875, "loss_xval": 0.04248046875, "num_input_tokens_seen": 535435272, "step": 5866 }, { "epoch": 24.445833333333333, "grad_norm": 2.5144469528048283, "learning_rate": 5e-05, "loss": 0.0325, "num_input_tokens_seen": 535527408, "step": 5867 }, { "epoch": 24.445833333333333, "loss": 0.025866053998470306, "loss_ce": 2.4057346763584064e-06, "loss_iou": 0.2177734375, "loss_num": 0.005157470703125, "loss_xval": 0.02587890625, "num_input_tokens_seen": 535527408, "step": 5867 }, { "epoch": 24.45, "grad_norm": 2.8159218187853825, "learning_rate": 5e-05, "loss": 0.0335, "num_input_tokens_seen": 535618412, "step": 5868 }, { "epoch": 24.45, "loss": 0.033513039350509644, "loss_ce": 4.736622940981761e-06, "loss_iou": 0.283203125, "loss_num": 0.006683349609375, "loss_xval": 0.033447265625, "num_input_tokens_seen": 535618412, "step": 5868 }, { "epoch": 24.454166666666666, "grad_norm": 3.4214494091468204, "learning_rate": 5e-05, "loss": 0.0422, "num_input_tokens_seen": 535709292, "step": 5869 }, { "epoch": 24.454166666666666, "loss": 0.03987106680870056, "loss_ce": 1.5108943443919998e-05, "loss_iou": 0.2734375, "loss_num": 0.00799560546875, "loss_xval": 0.039794921875, "num_input_tokens_seen": 535709292, "step": 5869 }, { "epoch": 24.458333333333332, "grad_norm": 2.7589786437540322, "learning_rate": 5e-05, "loss": 0.0287, "num_input_tokens_seen": 535800576, "step": 5870 }, { "epoch": 24.458333333333332, "loss": 0.029235608875751495, "loss_ce": 7.60642797104083e-05, "loss_iou": 0.279296875, "loss_num": 0.005828857421875, "loss_xval": 0.0291748046875, "num_input_tokens_seen": 535800576, "step": 5870 }, { "epoch": 24.4625, "grad_norm": 2.6709935919613668, "learning_rate": 5e-05, "loss": 0.027, "num_input_tokens_seen": 535892424, "step": 5871 }, { "epoch": 24.4625, "loss": 0.030119696632027626, "loss_ce": 1.4105718946666457e-05, "loss_iou": 0.318359375, "loss_num": 0.00604248046875, "loss_xval": 0.0301513671875, "num_input_tokens_seen": 535892424, "step": 5871 }, { "epoch": 24.466666666666665, "grad_norm": 2.115546282310238, "learning_rate": 5e-05, "loss": 0.0811, "num_input_tokens_seen": 535983528, "step": 5872 }, { "epoch": 24.466666666666665, "loss": 0.03541748225688934, "loss_ce": 1.8342611838306766e-06, "loss_iou": 0.271484375, "loss_num": 0.007080078125, "loss_xval": 0.035400390625, "num_input_tokens_seen": 535983528, "step": 5872 }, { "epoch": 24.470833333333335, "grad_norm": 2.025575395497392, "learning_rate": 5e-05, "loss": 0.0308, "num_input_tokens_seen": 536074768, "step": 5873 }, { "epoch": 24.470833333333335, "loss": 0.031343501061201096, "loss_ce": 1.949052148120245e-06, "loss_iou": 0.494140625, "loss_num": 0.00628662109375, "loss_xval": 0.03125, "num_input_tokens_seen": 536074768, "step": 5873 }, { "epoch": 24.475, "grad_norm": 2.321141271223248, "learning_rate": 5e-05, "loss": 0.0337, "num_input_tokens_seen": 536165936, "step": 5874 }, { "epoch": 24.475, "loss": 0.03415609523653984, "loss_ce": 6.9254256231943145e-06, "loss_iou": 0.306640625, "loss_num": 0.0068359375, "loss_xval": 0.0341796875, "num_input_tokens_seen": 536165936, "step": 5874 }, { "epoch": 24.479166666666668, "grad_norm": 2.6402123602693823, "learning_rate": 5e-05, "loss": 0.0393, "num_input_tokens_seen": 536256940, "step": 5875 }, { "epoch": 24.479166666666668, "loss": 0.03880603611469269, "loss_ce": 2.935068550868891e-06, "loss_iou": 0.2109375, "loss_num": 0.00775146484375, "loss_xval": 0.038818359375, "num_input_tokens_seen": 536256940, "step": 5875 }, { "epoch": 24.483333333333334, "grad_norm": 2.118281042942542, "learning_rate": 5e-05, "loss": 0.0335, "num_input_tokens_seen": 536348488, "step": 5876 }, { "epoch": 24.483333333333334, "loss": 0.04635797068476677, "loss_ce": 1.7691461380309192e-06, "loss_iou": 0.265625, "loss_num": 0.00927734375, "loss_xval": 0.04638671875, "num_input_tokens_seen": 536348488, "step": 5876 }, { "epoch": 24.4875, "grad_norm": 2.0982521767984488, "learning_rate": 5e-05, "loss": 0.0245, "num_input_tokens_seen": 536439932, "step": 5877 }, { "epoch": 24.4875, "loss": 0.025177722796797752, "loss_ce": 1.5980136595317163e-05, "loss_iou": 0.255859375, "loss_num": 0.005035400390625, "loss_xval": 0.025146484375, "num_input_tokens_seen": 536439932, "step": 5877 }, { "epoch": 24.491666666666667, "grad_norm": 1.9342299361749438, "learning_rate": 5e-05, "loss": 0.0328, "num_input_tokens_seen": 536531264, "step": 5878 }, { "epoch": 24.491666666666667, "loss": 0.038806505501270294, "loss_ce": 3.403896243980853e-06, "loss_iou": 0.271484375, "loss_num": 0.00775146484375, "loss_xval": 0.038818359375, "num_input_tokens_seen": 536531264, "step": 5878 }, { "epoch": 24.495833333333334, "grad_norm": 2.7704734606875876, "learning_rate": 5e-05, "loss": 0.0409, "num_input_tokens_seen": 536622972, "step": 5879 }, { "epoch": 24.495833333333334, "loss": 0.025232654064893723, "loss_ce": 2.2465160327556077e-06, "loss_iou": 0.25, "loss_num": 0.00506591796875, "loss_xval": 0.0252685546875, "num_input_tokens_seen": 536622972, "step": 5879 }, { "epoch": 24.5, "grad_norm": 3.179644385492909, "learning_rate": 5e-05, "loss": 0.064, "num_input_tokens_seen": 536714900, "step": 5880 }, { "epoch": 24.5, "loss": 0.07569757103919983, "loss_ce": 1.3975191905046813e-05, "loss_iou": 0.2890625, "loss_num": 0.01513671875, "loss_xval": 0.07568359375, "num_input_tokens_seen": 536714900, "step": 5880 }, { "epoch": 24.504166666666666, "grad_norm": 2.253804276771664, "learning_rate": 5e-05, "loss": 0.0351, "num_input_tokens_seen": 536806200, "step": 5881 }, { "epoch": 24.504166666666666, "loss": 0.0256878063082695, "loss_ce": 7.26442249288084e-06, "loss_iou": 0.224609375, "loss_num": 0.005126953125, "loss_xval": 0.025634765625, "num_input_tokens_seen": 536806200, "step": 5881 }, { "epoch": 24.508333333333333, "grad_norm": 2.586446671393119, "learning_rate": 5e-05, "loss": 0.0303, "num_input_tokens_seen": 536897652, "step": 5882 }, { "epoch": 24.508333333333333, "loss": 0.027839424088597298, "loss_ce": 0.0007703331648372114, "loss_iou": 0.26171875, "loss_num": 0.005401611328125, "loss_xval": 0.027099609375, "num_input_tokens_seen": 536897652, "step": 5882 }, { "epoch": 24.5125, "grad_norm": 1.9124413383342893, "learning_rate": 5e-05, "loss": 0.0441, "num_input_tokens_seen": 536989336, "step": 5883 }, { "epoch": 24.5125, "loss": 0.021991252899169922, "loss_ce": 3.337895577715244e-06, "loss_iou": 0.1513671875, "loss_num": 0.00439453125, "loss_xval": 0.02197265625, "num_input_tokens_seen": 536989336, "step": 5883 }, { "epoch": 24.516666666666666, "grad_norm": 1.914465500214039, "learning_rate": 5e-05, "loss": 0.0306, "num_input_tokens_seen": 537080820, "step": 5884 }, { "epoch": 24.516666666666666, "loss": 0.036232881247997284, "loss_ce": 2.377348573645577e-05, "loss_iou": 0.2314453125, "loss_num": 0.00726318359375, "loss_xval": 0.0361328125, "num_input_tokens_seen": 537080820, "step": 5884 }, { "epoch": 24.520833333333332, "grad_norm": 1.8379129218005454, "learning_rate": 5e-05, "loss": 0.053, "num_input_tokens_seen": 537172124, "step": 5885 }, { "epoch": 24.520833333333332, "loss": 0.06826095283031464, "loss_ce": 8.385493856621906e-06, "loss_iou": 0.3046875, "loss_num": 0.013671875, "loss_xval": 0.068359375, "num_input_tokens_seen": 537172124, "step": 5885 }, { "epoch": 24.525, "grad_norm": 2.205236982199377, "learning_rate": 5e-05, "loss": 0.0392, "num_input_tokens_seen": 537262800, "step": 5886 }, { "epoch": 24.525, "loss": 0.0484449602663517, "loss_ce": 5.936674369877437e-06, "loss_iou": 0.2177734375, "loss_num": 0.00970458984375, "loss_xval": 0.04833984375, "num_input_tokens_seen": 537262800, "step": 5886 }, { "epoch": 24.529166666666665, "grad_norm": 2.5479061923370026, "learning_rate": 5e-05, "loss": 0.0419, "num_input_tokens_seen": 537354264, "step": 5887 }, { "epoch": 24.529166666666665, "loss": 0.02890472114086151, "loss_ce": 4.57498435935122e-06, "loss_iou": 0.24609375, "loss_num": 0.005767822265625, "loss_xval": 0.0289306640625, "num_input_tokens_seen": 537354264, "step": 5887 }, { "epoch": 24.533333333333335, "grad_norm": 2.409790801483189, "learning_rate": 5e-05, "loss": 0.0282, "num_input_tokens_seen": 537445900, "step": 5888 }, { "epoch": 24.533333333333335, "loss": 0.030124662443995476, "loss_ce": 3.8115688312245766e-06, "loss_iou": 0.1787109375, "loss_num": 0.00604248046875, "loss_xval": 0.0301513671875, "num_input_tokens_seen": 537445900, "step": 5888 }, { "epoch": 24.5375, "grad_norm": 3.0075649723691718, "learning_rate": 5e-05, "loss": 0.0415, "num_input_tokens_seen": 537536752, "step": 5889 }, { "epoch": 24.5375, "loss": 0.06466107070446014, "loss_ce": 1.954044137164601e-06, "loss_iou": 0.2890625, "loss_num": 0.012939453125, "loss_xval": 0.064453125, "num_input_tokens_seen": 537536752, "step": 5889 }, { "epoch": 24.541666666666668, "grad_norm": 2.898725150209427, "learning_rate": 5e-05, "loss": 0.0397, "num_input_tokens_seen": 537627952, "step": 5890 }, { "epoch": 24.541666666666668, "loss": 0.04495709389448166, "loss_ce": 4.702219484897796e-06, "loss_iou": 0.271484375, "loss_num": 0.00897216796875, "loss_xval": 0.044921875, "num_input_tokens_seen": 537627952, "step": 5890 }, { "epoch": 24.545833333333334, "grad_norm": 2.8535368875900153, "learning_rate": 5e-05, "loss": 0.0451, "num_input_tokens_seen": 537719124, "step": 5891 }, { "epoch": 24.545833333333334, "loss": 0.049891311675310135, "loss_ce": 1.0329967153666075e-05, "loss_iou": 0.298828125, "loss_num": 0.00994873046875, "loss_xval": 0.0498046875, "num_input_tokens_seen": 537719124, "step": 5891 }, { "epoch": 24.55, "grad_norm": 2.337094480779963, "learning_rate": 5e-05, "loss": 0.0475, "num_input_tokens_seen": 537810200, "step": 5892 }, { "epoch": 24.55, "loss": 0.04928762465715408, "loss_ce": 1.732335135784524e-06, "loss_iou": 0.1787109375, "loss_num": 0.0098876953125, "loss_xval": 0.04931640625, "num_input_tokens_seen": 537810200, "step": 5892 }, { "epoch": 24.554166666666667, "grad_norm": 3.505489995181878, "learning_rate": 5e-05, "loss": 0.0388, "num_input_tokens_seen": 537901240, "step": 5893 }, { "epoch": 24.554166666666667, "loss": 0.02528042159974575, "loss_ce": 4.236781023791991e-06, "loss_iou": 0.27734375, "loss_num": 0.00506591796875, "loss_xval": 0.0252685546875, "num_input_tokens_seen": 537901240, "step": 5893 }, { "epoch": 24.558333333333334, "grad_norm": 3.1120218461714346, "learning_rate": 5e-05, "loss": 0.0602, "num_input_tokens_seen": 537991900, "step": 5894 }, { "epoch": 24.558333333333334, "loss": 0.03692232817411423, "loss_ce": 9.52406189753674e-05, "loss_iou": 0.248046875, "loss_num": 0.007354736328125, "loss_xval": 0.036865234375, "num_input_tokens_seen": 537991900, "step": 5894 }, { "epoch": 24.5625, "grad_norm": 2.377712094838735, "learning_rate": 5e-05, "loss": 0.0498, "num_input_tokens_seen": 538083508, "step": 5895 }, { "epoch": 24.5625, "loss": 0.028869686648249626, "loss_ce": 3.0576215067412704e-05, "loss_iou": 0.306640625, "loss_num": 0.005767822265625, "loss_xval": 0.02880859375, "num_input_tokens_seen": 538083508, "step": 5895 }, { "epoch": 24.566666666666666, "grad_norm": 2.4117882798127974, "learning_rate": 5e-05, "loss": 0.0537, "num_input_tokens_seen": 538174856, "step": 5896 }, { "epoch": 24.566666666666666, "loss": 0.07982829958200455, "loss_ce": 9.573964234732557e-06, "loss_iou": 0.291015625, "loss_num": 0.0159912109375, "loss_xval": 0.07958984375, "num_input_tokens_seen": 538174856, "step": 5896 }, { "epoch": 24.570833333333333, "grad_norm": 2.7877261288827166, "learning_rate": 5e-05, "loss": 0.0391, "num_input_tokens_seen": 538266120, "step": 5897 }, { "epoch": 24.570833333333333, "loss": 0.03959943354129791, "loss_ce": 2.8757731342921034e-06, "loss_iou": 0.330078125, "loss_num": 0.0079345703125, "loss_xval": 0.03955078125, "num_input_tokens_seen": 538266120, "step": 5897 }, { "epoch": 24.575, "grad_norm": 2.9636345146482475, "learning_rate": 5e-05, "loss": 0.052, "num_input_tokens_seen": 538357420, "step": 5898 }, { "epoch": 24.575, "loss": 0.05580301955342293, "loss_ce": 1.6886146113392897e-05, "loss_iou": 0.3203125, "loss_num": 0.01116943359375, "loss_xval": 0.0556640625, "num_input_tokens_seen": 538357420, "step": 5898 }, { "epoch": 24.579166666666666, "grad_norm": 4.463181674979655, "learning_rate": 5e-05, "loss": 0.037, "num_input_tokens_seen": 538447944, "step": 5899 }, { "epoch": 24.579166666666666, "loss": 0.043856751173734665, "loss_ce": 2.9877814995415974e-06, "loss_iou": 0.27734375, "loss_num": 0.0087890625, "loss_xval": 0.0439453125, "num_input_tokens_seen": 538447944, "step": 5899 }, { "epoch": 24.583333333333332, "grad_norm": 8.215582023201812, "learning_rate": 5e-05, "loss": 0.0739, "num_input_tokens_seen": 538539312, "step": 5900 }, { "epoch": 24.583333333333332, "loss": 0.07832689583301544, "loss_ce": 1.8785158317768946e-05, "loss_iou": 0.18359375, "loss_num": 0.015625, "loss_xval": 0.078125, "num_input_tokens_seen": 538539312, "step": 5900 }, { "epoch": 24.5875, "grad_norm": 3.5866870849822976, "learning_rate": 5e-05, "loss": 0.0926, "num_input_tokens_seen": 538629948, "step": 5901 }, { "epoch": 24.5875, "loss": 0.07081713527441025, "loss_ce": 1.0961807674902957e-06, "loss_iou": 0.228515625, "loss_num": 0.01416015625, "loss_xval": 0.07080078125, "num_input_tokens_seen": 538629948, "step": 5901 }, { "epoch": 24.591666666666665, "grad_norm": 3.7779213662672033, "learning_rate": 5e-05, "loss": 0.057, "num_input_tokens_seen": 538721432, "step": 5902 }, { "epoch": 24.591666666666665, "loss": 0.04373526945710182, "loss_ce": 3.5786486023425823e-06, "loss_iou": 0.251953125, "loss_num": 0.00872802734375, "loss_xval": 0.043701171875, "num_input_tokens_seen": 538721432, "step": 5902 }, { "epoch": 24.595833333333335, "grad_norm": 2.168479694601854, "learning_rate": 5e-05, "loss": 0.0473, "num_input_tokens_seen": 538812516, "step": 5903 }, { "epoch": 24.595833333333335, "loss": 0.03565511852502823, "loss_ce": 0.00011740053014364094, "loss_iou": 0.171875, "loss_num": 0.007110595703125, "loss_xval": 0.03564453125, "num_input_tokens_seen": 538812516, "step": 5903 }, { "epoch": 24.6, "grad_norm": 3.0841797571544123, "learning_rate": 5e-05, "loss": 0.0291, "num_input_tokens_seen": 538903744, "step": 5904 }, { "epoch": 24.6, "loss": 0.02611241489648819, "loss_ce": 6.566110096173361e-05, "loss_iou": 0.2216796875, "loss_num": 0.005218505859375, "loss_xval": 0.0260009765625, "num_input_tokens_seen": 538903744, "step": 5904 }, { "epoch": 24.604166666666668, "grad_norm": 3.154338098881419, "learning_rate": 5e-05, "loss": 0.0524, "num_input_tokens_seen": 538995084, "step": 5905 }, { "epoch": 24.604166666666668, "loss": 0.03224372863769531, "loss_ce": 1.9066790173383197e-06, "loss_iou": 0.2578125, "loss_num": 0.006439208984375, "loss_xval": 0.0322265625, "num_input_tokens_seen": 538995084, "step": 5905 }, { "epoch": 24.608333333333334, "grad_norm": 2.69142730021738, "learning_rate": 5e-05, "loss": 0.0466, "num_input_tokens_seen": 539086212, "step": 5906 }, { "epoch": 24.608333333333334, "loss": 0.03064822033047676, "loss_ce": 9.429805345462228e-07, "loss_iou": 0.189453125, "loss_num": 0.006134033203125, "loss_xval": 0.0306396484375, "num_input_tokens_seen": 539086212, "step": 5906 }, { "epoch": 24.6125, "grad_norm": 2.5427073613900877, "learning_rate": 5e-05, "loss": 0.051, "num_input_tokens_seen": 539177624, "step": 5907 }, { "epoch": 24.6125, "loss": 0.0609009675681591, "loss_ce": 3.139893124171067e-06, "loss_iou": 0.314453125, "loss_num": 0.01214599609375, "loss_xval": 0.060791015625, "num_input_tokens_seen": 539177624, "step": 5907 }, { "epoch": 24.616666666666667, "grad_norm": 2.7146400523726304, "learning_rate": 5e-05, "loss": 0.0368, "num_input_tokens_seen": 539268644, "step": 5908 }, { "epoch": 24.616666666666667, "loss": 0.03491390123963356, "loss_ce": 1.7929927480508923e-06, "loss_iou": 0.1982421875, "loss_num": 0.006988525390625, "loss_xval": 0.034912109375, "num_input_tokens_seen": 539268644, "step": 5908 }, { "epoch": 24.620833333333334, "grad_norm": 1.9120792026763072, "learning_rate": 5e-05, "loss": 0.0416, "num_input_tokens_seen": 539359980, "step": 5909 }, { "epoch": 24.620833333333334, "loss": 0.05520816519856453, "loss_ce": 1.8630300928634824e-06, "loss_iou": 0.1103515625, "loss_num": 0.01104736328125, "loss_xval": 0.05517578125, "num_input_tokens_seen": 539359980, "step": 5909 }, { "epoch": 24.625, "grad_norm": 2.8697876589951665, "learning_rate": 5e-05, "loss": 0.0623, "num_input_tokens_seen": 539450148, "step": 5910 }, { "epoch": 24.625, "loss": 0.044227711856365204, "loss_ce": 1.5374156646430492e-05, "loss_iou": 0.166015625, "loss_num": 0.00885009765625, "loss_xval": 0.044189453125, "num_input_tokens_seen": 539450148, "step": 5910 }, { "epoch": 24.629166666666666, "grad_norm": 1.1012244154737323, "learning_rate": 5e-05, "loss": 0.0512, "num_input_tokens_seen": 539541476, "step": 5911 }, { "epoch": 24.629166666666666, "loss": 0.05504788085818291, "loss_ce": 0.0006121534388512373, "loss_iou": 0.1201171875, "loss_num": 0.0108642578125, "loss_xval": 0.054443359375, "num_input_tokens_seen": 539541476, "step": 5911 }, { "epoch": 24.633333333333333, "grad_norm": 4.048964876658251, "learning_rate": 5e-05, "loss": 0.0682, "num_input_tokens_seen": 539631032, "step": 5912 }, { "epoch": 24.633333333333333, "loss": 0.06917066872119904, "loss_ce": 2.58000181929674e-06, "loss_iou": 0.2275390625, "loss_num": 0.01385498046875, "loss_xval": 0.0693359375, "num_input_tokens_seen": 539631032, "step": 5912 }, { "epoch": 24.6375, "grad_norm": 1.0274591914629472, "learning_rate": 5e-05, "loss": 0.0617, "num_input_tokens_seen": 539720944, "step": 5913 }, { "epoch": 24.6375, "loss": 0.038511671125888824, "loss_ce": 6.119631052570185e-06, "loss_iou": 0.0859375, "loss_num": 0.007720947265625, "loss_xval": 0.03857421875, "num_input_tokens_seen": 539720944, "step": 5913 }, { "epoch": 24.641666666666666, "grad_norm": 0.7784951474058762, "learning_rate": 5e-05, "loss": 0.0265, "num_input_tokens_seen": 539812508, "step": 5914 }, { "epoch": 24.641666666666666, "loss": 0.02655962109565735, "loss_ce": 4.7475121391471475e-05, "loss_iou": 0.224609375, "loss_num": 0.00531005859375, "loss_xval": 0.0264892578125, "num_input_tokens_seen": 539812508, "step": 5914 }, { "epoch": 24.645833333333332, "grad_norm": 0.8845230205746087, "learning_rate": 5e-05, "loss": 0.0196, "num_input_tokens_seen": 539904344, "step": 5915 }, { "epoch": 24.645833333333332, "loss": 0.02053234726190567, "loss_ce": 5.5050964874681085e-05, "loss_iou": 0.216796875, "loss_num": 0.00408935546875, "loss_xval": 0.0205078125, "num_input_tokens_seen": 539904344, "step": 5915 }, { "epoch": 24.65, "grad_norm": 1.5167444500502159, "learning_rate": 5e-05, "loss": 0.0558, "num_input_tokens_seen": 539995948, "step": 5916 }, { "epoch": 24.65, "loss": 0.030595405027270317, "loss_ce": 1.6793703252915293e-05, "loss_iou": 0.154296875, "loss_num": 0.006103515625, "loss_xval": 0.030517578125, "num_input_tokens_seen": 539995948, "step": 5916 }, { "epoch": 24.654166666666665, "grad_norm": 3.4466687869544694, "learning_rate": 5e-05, "loss": 0.0709, "num_input_tokens_seen": 540087116, "step": 5917 }, { "epoch": 24.654166666666665, "loss": 0.08021612465381622, "loss_ce": 0.00019903438806068152, "loss_iou": 0.1328125, "loss_num": 0.0159912109375, "loss_xval": 0.080078125, "num_input_tokens_seen": 540087116, "step": 5917 }, { "epoch": 24.658333333333335, "grad_norm": 4.366893586605952, "learning_rate": 5e-05, "loss": 0.1025, "num_input_tokens_seen": 540177852, "step": 5918 }, { "epoch": 24.658333333333335, "loss": 0.1458016186952591, "loss_ce": 3.891634150932077e-06, "loss_iou": 0.31640625, "loss_num": 0.0291748046875, "loss_xval": 0.1455078125, "num_input_tokens_seen": 540177852, "step": 5918 }, { "epoch": 24.6625, "grad_norm": 2.3891871671129277, "learning_rate": 5e-05, "loss": 0.028, "num_input_tokens_seen": 540269780, "step": 5919 }, { "epoch": 24.6625, "loss": 0.02481307089328766, "loss_ce": 3.279614611528814e-05, "loss_iou": 0.17578125, "loss_num": 0.00494384765625, "loss_xval": 0.0247802734375, "num_input_tokens_seen": 540269780, "step": 5919 }, { "epoch": 24.666666666666668, "grad_norm": 4.410840281215651, "learning_rate": 5e-05, "loss": 0.0265, "num_input_tokens_seen": 540361224, "step": 5920 }, { "epoch": 24.666666666666668, "loss": 0.021179374307394028, "loss_ce": 2.3062859327183105e-05, "loss_iou": 0.2138671875, "loss_num": 0.004241943359375, "loss_xval": 0.0211181640625, "num_input_tokens_seen": 540361224, "step": 5920 }, { "epoch": 24.670833333333334, "grad_norm": 1.9648680045006581, "learning_rate": 5e-05, "loss": 0.064, "num_input_tokens_seen": 540452532, "step": 5921 }, { "epoch": 24.670833333333334, "loss": 0.08267770707607269, "loss_ce": 2.0848239728366025e-05, "loss_iou": 0.1630859375, "loss_num": 0.0164794921875, "loss_xval": 0.08251953125, "num_input_tokens_seen": 540452532, "step": 5921 }, { "epoch": 24.675, "grad_norm": 2.7172343307171207, "learning_rate": 5e-05, "loss": 0.0339, "num_input_tokens_seen": 540543684, "step": 5922 }, { "epoch": 24.675, "loss": 0.03063477762043476, "loss_ce": 2.758548589554266e-06, "loss_iou": 0.1923828125, "loss_num": 0.006134033203125, "loss_xval": 0.0306396484375, "num_input_tokens_seen": 540543684, "step": 5922 }, { "epoch": 24.679166666666667, "grad_norm": 3.0738158052431808, "learning_rate": 5e-05, "loss": 0.0489, "num_input_tokens_seen": 540635616, "step": 5923 }, { "epoch": 24.679166666666667, "loss": 0.027951788157224655, "loss_ce": 2.057402889477089e-05, "loss_iou": 0.2451171875, "loss_num": 0.005584716796875, "loss_xval": 0.0279541015625, "num_input_tokens_seen": 540635616, "step": 5923 }, { "epoch": 24.683333333333334, "grad_norm": 2.6663540732172617, "learning_rate": 5e-05, "loss": 0.031, "num_input_tokens_seen": 540726568, "step": 5924 }, { "epoch": 24.683333333333334, "loss": 0.026276925578713417, "loss_ce": 1.2912703368783696e-06, "loss_iou": 0.11572265625, "loss_num": 0.0052490234375, "loss_xval": 0.0262451171875, "num_input_tokens_seen": 540726568, "step": 5924 }, { "epoch": 24.6875, "grad_norm": 6.225553984733464, "learning_rate": 5e-05, "loss": 0.0362, "num_input_tokens_seen": 540818112, "step": 5925 }, { "epoch": 24.6875, "loss": 0.032184898853302, "loss_ce": 4.109516794414958e-06, "loss_iou": 0.1982421875, "loss_num": 0.006439208984375, "loss_xval": 0.0322265625, "num_input_tokens_seen": 540818112, "step": 5925 }, { "epoch": 24.691666666666666, "grad_norm": 3.280122739815333, "learning_rate": 5e-05, "loss": 0.0468, "num_input_tokens_seen": 540908712, "step": 5926 }, { "epoch": 24.691666666666666, "loss": 0.05258284509181976, "loss_ce": 1.0590549663902493e-06, "loss_iou": 0.20703125, "loss_num": 0.010498046875, "loss_xval": 0.052490234375, "num_input_tokens_seen": 540908712, "step": 5926 }, { "epoch": 24.695833333333333, "grad_norm": 5.29702773637457, "learning_rate": 5e-05, "loss": 0.0838, "num_input_tokens_seen": 541000016, "step": 5927 }, { "epoch": 24.695833333333333, "loss": 0.09465332329273224, "loss_ce": 3.0550074825441698e-06, "loss_iou": 0.271484375, "loss_num": 0.0189208984375, "loss_xval": 0.0947265625, "num_input_tokens_seen": 541000016, "step": 5927 }, { "epoch": 24.7, "grad_norm": 4.623311810622116, "learning_rate": 5e-05, "loss": 0.0383, "num_input_tokens_seen": 541091220, "step": 5928 }, { "epoch": 24.7, "loss": 0.04450526461005211, "loss_ce": 3.0040828278288245e-06, "loss_iou": 0.1640625, "loss_num": 0.0089111328125, "loss_xval": 0.04443359375, "num_input_tokens_seen": 541091220, "step": 5928 }, { "epoch": 24.704166666666666, "grad_norm": 1.3477347149151542, "learning_rate": 5e-05, "loss": 0.0489, "num_input_tokens_seen": 541182772, "step": 5929 }, { "epoch": 24.704166666666666, "loss": 0.04887685179710388, "loss_ce": 2.949379450001288e-06, "loss_iou": 0.1923828125, "loss_num": 0.009765625, "loss_xval": 0.048828125, "num_input_tokens_seen": 541182772, "step": 5929 }, { "epoch": 24.708333333333332, "grad_norm": 2.3558814680354536, "learning_rate": 5e-05, "loss": 0.0221, "num_input_tokens_seen": 541273984, "step": 5930 }, { "epoch": 24.708333333333332, "loss": 0.018274590373039246, "loss_ce": 2.190319037254085e-06, "loss_iou": 0.1689453125, "loss_num": 0.0036468505859375, "loss_xval": 0.018310546875, "num_input_tokens_seen": 541273984, "step": 5930 }, { "epoch": 24.7125, "grad_norm": 2.565239447900155, "learning_rate": 5e-05, "loss": 0.0437, "num_input_tokens_seen": 541364932, "step": 5931 }, { "epoch": 24.7125, "loss": 0.051742859184741974, "loss_ce": 7.933152119221631e-06, "loss_iou": 0.203125, "loss_num": 0.0103759765625, "loss_xval": 0.0517578125, "num_input_tokens_seen": 541364932, "step": 5931 }, { "epoch": 24.716666666666665, "grad_norm": 2.724349746006053, "learning_rate": 5e-05, "loss": 0.0564, "num_input_tokens_seen": 541456372, "step": 5932 }, { "epoch": 24.716666666666665, "loss": 0.07829385995864868, "loss_ce": 1.0153214589081472e-06, "loss_iou": 0.279296875, "loss_num": 0.015625, "loss_xval": 0.078125, "num_input_tokens_seen": 541456372, "step": 5932 }, { "epoch": 24.720833333333335, "grad_norm": 2.7034234598003817, "learning_rate": 5e-05, "loss": 0.0347, "num_input_tokens_seen": 541547664, "step": 5933 }, { "epoch": 24.720833333333335, "loss": 0.028786811977624893, "loss_ce": 8.735810297366697e-06, "loss_iou": 0.2158203125, "loss_num": 0.005767822265625, "loss_xval": 0.02880859375, "num_input_tokens_seen": 541547664, "step": 5933 }, { "epoch": 24.725, "grad_norm": 2.9238049557551875, "learning_rate": 5e-05, "loss": 0.0276, "num_input_tokens_seen": 541639076, "step": 5934 }, { "epoch": 24.725, "loss": 0.03198646754026413, "loss_ce": 4.048427854286274e-06, "loss_iou": 0.1611328125, "loss_num": 0.00640869140625, "loss_xval": 0.031982421875, "num_input_tokens_seen": 541639076, "step": 5934 }, { "epoch": 24.729166666666668, "grad_norm": 3.1921958158987627, "learning_rate": 5e-05, "loss": 0.0673, "num_input_tokens_seen": 541730268, "step": 5935 }, { "epoch": 24.729166666666668, "loss": 0.07729228585958481, "loss_ce": 0.000258283456787467, "loss_iou": 0.244140625, "loss_num": 0.015380859375, "loss_xval": 0.0771484375, "num_input_tokens_seen": 541730268, "step": 5935 }, { "epoch": 24.733333333333334, "grad_norm": 2.3646558090848777, "learning_rate": 5e-05, "loss": 0.0674, "num_input_tokens_seen": 541821312, "step": 5936 }, { "epoch": 24.733333333333334, "loss": 0.10533357411623001, "loss_ce": 9.777621016837656e-06, "loss_iou": 0.34765625, "loss_num": 0.0211181640625, "loss_xval": 0.10546875, "num_input_tokens_seen": 541821312, "step": 5936 }, { "epoch": 24.7375, "grad_norm": 4.5028653103550225, "learning_rate": 5e-05, "loss": 0.0372, "num_input_tokens_seen": 541912692, "step": 5937 }, { "epoch": 24.7375, "loss": 0.046153221279382706, "loss_ce": 5.642018368234858e-05, "loss_iou": 0.2177734375, "loss_num": 0.00921630859375, "loss_xval": 0.046142578125, "num_input_tokens_seen": 541912692, "step": 5937 }, { "epoch": 24.741666666666667, "grad_norm": 4.870812530160758, "learning_rate": 5e-05, "loss": 0.0411, "num_input_tokens_seen": 542003980, "step": 5938 }, { "epoch": 24.741666666666667, "loss": 0.03171246126294136, "loss_ce": 4.69624001198099e-06, "loss_iou": 0.1943359375, "loss_num": 0.00634765625, "loss_xval": 0.03173828125, "num_input_tokens_seen": 542003980, "step": 5938 }, { "epoch": 24.745833333333334, "grad_norm": 2.5231449756579, "learning_rate": 5e-05, "loss": 0.0806, "num_input_tokens_seen": 542095820, "step": 5939 }, { "epoch": 24.745833333333334, "loss": 0.05183350667357445, "loss_ce": 7.0313944888766855e-06, "loss_iou": 0.1884765625, "loss_num": 0.0103759765625, "loss_xval": 0.0517578125, "num_input_tokens_seen": 542095820, "step": 5939 }, { "epoch": 24.75, "grad_norm": 1.5649742368545314, "learning_rate": 5e-05, "loss": 0.0479, "num_input_tokens_seen": 542186908, "step": 5940 }, { "epoch": 24.75, "loss": 0.04194850102066994, "loss_ce": 0.0010778369614854455, "loss_iou": 0.29296875, "loss_num": 0.0081787109375, "loss_xval": 0.040771484375, "num_input_tokens_seen": 542186908, "step": 5940 }, { "epoch": 24.754166666666666, "grad_norm": 1.9013408198419663, "learning_rate": 5e-05, "loss": 0.029, "num_input_tokens_seen": 542277800, "step": 5941 }, { "epoch": 24.754166666666666, "loss": 0.020965928211808205, "loss_ce": 1.5612324204994366e-05, "loss_iou": 0.20703125, "loss_num": 0.004180908203125, "loss_xval": 0.02099609375, "num_input_tokens_seen": 542277800, "step": 5941 }, { "epoch": 24.758333333333333, "grad_norm": 2.0282852397219835, "learning_rate": 5e-05, "loss": 0.0532, "num_input_tokens_seen": 542367376, "step": 5942 }, { "epoch": 24.758333333333333, "loss": 0.07156237214803696, "loss_ce": 0.0009141807677224278, "loss_iou": 0.1181640625, "loss_num": 0.01409912109375, "loss_xval": 0.07080078125, "num_input_tokens_seen": 542367376, "step": 5942 }, { "epoch": 24.7625, "grad_norm": 4.284082256453397, "learning_rate": 5e-05, "loss": 0.0399, "num_input_tokens_seen": 542458328, "step": 5943 }, { "epoch": 24.7625, "loss": 0.03209364414215088, "loss_ce": 6.544977077282965e-05, "loss_iou": 0.279296875, "loss_num": 0.00640869140625, "loss_xval": 0.031982421875, "num_input_tokens_seen": 542458328, "step": 5943 }, { "epoch": 24.766666666666666, "grad_norm": 6.875100205694348, "learning_rate": 5e-05, "loss": 0.037, "num_input_tokens_seen": 542549360, "step": 5944 }, { "epoch": 24.766666666666666, "loss": 0.03342054411768913, "loss_ce": 3.4312968637095764e-05, "loss_iou": 0.28125, "loss_num": 0.006683349609375, "loss_xval": 0.033447265625, "num_input_tokens_seen": 542549360, "step": 5944 }, { "epoch": 24.770833333333332, "grad_norm": 2.7426263223939413, "learning_rate": 5e-05, "loss": 0.0277, "num_input_tokens_seen": 542640716, "step": 5945 }, { "epoch": 24.770833333333332, "loss": 0.03136511147022247, "loss_ce": 0.0019232768099755049, "loss_iou": 0.22265625, "loss_num": 0.005889892578125, "loss_xval": 0.0294189453125, "num_input_tokens_seen": 542640716, "step": 5945 }, { "epoch": 24.775, "grad_norm": 2.210203886249043, "learning_rate": 5e-05, "loss": 0.0567, "num_input_tokens_seen": 542731968, "step": 5946 }, { "epoch": 24.775, "loss": 0.041960351169109344, "loss_ce": 6.309468972176546e-06, "loss_iou": 0.2490234375, "loss_num": 0.00836181640625, "loss_xval": 0.0419921875, "num_input_tokens_seen": 542731968, "step": 5946 }, { "epoch": 24.779166666666665, "grad_norm": 3.9227907162197333, "learning_rate": 5e-05, "loss": 0.036, "num_input_tokens_seen": 542823432, "step": 5947 }, { "epoch": 24.779166666666665, "loss": 0.047088660299777985, "loss_ce": 7.663855285500176e-06, "loss_iou": 0.21484375, "loss_num": 0.0093994140625, "loss_xval": 0.047119140625, "num_input_tokens_seen": 542823432, "step": 5947 }, { "epoch": 24.783333333333335, "grad_norm": 3.1668412273112407, "learning_rate": 5e-05, "loss": 0.0276, "num_input_tokens_seen": 542914652, "step": 5948 }, { "epoch": 24.783333333333335, "loss": 0.02823035418987274, "loss_ce": 1.685171082499437e-05, "loss_iou": 0.359375, "loss_num": 0.005645751953125, "loss_xval": 0.0281982421875, "num_input_tokens_seen": 542914652, "step": 5948 }, { "epoch": 24.7875, "grad_norm": 2.861085397257759, "learning_rate": 5e-05, "loss": 0.0812, "num_input_tokens_seen": 543006236, "step": 5949 }, { "epoch": 24.7875, "loss": 0.09519291669130325, "loss_ce": 8.586997864767909e-06, "loss_iou": 0.345703125, "loss_num": 0.01904296875, "loss_xval": 0.09521484375, "num_input_tokens_seen": 543006236, "step": 5949 }, { "epoch": 24.791666666666668, "grad_norm": 3.458020700237174, "learning_rate": 5e-05, "loss": 0.0468, "num_input_tokens_seen": 543098220, "step": 5950 }, { "epoch": 24.791666666666668, "loss": 0.05714184045791626, "loss_ce": 3.5821729397866875e-05, "loss_iou": 0.17578125, "loss_num": 0.01141357421875, "loss_xval": 0.05712890625, "num_input_tokens_seen": 543098220, "step": 5950 }, { "epoch": 24.795833333333334, "grad_norm": 2.0891758994886893, "learning_rate": 5e-05, "loss": 0.0744, "num_input_tokens_seen": 543189344, "step": 5951 }, { "epoch": 24.795833333333334, "loss": 0.05110526829957962, "loss_ce": 3.5859707168128807e-06, "loss_iou": 0.224609375, "loss_num": 0.01019287109375, "loss_xval": 0.051025390625, "num_input_tokens_seen": 543189344, "step": 5951 }, { "epoch": 24.8, "grad_norm": 1.4596318900144993, "learning_rate": 5e-05, "loss": 0.0337, "num_input_tokens_seen": 543280584, "step": 5952 }, { "epoch": 24.8, "loss": 0.04132525622844696, "loss_ce": 1.2085286471119616e-05, "loss_iou": 0.1689453125, "loss_num": 0.00823974609375, "loss_xval": 0.041259765625, "num_input_tokens_seen": 543280584, "step": 5952 }, { "epoch": 24.804166666666667, "grad_norm": 2.243959651716921, "learning_rate": 5e-05, "loss": 0.0702, "num_input_tokens_seen": 543371820, "step": 5953 }, { "epoch": 24.804166666666667, "loss": 0.07528236508369446, "loss_ce": 7.560790982097387e-05, "loss_iou": 0.1796875, "loss_num": 0.01507568359375, "loss_xval": 0.0751953125, "num_input_tokens_seen": 543371820, "step": 5953 }, { "epoch": 24.808333333333334, "grad_norm": 8.27410245570298, "learning_rate": 5e-05, "loss": 0.0898, "num_input_tokens_seen": 543462980, "step": 5954 }, { "epoch": 24.808333333333334, "loss": 0.06564659625291824, "loss_ce": 9.483678149990737e-05, "loss_iou": 0.33984375, "loss_num": 0.0130615234375, "loss_xval": 0.0654296875, "num_input_tokens_seen": 543462980, "step": 5954 }, { "epoch": 24.8125, "grad_norm": 3.2307275820053536, "learning_rate": 5e-05, "loss": 0.0455, "num_input_tokens_seen": 543553840, "step": 5955 }, { "epoch": 24.8125, "loss": 0.05685967206954956, "loss_ce": 3.5940804082201794e-05, "loss_iou": 0.220703125, "loss_num": 0.01141357421875, "loss_xval": 0.056884765625, "num_input_tokens_seen": 543553840, "step": 5955 }, { "epoch": 24.816666666666666, "grad_norm": 2.9867327221025106, "learning_rate": 5e-05, "loss": 0.0502, "num_input_tokens_seen": 543644836, "step": 5956 }, { "epoch": 24.816666666666666, "loss": 0.024468587711453438, "loss_ce": 3.926640783902258e-05, "loss_iou": 0.294921875, "loss_num": 0.0048828125, "loss_xval": 0.0244140625, "num_input_tokens_seen": 543644836, "step": 5956 }, { "epoch": 24.820833333333333, "grad_norm": 3.7096201493254624, "learning_rate": 5e-05, "loss": 0.059, "num_input_tokens_seen": 543735620, "step": 5957 }, { "epoch": 24.820833333333333, "loss": 0.08750680834054947, "loss_ce": 2.053728894679807e-05, "loss_iou": 0.2353515625, "loss_num": 0.0174560546875, "loss_xval": 0.08740234375, "num_input_tokens_seen": 543735620, "step": 5957 }, { "epoch": 24.825, "grad_norm": 3.158868577300251, "learning_rate": 5e-05, "loss": 0.0486, "num_input_tokens_seen": 543827112, "step": 5958 }, { "epoch": 24.825, "loss": 0.06845375150442123, "loss_ce": 0.00027748377760872245, "loss_iou": 0.2451171875, "loss_num": 0.01361083984375, "loss_xval": 0.068359375, "num_input_tokens_seen": 543827112, "step": 5958 }, { "epoch": 24.829166666666666, "grad_norm": 3.0045479568999065, "learning_rate": 5e-05, "loss": 0.0474, "num_input_tokens_seen": 543918588, "step": 5959 }, { "epoch": 24.829166666666666, "loss": 0.040871065109968185, "loss_ce": 8.028022421058267e-06, "loss_iou": 0.28125, "loss_num": 0.0081787109375, "loss_xval": 0.040771484375, "num_input_tokens_seen": 543918588, "step": 5959 }, { "epoch": 24.833333333333332, "grad_norm": 3.406046898558503, "learning_rate": 5e-05, "loss": 0.0479, "num_input_tokens_seen": 544010588, "step": 5960 }, { "epoch": 24.833333333333332, "loss": 0.043734535574913025, "loss_ce": 1.8105354683939368e-05, "loss_iou": 0.37109375, "loss_num": 0.00872802734375, "loss_xval": 0.043701171875, "num_input_tokens_seen": 544010588, "step": 5960 }, { "epoch": 24.8375, "grad_norm": 19.736425320828538, "learning_rate": 5e-05, "loss": 0.0451, "num_input_tokens_seen": 544101932, "step": 5961 }, { "epoch": 24.8375, "loss": 0.022242430597543716, "loss_ce": 2.7447626962384675e-06, "loss_iou": 0.1875, "loss_num": 0.00445556640625, "loss_xval": 0.022216796875, "num_input_tokens_seen": 544101932, "step": 5961 }, { "epoch": 24.841666666666665, "grad_norm": 3.5540091350026795, "learning_rate": 5e-05, "loss": 0.0277, "num_input_tokens_seen": 544193824, "step": 5962 }, { "epoch": 24.841666666666665, "loss": 0.02422468177974224, "loss_ce": 8.527738100383431e-05, "loss_iou": 0.28125, "loss_num": 0.00482177734375, "loss_xval": 0.024169921875, "num_input_tokens_seen": 544193824, "step": 5962 }, { "epoch": 24.845833333333335, "grad_norm": 3.2074751309801197, "learning_rate": 5e-05, "loss": 0.0911, "num_input_tokens_seen": 544284820, "step": 5963 }, { "epoch": 24.845833333333335, "loss": 0.12568366527557373, "loss_ce": 1.2279349903110415e-05, "loss_iou": 0.0810546875, "loss_num": 0.025146484375, "loss_xval": 0.1259765625, "num_input_tokens_seen": 544284820, "step": 5963 }, { "epoch": 24.85, "grad_norm": 2.946357998987661, "learning_rate": 5e-05, "loss": 0.0687, "num_input_tokens_seen": 544376068, "step": 5964 }, { "epoch": 24.85, "loss": 0.0366411954164505, "loss_ce": 4.842233465751633e-06, "loss_iou": 0.189453125, "loss_num": 0.00732421875, "loss_xval": 0.03662109375, "num_input_tokens_seen": 544376068, "step": 5964 }, { "epoch": 24.854166666666668, "grad_norm": 2.875951617130414, "learning_rate": 5e-05, "loss": 0.0691, "num_input_tokens_seen": 544467844, "step": 5965 }, { "epoch": 24.854166666666668, "loss": 0.07936926931142807, "loss_ce": 3.882595046889037e-05, "loss_iou": 0.2001953125, "loss_num": 0.015869140625, "loss_xval": 0.0791015625, "num_input_tokens_seen": 544467844, "step": 5965 }, { "epoch": 24.858333333333334, "grad_norm": 4.6237034845892255, "learning_rate": 5e-05, "loss": 0.0495, "num_input_tokens_seen": 544559136, "step": 5966 }, { "epoch": 24.858333333333334, "loss": 0.05209742859005928, "loss_ce": 3.925106284441426e-06, "loss_iou": 0.263671875, "loss_num": 0.01043701171875, "loss_xval": 0.052001953125, "num_input_tokens_seen": 544559136, "step": 5966 }, { "epoch": 24.8625, "grad_norm": 1.1493134989483929, "learning_rate": 5e-05, "loss": 0.0503, "num_input_tokens_seen": 544649940, "step": 5967 }, { "epoch": 24.8625, "loss": 0.04756912589073181, "loss_ce": 3.036867201444693e-05, "loss_iou": 0.244140625, "loss_num": 0.009521484375, "loss_xval": 0.047607421875, "num_input_tokens_seen": 544649940, "step": 5967 }, { "epoch": 24.866666666666667, "grad_norm": 2.0982886092577315, "learning_rate": 5e-05, "loss": 0.0329, "num_input_tokens_seen": 544741492, "step": 5968 }, { "epoch": 24.866666666666667, "loss": 0.04254509508609772, "loss_ce": 3.592864914025995e-06, "loss_iou": 0.27734375, "loss_num": 0.00848388671875, "loss_xval": 0.04248046875, "num_input_tokens_seen": 544741492, "step": 5968 }, { "epoch": 24.870833333333334, "grad_norm": 0.746916826526483, "learning_rate": 5e-05, "loss": 0.0496, "num_input_tokens_seen": 544833060, "step": 5969 }, { "epoch": 24.870833333333334, "loss": 0.054372403770685196, "loss_ce": 6.637265323661268e-05, "loss_iou": 0.193359375, "loss_num": 0.0108642578125, "loss_xval": 0.05419921875, "num_input_tokens_seen": 544833060, "step": 5969 }, { "epoch": 24.875, "grad_norm": 1.2717332474770326, "learning_rate": 5e-05, "loss": 0.0588, "num_input_tokens_seen": 544924372, "step": 5970 }, { "epoch": 24.875, "loss": 0.024903494864702225, "loss_ce": 1.1499631682454492e-06, "loss_iou": 0.224609375, "loss_num": 0.004974365234375, "loss_xval": 0.02490234375, "num_input_tokens_seen": 544924372, "step": 5970 }, { "epoch": 24.879166666666666, "grad_norm": 2.792213761061456, "learning_rate": 5e-05, "loss": 0.0406, "num_input_tokens_seen": 545015452, "step": 5971 }, { "epoch": 24.879166666666666, "loss": 0.032501786947250366, "loss_ce": 2.3451317247236148e-05, "loss_iou": 0.1650390625, "loss_num": 0.006500244140625, "loss_xval": 0.032470703125, "num_input_tokens_seen": 545015452, "step": 5971 }, { "epoch": 24.883333333333333, "grad_norm": 3.4748548119466327, "learning_rate": 5e-05, "loss": 0.0404, "num_input_tokens_seen": 545106680, "step": 5972 }, { "epoch": 24.883333333333333, "loss": 0.03997427970170975, "loss_ce": 0.00042349606519564986, "loss_iou": 0.2734375, "loss_num": 0.0079345703125, "loss_xval": 0.03955078125, "num_input_tokens_seen": 545106680, "step": 5972 }, { "epoch": 24.8875, "grad_norm": 6.263040693590258, "learning_rate": 5e-05, "loss": 0.0397, "num_input_tokens_seen": 545198316, "step": 5973 }, { "epoch": 24.8875, "loss": 0.04715617746114731, "loss_ce": 6.519712769659236e-06, "loss_iou": 0.25, "loss_num": 0.00946044921875, "loss_xval": 0.047119140625, "num_input_tokens_seen": 545198316, "step": 5973 }, { "epoch": 24.891666666666666, "grad_norm": 2.448734942191181, "learning_rate": 5e-05, "loss": 0.0422, "num_input_tokens_seen": 545289492, "step": 5974 }, { "epoch": 24.891666666666666, "loss": 0.04973657801747322, "loss_ce": 8.183667887351476e-06, "loss_iou": 0.2314453125, "loss_num": 0.00994873046875, "loss_xval": 0.0498046875, "num_input_tokens_seen": 545289492, "step": 5974 }, { "epoch": 24.895833333333332, "grad_norm": 2.5058430825356885, "learning_rate": 5e-05, "loss": 0.037, "num_input_tokens_seen": 545380824, "step": 5975 }, { "epoch": 24.895833333333332, "loss": 0.05505327135324478, "loss_ce": 7.189291409304133e-06, "loss_iou": 0.1435546875, "loss_num": 0.010986328125, "loss_xval": 0.054931640625, "num_input_tokens_seen": 545380824, "step": 5975 }, { "epoch": 24.9, "grad_norm": 2.4357214961264275, "learning_rate": 5e-05, "loss": 0.0449, "num_input_tokens_seen": 545472512, "step": 5976 }, { "epoch": 24.9, "loss": 0.047596514225006104, "loss_ce": 0.0005994450766593218, "loss_iou": 0.2470703125, "loss_num": 0.0093994140625, "loss_xval": 0.046875, "num_input_tokens_seen": 545472512, "step": 5976 }, { "epoch": 24.904166666666665, "grad_norm": 3.1992849131099597, "learning_rate": 5e-05, "loss": 0.0514, "num_input_tokens_seen": 545563616, "step": 5977 }, { "epoch": 24.904166666666665, "loss": 0.053942278027534485, "loss_ce": 2.459473762428388e-06, "loss_iou": 0.33203125, "loss_num": 0.01080322265625, "loss_xval": 0.053955078125, "num_input_tokens_seen": 545563616, "step": 5977 }, { "epoch": 24.908333333333335, "grad_norm": 1.7751656495507373, "learning_rate": 5e-05, "loss": 0.0281, "num_input_tokens_seen": 545655460, "step": 5978 }, { "epoch": 24.908333333333335, "loss": 0.0314764603972435, "loss_ce": 5.2090144890826195e-06, "loss_iou": 0.2119140625, "loss_num": 0.00628662109375, "loss_xval": 0.031494140625, "num_input_tokens_seen": 545655460, "step": 5978 }, { "epoch": 24.9125, "grad_norm": 2.447711320425934, "learning_rate": 5e-05, "loss": 0.0469, "num_input_tokens_seen": 545746784, "step": 5979 }, { "epoch": 24.9125, "loss": 0.0640127956867218, "loss_ce": 2.172352651541587e-06, "loss_iou": 0.37890625, "loss_num": 0.0128173828125, "loss_xval": 0.06396484375, "num_input_tokens_seen": 545746784, "step": 5979 }, { "epoch": 24.916666666666668, "grad_norm": 2.3039834216803268, "learning_rate": 5e-05, "loss": 0.0381, "num_input_tokens_seen": 545838384, "step": 5980 }, { "epoch": 24.916666666666668, "loss": 0.04834413528442383, "loss_ce": 1.192315812659217e-05, "loss_iou": 0.2294921875, "loss_num": 0.0096435546875, "loss_xval": 0.04833984375, "num_input_tokens_seen": 545838384, "step": 5980 }, { "epoch": 24.920833333333334, "grad_norm": 2.8196817319781955, "learning_rate": 5e-05, "loss": 0.0359, "num_input_tokens_seen": 545930048, "step": 5981 }, { "epoch": 24.920833333333334, "loss": 0.04672251641750336, "loss_ce": 7.730915058346e-06, "loss_iou": 0.275390625, "loss_num": 0.00933837890625, "loss_xval": 0.046630859375, "num_input_tokens_seen": 545930048, "step": 5981 }, { "epoch": 24.925, "grad_norm": 3.6656535804290504, "learning_rate": 5e-05, "loss": 0.0505, "num_input_tokens_seen": 546021812, "step": 5982 }, { "epoch": 24.925, "loss": 0.030796393752098083, "loss_ce": 4.158198862569407e-06, "loss_iou": 0.310546875, "loss_num": 0.00616455078125, "loss_xval": 0.03076171875, "num_input_tokens_seen": 546021812, "step": 5982 }, { "epoch": 24.929166666666667, "grad_norm": 2.479852690724127, "learning_rate": 5e-05, "loss": 0.0606, "num_input_tokens_seen": 546113424, "step": 5983 }, { "epoch": 24.929166666666667, "loss": 0.052928995341062546, "loss_ce": 1.1513237041071989e-05, "loss_iou": 0.197265625, "loss_num": 0.0106201171875, "loss_xval": 0.052978515625, "num_input_tokens_seen": 546113424, "step": 5983 }, { "epoch": 24.933333333333334, "grad_norm": 3.2366282793434946, "learning_rate": 5e-05, "loss": 0.0779, "num_input_tokens_seen": 546205548, "step": 5984 }, { "epoch": 24.933333333333334, "loss": 0.024994969367980957, "loss_ce": 1.0731979500633315e-06, "loss_iou": 0.265625, "loss_num": 0.0050048828125, "loss_xval": 0.0250244140625, "num_input_tokens_seen": 546205548, "step": 5984 }, { "epoch": 24.9375, "grad_norm": 2.9060950394394984, "learning_rate": 5e-05, "loss": 0.0731, "num_input_tokens_seen": 546296632, "step": 5985 }, { "epoch": 24.9375, "loss": 0.09871162474155426, "loss_ce": 2.5201684366038535e-06, "loss_iou": 0.2890625, "loss_num": 0.019775390625, "loss_xval": 0.0986328125, "num_input_tokens_seen": 546296632, "step": 5985 }, { "epoch": 24.941666666666666, "grad_norm": 3.017673406017083, "learning_rate": 5e-05, "loss": 0.0315, "num_input_tokens_seen": 546386652, "step": 5986 }, { "epoch": 24.941666666666666, "loss": 0.025592109188437462, "loss_ce": 3.1204160677589243e-06, "loss_iou": 0.1748046875, "loss_num": 0.005126953125, "loss_xval": 0.025634765625, "num_input_tokens_seen": 546386652, "step": 5986 }, { "epoch": 24.945833333333333, "grad_norm": 2.7750188921117465, "learning_rate": 5e-05, "loss": 0.023, "num_input_tokens_seen": 546478508, "step": 5987 }, { "epoch": 24.945833333333333, "loss": 0.023148812353610992, "loss_ce": 1.2287633808227838e-06, "loss_iou": 0.1494140625, "loss_num": 0.004638671875, "loss_xval": 0.023193359375, "num_input_tokens_seen": 546478508, "step": 5987 }, { "epoch": 24.95, "grad_norm": 2.093958784103149, "learning_rate": 5e-05, "loss": 0.0568, "num_input_tokens_seen": 546569960, "step": 5988 }, { "epoch": 24.95, "loss": 0.0406670905649662, "loss_ce": 2.414625669189263e-06, "loss_iou": 0.24609375, "loss_num": 0.00811767578125, "loss_xval": 0.040771484375, "num_input_tokens_seen": 546569960, "step": 5988 }, { "epoch": 24.954166666666666, "grad_norm": 1.3102097345328108, "learning_rate": 5e-05, "loss": 0.0519, "num_input_tokens_seen": 546661232, "step": 5989 }, { "epoch": 24.954166666666666, "loss": 0.03977053984999657, "loss_ce": 0.00011294680007267743, "loss_iou": 0.212890625, "loss_num": 0.0079345703125, "loss_xval": 0.03955078125, "num_input_tokens_seen": 546661232, "step": 5989 }, { "epoch": 24.958333333333332, "grad_norm": 1.9589293422616056, "learning_rate": 5e-05, "loss": 0.0552, "num_input_tokens_seen": 546752348, "step": 5990 }, { "epoch": 24.958333333333332, "loss": 0.04497984051704407, "loss_ce": 4.556593466986669e-06, "loss_iou": 0.1669921875, "loss_num": 0.00897216796875, "loss_xval": 0.044921875, "num_input_tokens_seen": 546752348, "step": 5990 }, { "epoch": 24.9625, "grad_norm": 2.3781652883990727, "learning_rate": 5e-05, "loss": 0.0473, "num_input_tokens_seen": 546843064, "step": 5991 }, { "epoch": 24.9625, "loss": 0.03465817868709564, "loss_ce": 2.0727646187879145e-05, "loss_iou": 0.34375, "loss_num": 0.006927490234375, "loss_xval": 0.03466796875, "num_input_tokens_seen": 546843064, "step": 5991 }, { "epoch": 24.966666666666665, "grad_norm": 3.3296711759595006, "learning_rate": 5e-05, "loss": 0.0291, "num_input_tokens_seen": 546934208, "step": 5992 }, { "epoch": 24.966666666666665, "loss": 0.030732300132513046, "loss_ce": 1.0995988759532338e-06, "loss_iou": 0.349609375, "loss_num": 0.00616455078125, "loss_xval": 0.03076171875, "num_input_tokens_seen": 546934208, "step": 5992 }, { "epoch": 24.970833333333335, "grad_norm": 2.1965400503971133, "learning_rate": 5e-05, "loss": 0.0587, "num_input_tokens_seen": 547026024, "step": 5993 }, { "epoch": 24.970833333333335, "loss": 0.09637053310871124, "loss_ce": 1.1280644685029984e-05, "loss_iou": 0.22265625, "loss_num": 0.019287109375, "loss_xval": 0.09619140625, "num_input_tokens_seen": 547026024, "step": 5993 }, { "epoch": 24.975, "grad_norm": 2.232533704551629, "learning_rate": 5e-05, "loss": 0.0364, "num_input_tokens_seen": 547117604, "step": 5994 }, { "epoch": 24.975, "loss": 0.0288502499461174, "loss_ce": 3.508933104967582e-06, "loss_iou": 0.2333984375, "loss_num": 0.005767822265625, "loss_xval": 0.02880859375, "num_input_tokens_seen": 547117604, "step": 5994 }, { "epoch": 24.979166666666668, "grad_norm": 2.0416217401760988, "learning_rate": 5e-05, "loss": 0.0165, "num_input_tokens_seen": 547209032, "step": 5995 }, { "epoch": 24.979166666666668, "loss": 0.01802220195531845, "loss_ce": 5.386910288507352e-06, "loss_iou": 0.31640625, "loss_num": 0.00360107421875, "loss_xval": 0.01806640625, "num_input_tokens_seen": 547209032, "step": 5995 }, { "epoch": 24.983333333333334, "grad_norm": 3.8484841226148996, "learning_rate": 5e-05, "loss": 0.0652, "num_input_tokens_seen": 547299940, "step": 5996 }, { "epoch": 24.983333333333334, "loss": 0.05408111587166786, "loss_ce": 3.964207280660048e-06, "loss_iou": 0.1474609375, "loss_num": 0.01080322265625, "loss_xval": 0.05419921875, "num_input_tokens_seen": 547299940, "step": 5996 }, { "epoch": 24.9875, "grad_norm": 3.1742817345645675, "learning_rate": 5e-05, "loss": 0.027, "num_input_tokens_seen": 547390716, "step": 5997 }, { "epoch": 24.9875, "loss": 0.02947721816599369, "loss_ce": 2.7756241252063774e-05, "loss_iou": 0.271484375, "loss_num": 0.005889892578125, "loss_xval": 0.0294189453125, "num_input_tokens_seen": 547390716, "step": 5997 }, { "epoch": 24.991666666666667, "grad_norm": 3.1706086611539157, "learning_rate": 5e-05, "loss": 0.0394, "num_input_tokens_seen": 547482112, "step": 5998 }, { "epoch": 24.991666666666667, "loss": 0.03286163881421089, "loss_ce": 1.8380596884526312e-06, "loss_iou": 0.26953125, "loss_num": 0.006591796875, "loss_xval": 0.032958984375, "num_input_tokens_seen": 547482112, "step": 5998 }, { "epoch": 24.995833333333334, "grad_norm": 5.756392964797963, "learning_rate": 5e-05, "loss": 0.049, "num_input_tokens_seen": 547573120, "step": 5999 }, { "epoch": 24.995833333333334, "loss": 0.052656348794698715, "loss_ce": 2.878543091355823e-05, "loss_iou": 0.330078125, "loss_num": 0.010498046875, "loss_xval": 0.052734375, "num_input_tokens_seen": 547573120, "step": 5999 }, { "epoch": 25.0, "grad_norm": 2.5867875653331494, "learning_rate": 5e-05, "loss": 0.0334, "num_input_tokens_seen": 547664792, "step": 6000 }, { "epoch": 25.0, "eval_seeclick_CIoU": 0.24265528470277786, "eval_seeclick_GIoU": 0.22994572669267654, "eval_seeclick_IoU": 0.34904928505420685, "eval_seeclick_MAE_all": 0.10797623917460442, "eval_seeclick_MAE_h": 0.07592884637415409, "eval_seeclick_MAE_w": 0.25676916539669037, "eval_seeclick_MAE_x_boxes": 0.23508797585964203, "eval_seeclick_MAE_y_boxes": 0.08006148040294647, "eval_seeclick_NUM_probability": 0.9999965131282806, "eval_seeclick_inside_bbox": 0.5553977340459824, "eval_seeclick_loss": 0.5893933773040771, "eval_seeclick_loss_ce": 0.10836980864405632, "eval_seeclick_loss_iou": 0.4580078125, "eval_seeclick_loss_num": 0.09033203125, "eval_seeclick_loss_xval": 0.45166015625, "eval_seeclick_runtime": 77.9192, "eval_seeclick_samples_per_second": 0.552, "eval_seeclick_steps_per_second": 0.026, "num_input_tokens_seen": 547664792, "step": 6000 }, { "epoch": 25.0, "eval_icons_CIoU": 0.2780071347951889, "eval_icons_GIoU": 0.2557266652584076, "eval_icons_IoU": 0.3593812435865402, "eval_icons_MAE_all": 0.07604104280471802, "eval_icons_MAE_h": 0.14657872170209885, "eval_icons_MAE_w": 0.11666521430015564, "eval_icons_MAE_x_boxes": 0.1220252551138401, "eval_icons_MAE_y_boxes": 0.15165819227695465, "eval_icons_NUM_probability": 0.9999914765357971, "eval_icons_inside_bbox": 0.5347222238779068, "eval_icons_loss": 0.38236290216445923, "eval_icons_loss_ce": 0.0005112176586408168, "eval_icons_loss_iou": 0.25677490234375, "eval_icons_loss_num": 0.0786590576171875, "eval_icons_loss_xval": 0.3931884765625, "eval_icons_runtime": 89.2314, "eval_icons_samples_per_second": 0.56, "eval_icons_steps_per_second": 0.022, "num_input_tokens_seen": 547664792, "step": 6000 }, { "epoch": 25.0, "eval_screenspot_CIoU": 0.37966782848040265, "eval_screenspot_GIoU": 0.3703218400478363, "eval_screenspot_IoU": 0.44274385770161945, "eval_screenspot_MAE_all": 0.08963250120480855, "eval_screenspot_MAE_h": 0.07793333381414413, "eval_screenspot_MAE_w": 0.1972505748271942, "eval_screenspot_MAE_x_boxes": 0.16598065694173178, "eval_screenspot_MAE_y_boxes": 0.07822733372449875, "eval_screenspot_NUM_probability": 0.9999973376592001, "eval_screenspot_inside_bbox": 0.7116666634877523, "eval_screenspot_loss": 0.44954127073287964, "eval_screenspot_loss_ce": 0.0009444566724899536, "eval_screenspot_loss_iou": 0.3677571614583333, "eval_screenspot_loss_num": 0.09143575032552083, "eval_screenspot_loss_xval": 0.4573567708333333, "eval_screenspot_runtime": 149.8438, "eval_screenspot_samples_per_second": 0.594, "eval_screenspot_steps_per_second": 0.02, "num_input_tokens_seen": 547664792, "step": 6000 }, { "epoch": 25.0, "eval_compot_CIoU": 0.4837338328361511, "eval_compot_GIoU": 0.47165830433368683, "eval_compot_IoU": 0.5592522621154785, "eval_compot_MAE_all": 0.05793677642941475, "eval_compot_MAE_h": 0.0747892614454031, "eval_compot_MAE_w": 0.14299434423446655, "eval_compot_MAE_x_boxes": 0.14643944799900055, "eval_compot_MAE_y_boxes": 0.07384524680674076, "eval_compot_NUM_probability": 0.9999956786632538, "eval_compot_inside_bbox": 0.7361111044883728, "eval_compot_loss": 0.3381621539592743, "eval_compot_loss_ce": 0.059079062193632126, "eval_compot_loss_iou": 0.3218994140625, "eval_compot_loss_num": 0.05415534973144531, "eval_compot_loss_xval": 0.2707672119140625, "eval_compot_runtime": 85.9918, "eval_compot_samples_per_second": 0.581, "eval_compot_steps_per_second": 0.023, "num_input_tokens_seen": 547664792, "step": 6000 }, { "epoch": 25.0, "loss": 0.34967148303985596, "loss_ce": 0.057862378656864166, "loss_iou": 0.3359375, "loss_num": 0.058349609375, "loss_xval": 0.291015625, "num_input_tokens_seen": 547664792, "step": 6000 }, { "epoch": 25.004166666666666, "grad_norm": 1.902572635180254, "learning_rate": 5e-05, "loss": 0.0347, "num_input_tokens_seen": 547755800, "step": 6001 }, { "epoch": 25.004166666666666, "loss": 0.034302763640880585, "loss_ce": 1.0069838936033193e-06, "loss_iou": 0.2421875, "loss_num": 0.006866455078125, "loss_xval": 0.0341796875, "num_input_tokens_seen": 547755800, "step": 6001 }, { "epoch": 25.008333333333333, "grad_norm": 2.2311118770006884, "learning_rate": 5e-05, "loss": 0.0289, "num_input_tokens_seen": 547846116, "step": 6002 }, { "epoch": 25.008333333333333, "loss": 0.03062179684638977, "loss_ce": 1.2666236216318794e-05, "loss_iou": 0.251953125, "loss_num": 0.006134033203125, "loss_xval": 0.0306396484375, "num_input_tokens_seen": 547846116, "step": 6002 }, { "epoch": 25.0125, "grad_norm": 2.8990535287036945, "learning_rate": 5e-05, "loss": 0.051, "num_input_tokens_seen": 547937616, "step": 6003 }, { "epoch": 25.0125, "loss": 0.03604936972260475, "loss_ce": 8.109300324576907e-06, "loss_iou": 0.2451171875, "loss_num": 0.0072021484375, "loss_xval": 0.0361328125, "num_input_tokens_seen": 547937616, "step": 6003 }, { "epoch": 25.016666666666666, "grad_norm": 2.879151132864372, "learning_rate": 5e-05, "loss": 0.0442, "num_input_tokens_seen": 548029308, "step": 6004 }, { "epoch": 25.016666666666666, "loss": 0.028768474236130714, "loss_ce": 0.0006312677287496626, "loss_iou": 0.19140625, "loss_num": 0.005615234375, "loss_xval": 0.028076171875, "num_input_tokens_seen": 548029308, "step": 6004 }, { "epoch": 25.020833333333332, "grad_norm": 2.3560327219287287, "learning_rate": 5e-05, "loss": 0.0549, "num_input_tokens_seen": 548121284, "step": 6005 }, { "epoch": 25.020833333333332, "loss": 0.023445885628461838, "loss_ce": 8.384964530705474e-06, "loss_iou": 0.22265625, "loss_num": 0.00469970703125, "loss_xval": 0.0234375, "num_input_tokens_seen": 548121284, "step": 6005 }, { "epoch": 25.025, "grad_norm": 2.7276189833542777, "learning_rate": 5e-05, "loss": 0.0442, "num_input_tokens_seen": 548212772, "step": 6006 }, { "epoch": 25.025, "loss": 0.04950391501188278, "loss_ce": 4.40308667748468e-06, "loss_iou": 0.28515625, "loss_num": 0.0098876953125, "loss_xval": 0.049560546875, "num_input_tokens_seen": 548212772, "step": 6006 }, { "epoch": 25.029166666666665, "grad_norm": 2.5226347897824684, "learning_rate": 5e-05, "loss": 0.0503, "num_input_tokens_seen": 548304444, "step": 6007 }, { "epoch": 25.029166666666665, "loss": 0.050401873886585236, "loss_ce": 2.0907170892314753e-06, "loss_iou": 0.3828125, "loss_num": 0.01007080078125, "loss_xval": 0.05029296875, "num_input_tokens_seen": 548304444, "step": 6007 }, { "epoch": 25.033333333333335, "grad_norm": 1.0459329312119423, "learning_rate": 5e-05, "loss": 0.0834, "num_input_tokens_seen": 548395768, "step": 6008 }, { "epoch": 25.033333333333335, "loss": 0.14836296439170837, "loss_ce": 1.7653171653364552e-06, "loss_iou": 0.2255859375, "loss_num": 0.0296630859375, "loss_xval": 0.1484375, "num_input_tokens_seen": 548395768, "step": 6008 }, { "epoch": 25.0375, "grad_norm": 1.9787605859110673, "learning_rate": 5e-05, "loss": 0.0205, "num_input_tokens_seen": 548487192, "step": 6009 }, { "epoch": 25.0375, "loss": 0.018174223601818085, "loss_ce": 1.0055944130726857e-06, "loss_iou": 0.2265625, "loss_num": 0.003631591796875, "loss_xval": 0.0181884765625, "num_input_tokens_seen": 548487192, "step": 6009 }, { "epoch": 25.041666666666668, "grad_norm": 1.121889175408013, "learning_rate": 5e-05, "loss": 0.0244, "num_input_tokens_seen": 548578280, "step": 6010 }, { "epoch": 25.041666666666668, "loss": 0.03376873955130577, "loss_ce": 1.0389567250967957e-06, "loss_iou": 0.291015625, "loss_num": 0.006744384765625, "loss_xval": 0.03369140625, "num_input_tokens_seen": 548578280, "step": 6010 }, { "epoch": 25.045833333333334, "grad_norm": 1.3851939658554242, "learning_rate": 5e-05, "loss": 0.0263, "num_input_tokens_seen": 548669468, "step": 6011 }, { "epoch": 25.045833333333334, "loss": 0.027827546000480652, "loss_ce": 1.0773524081741925e-05, "loss_iou": 0.25390625, "loss_num": 0.00555419921875, "loss_xval": 0.02783203125, "num_input_tokens_seen": 548669468, "step": 6011 }, { "epoch": 25.05, "grad_norm": 2.126157401900572, "learning_rate": 5e-05, "loss": 0.0559, "num_input_tokens_seen": 548760536, "step": 6012 }, { "epoch": 25.05, "loss": 0.06270837038755417, "loss_ce": 1.0008010576711968e-05, "loss_iou": 0.2109375, "loss_num": 0.01251220703125, "loss_xval": 0.0625, "num_input_tokens_seen": 548760536, "step": 6012 }, { "epoch": 25.054166666666667, "grad_norm": 3.4490338069498168, "learning_rate": 5e-05, "loss": 0.024, "num_input_tokens_seen": 548852208, "step": 6013 }, { "epoch": 25.054166666666667, "loss": 0.024851012974977493, "loss_ce": 2.496362321835477e-05, "loss_iou": 0.3203125, "loss_num": 0.004974365234375, "loss_xval": 0.0247802734375, "num_input_tokens_seen": 548852208, "step": 6013 }, { "epoch": 25.058333333333334, "grad_norm": 3.8029374535271323, "learning_rate": 5e-05, "loss": 0.0359, "num_input_tokens_seen": 548943736, "step": 6014 }, { "epoch": 25.058333333333334, "loss": 0.035249799489974976, "loss_ce": 1.9983369838882936e-06, "loss_iou": 0.25390625, "loss_num": 0.007049560546875, "loss_xval": 0.03515625, "num_input_tokens_seen": 548943736, "step": 6014 }, { "epoch": 25.0625, "grad_norm": 2.324225388968632, "learning_rate": 5e-05, "loss": 0.0263, "num_input_tokens_seen": 549034768, "step": 6015 }, { "epoch": 25.0625, "loss": 0.023685678839683533, "loss_ce": 6.507354555651546e-05, "loss_iou": 0.201171875, "loss_num": 0.004730224609375, "loss_xval": 0.023681640625, "num_input_tokens_seen": 549034768, "step": 6015 }, { "epoch": 25.066666666666666, "grad_norm": 3.0835970710088816, "learning_rate": 5e-05, "loss": 0.0276, "num_input_tokens_seen": 549126412, "step": 6016 }, { "epoch": 25.066666666666666, "loss": 0.03177113085985184, "loss_ce": 1.758850885380525e-05, "loss_iou": 0.166015625, "loss_num": 0.00634765625, "loss_xval": 0.03173828125, "num_input_tokens_seen": 549126412, "step": 6016 }, { "epoch": 25.070833333333333, "grad_norm": 2.917265856135224, "learning_rate": 5e-05, "loss": 0.0412, "num_input_tokens_seen": 549218220, "step": 6017 }, { "epoch": 25.070833333333333, "loss": 0.028635790571570396, "loss_ce": 2.6737684493127745e-06, "loss_iou": 0.3046875, "loss_num": 0.0057373046875, "loss_xval": 0.0286865234375, "num_input_tokens_seen": 549218220, "step": 6017 }, { "epoch": 25.075, "grad_norm": 1.7254570430119616, "learning_rate": 5e-05, "loss": 0.0401, "num_input_tokens_seen": 549309384, "step": 6018 }, { "epoch": 25.075, "loss": 0.031534358859062195, "loss_ce": 2.0701659195765387e-06, "loss_iou": 0.0849609375, "loss_num": 0.006317138671875, "loss_xval": 0.031494140625, "num_input_tokens_seen": 549309384, "step": 6018 }, { "epoch": 25.079166666666666, "grad_norm": 2.582436232179212, "learning_rate": 5e-05, "loss": 0.0312, "num_input_tokens_seen": 549400380, "step": 6019 }, { "epoch": 25.079166666666666, "loss": 0.031680814921855927, "loss_ce": 3.5690659387910273e-06, "loss_iou": 0.1748046875, "loss_num": 0.00634765625, "loss_xval": 0.03173828125, "num_input_tokens_seen": 549400380, "step": 6019 }, { "epoch": 25.083333333333332, "grad_norm": 2.511898733177223, "learning_rate": 5e-05, "loss": 0.0435, "num_input_tokens_seen": 549489952, "step": 6020 }, { "epoch": 25.083333333333332, "loss": 0.06118789315223694, "loss_ce": 7.775071026117075e-06, "loss_iou": 0.07421875, "loss_num": 0.01220703125, "loss_xval": 0.061279296875, "num_input_tokens_seen": 549489952, "step": 6020 }, { "epoch": 25.0875, "grad_norm": 3.1444060718215425, "learning_rate": 5e-05, "loss": 0.0331, "num_input_tokens_seen": 549581396, "step": 6021 }, { "epoch": 25.0875, "loss": 0.04130501300096512, "loss_ce": 3.762021515285596e-05, "loss_iou": 0.203125, "loss_num": 0.00823974609375, "loss_xval": 0.041259765625, "num_input_tokens_seen": 549581396, "step": 6021 }, { "epoch": 25.091666666666665, "grad_norm": 4.008643885183847, "learning_rate": 5e-05, "loss": 0.0731, "num_input_tokens_seen": 549672464, "step": 6022 }, { "epoch": 25.091666666666665, "loss": 0.0670071691274643, "loss_ce": 2.10778416658286e-05, "loss_iou": 0.224609375, "loss_num": 0.013427734375, "loss_xval": 0.06689453125, "num_input_tokens_seen": 549672464, "step": 6022 }, { "epoch": 25.095833333333335, "grad_norm": 2.5295437885586853, "learning_rate": 5e-05, "loss": 0.0268, "num_input_tokens_seen": 549763584, "step": 6023 }, { "epoch": 25.095833333333335, "loss": 0.029910461977124214, "loss_ce": 3.235071289964253e-06, "loss_iou": 0.234375, "loss_num": 0.0059814453125, "loss_xval": 0.0299072265625, "num_input_tokens_seen": 549763584, "step": 6023 }, { "epoch": 25.1, "grad_norm": 2.811572290753676, "learning_rate": 5e-05, "loss": 0.0487, "num_input_tokens_seen": 549855288, "step": 6024 }, { "epoch": 25.1, "loss": 0.03294540196657181, "loss_ce": 1.6782751117716543e-06, "loss_iou": 0.2734375, "loss_num": 0.006591796875, "loss_xval": 0.032958984375, "num_input_tokens_seen": 549855288, "step": 6024 }, { "epoch": 25.104166666666668, "grad_norm": 2.3896023458812206, "learning_rate": 5e-05, "loss": 0.0477, "num_input_tokens_seen": 549946244, "step": 6025 }, { "epoch": 25.104166666666668, "loss": 0.06967095285654068, "loss_ce": 1.4581054529116955e-05, "loss_iou": 0.30078125, "loss_num": 0.013916015625, "loss_xval": 0.06982421875, "num_input_tokens_seen": 549946244, "step": 6025 }, { "epoch": 25.108333333333334, "grad_norm": 3.5066065692268977, "learning_rate": 5e-05, "loss": 0.0216, "num_input_tokens_seen": 550037400, "step": 6026 }, { "epoch": 25.108333333333334, "loss": 0.021294424310326576, "loss_ce": 2.3672111638006754e-05, "loss_iou": 0.2265625, "loss_num": 0.0042724609375, "loss_xval": 0.021240234375, "num_input_tokens_seen": 550037400, "step": 6026 }, { "epoch": 25.1125, "grad_norm": 3.5435575532416266, "learning_rate": 5e-05, "loss": 0.0336, "num_input_tokens_seen": 550129352, "step": 6027 }, { "epoch": 25.1125, "loss": 0.027449486777186394, "loss_ce": 0.00019728824554476887, "loss_iou": 0.2216796875, "loss_num": 0.005462646484375, "loss_xval": 0.0272216796875, "num_input_tokens_seen": 550129352, "step": 6027 }, { "epoch": 25.116666666666667, "grad_norm": 2.154591012494385, "learning_rate": 5e-05, "loss": 0.0212, "num_input_tokens_seen": 550220748, "step": 6028 }, { "epoch": 25.116666666666667, "loss": 0.020502205938100815, "loss_ce": 2.022517037403304e-06, "loss_iou": 0.22265625, "loss_num": 0.00408935546875, "loss_xval": 0.0205078125, "num_input_tokens_seen": 550220748, "step": 6028 }, { "epoch": 25.120833333333334, "grad_norm": 2.858585344693595, "learning_rate": 5e-05, "loss": 0.043, "num_input_tokens_seen": 550312356, "step": 6029 }, { "epoch": 25.120833333333334, "loss": 0.027978356927633286, "loss_ce": 1.3687447335541947e-06, "loss_iou": 0.26953125, "loss_num": 0.005584716796875, "loss_xval": 0.0279541015625, "num_input_tokens_seen": 550312356, "step": 6029 }, { "epoch": 25.125, "grad_norm": 3.69369681425425, "learning_rate": 5e-05, "loss": 0.057, "num_input_tokens_seen": 550403388, "step": 6030 }, { "epoch": 25.125, "loss": 0.07482340186834335, "loss_ce": 9.55643645283999e-06, "loss_iou": 0.14453125, "loss_num": 0.01495361328125, "loss_xval": 0.07470703125, "num_input_tokens_seen": 550403388, "step": 6030 }, { "epoch": 25.129166666666666, "grad_norm": 2.2436760329202796, "learning_rate": 5e-05, "loss": 0.0246, "num_input_tokens_seen": 550494840, "step": 6031 }, { "epoch": 25.129166666666666, "loss": 0.022455964237451553, "loss_ce": 1.7915572243509814e-05, "loss_iou": 0.28515625, "loss_num": 0.004486083984375, "loss_xval": 0.0224609375, "num_input_tokens_seen": 550494840, "step": 6031 }, { "epoch": 25.133333333333333, "grad_norm": 3.0835356823004285, "learning_rate": 5e-05, "loss": 0.0829, "num_input_tokens_seen": 550586596, "step": 6032 }, { "epoch": 25.133333333333333, "loss": 0.07718226313591003, "loss_ce": 0.023776497691869736, "loss_iou": 0.27734375, "loss_num": 0.01068115234375, "loss_xval": 0.053466796875, "num_input_tokens_seen": 550586596, "step": 6032 }, { "epoch": 25.1375, "grad_norm": 3.9053492602716466, "learning_rate": 5e-05, "loss": 0.0479, "num_input_tokens_seen": 550677468, "step": 6033 }, { "epoch": 25.1375, "loss": 0.04059723764657974, "loss_ce": 5.463583511300385e-05, "loss_iou": 0.2421875, "loss_num": 0.00811767578125, "loss_xval": 0.04052734375, "num_input_tokens_seen": 550677468, "step": 6033 }, { "epoch": 25.141666666666666, "grad_norm": 4.391838195127731, "learning_rate": 5e-05, "loss": 0.0502, "num_input_tokens_seen": 550769148, "step": 6034 }, { "epoch": 25.141666666666666, "loss": 0.03933006525039673, "loss_ce": 2.3427393898600712e-05, "loss_iou": 0.310546875, "loss_num": 0.00787353515625, "loss_xval": 0.039306640625, "num_input_tokens_seen": 550769148, "step": 6034 }, { "epoch": 25.145833333333332, "grad_norm": 1.702615205429913, "learning_rate": 5e-05, "loss": 0.0276, "num_input_tokens_seen": 550860784, "step": 6035 }, { "epoch": 25.145833333333332, "loss": 0.03935377299785614, "loss_ce": 1.3561893865698949e-06, "loss_iou": 0.193359375, "loss_num": 0.00787353515625, "loss_xval": 0.039306640625, "num_input_tokens_seen": 550860784, "step": 6035 }, { "epoch": 25.15, "grad_norm": 8.526456658638734, "learning_rate": 5e-05, "loss": 0.042, "num_input_tokens_seen": 550952312, "step": 6036 }, { "epoch": 25.15, "loss": 0.04818908870220184, "loss_ce": 1.8335138065594947e-06, "loss_iou": 0.1337890625, "loss_num": 0.0096435546875, "loss_xval": 0.048095703125, "num_input_tokens_seen": 550952312, "step": 6036 }, { "epoch": 25.154166666666665, "grad_norm": 1.5319893922990855, "learning_rate": 5e-05, "loss": 0.0448, "num_input_tokens_seen": 551043984, "step": 6037 }, { "epoch": 25.154166666666665, "loss": 0.04240569844841957, "loss_ce": 1.5248575664372765e-06, "loss_iou": 0.201171875, "loss_num": 0.00848388671875, "loss_xval": 0.04248046875, "num_input_tokens_seen": 551043984, "step": 6037 }, { "epoch": 25.158333333333335, "grad_norm": 1.976474479972411, "learning_rate": 5e-05, "loss": 0.0447, "num_input_tokens_seen": 551134884, "step": 6038 }, { "epoch": 25.158333333333335, "loss": 0.06959168612957001, "loss_ce": 3.971980731876101e-06, "loss_iou": 0.1650390625, "loss_num": 0.013916015625, "loss_xval": 0.06982421875, "num_input_tokens_seen": 551134884, "step": 6038 }, { "epoch": 25.1625, "grad_norm": 8.437012268532477, "learning_rate": 5e-05, "loss": 0.0513, "num_input_tokens_seen": 551226476, "step": 6039 }, { "epoch": 25.1625, "loss": 0.051346659660339355, "loss_ce": 0.00024497474078089, "loss_iou": 0.265625, "loss_num": 0.01019287109375, "loss_xval": 0.051025390625, "num_input_tokens_seen": 551226476, "step": 6039 }, { "epoch": 25.166666666666668, "grad_norm": 2.817841350402008, "learning_rate": 5e-05, "loss": 0.0324, "num_input_tokens_seen": 551317596, "step": 6040 }, { "epoch": 25.166666666666668, "loss": 0.03814946487545967, "loss_ce": 2.493340844011982e-06, "loss_iou": 0.357421875, "loss_num": 0.00762939453125, "loss_xval": 0.0380859375, "num_input_tokens_seen": 551317596, "step": 6040 }, { "epoch": 25.170833333333334, "grad_norm": 2.861904774708129, "learning_rate": 5e-05, "loss": 0.0278, "num_input_tokens_seen": 551409240, "step": 6041 }, { "epoch": 25.170833333333334, "loss": 0.026386898010969162, "loss_ce": 1.208052799483994e-05, "loss_iou": 0.30078125, "loss_num": 0.005279541015625, "loss_xval": 0.0263671875, "num_input_tokens_seen": 551409240, "step": 6041 }, { "epoch": 25.175, "grad_norm": 2.7754420985172317, "learning_rate": 5e-05, "loss": 0.0257, "num_input_tokens_seen": 551500688, "step": 6042 }, { "epoch": 25.175, "loss": 0.02647359110414982, "loss_ce": 0.00019795526168309152, "loss_iou": 0.22265625, "loss_num": 0.0052490234375, "loss_xval": 0.0262451171875, "num_input_tokens_seen": 551500688, "step": 6042 }, { "epoch": 25.179166666666667, "grad_norm": 3.2049472199198212, "learning_rate": 5e-05, "loss": 0.034, "num_input_tokens_seen": 551592012, "step": 6043 }, { "epoch": 25.179166666666667, "loss": 0.03987519443035126, "loss_ce": 3.9797023418941535e-06, "loss_iou": 0.255859375, "loss_num": 0.00799560546875, "loss_xval": 0.039794921875, "num_input_tokens_seen": 551592012, "step": 6043 }, { "epoch": 25.183333333333334, "grad_norm": 4.081577881387569, "learning_rate": 5e-05, "loss": 0.0476, "num_input_tokens_seen": 551683120, "step": 6044 }, { "epoch": 25.183333333333334, "loss": 0.05834193900227547, "loss_ce": 7.586995252495399e-06, "loss_iou": 0.31640625, "loss_num": 0.01165771484375, "loss_xval": 0.058349609375, "num_input_tokens_seen": 551683120, "step": 6044 }, { "epoch": 25.1875, "grad_norm": 3.7294875673962977, "learning_rate": 5e-05, "loss": 0.0434, "num_input_tokens_seen": 551773720, "step": 6045 }, { "epoch": 25.1875, "loss": 0.047974590212106705, "loss_ce": 9.558589226799086e-07, "loss_iou": 0.279296875, "loss_num": 0.00958251953125, "loss_xval": 0.0478515625, "num_input_tokens_seen": 551773720, "step": 6045 }, { "epoch": 25.191666666666666, "grad_norm": 3.02537900411946, "learning_rate": 5e-05, "loss": 0.0323, "num_input_tokens_seen": 551865096, "step": 6046 }, { "epoch": 25.191666666666666, "loss": 0.025306470692157745, "loss_ce": 7.398280104098376e-06, "loss_iou": 0.271484375, "loss_num": 0.00506591796875, "loss_xval": 0.0252685546875, "num_input_tokens_seen": 551865096, "step": 6046 }, { "epoch": 25.195833333333333, "grad_norm": 2.8880457566644586, "learning_rate": 5e-05, "loss": 0.0446, "num_input_tokens_seen": 551956344, "step": 6047 }, { "epoch": 25.195833333333333, "loss": 0.027018003165721893, "loss_ce": 9.946590580511838e-06, "loss_iou": 0.23828125, "loss_num": 0.005401611328125, "loss_xval": 0.0269775390625, "num_input_tokens_seen": 551956344, "step": 6047 }, { "epoch": 25.2, "grad_norm": 3.0617668203476756, "learning_rate": 5e-05, "loss": 0.0341, "num_input_tokens_seen": 552047920, "step": 6048 }, { "epoch": 25.2, "loss": 0.04437287151813507, "loss_ce": 0.0005038546514697373, "loss_iou": 0.248046875, "loss_num": 0.0087890625, "loss_xval": 0.0439453125, "num_input_tokens_seen": 552047920, "step": 6048 }, { "epoch": 25.204166666666666, "grad_norm": 3.1356232150398906, "learning_rate": 5e-05, "loss": 0.0315, "num_input_tokens_seen": 552139636, "step": 6049 }, { "epoch": 25.204166666666666, "loss": 0.045794256031513214, "loss_ce": 2.6284108116669813e-06, "loss_iou": 0.3046875, "loss_num": 0.0091552734375, "loss_xval": 0.0458984375, "num_input_tokens_seen": 552139636, "step": 6049 }, { "epoch": 25.208333333333332, "grad_norm": 3.108656855627276, "learning_rate": 5e-05, "loss": 0.0493, "num_input_tokens_seen": 552230548, "step": 6050 }, { "epoch": 25.208333333333332, "loss": 0.02768785133957863, "loss_ce": 8.406275810557418e-06, "loss_iou": 0.2060546875, "loss_num": 0.005523681640625, "loss_xval": 0.0277099609375, "num_input_tokens_seen": 552230548, "step": 6050 }, { "epoch": 25.2125, "grad_norm": 2.556294518952924, "learning_rate": 5e-05, "loss": 0.0431, "num_input_tokens_seen": 552321944, "step": 6051 }, { "epoch": 25.2125, "loss": 0.041742969304323196, "loss_ce": 7.121398812159896e-05, "loss_iou": 0.1455078125, "loss_num": 0.00830078125, "loss_xval": 0.041748046875, "num_input_tokens_seen": 552321944, "step": 6051 }, { "epoch": 25.216666666666665, "grad_norm": 1.604112879099758, "learning_rate": 5e-05, "loss": 0.0303, "num_input_tokens_seen": 552412204, "step": 6052 }, { "epoch": 25.216666666666665, "loss": 0.03724295273423195, "loss_ce": 3.439585998421535e-05, "loss_iou": 0.1767578125, "loss_num": 0.0074462890625, "loss_xval": 0.037109375, "num_input_tokens_seen": 552412204, "step": 6052 }, { "epoch": 25.220833333333335, "grad_norm": 2.169097902979818, "learning_rate": 5e-05, "loss": 0.0353, "num_input_tokens_seen": 552503264, "step": 6053 }, { "epoch": 25.220833333333335, "loss": 0.043073736131191254, "loss_ce": 1.3429884347715415e-05, "loss_iou": 0.134765625, "loss_num": 0.00860595703125, "loss_xval": 0.04296875, "num_input_tokens_seen": 552503264, "step": 6053 }, { "epoch": 25.225, "grad_norm": 2.6938214772458475, "learning_rate": 5e-05, "loss": 0.056, "num_input_tokens_seen": 552594460, "step": 6054 }, { "epoch": 25.225, "loss": 0.05623520910739899, "loss_ce": 0.0016163706313818693, "loss_iou": 0.388671875, "loss_num": 0.01092529296875, "loss_xval": 0.0546875, "num_input_tokens_seen": 552594460, "step": 6054 }, { "epoch": 25.229166666666668, "grad_norm": 2.4388213256353657, "learning_rate": 5e-05, "loss": 0.0319, "num_input_tokens_seen": 552686664, "step": 6055 }, { "epoch": 25.229166666666668, "loss": 0.03916994854807854, "loss_ce": 8.268221790785901e-06, "loss_iou": 0.26953125, "loss_num": 0.0078125, "loss_xval": 0.0390625, "num_input_tokens_seen": 552686664, "step": 6055 }, { "epoch": 25.233333333333334, "grad_norm": 2.30275518305168, "learning_rate": 5e-05, "loss": 0.0368, "num_input_tokens_seen": 552777596, "step": 6056 }, { "epoch": 25.233333333333334, "loss": 0.030525466427206993, "loss_ce": 7.88822399044875e-06, "loss_iou": 0.248046875, "loss_num": 0.006103515625, "loss_xval": 0.030517578125, "num_input_tokens_seen": 552777596, "step": 6056 }, { "epoch": 25.2375, "grad_norm": 7.904650634242569, "learning_rate": 5e-05, "loss": 0.0728, "num_input_tokens_seen": 552869104, "step": 6057 }, { "epoch": 25.2375, "loss": 0.04267742484807968, "loss_ce": 1.3848333765054122e-05, "loss_iou": 0.314453125, "loss_num": 0.008544921875, "loss_xval": 0.042724609375, "num_input_tokens_seen": 552869104, "step": 6057 }, { "epoch": 25.241666666666667, "grad_norm": 1.6329504772202257, "learning_rate": 5e-05, "loss": 0.0224, "num_input_tokens_seen": 552960884, "step": 6058 }, { "epoch": 25.241666666666667, "loss": 0.017983609810471535, "loss_ce": 7.74200598243624e-05, "loss_iou": 0.1875, "loss_num": 0.0035858154296875, "loss_xval": 0.0179443359375, "num_input_tokens_seen": 552960884, "step": 6058 }, { "epoch": 25.245833333333334, "grad_norm": 1.629977688834735, "learning_rate": 5e-05, "loss": 0.0289, "num_input_tokens_seen": 553052332, "step": 6059 }, { "epoch": 25.245833333333334, "loss": 0.02765512838959694, "loss_ce": 6.202018084877636e-06, "loss_iou": 0.1650390625, "loss_num": 0.005523681640625, "loss_xval": 0.027587890625, "num_input_tokens_seen": 553052332, "step": 6059 }, { "epoch": 25.25, "grad_norm": 1.151649146290314, "learning_rate": 5e-05, "loss": 0.0361, "num_input_tokens_seen": 553143824, "step": 6060 }, { "epoch": 25.25, "loss": 0.04415666684508324, "loss_ce": 1.2988108210265636e-05, "loss_iou": 0.203125, "loss_num": 0.00885009765625, "loss_xval": 0.044189453125, "num_input_tokens_seen": 553143824, "step": 6060 }, { "epoch": 25.254166666666666, "grad_norm": 2.573206958944099, "learning_rate": 5e-05, "loss": 0.0265, "num_input_tokens_seen": 553235640, "step": 6061 }, { "epoch": 25.254166666666666, "loss": 0.022646624594926834, "loss_ce": 0.00011702188930939883, "loss_iou": 0.228515625, "loss_num": 0.0045166015625, "loss_xval": 0.0225830078125, "num_input_tokens_seen": 553235640, "step": 6061 }, { "epoch": 25.258333333333333, "grad_norm": 2.543205432218472, "learning_rate": 5e-05, "loss": 0.0283, "num_input_tokens_seen": 553327176, "step": 6062 }, { "epoch": 25.258333333333333, "loss": 0.031073156744241714, "loss_ce": 6.262220722419443e-06, "loss_iou": 0.2119140625, "loss_num": 0.0062255859375, "loss_xval": 0.031005859375, "num_input_tokens_seen": 553327176, "step": 6062 }, { "epoch": 25.2625, "grad_norm": 1.7539115692603051, "learning_rate": 5e-05, "loss": 0.0422, "num_input_tokens_seen": 553417908, "step": 6063 }, { "epoch": 25.2625, "loss": 0.0423419363796711, "loss_ce": 2.9313752747839317e-05, "loss_iou": 0.142578125, "loss_num": 0.00848388671875, "loss_xval": 0.042236328125, "num_input_tokens_seen": 553417908, "step": 6063 }, { "epoch": 25.266666666666666, "grad_norm": 2.6240582297613866, "learning_rate": 5e-05, "loss": 0.0447, "num_input_tokens_seen": 553509024, "step": 6064 }, { "epoch": 25.266666666666666, "loss": 0.04294588416814804, "loss_ce": 2.2912352505954914e-05, "loss_iou": 0.220703125, "loss_num": 0.00860595703125, "loss_xval": 0.04296875, "num_input_tokens_seen": 553509024, "step": 6064 }, { "epoch": 25.270833333333332, "grad_norm": 3.4733995884437077, "learning_rate": 5e-05, "loss": 0.0284, "num_input_tokens_seen": 553600624, "step": 6065 }, { "epoch": 25.270833333333332, "loss": 0.02730099856853485, "loss_ce": 3.0246562801039545e-06, "loss_iou": 0.26953125, "loss_num": 0.005462646484375, "loss_xval": 0.02734375, "num_input_tokens_seen": 553600624, "step": 6065 }, { "epoch": 25.275, "grad_norm": 2.3389696239677797, "learning_rate": 5e-05, "loss": 0.0342, "num_input_tokens_seen": 553692356, "step": 6066 }, { "epoch": 25.275, "loss": 0.036377716809511185, "loss_ce": 8.394465112360194e-06, "loss_iou": 0.23828125, "loss_num": 0.00726318359375, "loss_xval": 0.036376953125, "num_input_tokens_seen": 553692356, "step": 6066 }, { "epoch": 25.279166666666665, "grad_norm": 1.6033628000150022, "learning_rate": 5e-05, "loss": 0.0533, "num_input_tokens_seen": 553784324, "step": 6067 }, { "epoch": 25.279166666666665, "loss": 0.026155997067689896, "loss_ce": 4.630144394468516e-05, "loss_iou": 0.248046875, "loss_num": 0.005218505859375, "loss_xval": 0.026123046875, "num_input_tokens_seen": 553784324, "step": 6067 }, { "epoch": 25.283333333333335, "grad_norm": 2.9244443998332263, "learning_rate": 5e-05, "loss": 0.044, "num_input_tokens_seen": 553875540, "step": 6068 }, { "epoch": 25.283333333333335, "loss": 0.06535577028989792, "loss_ce": 2.377549662924139e-06, "loss_iou": 0.23828125, "loss_num": 0.0130615234375, "loss_xval": 0.0654296875, "num_input_tokens_seen": 553875540, "step": 6068 }, { "epoch": 25.2875, "grad_norm": 3.807395834154097, "learning_rate": 5e-05, "loss": 0.0378, "num_input_tokens_seen": 553966832, "step": 6069 }, { "epoch": 25.2875, "loss": 0.05497179552912712, "loss_ce": 9.639084055379499e-06, "loss_iou": 0.23046875, "loss_num": 0.010986328125, "loss_xval": 0.054931640625, "num_input_tokens_seen": 553966832, "step": 6069 }, { "epoch": 25.291666666666668, "grad_norm": 2.834562012780839, "learning_rate": 5e-05, "loss": 0.0324, "num_input_tokens_seen": 554058148, "step": 6070 }, { "epoch": 25.291666666666668, "loss": 0.029552895575761795, "loss_ce": 1.1879415978910401e-05, "loss_iou": 0.216796875, "loss_num": 0.005889892578125, "loss_xval": 0.029541015625, "num_input_tokens_seen": 554058148, "step": 6070 }, { "epoch": 25.295833333333334, "grad_norm": 2.661763152696126, "learning_rate": 5e-05, "loss": 0.0307, "num_input_tokens_seen": 554149580, "step": 6071 }, { "epoch": 25.295833333333334, "loss": 0.02328696846961975, "loss_ce": 3.257199205108918e-05, "loss_iou": 0.21484375, "loss_num": 0.004638671875, "loss_xval": 0.023193359375, "num_input_tokens_seen": 554149580, "step": 6071 }, { "epoch": 25.3, "grad_norm": 2.5368975839965318, "learning_rate": 5e-05, "loss": 0.0576, "num_input_tokens_seen": 554241028, "step": 6072 }, { "epoch": 25.3, "loss": 0.04571257531642914, "loss_ce": 0.00034056510776281357, "loss_iou": 0.2431640625, "loss_num": 0.00909423828125, "loss_xval": 0.04541015625, "num_input_tokens_seen": 554241028, "step": 6072 }, { "epoch": 25.304166666666667, "grad_norm": 2.9454011017858925, "learning_rate": 5e-05, "loss": 0.0389, "num_input_tokens_seen": 554332176, "step": 6073 }, { "epoch": 25.304166666666667, "loss": 0.038939282298088074, "loss_ce": 0.00022773287491872907, "loss_iou": 0.150390625, "loss_num": 0.00775146484375, "loss_xval": 0.038818359375, "num_input_tokens_seen": 554332176, "step": 6073 }, { "epoch": 25.308333333333334, "grad_norm": 2.6148875530533044, "learning_rate": 5e-05, "loss": 0.0495, "num_input_tokens_seen": 554424136, "step": 6074 }, { "epoch": 25.308333333333334, "loss": 0.04254477471113205, "loss_ce": 3.2680629828973906e-06, "loss_iou": 0.2265625, "loss_num": 0.008544921875, "loss_xval": 0.04248046875, "num_input_tokens_seen": 554424136, "step": 6074 }, { "epoch": 25.3125, "grad_norm": 1.9303923916065067, "learning_rate": 5e-05, "loss": 0.0236, "num_input_tokens_seen": 554515640, "step": 6075 }, { "epoch": 25.3125, "loss": 0.022905535995960236, "loss_ce": 2.0937043245794484e-06, "loss_iou": 0.2412109375, "loss_num": 0.00457763671875, "loss_xval": 0.02294921875, "num_input_tokens_seen": 554515640, "step": 6075 }, { "epoch": 25.316666666666666, "grad_norm": 4.4026124210329085, "learning_rate": 5e-05, "loss": 0.0357, "num_input_tokens_seen": 554607388, "step": 6076 }, { "epoch": 25.316666666666666, "loss": 0.01897761970758438, "loss_ce": 3.31442925016745e-06, "loss_iou": 0.2578125, "loss_num": 0.0037994384765625, "loss_xval": 0.0189208984375, "num_input_tokens_seen": 554607388, "step": 6076 }, { "epoch": 25.320833333333333, "grad_norm": 2.605943019448658, "learning_rate": 5e-05, "loss": 0.0288, "num_input_tokens_seen": 554698436, "step": 6077 }, { "epoch": 25.320833333333333, "loss": 0.036523886024951935, "loss_ce": 1.9757746940740617e-06, "loss_iou": 0.185546875, "loss_num": 0.007293701171875, "loss_xval": 0.03662109375, "num_input_tokens_seen": 554698436, "step": 6077 }, { "epoch": 25.325, "grad_norm": 1.573186431228264, "learning_rate": 5e-05, "loss": 0.0208, "num_input_tokens_seen": 554789692, "step": 6078 }, { "epoch": 25.325, "loss": 0.020063627511262894, "loss_ce": 6.698464130749926e-05, "loss_iou": 0.158203125, "loss_num": 0.003997802734375, "loss_xval": 0.02001953125, "num_input_tokens_seen": 554789692, "step": 6078 }, { "epoch": 25.329166666666666, "grad_norm": 2.068659930680847, "learning_rate": 5e-05, "loss": 0.0248, "num_input_tokens_seen": 554881188, "step": 6079 }, { "epoch": 25.329166666666666, "loss": 0.02518697828054428, "loss_ce": 9.976995897886809e-06, "loss_iou": 0.302734375, "loss_num": 0.005035400390625, "loss_xval": 0.025146484375, "num_input_tokens_seen": 554881188, "step": 6079 }, { "epoch": 25.333333333333332, "grad_norm": 2.5877747783948086, "learning_rate": 5e-05, "loss": 0.0408, "num_input_tokens_seen": 554971984, "step": 6080 }, { "epoch": 25.333333333333332, "loss": 0.045886993408203125, "loss_ce": 3.8162011151143815e-06, "loss_iou": 0.23828125, "loss_num": 0.0091552734375, "loss_xval": 0.0458984375, "num_input_tokens_seen": 554971984, "step": 6080 }, { "epoch": 25.3375, "grad_norm": 2.0839909687644758, "learning_rate": 5e-05, "loss": 0.0583, "num_input_tokens_seen": 555063872, "step": 6081 }, { "epoch": 25.3375, "loss": 0.04413619637489319, "loss_ce": 1.5406214515678585e-05, "loss_iou": 0.138671875, "loss_num": 0.00885009765625, "loss_xval": 0.044189453125, "num_input_tokens_seen": 555063872, "step": 6081 }, { "epoch": 25.341666666666665, "grad_norm": 2.2894444802931857, "learning_rate": 5e-05, "loss": 0.0388, "num_input_tokens_seen": 555156132, "step": 6082 }, { "epoch": 25.341666666666665, "loss": 0.029500527307391167, "loss_ce": 5.286502528178971e-06, "loss_iou": 0.1650390625, "loss_num": 0.005889892578125, "loss_xval": 0.029541015625, "num_input_tokens_seen": 555156132, "step": 6082 }, { "epoch": 25.345833333333335, "grad_norm": 2.591850664136599, "learning_rate": 5e-05, "loss": 0.0549, "num_input_tokens_seen": 555247880, "step": 6083 }, { "epoch": 25.345833333333335, "loss": 0.08285599946975708, "loss_ce": 8.408465873799287e-06, "loss_iou": 0.314453125, "loss_num": 0.0166015625, "loss_xval": 0.0830078125, "num_input_tokens_seen": 555247880, "step": 6083 }, { "epoch": 25.35, "grad_norm": 2.980730081723208, "learning_rate": 5e-05, "loss": 0.0454, "num_input_tokens_seen": 555339520, "step": 6084 }, { "epoch": 25.35, "loss": 0.04531401768326759, "loss_ce": 1.0674537406885065e-05, "loss_iou": 0.341796875, "loss_num": 0.009033203125, "loss_xval": 0.04541015625, "num_input_tokens_seen": 555339520, "step": 6084 }, { "epoch": 25.354166666666668, "grad_norm": 2.0120800401632333, "learning_rate": 5e-05, "loss": 0.0375, "num_input_tokens_seen": 555431580, "step": 6085 }, { "epoch": 25.354166666666668, "loss": 0.03285948187112808, "loss_ce": 7.307754458452109e-06, "loss_iou": 0.2734375, "loss_num": 0.006561279296875, "loss_xval": 0.032958984375, "num_input_tokens_seen": 555431580, "step": 6085 }, { "epoch": 25.358333333333334, "grad_norm": 1.744347188648586, "learning_rate": 5e-05, "loss": 0.0535, "num_input_tokens_seen": 555522828, "step": 6086 }, { "epoch": 25.358333333333334, "loss": 0.03260708227753639, "loss_ce": 1.4308277059171814e-05, "loss_iou": 0.1845703125, "loss_num": 0.00653076171875, "loss_xval": 0.03271484375, "num_input_tokens_seen": 555522828, "step": 6086 }, { "epoch": 25.3625, "grad_norm": 2.099605791813135, "learning_rate": 5e-05, "loss": 0.029, "num_input_tokens_seen": 555614424, "step": 6087 }, { "epoch": 25.3625, "loss": 0.02494313381612301, "loss_ce": 4.84204210806638e-05, "loss_iou": 0.2119140625, "loss_num": 0.004974365234375, "loss_xval": 0.02490234375, "num_input_tokens_seen": 555614424, "step": 6087 }, { "epoch": 25.366666666666667, "grad_norm": 2.571578714794204, "learning_rate": 5e-05, "loss": 0.057, "num_input_tokens_seen": 555705584, "step": 6088 }, { "epoch": 25.366666666666667, "loss": 0.03919106349349022, "loss_ce": 6.493094133475097e-06, "loss_iou": 0.232421875, "loss_num": 0.0078125, "loss_xval": 0.0390625, "num_input_tokens_seen": 555705584, "step": 6088 }, { "epoch": 25.370833333333334, "grad_norm": 4.240971299263898, "learning_rate": 5e-05, "loss": 0.0352, "num_input_tokens_seen": 555796600, "step": 6089 }, { "epoch": 25.370833333333334, "loss": 0.030032845214009285, "loss_ce": 3.5493658288032748e-06, "loss_iou": 0.2353515625, "loss_num": 0.006011962890625, "loss_xval": 0.030029296875, "num_input_tokens_seen": 555796600, "step": 6089 }, { "epoch": 25.375, "grad_norm": 2.91714991685141, "learning_rate": 5e-05, "loss": 0.0402, "num_input_tokens_seen": 555888220, "step": 6090 }, { "epoch": 25.375, "loss": 0.024421460926532745, "loss_ce": 7.397808531095507e-06, "loss_iou": 0.2392578125, "loss_num": 0.0048828125, "loss_xval": 0.0244140625, "num_input_tokens_seen": 555888220, "step": 6090 }, { "epoch": 25.379166666666666, "grad_norm": 2.6817740223888205, "learning_rate": 5e-05, "loss": 0.0401, "num_input_tokens_seen": 555978744, "step": 6091 }, { "epoch": 25.379166666666666, "loss": 0.04826442897319794, "loss_ce": 0.0008095953380689025, "loss_iou": 0.32421875, "loss_num": 0.009521484375, "loss_xval": 0.04736328125, "num_input_tokens_seen": 555978744, "step": 6091 }, { "epoch": 25.383333333333333, "grad_norm": 2.820622322141332, "learning_rate": 5e-05, "loss": 0.0452, "num_input_tokens_seen": 556071016, "step": 6092 }, { "epoch": 25.383333333333333, "loss": 0.04822583124041557, "loss_ce": 8.055085345404223e-06, "loss_iou": 0.29296875, "loss_num": 0.0096435546875, "loss_xval": 0.04833984375, "num_input_tokens_seen": 556071016, "step": 6092 }, { "epoch": 25.3875, "grad_norm": 2.7407924939090265, "learning_rate": 5e-05, "loss": 0.045, "num_input_tokens_seen": 556161896, "step": 6093 }, { "epoch": 25.3875, "loss": 0.03777515888214111, "loss_ce": 2.491514533176087e-05, "loss_iou": 0.2119140625, "loss_num": 0.007537841796875, "loss_xval": 0.037841796875, "num_input_tokens_seen": 556161896, "step": 6093 }, { "epoch": 25.391666666666666, "grad_norm": 3.111564298227178, "learning_rate": 5e-05, "loss": 0.0524, "num_input_tokens_seen": 556253152, "step": 6094 }, { "epoch": 25.391666666666666, "loss": 0.07930988818407059, "loss_ce": 0.00014728563837707043, "loss_iou": 0.1943359375, "loss_num": 0.015869140625, "loss_xval": 0.0791015625, "num_input_tokens_seen": 556253152, "step": 6094 }, { "epoch": 25.395833333333332, "grad_norm": 2.469716527508771, "learning_rate": 5e-05, "loss": 0.0292, "num_input_tokens_seen": 556344880, "step": 6095 }, { "epoch": 25.395833333333332, "loss": 0.029712753370404243, "loss_ce": 1.1521155101945624e-05, "loss_iou": 0.208984375, "loss_num": 0.005950927734375, "loss_xval": 0.0296630859375, "num_input_tokens_seen": 556344880, "step": 6095 }, { "epoch": 25.4, "grad_norm": 2.9204098755484718, "learning_rate": 5e-05, "loss": 0.0423, "num_input_tokens_seen": 556436112, "step": 6096 }, { "epoch": 25.4, "loss": 0.06473027169704437, "loss_ce": 2.4858775304892333e-06, "loss_iou": 0.3046875, "loss_num": 0.012939453125, "loss_xval": 0.06494140625, "num_input_tokens_seen": 556436112, "step": 6096 }, { "epoch": 25.404166666666665, "grad_norm": 3.7665925035988774, "learning_rate": 5e-05, "loss": 0.0461, "num_input_tokens_seen": 556527744, "step": 6097 }, { "epoch": 25.404166666666665, "loss": 0.06511792540550232, "loss_ce": 0.0021601621992886066, "loss_iou": 0.37890625, "loss_num": 0.0125732421875, "loss_xval": 0.06298828125, "num_input_tokens_seen": 556527744, "step": 6097 }, { "epoch": 25.408333333333335, "grad_norm": 2.260592190063528, "learning_rate": 5e-05, "loss": 0.0291, "num_input_tokens_seen": 556618672, "step": 6098 }, { "epoch": 25.408333333333335, "loss": 0.03642168268561363, "loss_ce": 0.0002125762403011322, "loss_iou": 0.2412109375, "loss_num": 0.00726318359375, "loss_xval": 0.0361328125, "num_input_tokens_seen": 556618672, "step": 6098 }, { "epoch": 25.4125, "grad_norm": 3.1675027303796948, "learning_rate": 5e-05, "loss": 0.0384, "num_input_tokens_seen": 556710084, "step": 6099 }, { "epoch": 25.4125, "loss": 0.0574367418885231, "loss_ce": 2.656482593010878e-06, "loss_iou": 0.294921875, "loss_num": 0.011474609375, "loss_xval": 0.057373046875, "num_input_tokens_seen": 556710084, "step": 6099 }, { "epoch": 25.416666666666668, "grad_norm": 2.648084156187906, "learning_rate": 5e-05, "loss": 0.058, "num_input_tokens_seen": 556801032, "step": 6100 }, { "epoch": 25.416666666666668, "loss": 0.08385223150253296, "loss_ce": 5.189658622839488e-06, "loss_iou": 0.2353515625, "loss_num": 0.0167236328125, "loss_xval": 0.083984375, "num_input_tokens_seen": 556801032, "step": 6100 }, { "epoch": 25.420833333333334, "grad_norm": 1.7032329247033198, "learning_rate": 5e-05, "loss": 0.0318, "num_input_tokens_seen": 556892808, "step": 6101 }, { "epoch": 25.420833333333334, "loss": 0.03905937820672989, "loss_ce": 2.739548472163733e-05, "loss_iou": 0.3125, "loss_num": 0.0078125, "loss_xval": 0.0390625, "num_input_tokens_seen": 556892808, "step": 6101 }, { "epoch": 25.425, "grad_norm": 5.489724193926814, "learning_rate": 5e-05, "loss": 0.0485, "num_input_tokens_seen": 556984324, "step": 6102 }, { "epoch": 25.425, "loss": 0.025776395574212074, "loss_ce": 4.301471562939696e-06, "loss_iou": 0.2890625, "loss_num": 0.005157470703125, "loss_xval": 0.0257568359375, "num_input_tokens_seen": 556984324, "step": 6102 }, { "epoch": 25.429166666666667, "grad_norm": 1.882860573439344, "learning_rate": 5e-05, "loss": 0.0354, "num_input_tokens_seen": 557075480, "step": 6103 }, { "epoch": 25.429166666666667, "loss": 0.028099657967686653, "loss_ce": 8.226294994528871e-06, "loss_iou": 0.23828125, "loss_num": 0.005615234375, "loss_xval": 0.028076171875, "num_input_tokens_seen": 557075480, "step": 6103 }, { "epoch": 25.433333333333334, "grad_norm": 3.51761967547373, "learning_rate": 5e-05, "loss": 0.027, "num_input_tokens_seen": 557166728, "step": 6104 }, { "epoch": 25.433333333333334, "loss": 0.02423533797264099, "loss_ce": 4.3809313865494914e-06, "loss_iou": 0.1533203125, "loss_num": 0.004852294921875, "loss_xval": 0.024169921875, "num_input_tokens_seen": 557166728, "step": 6104 }, { "epoch": 25.4375, "grad_norm": 25.872497182724743, "learning_rate": 5e-05, "loss": 0.062, "num_input_tokens_seen": 557258312, "step": 6105 }, { "epoch": 25.4375, "loss": 0.059171389788389206, "loss_ce": 1.3064649465377443e-05, "loss_iou": 0.2431640625, "loss_num": 0.0118408203125, "loss_xval": 0.05908203125, "num_input_tokens_seen": 557258312, "step": 6105 }, { "epoch": 25.441666666666666, "grad_norm": 33.64559528321087, "learning_rate": 5e-05, "loss": 0.06, "num_input_tokens_seen": 557349740, "step": 6106 }, { "epoch": 25.441666666666666, "loss": 0.03349640592932701, "loss_ce": 3.364375970704714e-06, "loss_iou": 0.291015625, "loss_num": 0.006683349609375, "loss_xval": 0.033447265625, "num_input_tokens_seen": 557349740, "step": 6106 }, { "epoch": 25.445833333333333, "grad_norm": 2.288998411873553, "learning_rate": 5e-05, "loss": 0.0412, "num_input_tokens_seen": 557440816, "step": 6107 }, { "epoch": 25.445833333333333, "loss": 0.027652215212583542, "loss_ce": 3.2900277346925577e-06, "loss_iou": 0.1533203125, "loss_num": 0.005523681640625, "loss_xval": 0.027587890625, "num_input_tokens_seen": 557440816, "step": 6107 }, { "epoch": 25.45, "grad_norm": 0.9314464545262887, "learning_rate": 5e-05, "loss": 0.054, "num_input_tokens_seen": 557532080, "step": 6108 }, { "epoch": 25.45, "loss": 0.07703755050897598, "loss_ce": 7.985006959643215e-05, "loss_iou": 0.201171875, "loss_num": 0.015380859375, "loss_xval": 0.0771484375, "num_input_tokens_seen": 557532080, "step": 6108 }, { "epoch": 25.454166666666666, "grad_norm": 1.241327751825131, "learning_rate": 5e-05, "loss": 0.0594, "num_input_tokens_seen": 557623024, "step": 6109 }, { "epoch": 25.454166666666666, "loss": 0.06884145736694336, "loss_ce": 9.057611350726802e-06, "loss_iou": 0.1455078125, "loss_num": 0.0137939453125, "loss_xval": 0.06884765625, "num_input_tokens_seen": 557623024, "step": 6109 }, { "epoch": 25.458333333333332, "grad_norm": 1.1212944439629025, "learning_rate": 5e-05, "loss": 0.0198, "num_input_tokens_seen": 557714656, "step": 6110 }, { "epoch": 25.458333333333332, "loss": 0.02408221922814846, "loss_ce": 1.147904731624294e-05, "loss_iou": 0.1962890625, "loss_num": 0.00482177734375, "loss_xval": 0.0240478515625, "num_input_tokens_seen": 557714656, "step": 6110 }, { "epoch": 25.4625, "grad_norm": 2.017701233508004, "learning_rate": 5e-05, "loss": 0.0365, "num_input_tokens_seen": 557805492, "step": 6111 }, { "epoch": 25.4625, "loss": 0.03724497929215431, "loss_ce": 5.9048184084531385e-06, "loss_iou": 0.21484375, "loss_num": 0.0074462890625, "loss_xval": 0.037353515625, "num_input_tokens_seen": 557805492, "step": 6111 }, { "epoch": 25.466666666666665, "grad_norm": 2.0441244319648897, "learning_rate": 5e-05, "loss": 0.0389, "num_input_tokens_seen": 557896420, "step": 6112 }, { "epoch": 25.466666666666665, "loss": 0.03492957353591919, "loss_ce": 2.2076055756770074e-06, "loss_iou": 0.150390625, "loss_num": 0.006988525390625, "loss_xval": 0.034912109375, "num_input_tokens_seen": 557896420, "step": 6112 }, { "epoch": 25.470833333333335, "grad_norm": 2.491763401469673, "learning_rate": 5e-05, "loss": 0.0295, "num_input_tokens_seen": 557988276, "step": 6113 }, { "epoch": 25.470833333333335, "loss": 0.028048336505889893, "loss_ce": 2.5569899662514217e-05, "loss_iou": 0.177734375, "loss_num": 0.005615234375, "loss_xval": 0.028076171875, "num_input_tokens_seen": 557988276, "step": 6113 }, { "epoch": 25.475, "grad_norm": 2.5447415831955773, "learning_rate": 5e-05, "loss": 0.0311, "num_input_tokens_seen": 558079432, "step": 6114 }, { "epoch": 25.475, "loss": 0.02955949306488037, "loss_ce": 1.8475300748832524e-05, "loss_iou": 0.26953125, "loss_num": 0.00592041015625, "loss_xval": 0.029541015625, "num_input_tokens_seen": 558079432, "step": 6114 }, { "epoch": 25.479166666666668, "grad_norm": 3.3400385296586834, "learning_rate": 5e-05, "loss": 0.04, "num_input_tokens_seen": 558170584, "step": 6115 }, { "epoch": 25.479166666666668, "loss": 0.036854639649391174, "loss_ce": 4.66698429590906e-06, "loss_iou": 0.275390625, "loss_num": 0.00738525390625, "loss_xval": 0.036865234375, "num_input_tokens_seen": 558170584, "step": 6115 }, { "epoch": 25.483333333333334, "grad_norm": 3.265928169946302, "learning_rate": 5e-05, "loss": 0.0495, "num_input_tokens_seen": 558261288, "step": 6116 }, { "epoch": 25.483333333333334, "loss": 0.047640688717365265, "loss_ce": 2.7500962005433394e-06, "loss_iou": 0.181640625, "loss_num": 0.009521484375, "loss_xval": 0.047607421875, "num_input_tokens_seen": 558261288, "step": 6116 }, { "epoch": 25.4875, "grad_norm": 2.7706822553364066, "learning_rate": 5e-05, "loss": 0.0565, "num_input_tokens_seen": 558352716, "step": 6117 }, { "epoch": 25.4875, "loss": 0.08690313994884491, "loss_ce": 4.333661308919545e-06, "loss_iou": 0.1953125, "loss_num": 0.0174560546875, "loss_xval": 0.0869140625, "num_input_tokens_seen": 558352716, "step": 6117 }, { "epoch": 25.491666666666667, "grad_norm": 2.24556863927375, "learning_rate": 5e-05, "loss": 0.0401, "num_input_tokens_seen": 558443444, "step": 6118 }, { "epoch": 25.491666666666667, "loss": 0.040499813854694366, "loss_ce": 2.989433141920017e-06, "loss_iou": 0.30078125, "loss_num": 0.00811767578125, "loss_xval": 0.04052734375, "num_input_tokens_seen": 558443444, "step": 6118 }, { "epoch": 25.495833333333334, "grad_norm": 1.6439354079658433, "learning_rate": 5e-05, "loss": 0.0424, "num_input_tokens_seen": 558535240, "step": 6119 }, { "epoch": 25.495833333333334, "loss": 0.048288244754076004, "loss_ce": 9.436404070584103e-06, "loss_iou": 0.34375, "loss_num": 0.0096435546875, "loss_xval": 0.04833984375, "num_input_tokens_seen": 558535240, "step": 6119 }, { "epoch": 25.5, "grad_norm": 3.24354781076264, "learning_rate": 5e-05, "loss": 0.0423, "num_input_tokens_seen": 558626272, "step": 6120 }, { "epoch": 25.5, "loss": 0.05987918749451637, "loss_ce": 3.6996714243286988e-06, "loss_iou": 0.1494140625, "loss_num": 0.011962890625, "loss_xval": 0.059814453125, "num_input_tokens_seen": 558626272, "step": 6120 }, { "epoch": 25.504166666666666, "grad_norm": 2.5912436325314285, "learning_rate": 5e-05, "loss": 0.0421, "num_input_tokens_seen": 558717976, "step": 6121 }, { "epoch": 25.504166666666666, "loss": 0.05477771908044815, "loss_ce": 2.1548537915805355e-05, "loss_iou": 0.2412109375, "loss_num": 0.01092529296875, "loss_xval": 0.0546875, "num_input_tokens_seen": 558717976, "step": 6121 }, { "epoch": 25.508333333333333, "grad_norm": 2.347266316565128, "learning_rate": 5e-05, "loss": 0.0509, "num_input_tokens_seen": 558808952, "step": 6122 }, { "epoch": 25.508333333333333, "loss": 0.028351813554763794, "loss_ce": 1.6242007404798642e-05, "loss_iou": 0.29296875, "loss_num": 0.00567626953125, "loss_xval": 0.0283203125, "num_input_tokens_seen": 558808952, "step": 6122 }, { "epoch": 25.5125, "grad_norm": 1.9858400015423128, "learning_rate": 5e-05, "loss": 0.0465, "num_input_tokens_seen": 558899452, "step": 6123 }, { "epoch": 25.5125, "loss": 0.06100527197122574, "loss_ce": 1.5890123904682696e-05, "loss_iou": 0.2353515625, "loss_num": 0.01220703125, "loss_xval": 0.06103515625, "num_input_tokens_seen": 558899452, "step": 6123 }, { "epoch": 25.516666666666666, "grad_norm": 2.3835819394228754, "learning_rate": 5e-05, "loss": 0.0407, "num_input_tokens_seen": 558990088, "step": 6124 }, { "epoch": 25.516666666666666, "loss": 0.02575359307229519, "loss_ce": 4.386562977742869e-06, "loss_iou": 0.26171875, "loss_num": 0.005157470703125, "loss_xval": 0.0257568359375, "num_input_tokens_seen": 558990088, "step": 6124 }, { "epoch": 25.520833333333332, "grad_norm": 3.904938479971484, "learning_rate": 5e-05, "loss": 0.0402, "num_input_tokens_seen": 559081656, "step": 6125 }, { "epoch": 25.520833333333332, "loss": 0.04114597290754318, "loss_ce": 8.280196198029444e-06, "loss_iou": 0.28125, "loss_num": 0.00823974609375, "loss_xval": 0.041015625, "num_input_tokens_seen": 559081656, "step": 6125 }, { "epoch": 25.525, "grad_norm": 2.9375503556599205, "learning_rate": 5e-05, "loss": 0.0808, "num_input_tokens_seen": 559173288, "step": 6126 }, { "epoch": 25.525, "loss": 0.11002198606729507, "loss_ce": 0.0004333627293817699, "loss_iou": 0.22265625, "loss_num": 0.0218505859375, "loss_xval": 0.109375, "num_input_tokens_seen": 559173288, "step": 6126 }, { "epoch": 25.529166666666665, "grad_norm": 2.037901143825145, "learning_rate": 5e-05, "loss": 0.0602, "num_input_tokens_seen": 559264852, "step": 6127 }, { "epoch": 25.529166666666665, "loss": 0.042439281940460205, "loss_ce": 4.587895546137588e-06, "loss_iou": 0.189453125, "loss_num": 0.00848388671875, "loss_xval": 0.04248046875, "num_input_tokens_seen": 559264852, "step": 6127 }, { "epoch": 25.533333333333335, "grad_norm": 2.7313235323653307, "learning_rate": 5e-05, "loss": 0.0377, "num_input_tokens_seen": 559356024, "step": 6128 }, { "epoch": 25.533333333333335, "loss": 0.04620426520705223, "loss_ce": 7.69436446717009e-05, "loss_iou": 0.251953125, "loss_num": 0.00921630859375, "loss_xval": 0.046142578125, "num_input_tokens_seen": 559356024, "step": 6128 }, { "epoch": 25.5375, "grad_norm": 3.677637384356609, "learning_rate": 5e-05, "loss": 0.031, "num_input_tokens_seen": 559445656, "step": 6129 }, { "epoch": 25.5375, "loss": 0.0315367616713047, "loss_ce": 1.2105560927011538e-05, "loss_iou": 0.2275390625, "loss_num": 0.006317138671875, "loss_xval": 0.031494140625, "num_input_tokens_seen": 559445656, "step": 6129 }, { "epoch": 25.541666666666668, "grad_norm": 1.4822979443891784, "learning_rate": 5e-05, "loss": 0.026, "num_input_tokens_seen": 559536732, "step": 6130 }, { "epoch": 25.541666666666668, "loss": 0.029575761407613754, "loss_ce": 4.225951670377981e-06, "loss_iou": 0.201171875, "loss_num": 0.00592041015625, "loss_xval": 0.029541015625, "num_input_tokens_seen": 559536732, "step": 6130 }, { "epoch": 25.545833333333334, "grad_norm": 1.8855107184609745, "learning_rate": 5e-05, "loss": 0.0493, "num_input_tokens_seen": 559628444, "step": 6131 }, { "epoch": 25.545833333333334, "loss": 0.056017693132162094, "loss_ce": 2.6805405468621757e-06, "loss_iou": 0.27734375, "loss_num": 0.01123046875, "loss_xval": 0.055908203125, "num_input_tokens_seen": 559628444, "step": 6131 }, { "epoch": 25.55, "grad_norm": 2.651588365035582, "learning_rate": 5e-05, "loss": 0.0519, "num_input_tokens_seen": 559719208, "step": 6132 }, { "epoch": 25.55, "loss": 0.04326911270618439, "loss_ce": 2.819902192641166e-06, "loss_iou": 0.2138671875, "loss_num": 0.0086669921875, "loss_xval": 0.043212890625, "num_input_tokens_seen": 559719208, "step": 6132 }, { "epoch": 25.554166666666667, "grad_norm": 2.445094593769028, "learning_rate": 5e-05, "loss": 0.0427, "num_input_tokens_seen": 559810484, "step": 6133 }, { "epoch": 25.554166666666667, "loss": 0.0506831593811512, "loss_ce": 8.721475751372054e-06, "loss_iou": 0.294921875, "loss_num": 0.0101318359375, "loss_xval": 0.05078125, "num_input_tokens_seen": 559810484, "step": 6133 }, { "epoch": 25.558333333333334, "grad_norm": 3.094808076272301, "learning_rate": 5e-05, "loss": 0.0439, "num_input_tokens_seen": 559900268, "step": 6134 }, { "epoch": 25.558333333333334, "loss": 0.034136466681957245, "loss_ce": 2.554569846324739e-06, "loss_iou": 0.201171875, "loss_num": 0.0068359375, "loss_xval": 0.0341796875, "num_input_tokens_seen": 559900268, "step": 6134 }, { "epoch": 25.5625, "grad_norm": 3.0796429838124317, "learning_rate": 5e-05, "loss": 0.0344, "num_input_tokens_seen": 559991272, "step": 6135 }, { "epoch": 25.5625, "loss": 0.027941647917032242, "loss_ce": 2.805052190524293e-06, "loss_iou": 0.3359375, "loss_num": 0.005584716796875, "loss_xval": 0.0279541015625, "num_input_tokens_seen": 559991272, "step": 6135 }, { "epoch": 25.566666666666666, "grad_norm": 2.9306883834033335, "learning_rate": 5e-05, "loss": 0.0395, "num_input_tokens_seen": 560082028, "step": 6136 }, { "epoch": 25.566666666666666, "loss": 0.046127140522003174, "loss_ce": 0.0001371528342133388, "loss_iou": 0.21875, "loss_num": 0.00921630859375, "loss_xval": 0.0458984375, "num_input_tokens_seen": 560082028, "step": 6136 }, { "epoch": 25.570833333333333, "grad_norm": 12.48278292680775, "learning_rate": 5e-05, "loss": 0.0317, "num_input_tokens_seen": 560172916, "step": 6137 }, { "epoch": 25.570833333333333, "loss": 0.03275076672434807, "loss_ce": 5.406980108091375e-06, "loss_iou": 0.171875, "loss_num": 0.00653076171875, "loss_xval": 0.03271484375, "num_input_tokens_seen": 560172916, "step": 6137 }, { "epoch": 25.575, "grad_norm": 3.4255258269354933, "learning_rate": 5e-05, "loss": 0.0504, "num_input_tokens_seen": 560264344, "step": 6138 }, { "epoch": 25.575, "loss": 0.05948089435696602, "loss_ce": 2.1331347852537874e-06, "loss_iou": 0.224609375, "loss_num": 0.01190185546875, "loss_xval": 0.0595703125, "num_input_tokens_seen": 560264344, "step": 6138 }, { "epoch": 25.579166666666666, "grad_norm": 6.176161240214384, "learning_rate": 5e-05, "loss": 0.0672, "num_input_tokens_seen": 560355508, "step": 6139 }, { "epoch": 25.579166666666666, "loss": 0.08296459913253784, "loss_ce": 2.5523606836941326e-06, "loss_iou": 0.19140625, "loss_num": 0.0166015625, "loss_xval": 0.0830078125, "num_input_tokens_seen": 560355508, "step": 6139 }, { "epoch": 25.583333333333332, "grad_norm": 14.835425161934097, "learning_rate": 5e-05, "loss": 0.0764, "num_input_tokens_seen": 560446256, "step": 6140 }, { "epoch": 25.583333333333332, "loss": 0.08755681663751602, "loss_ce": 9.343799320049584e-05, "loss_iou": 0.3046875, "loss_num": 0.0174560546875, "loss_xval": 0.08740234375, "num_input_tokens_seen": 560446256, "step": 6140 }, { "epoch": 25.5875, "grad_norm": 205.04027595026457, "learning_rate": 5e-05, "loss": 0.2514, "num_input_tokens_seen": 560537824, "step": 6141 }, { "epoch": 25.5875, "loss": 0.28212207555770874, "loss_ce": 0.08082813024520874, "loss_iou": 0.34375, "loss_num": 0.040283203125, "loss_xval": 0.201171875, "num_input_tokens_seen": 560537824, "step": 6141 }, { "epoch": 25.591666666666665, "grad_norm": 7.055879885382561, "learning_rate": 5e-05, "loss": 0.0733, "num_input_tokens_seen": 560629236, "step": 6142 }, { "epoch": 25.591666666666665, "loss": 0.08724255859851837, "loss_ce": 0.04560131952166557, "loss_iou": 0.23046875, "loss_num": 0.00830078125, "loss_xval": 0.041748046875, "num_input_tokens_seen": 560629236, "step": 6142 }, { "epoch": 25.595833333333335, "grad_norm": 1.1593042300951693, "learning_rate": 5e-05, "loss": 0.0636, "num_input_tokens_seen": 560720168, "step": 6143 }, { "epoch": 25.595833333333335, "loss": 0.035621605813503265, "loss_ce": 3.0483281079796143e-05, "loss_iou": 0.208984375, "loss_num": 0.007110595703125, "loss_xval": 0.03564453125, "num_input_tokens_seen": 560720168, "step": 6143 }, { "epoch": 25.6, "grad_norm": 6.720734426976948, "learning_rate": 5e-05, "loss": 0.0393, "num_input_tokens_seen": 560811132, "step": 6144 }, { "epoch": 25.6, "loss": 0.03479313105344772, "loss_ce": 3.0915059596736683e-06, "loss_iou": 0.27734375, "loss_num": 0.0069580078125, "loss_xval": 0.03466796875, "num_input_tokens_seen": 560811132, "step": 6144 }, { "epoch": 25.604166666666668, "grad_norm": 2.518419210158942, "learning_rate": 5e-05, "loss": 0.0522, "num_input_tokens_seen": 560901332, "step": 6145 }, { "epoch": 25.604166666666668, "loss": 0.02495124191045761, "loss_ce": 3.1241229407896753e-06, "loss_iou": 0.2021484375, "loss_num": 0.004974365234375, "loss_xval": 0.02490234375, "num_input_tokens_seen": 560901332, "step": 6145 }, { "epoch": 25.608333333333334, "grad_norm": 2.8052451512041237, "learning_rate": 5e-05, "loss": 0.1045, "num_input_tokens_seen": 560993196, "step": 6146 }, { "epoch": 25.608333333333334, "loss": 0.04913078621029854, "loss_ce": 5.116238298796816e-06, "loss_iou": 0.255859375, "loss_num": 0.00982666015625, "loss_xval": 0.049072265625, "num_input_tokens_seen": 560993196, "step": 6146 }, { "epoch": 25.6125, "grad_norm": 2.9103962904260117, "learning_rate": 5e-05, "loss": 0.0764, "num_input_tokens_seen": 561084644, "step": 6147 }, { "epoch": 25.6125, "loss": 0.07760877907276154, "loss_ce": 9.412600775249302e-05, "loss_iou": 0.333984375, "loss_num": 0.0155029296875, "loss_xval": 0.07763671875, "num_input_tokens_seen": 561084644, "step": 6147 }, { "epoch": 25.616666666666667, "grad_norm": 2.704512380849956, "learning_rate": 5e-05, "loss": 0.0583, "num_input_tokens_seen": 561176220, "step": 6148 }, { "epoch": 25.616666666666667, "loss": 0.036154501140117645, "loss_ce": 6.4274645410478115e-06, "loss_iou": 0.21875, "loss_num": 0.007232666015625, "loss_xval": 0.0361328125, "num_input_tokens_seen": 561176220, "step": 6148 }, { "epoch": 25.620833333333334, "grad_norm": 7.236808509675872, "learning_rate": 5e-05, "loss": 0.0305, "num_input_tokens_seen": 561267356, "step": 6149 }, { "epoch": 25.620833333333334, "loss": 0.03460079804062843, "loss_ce": 9.121634320763405e-06, "loss_iou": 0.33203125, "loss_num": 0.00689697265625, "loss_xval": 0.03466796875, "num_input_tokens_seen": 561267356, "step": 6149 }, { "epoch": 25.625, "grad_norm": 4.561622903085843, "learning_rate": 5e-05, "loss": 0.0583, "num_input_tokens_seen": 561359028, "step": 6150 }, { "epoch": 25.625, "loss": 0.07709389925003052, "loss_ce": 0.00018960244779009372, "loss_iou": 0.3046875, "loss_num": 0.015380859375, "loss_xval": 0.0771484375, "num_input_tokens_seen": 561359028, "step": 6150 }, { "epoch": 25.629166666666666, "grad_norm": 2.8734458229378474, "learning_rate": 5e-05, "loss": 0.0378, "num_input_tokens_seen": 561450104, "step": 6151 }, { "epoch": 25.629166666666666, "loss": 0.04495877027511597, "loss_ce": 6.373582436935976e-06, "loss_iou": 0.1591796875, "loss_num": 0.00897216796875, "loss_xval": 0.044921875, "num_input_tokens_seen": 561450104, "step": 6151 }, { "epoch": 25.633333333333333, "grad_norm": 3.25924726062024, "learning_rate": 5e-05, "loss": 0.0375, "num_input_tokens_seen": 561541564, "step": 6152 }, { "epoch": 25.633333333333333, "loss": 0.05050988495349884, "loss_ce": 3.3811782486736774e-05, "loss_iou": 0.26171875, "loss_num": 0.01007080078125, "loss_xval": 0.050537109375, "num_input_tokens_seen": 561541564, "step": 6152 }, { "epoch": 25.6375, "grad_norm": 2.6116361066075933, "learning_rate": 5e-05, "loss": 0.0718, "num_input_tokens_seen": 561632796, "step": 6153 }, { "epoch": 25.6375, "loss": 0.09388189762830734, "loss_ce": 3.271274545113556e-05, "loss_iou": 0.208984375, "loss_num": 0.018798828125, "loss_xval": 0.09375, "num_input_tokens_seen": 561632796, "step": 6153 }, { "epoch": 25.641666666666666, "grad_norm": 2.3179977721097003, "learning_rate": 5e-05, "loss": 0.0365, "num_input_tokens_seen": 561723784, "step": 6154 }, { "epoch": 25.641666666666666, "loss": 0.03187521547079086, "loss_ce": 7.235814791783923e-06, "loss_iou": 0.19140625, "loss_num": 0.006378173828125, "loss_xval": 0.031982421875, "num_input_tokens_seen": 561723784, "step": 6154 }, { "epoch": 25.645833333333332, "grad_norm": 2.778303299620632, "learning_rate": 5e-05, "loss": 0.0492, "num_input_tokens_seen": 561814648, "step": 6155 }, { "epoch": 25.645833333333332, "loss": 0.04797649383544922, "loss_ce": 7.915434980532154e-05, "loss_iou": 0.1064453125, "loss_num": 0.00958251953125, "loss_xval": 0.0478515625, "num_input_tokens_seen": 561814648, "step": 6155 }, { "epoch": 25.65, "grad_norm": 1.7432156361042102, "learning_rate": 5e-05, "loss": 0.0887, "num_input_tokens_seen": 561906228, "step": 6156 }, { "epoch": 25.65, "loss": 0.1472359001636505, "loss_ce": 9.540006431052461e-05, "loss_iou": 0.1875, "loss_num": 0.0294189453125, "loss_xval": 0.1474609375, "num_input_tokens_seen": 561906228, "step": 6156 }, { "epoch": 25.654166666666665, "grad_norm": 2.163326086254311, "learning_rate": 5e-05, "loss": 0.0499, "num_input_tokens_seen": 561997536, "step": 6157 }, { "epoch": 25.654166666666665, "loss": 0.03649410605430603, "loss_ce": 9.426492761122063e-05, "loss_iou": 0.255859375, "loss_num": 0.007293701171875, "loss_xval": 0.036376953125, "num_input_tokens_seen": 561997536, "step": 6157 }, { "epoch": 25.658333333333335, "grad_norm": 2.576559069006906, "learning_rate": 5e-05, "loss": 0.0385, "num_input_tokens_seen": 562088964, "step": 6158 }, { "epoch": 25.658333333333335, "loss": 0.041357457637786865, "loss_ce": 7.098697824403644e-05, "loss_iou": 0.232421875, "loss_num": 0.00823974609375, "loss_xval": 0.041259765625, "num_input_tokens_seen": 562088964, "step": 6158 }, { "epoch": 25.6625, "grad_norm": 1.94564116676901, "learning_rate": 5e-05, "loss": 0.0263, "num_input_tokens_seen": 562180896, "step": 6159 }, { "epoch": 25.6625, "loss": 0.03156707063317299, "loss_ce": 4.264139533916023e-06, "loss_iou": 0.228515625, "loss_num": 0.006317138671875, "loss_xval": 0.031494140625, "num_input_tokens_seen": 562180896, "step": 6159 }, { "epoch": 25.666666666666668, "grad_norm": 2.60825665054094, "learning_rate": 5e-05, "loss": 0.0499, "num_input_tokens_seen": 562272180, "step": 6160 }, { "epoch": 25.666666666666668, "loss": 0.0645618662238121, "loss_ce": 3.2445161195937544e-05, "loss_iou": 0.28515625, "loss_num": 0.012939453125, "loss_xval": 0.064453125, "num_input_tokens_seen": 562272180, "step": 6160 }, { "epoch": 25.670833333333334, "grad_norm": 3.697479041946092, "learning_rate": 5e-05, "loss": 0.0708, "num_input_tokens_seen": 562363832, "step": 6161 }, { "epoch": 25.670833333333334, "loss": 0.03595633804798126, "loss_ce": 5.240726750344038e-05, "loss_iou": 0.31640625, "loss_num": 0.007171630859375, "loss_xval": 0.035888671875, "num_input_tokens_seen": 562363832, "step": 6161 }, { "epoch": 25.675, "grad_norm": 2.079205454212805, "learning_rate": 5e-05, "loss": 0.0347, "num_input_tokens_seen": 562455596, "step": 6162 }, { "epoch": 25.675, "loss": 0.031726814806461334, "loss_ce": 3.79521907234448e-06, "loss_iou": 0.1875, "loss_num": 0.00634765625, "loss_xval": 0.03173828125, "num_input_tokens_seen": 562455596, "step": 6162 }, { "epoch": 25.679166666666667, "grad_norm": 5.529298553751564, "learning_rate": 5e-05, "loss": 0.0312, "num_input_tokens_seen": 562547416, "step": 6163 }, { "epoch": 25.679166666666667, "loss": 0.029071012511849403, "loss_ce": 7.168351294239983e-05, "loss_iou": 0.16796875, "loss_num": 0.00579833984375, "loss_xval": 0.029052734375, "num_input_tokens_seen": 562547416, "step": 6163 }, { "epoch": 25.683333333333334, "grad_norm": 2.1032150384919324, "learning_rate": 5e-05, "loss": 0.0501, "num_input_tokens_seen": 562638784, "step": 6164 }, { "epoch": 25.683333333333334, "loss": 0.030691668391227722, "loss_ce": 6.2420622271019965e-06, "loss_iou": 0.169921875, "loss_num": 0.006134033203125, "loss_xval": 0.0306396484375, "num_input_tokens_seen": 562638784, "step": 6164 }, { "epoch": 25.6875, "grad_norm": 2.820384299390893, "learning_rate": 5e-05, "loss": 0.0507, "num_input_tokens_seen": 562730376, "step": 6165 }, { "epoch": 25.6875, "loss": 0.04048352688550949, "loss_ce": 1.9593010165408487e-06, "loss_iou": 0.22265625, "loss_num": 0.00811767578125, "loss_xval": 0.04052734375, "num_input_tokens_seen": 562730376, "step": 6165 }, { "epoch": 25.691666666666666, "grad_norm": 3.114629293616458, "learning_rate": 5e-05, "loss": 0.0297, "num_input_tokens_seen": 562821668, "step": 6166 }, { "epoch": 25.691666666666666, "loss": 0.032954927533864975, "loss_ce": 1.1202602763660252e-05, "loss_iou": 0.2490234375, "loss_num": 0.006591796875, "loss_xval": 0.032958984375, "num_input_tokens_seen": 562821668, "step": 6166 }, { "epoch": 25.695833333333333, "grad_norm": 2.8132563422955816, "learning_rate": 5e-05, "loss": 0.0376, "num_input_tokens_seen": 562912528, "step": 6167 }, { "epoch": 25.695833333333333, "loss": 0.033153094351291656, "loss_ce": 1.1004223779309541e-05, "loss_iou": 0.197265625, "loss_num": 0.006622314453125, "loss_xval": 0.033203125, "num_input_tokens_seen": 562912528, "step": 6167 }, { "epoch": 25.7, "grad_norm": 2.430857235582789, "learning_rate": 5e-05, "loss": 0.0649, "num_input_tokens_seen": 563004012, "step": 6168 }, { "epoch": 25.7, "loss": 0.04591874033212662, "loss_ce": 5.042638804297894e-06, "loss_iou": 0.34375, "loss_num": 0.0091552734375, "loss_xval": 0.0458984375, "num_input_tokens_seen": 563004012, "step": 6168 }, { "epoch": 25.704166666666666, "grad_norm": 4.638590187533068, "learning_rate": 5e-05, "loss": 0.0362, "num_input_tokens_seen": 563095140, "step": 6169 }, { "epoch": 25.704166666666666, "loss": 0.043398939073085785, "loss_ce": 2.9414709388220217e-06, "loss_iou": 0.294921875, "loss_num": 0.0086669921875, "loss_xval": 0.04345703125, "num_input_tokens_seen": 563095140, "step": 6169 }, { "epoch": 25.708333333333332, "grad_norm": 3.3078042298822665, "learning_rate": 5e-05, "loss": 0.0393, "num_input_tokens_seen": 563187308, "step": 6170 }, { "epoch": 25.708333333333332, "loss": 0.03523440659046173, "loss_ce": 4.001004344900139e-05, "loss_iou": 0.1474609375, "loss_num": 0.007049560546875, "loss_xval": 0.03515625, "num_input_tokens_seen": 563187308, "step": 6170 }, { "epoch": 25.7125, "grad_norm": 2.2406118879558137, "learning_rate": 5e-05, "loss": 0.0623, "num_input_tokens_seen": 563277300, "step": 6171 }, { "epoch": 25.7125, "loss": 0.09529478847980499, "loss_ce": 3.6481064853433054e-06, "loss_iou": 0.23828125, "loss_num": 0.01904296875, "loss_xval": 0.09521484375, "num_input_tokens_seen": 563277300, "step": 6171 }, { "epoch": 25.716666666666665, "grad_norm": 3.0833788667937045, "learning_rate": 5e-05, "loss": 0.0242, "num_input_tokens_seen": 563368680, "step": 6172 }, { "epoch": 25.716666666666665, "loss": 0.026963843032717705, "loss_ce": 1.5623777471773792e-06, "loss_iou": 0.2578125, "loss_num": 0.005401611328125, "loss_xval": 0.0269775390625, "num_input_tokens_seen": 563368680, "step": 6172 }, { "epoch": 25.720833333333335, "grad_norm": 3.338505833691027, "learning_rate": 5e-05, "loss": 0.0694, "num_input_tokens_seen": 563459932, "step": 6173 }, { "epoch": 25.720833333333335, "loss": 0.04239106550812721, "loss_ce": 3.266503699705936e-05, "loss_iou": 0.365234375, "loss_num": 0.00848388671875, "loss_xval": 0.04248046875, "num_input_tokens_seen": 563459932, "step": 6173 }, { "epoch": 25.725, "grad_norm": 3.225510461718473, "learning_rate": 5e-05, "loss": 0.029, "num_input_tokens_seen": 563551464, "step": 6174 }, { "epoch": 25.725, "loss": 0.02353103831410408, "loss_ce": 0.0001545738341519609, "loss_iou": 0.26171875, "loss_num": 0.004669189453125, "loss_xval": 0.0234375, "num_input_tokens_seen": 563551464, "step": 6174 }, { "epoch": 25.729166666666668, "grad_norm": 3.6078941333414374, "learning_rate": 5e-05, "loss": 0.0562, "num_input_tokens_seen": 563643004, "step": 6175 }, { "epoch": 25.729166666666668, "loss": 0.026356343179941177, "loss_ce": 8.230143976106774e-06, "loss_iou": 0.2177734375, "loss_num": 0.005279541015625, "loss_xval": 0.0263671875, "num_input_tokens_seen": 563643004, "step": 6175 }, { "epoch": 25.733333333333334, "grad_norm": 1.0670316467237433, "learning_rate": 5e-05, "loss": 0.0453, "num_input_tokens_seen": 563734168, "step": 6176 }, { "epoch": 25.733333333333334, "loss": 0.027280237525701523, "loss_ce": 3.567018939065747e-05, "loss_iou": 0.2265625, "loss_num": 0.00543212890625, "loss_xval": 0.0272216796875, "num_input_tokens_seen": 563734168, "step": 6176 }, { "epoch": 25.7375, "grad_norm": 2.103919541031617, "learning_rate": 5e-05, "loss": 0.0377, "num_input_tokens_seen": 563824572, "step": 6177 }, { "epoch": 25.7375, "loss": 0.04883137717843056, "loss_ce": 3.2511102290300187e-06, "loss_iou": 0.1962890625, "loss_num": 0.009765625, "loss_xval": 0.048828125, "num_input_tokens_seen": 563824572, "step": 6177 }, { "epoch": 25.741666666666667, "grad_norm": 1.0159686485119894, "learning_rate": 5e-05, "loss": 0.0187, "num_input_tokens_seen": 563915732, "step": 6178 }, { "epoch": 25.741666666666667, "loss": 0.020426098257303238, "loss_ce": 2.5097266188822687e-05, "loss_iou": 0.1220703125, "loss_num": 0.00408935546875, "loss_xval": 0.0203857421875, "num_input_tokens_seen": 563915732, "step": 6178 }, { "epoch": 25.745833333333334, "grad_norm": 0.6865861724508531, "learning_rate": 5e-05, "loss": 0.0466, "num_input_tokens_seen": 564006540, "step": 6179 }, { "epoch": 25.745833333333334, "loss": 0.032504454255104065, "loss_ce": 3.2329619443771662e-06, "loss_iou": 0.203125, "loss_num": 0.006500244140625, "loss_xval": 0.032470703125, "num_input_tokens_seen": 564006540, "step": 6179 }, { "epoch": 25.75, "grad_norm": 1.2746881020338765, "learning_rate": 5e-05, "loss": 0.0592, "num_input_tokens_seen": 564098576, "step": 6180 }, { "epoch": 25.75, "loss": 0.09174899756908417, "loss_ce": 5.5285390772041865e-06, "loss_iou": 0.2158203125, "loss_num": 0.018310546875, "loss_xval": 0.091796875, "num_input_tokens_seen": 564098576, "step": 6180 }, { "epoch": 25.754166666666666, "grad_norm": 1.161104001659042, "learning_rate": 5e-05, "loss": 0.0302, "num_input_tokens_seen": 564190040, "step": 6181 }, { "epoch": 25.754166666666666, "loss": 0.03920216113328934, "loss_ce": 2.3326715563598555e-06, "loss_iou": 0.154296875, "loss_num": 0.00787353515625, "loss_xval": 0.039306640625, "num_input_tokens_seen": 564190040, "step": 6181 }, { "epoch": 25.758333333333333, "grad_norm": 2.635958304628402, "learning_rate": 5e-05, "loss": 0.0554, "num_input_tokens_seen": 564281212, "step": 6182 }, { "epoch": 25.758333333333333, "loss": 0.059660643339157104, "loss_ce": 0.007712096907198429, "loss_iou": 0.212890625, "loss_num": 0.0103759765625, "loss_xval": 0.052001953125, "num_input_tokens_seen": 564281212, "step": 6182 }, { "epoch": 25.7625, "grad_norm": 1.8956833933864967, "learning_rate": 5e-05, "loss": 0.0464, "num_input_tokens_seen": 564371556, "step": 6183 }, { "epoch": 25.7625, "loss": 0.03747256100177765, "loss_ce": 4.60345563624287e-06, "loss_iou": 0.2353515625, "loss_num": 0.00750732421875, "loss_xval": 0.037353515625, "num_input_tokens_seen": 564371556, "step": 6183 }, { "epoch": 25.766666666666666, "grad_norm": 1.7525944786849836, "learning_rate": 5e-05, "loss": 0.0495, "num_input_tokens_seen": 564463512, "step": 6184 }, { "epoch": 25.766666666666666, "loss": 0.06269238889217377, "loss_ce": 0.0001160952597274445, "loss_iou": 0.28515625, "loss_num": 0.01251220703125, "loss_xval": 0.0625, "num_input_tokens_seen": 564463512, "step": 6184 }, { "epoch": 25.770833333333332, "grad_norm": 0.8645786560215175, "learning_rate": 5e-05, "loss": 0.0466, "num_input_tokens_seen": 564554408, "step": 6185 }, { "epoch": 25.770833333333332, "loss": 0.050404638051986694, "loss_ce": 2.0119014152442105e-05, "loss_iou": 0.109375, "loss_num": 0.01007080078125, "loss_xval": 0.05029296875, "num_input_tokens_seen": 564554408, "step": 6185 }, { "epoch": 25.775, "grad_norm": 0.8226319288973017, "learning_rate": 5e-05, "loss": 0.0224, "num_input_tokens_seen": 564645524, "step": 6186 }, { "epoch": 25.775, "loss": 0.01581304706633091, "loss_ce": 3.16454897983931e-05, "loss_iou": 0.1376953125, "loss_num": 0.0031585693359375, "loss_xval": 0.0157470703125, "num_input_tokens_seen": 564645524, "step": 6186 }, { "epoch": 25.779166666666665, "grad_norm": 1.7741157810285912, "learning_rate": 5e-05, "loss": 0.0605, "num_input_tokens_seen": 564736176, "step": 6187 }, { "epoch": 25.779166666666665, "loss": 0.0682372897863388, "loss_ce": 3.050072155019734e-05, "loss_iou": 0.216796875, "loss_num": 0.01361083984375, "loss_xval": 0.068359375, "num_input_tokens_seen": 564736176, "step": 6187 }, { "epoch": 25.783333333333335, "grad_norm": 2.002310127769731, "learning_rate": 5e-05, "loss": 0.0449, "num_input_tokens_seen": 564827528, "step": 6188 }, { "epoch": 25.783333333333335, "loss": 0.03747815638780594, "loss_ce": 2.5684003048809245e-06, "loss_iou": 0.2373046875, "loss_num": 0.00750732421875, "loss_xval": 0.03759765625, "num_input_tokens_seen": 564827528, "step": 6188 }, { "epoch": 25.7875, "grad_norm": 3.5379552754793355, "learning_rate": 5e-05, "loss": 0.0385, "num_input_tokens_seen": 564918892, "step": 6189 }, { "epoch": 25.7875, "loss": 0.03478563204407692, "loss_ce": 3.2234866012004204e-06, "loss_iou": 0.2314453125, "loss_num": 0.0069580078125, "loss_xval": 0.03466796875, "num_input_tokens_seen": 564918892, "step": 6189 }, { "epoch": 25.791666666666668, "grad_norm": 2.139259743330136, "learning_rate": 5e-05, "loss": 0.0293, "num_input_tokens_seen": 565010512, "step": 6190 }, { "epoch": 25.791666666666668, "loss": 0.042409226298332214, "loss_ce": 5.050466825196054e-06, "loss_iou": 0.1728515625, "loss_num": 0.00848388671875, "loss_xval": 0.04248046875, "num_input_tokens_seen": 565010512, "step": 6190 }, { "epoch": 25.795833333333334, "grad_norm": 2.9062476205288945, "learning_rate": 5e-05, "loss": 0.0348, "num_input_tokens_seen": 565101640, "step": 6191 }, { "epoch": 25.795833333333334, "loss": 0.030232472345232964, "loss_ce": 4.8102770051627886e-06, "loss_iou": 0.1650390625, "loss_num": 0.00604248046875, "loss_xval": 0.0302734375, "num_input_tokens_seen": 565101640, "step": 6191 }, { "epoch": 25.8, "grad_norm": 2.621436298671246, "learning_rate": 5e-05, "loss": 0.0411, "num_input_tokens_seen": 565192556, "step": 6192 }, { "epoch": 25.8, "loss": 0.05496486276388168, "loss_ce": 2.707063913476304e-06, "loss_iou": 0.28515625, "loss_num": 0.010986328125, "loss_xval": 0.054931640625, "num_input_tokens_seen": 565192556, "step": 6192 }, { "epoch": 25.804166666666667, "grad_norm": 2.9015879210449724, "learning_rate": 5e-05, "loss": 0.0399, "num_input_tokens_seen": 565283468, "step": 6193 }, { "epoch": 25.804166666666667, "loss": 0.045189596712589264, "loss_ce": 0.00015328428708016872, "loss_iou": 0.21484375, "loss_num": 0.009033203125, "loss_xval": 0.044921875, "num_input_tokens_seen": 565283468, "step": 6193 }, { "epoch": 25.808333333333334, "grad_norm": 2.981056736907888, "learning_rate": 5e-05, "loss": 0.0482, "num_input_tokens_seen": 565373624, "step": 6194 }, { "epoch": 25.808333333333334, "loss": 0.0601491704583168, "loss_ce": 2.954133196908515e-05, "loss_iou": 0.197265625, "loss_num": 0.01202392578125, "loss_xval": 0.06005859375, "num_input_tokens_seen": 565373624, "step": 6194 }, { "epoch": 25.8125, "grad_norm": 3.100211608761263, "learning_rate": 5e-05, "loss": 0.0325, "num_input_tokens_seen": 565464568, "step": 6195 }, { "epoch": 25.8125, "loss": 0.03338789939880371, "loss_ce": 1.6691501514287665e-06, "loss_iou": 0.337890625, "loss_num": 0.006683349609375, "loss_xval": 0.033447265625, "num_input_tokens_seen": 565464568, "step": 6195 }, { "epoch": 25.816666666666666, "grad_norm": 2.7975115834447113, "learning_rate": 5e-05, "loss": 0.0768, "num_input_tokens_seen": 565555988, "step": 6196 }, { "epoch": 25.816666666666666, "loss": 0.03894903510808945, "loss_ce": 2.3860455257818103e-05, "loss_iou": 0.234375, "loss_num": 0.007781982421875, "loss_xval": 0.038818359375, "num_input_tokens_seen": 565555988, "step": 6196 }, { "epoch": 25.820833333333333, "grad_norm": 6.683605039191462, "learning_rate": 5e-05, "loss": 0.0619, "num_input_tokens_seen": 565646712, "step": 6197 }, { "epoch": 25.820833333333333, "loss": 0.06854942440986633, "loss_ce": 3.746480797417462e-05, "loss_iou": 0.279296875, "loss_num": 0.01373291015625, "loss_xval": 0.068359375, "num_input_tokens_seen": 565646712, "step": 6197 }, { "epoch": 25.825, "grad_norm": 2.939345412767463, "learning_rate": 5e-05, "loss": 0.0534, "num_input_tokens_seen": 565737608, "step": 6198 }, { "epoch": 25.825, "loss": 0.07456640899181366, "loss_ce": 4.3337663555576e-06, "loss_iou": 0.2412109375, "loss_num": 0.014892578125, "loss_xval": 0.07470703125, "num_input_tokens_seen": 565737608, "step": 6198 }, { "epoch": 25.829166666666666, "grad_norm": 2.796323197936319, "learning_rate": 5e-05, "loss": 0.0536, "num_input_tokens_seen": 565828744, "step": 6199 }, { "epoch": 25.829166666666666, "loss": 0.032312601804733276, "loss_ce": 1.3562228559749201e-05, "loss_iou": 0.2041015625, "loss_num": 0.0064697265625, "loss_xval": 0.0322265625, "num_input_tokens_seen": 565828744, "step": 6199 }, { "epoch": 25.833333333333332, "grad_norm": 2.300145429427322, "learning_rate": 5e-05, "loss": 0.0283, "num_input_tokens_seen": 565920076, "step": 6200 }, { "epoch": 25.833333333333332, "loss": 0.028814151883125305, "loss_ce": 2.0819143173866905e-05, "loss_iou": 0.3359375, "loss_num": 0.005767822265625, "loss_xval": 0.02880859375, "num_input_tokens_seen": 565920076, "step": 6200 }, { "epoch": 25.8375, "grad_norm": 2.158863167829128, "learning_rate": 5e-05, "loss": 0.025, "num_input_tokens_seen": 566011876, "step": 6201 }, { "epoch": 25.8375, "loss": 0.022084590047597885, "loss_ce": 0.0007451724377460778, "loss_iou": 0.1904296875, "loss_num": 0.0042724609375, "loss_xval": 0.0213623046875, "num_input_tokens_seen": 566011876, "step": 6201 }, { "epoch": 25.841666666666665, "grad_norm": 3.0032125907318417, "learning_rate": 5e-05, "loss": 0.0638, "num_input_tokens_seen": 566103524, "step": 6202 }, { "epoch": 25.841666666666665, "loss": 0.05159597098827362, "loss_ce": 1.363731917081168e-05, "loss_iou": 0.203125, "loss_num": 0.01031494140625, "loss_xval": 0.051513671875, "num_input_tokens_seen": 566103524, "step": 6202 }, { "epoch": 25.845833333333335, "grad_norm": 2.3648260691935685, "learning_rate": 5e-05, "loss": 0.0239, "num_input_tokens_seen": 566195456, "step": 6203 }, { "epoch": 25.845833333333335, "loss": 0.025610897690057755, "loss_ce": 0.0005178189603611827, "loss_iou": 0.2412109375, "loss_num": 0.005035400390625, "loss_xval": 0.025146484375, "num_input_tokens_seen": 566195456, "step": 6203 }, { "epoch": 25.85, "grad_norm": 2.4481790838852797, "learning_rate": 5e-05, "loss": 0.041, "num_input_tokens_seen": 566286808, "step": 6204 }, { "epoch": 25.85, "loss": 0.040507905185222626, "loss_ce": 3.44900968229922e-06, "loss_iou": 0.1982421875, "loss_num": 0.00811767578125, "loss_xval": 0.04052734375, "num_input_tokens_seen": 566286808, "step": 6204 }, { "epoch": 25.854166666666668, "grad_norm": 3.4786102615434094, "learning_rate": 5e-05, "loss": 0.0323, "num_input_tokens_seen": 566378044, "step": 6205 }, { "epoch": 25.854166666666668, "loss": 0.032995712012052536, "loss_ce": 2.1468926206580363e-05, "loss_iou": 0.333984375, "loss_num": 0.006591796875, "loss_xval": 0.032958984375, "num_input_tokens_seen": 566378044, "step": 6205 }, { "epoch": 25.858333333333334, "grad_norm": 3.5042248706895145, "learning_rate": 5e-05, "loss": 0.0351, "num_input_tokens_seen": 566469372, "step": 6206 }, { "epoch": 25.858333333333334, "loss": 0.03962497413158417, "loss_ce": 2.8418700821930543e-05, "loss_iou": 0.2412109375, "loss_num": 0.0079345703125, "loss_xval": 0.03955078125, "num_input_tokens_seen": 566469372, "step": 6206 }, { "epoch": 25.8625, "grad_norm": 2.8921605118927682, "learning_rate": 5e-05, "loss": 0.0404, "num_input_tokens_seen": 566560928, "step": 6207 }, { "epoch": 25.8625, "loss": 0.022944016382098198, "loss_ce": 5.5832915677456185e-05, "loss_iou": 0.158203125, "loss_num": 0.00457763671875, "loss_xval": 0.02294921875, "num_input_tokens_seen": 566560928, "step": 6207 }, { "epoch": 25.866666666666667, "grad_norm": 2.585905652175534, "learning_rate": 5e-05, "loss": 0.0415, "num_input_tokens_seen": 566652676, "step": 6208 }, { "epoch": 25.866666666666667, "loss": 0.05306173861026764, "loss_ce": 6.93045058142161e-06, "loss_iou": 0.271484375, "loss_num": 0.0106201171875, "loss_xval": 0.052978515625, "num_input_tokens_seen": 566652676, "step": 6208 }, { "epoch": 25.870833333333334, "grad_norm": 2.253935206946212, "learning_rate": 5e-05, "loss": 0.0369, "num_input_tokens_seen": 566744724, "step": 6209 }, { "epoch": 25.870833333333334, "loss": 0.027485482394695282, "loss_ce": 4.402094873512397e-06, "loss_iou": 0.19921875, "loss_num": 0.0054931640625, "loss_xval": 0.0274658203125, "num_input_tokens_seen": 566744724, "step": 6209 }, { "epoch": 25.875, "grad_norm": 1.1610223925097085, "learning_rate": 5e-05, "loss": 0.0528, "num_input_tokens_seen": 566835900, "step": 6210 }, { "epoch": 25.875, "loss": 0.02255455031991005, "loss_ce": 0.00010887056851061061, "loss_iou": 0.1552734375, "loss_num": 0.004486083984375, "loss_xval": 0.0224609375, "num_input_tokens_seen": 566835900, "step": 6210 }, { "epoch": 25.879166666666666, "grad_norm": 2.0502710078555793, "learning_rate": 5e-05, "loss": 0.0655, "num_input_tokens_seen": 566927672, "step": 6211 }, { "epoch": 25.879166666666666, "loss": 0.07474754750728607, "loss_ce": 0.0003685862757265568, "loss_iou": 0.18359375, "loss_num": 0.014892578125, "loss_xval": 0.07421875, "num_input_tokens_seen": 566927672, "step": 6211 }, { "epoch": 25.883333333333333, "grad_norm": 1.1012079323531696, "learning_rate": 5e-05, "loss": 0.0348, "num_input_tokens_seen": 567019052, "step": 6212 }, { "epoch": 25.883333333333333, "loss": 0.030489172786474228, "loss_ce": 2.1120893052284373e-06, "loss_iou": 0.2060546875, "loss_num": 0.006103515625, "loss_xval": 0.030517578125, "num_input_tokens_seen": 567019052, "step": 6212 }, { "epoch": 25.8875, "grad_norm": 1.690526939465067, "learning_rate": 5e-05, "loss": 0.0267, "num_input_tokens_seen": 567110820, "step": 6213 }, { "epoch": 25.8875, "loss": 0.03049873746931553, "loss_ce": 4.047382390126586e-06, "loss_iou": 0.38671875, "loss_num": 0.006103515625, "loss_xval": 0.030517578125, "num_input_tokens_seen": 567110820, "step": 6213 }, { "epoch": 25.891666666666666, "grad_norm": 2.027924223311171, "learning_rate": 5e-05, "loss": 0.0241, "num_input_tokens_seen": 567201736, "step": 6214 }, { "epoch": 25.891666666666666, "loss": 0.02165631204843521, "loss_ce": 4.0901650208979845e-06, "loss_iou": 0.146484375, "loss_num": 0.00433349609375, "loss_xval": 0.0216064453125, "num_input_tokens_seen": 567201736, "step": 6214 }, { "epoch": 25.895833333333332, "grad_norm": 2.786822197862997, "learning_rate": 5e-05, "loss": 0.0388, "num_input_tokens_seen": 567292892, "step": 6215 }, { "epoch": 25.895833333333332, "loss": 0.03339173272252083, "loss_ce": 5.50091135664843e-06, "loss_iou": 0.328125, "loss_num": 0.006683349609375, "loss_xval": 0.033447265625, "num_input_tokens_seen": 567292892, "step": 6215 }, { "epoch": 25.9, "grad_norm": 2.767579093122714, "learning_rate": 5e-05, "loss": 0.0361, "num_input_tokens_seen": 567383720, "step": 6216 }, { "epoch": 25.9, "loss": 0.03694453462958336, "loss_ce": 3.0083456294960342e-06, "loss_iou": 0.275390625, "loss_num": 0.00738525390625, "loss_xval": 0.036865234375, "num_input_tokens_seen": 567383720, "step": 6216 }, { "epoch": 25.904166666666665, "grad_norm": 2.249322127371412, "learning_rate": 5e-05, "loss": 0.0521, "num_input_tokens_seen": 567475000, "step": 6217 }, { "epoch": 25.904166666666665, "loss": 0.06515315920114517, "loss_ce": 2.8645601560128853e-05, "loss_iou": 0.154296875, "loss_num": 0.0130615234375, "loss_xval": 0.06494140625, "num_input_tokens_seen": 567475000, "step": 6217 }, { "epoch": 25.908333333333335, "grad_norm": 1.176764330390045, "learning_rate": 5e-05, "loss": 0.0713, "num_input_tokens_seen": 567564508, "step": 6218 }, { "epoch": 25.908333333333335, "loss": 0.10813853144645691, "loss_ce": 3.0015964512131177e-05, "loss_iou": 0.12109375, "loss_num": 0.0216064453125, "loss_xval": 0.10791015625, "num_input_tokens_seen": 567564508, "step": 6218 }, { "epoch": 25.9125, "grad_norm": 1.8127313337376398, "learning_rate": 5e-05, "loss": 0.0399, "num_input_tokens_seen": 567655664, "step": 6219 }, { "epoch": 25.9125, "loss": 0.05346290022134781, "loss_ce": 3.7308441278582904e-06, "loss_iou": 0.2216796875, "loss_num": 0.01068115234375, "loss_xval": 0.053466796875, "num_input_tokens_seen": 567655664, "step": 6219 }, { "epoch": 25.916666666666668, "grad_norm": 2.8901642512214525, "learning_rate": 5e-05, "loss": 0.0623, "num_input_tokens_seen": 567746800, "step": 6220 }, { "epoch": 25.916666666666668, "loss": 0.03645596653223038, "loss_ce": 2.721512146308669e-06, "loss_iou": 0.2021484375, "loss_num": 0.007293701171875, "loss_xval": 0.036376953125, "num_input_tokens_seen": 567746800, "step": 6220 }, { "epoch": 25.920833333333334, "grad_norm": 3.0671081042196895, "learning_rate": 5e-05, "loss": 0.0527, "num_input_tokens_seen": 567838300, "step": 6221 }, { "epoch": 25.920833333333334, "loss": 0.03754575550556183, "loss_ce": 0.0017791553400456905, "loss_iou": 0.25, "loss_num": 0.00714111328125, "loss_xval": 0.03564453125, "num_input_tokens_seen": 567838300, "step": 6221 }, { "epoch": 25.925, "grad_norm": 2.816814043901978, "learning_rate": 5e-05, "loss": 0.0411, "num_input_tokens_seen": 567929616, "step": 6222 }, { "epoch": 25.925, "loss": 0.04085657373070717, "loss_ce": 0.00011560738494154066, "loss_iou": 0.189453125, "loss_num": 0.0081787109375, "loss_xval": 0.040771484375, "num_input_tokens_seen": 567929616, "step": 6222 }, { "epoch": 25.929166666666667, "grad_norm": 2.8523635840464197, "learning_rate": 5e-05, "loss": 0.0401, "num_input_tokens_seen": 568020912, "step": 6223 }, { "epoch": 25.929166666666667, "loss": 0.03213977813720703, "loss_ce": 4.769712177221663e-06, "loss_iou": 0.2177734375, "loss_num": 0.00640869140625, "loss_xval": 0.0322265625, "num_input_tokens_seen": 568020912, "step": 6223 }, { "epoch": 25.933333333333334, "grad_norm": 3.0148122840929386, "learning_rate": 5e-05, "loss": 0.0788, "num_input_tokens_seen": 568112240, "step": 6224 }, { "epoch": 25.933333333333334, "loss": 0.05899741128087044, "loss_ce": 0.0003578842442948371, "loss_iou": 0.18359375, "loss_num": 0.01171875, "loss_xval": 0.05859375, "num_input_tokens_seen": 568112240, "step": 6224 }, { "epoch": 25.9375, "grad_norm": 3.2862347670356793, "learning_rate": 5e-05, "loss": 0.0309, "num_input_tokens_seen": 568201736, "step": 6225 }, { "epoch": 25.9375, "loss": 0.02853585034608841, "loss_ce": 1.9140070435241796e-06, "loss_iou": 0.1875, "loss_num": 0.005706787109375, "loss_xval": 0.028564453125, "num_input_tokens_seen": 568201736, "step": 6225 }, { "epoch": 25.941666666666666, "grad_norm": 2.8599250102557328, "learning_rate": 5e-05, "loss": 0.0553, "num_input_tokens_seen": 568293380, "step": 6226 }, { "epoch": 25.941666666666666, "loss": 0.07472589612007141, "loss_ce": 3.602154720283579e-06, "loss_iou": 0.26953125, "loss_num": 0.01495361328125, "loss_xval": 0.07470703125, "num_input_tokens_seen": 568293380, "step": 6226 }, { "epoch": 25.945833333333333, "grad_norm": 1.9691023829632905, "learning_rate": 5e-05, "loss": 0.0403, "num_input_tokens_seen": 568382916, "step": 6227 }, { "epoch": 25.945833333333333, "loss": 0.028636876493692398, "loss_ce": 3.7587974475172814e-06, "loss_iou": 0.201171875, "loss_num": 0.0057373046875, "loss_xval": 0.0286865234375, "num_input_tokens_seen": 568382916, "step": 6227 }, { "epoch": 25.95, "grad_norm": 2.237243818037928, "learning_rate": 5e-05, "loss": 0.0374, "num_input_tokens_seen": 568474528, "step": 6228 }, { "epoch": 25.95, "loss": 0.034794896841049194, "loss_ce": 2.393420618318487e-05, "loss_iou": 0.080078125, "loss_num": 0.0069580078125, "loss_xval": 0.03466796875, "num_input_tokens_seen": 568474528, "step": 6228 }, { "epoch": 25.954166666666666, "grad_norm": 3.5637477409096627, "learning_rate": 5e-05, "loss": 0.0562, "num_input_tokens_seen": 568565956, "step": 6229 }, { "epoch": 25.954166666666666, "loss": 0.04442165791988373, "loss_ce": 0.00047634501243010163, "loss_iou": 0.2451171875, "loss_num": 0.0087890625, "loss_xval": 0.0439453125, "num_input_tokens_seen": 568565956, "step": 6229 }, { "epoch": 25.958333333333332, "grad_norm": 3.015889661191772, "learning_rate": 5e-05, "loss": 0.0465, "num_input_tokens_seen": 568657444, "step": 6230 }, { "epoch": 25.958333333333332, "loss": 0.03149028494954109, "loss_ce": 0.004558522719889879, "loss_iou": 0.15625, "loss_num": 0.00537109375, "loss_xval": 0.0269775390625, "num_input_tokens_seen": 568657444, "step": 6230 }, { "epoch": 25.9625, "grad_norm": 2.338261968838323, "learning_rate": 5e-05, "loss": 0.0369, "num_input_tokens_seen": 568748604, "step": 6231 }, { "epoch": 25.9625, "loss": 0.031379248946905136, "loss_ce": 7.17790180715383e-06, "loss_iou": 0.32421875, "loss_num": 0.00628662109375, "loss_xval": 0.03125, "num_input_tokens_seen": 568748604, "step": 6231 }, { "epoch": 25.966666666666665, "grad_norm": 2.6931507635597693, "learning_rate": 5e-05, "loss": 0.0827, "num_input_tokens_seen": 568840140, "step": 6232 }, { "epoch": 25.966666666666665, "loss": 0.0415530651807785, "loss_ce": 3.384055844435352e-06, "loss_iou": 0.263671875, "loss_num": 0.00830078125, "loss_xval": 0.04150390625, "num_input_tokens_seen": 568840140, "step": 6232 }, { "epoch": 25.970833333333335, "grad_norm": 3.348812641350004, "learning_rate": 5e-05, "loss": 0.0478, "num_input_tokens_seen": 568931560, "step": 6233 }, { "epoch": 25.970833333333335, "loss": 0.06727465242147446, "loss_ce": 2.916773155448027e-05, "loss_iou": 0.2890625, "loss_num": 0.013427734375, "loss_xval": 0.0673828125, "num_input_tokens_seen": 568931560, "step": 6233 }, { "epoch": 25.975, "grad_norm": 2.3705319876111672, "learning_rate": 5e-05, "loss": 0.0406, "num_input_tokens_seen": 569023052, "step": 6234 }, { "epoch": 25.975, "loss": 0.05192911997437477, "loss_ce": 3.4578806662466377e-06, "loss_iou": 0.2294921875, "loss_num": 0.0103759765625, "loss_xval": 0.052001953125, "num_input_tokens_seen": 569023052, "step": 6234 }, { "epoch": 25.979166666666668, "grad_norm": 2.140631008113906, "learning_rate": 5e-05, "loss": 0.0641, "num_input_tokens_seen": 569114472, "step": 6235 }, { "epoch": 25.979166666666668, "loss": 0.05225517600774765, "loss_ce": 2.4343242330360226e-05, "loss_iou": 0.271484375, "loss_num": 0.01043701171875, "loss_xval": 0.05224609375, "num_input_tokens_seen": 569114472, "step": 6235 }, { "epoch": 25.983333333333334, "grad_norm": 2.5581902161791437, "learning_rate": 5e-05, "loss": 0.0324, "num_input_tokens_seen": 569205756, "step": 6236 }, { "epoch": 25.983333333333334, "loss": 0.02487356960773468, "loss_ce": 0.000566318107303232, "loss_iou": 0.240234375, "loss_num": 0.004852294921875, "loss_xval": 0.0242919921875, "num_input_tokens_seen": 569205756, "step": 6236 }, { "epoch": 25.9875, "grad_norm": 2.234012344346653, "learning_rate": 5e-05, "loss": 0.035, "num_input_tokens_seen": 569297040, "step": 6237 }, { "epoch": 25.9875, "loss": 0.03664017841219902, "loss_ce": 3.825194653472863e-06, "loss_iou": 0.28515625, "loss_num": 0.00732421875, "loss_xval": 0.03662109375, "num_input_tokens_seen": 569297040, "step": 6237 }, { "epoch": 25.991666666666667, "grad_norm": 2.5095885007986345, "learning_rate": 5e-05, "loss": 0.0493, "num_input_tokens_seen": 569388200, "step": 6238 }, { "epoch": 25.991666666666667, "loss": 0.057438794523477554, "loss_ce": 1.2340997272985987e-05, "loss_iou": 0.2138671875, "loss_num": 0.011474609375, "loss_xval": 0.057373046875, "num_input_tokens_seen": 569388200, "step": 6238 }, { "epoch": 25.995833333333334, "grad_norm": 6.921083419486904, "learning_rate": 5e-05, "loss": 0.08, "num_input_tokens_seen": 569479296, "step": 6239 }, { "epoch": 25.995833333333334, "loss": 0.12009778618812561, "loss_ce": 1.1113231266790535e-05, "loss_iou": 0.28125, "loss_num": 0.0240478515625, "loss_xval": 0.1201171875, "num_input_tokens_seen": 569479296, "step": 6239 }, { "epoch": 26.0, "grad_norm": 2.987832781000782, "learning_rate": 5e-05, "loss": 0.0573, "num_input_tokens_seen": 569570620, "step": 6240 }, { "epoch": 26.0, "loss": 0.04104957729578018, "loss_ce": 3.4333083931414876e-06, "loss_iou": 0.28125, "loss_num": 0.0081787109375, "loss_xval": 0.041015625, "num_input_tokens_seen": 569570620, "step": 6240 }, { "epoch": 26.004166666666666, "grad_norm": 3.1303272391838255, "learning_rate": 5e-05, "loss": 0.0406, "num_input_tokens_seen": 569662492, "step": 6241 }, { "epoch": 26.004166666666666, "loss": 0.035025689750909805, "loss_ce": 1.4398500752577092e-05, "loss_iou": 0.255859375, "loss_num": 0.006988525390625, "loss_xval": 0.034912109375, "num_input_tokens_seen": 569662492, "step": 6241 }, { "epoch": 26.008333333333333, "grad_norm": 3.5971941895441724, "learning_rate": 5e-05, "loss": 0.0318, "num_input_tokens_seen": 569753680, "step": 6242 }, { "epoch": 26.008333333333333, "loss": 0.027462609112262726, "loss_ce": 4.41880456492072e-06, "loss_iou": 0.201171875, "loss_num": 0.0054931640625, "loss_xval": 0.0274658203125, "num_input_tokens_seen": 569753680, "step": 6242 }, { "epoch": 26.0125, "grad_norm": 1.9901081175986914, "learning_rate": 5e-05, "loss": 0.0611, "num_input_tokens_seen": 569844260, "step": 6243 }, { "epoch": 26.0125, "loss": 0.04240645468235016, "loss_ce": 2.2796084522269666e-06, "loss_iou": 0.173828125, "loss_num": 0.00848388671875, "loss_xval": 0.04248046875, "num_input_tokens_seen": 569844260, "step": 6243 }, { "epoch": 26.016666666666666, "grad_norm": 2.5666725125305967, "learning_rate": 5e-05, "loss": 0.0385, "num_input_tokens_seen": 569935652, "step": 6244 }, { "epoch": 26.016666666666666, "loss": 0.04415284842252731, "loss_ce": 9.170468729280401e-06, "loss_iou": 0.2578125, "loss_num": 0.00885009765625, "loss_xval": 0.044189453125, "num_input_tokens_seen": 569935652, "step": 6244 }, { "epoch": 26.020833333333332, "grad_norm": 2.4139493327373662, "learning_rate": 5e-05, "loss": 0.0588, "num_input_tokens_seen": 570027156, "step": 6245 }, { "epoch": 26.020833333333332, "loss": 0.06275545805692673, "loss_ce": 3.6852998164249584e-06, "loss_iou": 0.2490234375, "loss_num": 0.0125732421875, "loss_xval": 0.06298828125, "num_input_tokens_seen": 570027156, "step": 6245 }, { "epoch": 26.025, "grad_norm": 2.447243831182181, "learning_rate": 5e-05, "loss": 0.0332, "num_input_tokens_seen": 570118944, "step": 6246 }, { "epoch": 26.025, "loss": 0.029815129935741425, "loss_ce": 7.086601272021653e-06, "loss_iou": 0.341796875, "loss_num": 0.005950927734375, "loss_xval": 0.02978515625, "num_input_tokens_seen": 570118944, "step": 6246 }, { "epoch": 26.029166666666665, "grad_norm": 2.016318243284664, "learning_rate": 5e-05, "loss": 0.0701, "num_input_tokens_seen": 570210268, "step": 6247 }, { "epoch": 26.029166666666665, "loss": 0.11593090742826462, "loss_ce": 2.5144749088212848e-05, "loss_iou": 0.1552734375, "loss_num": 0.023193359375, "loss_xval": 0.11572265625, "num_input_tokens_seen": 570210268, "step": 6247 }, { "epoch": 26.033333333333335, "grad_norm": 2.0813449498283854, "learning_rate": 5e-05, "loss": 0.0334, "num_input_tokens_seen": 570300968, "step": 6248 }, { "epoch": 26.033333333333335, "loss": 0.03726140409708023, "loss_ce": 7.0692808549210895e-06, "loss_iou": 0.212890625, "loss_num": 0.0074462890625, "loss_xval": 0.037353515625, "num_input_tokens_seen": 570300968, "step": 6248 }, { "epoch": 26.0375, "grad_norm": 3.053951564871621, "learning_rate": 5e-05, "loss": 0.0325, "num_input_tokens_seen": 570391888, "step": 6249 }, { "epoch": 26.0375, "loss": 0.025143064558506012, "loss_ce": 1.1838392310892232e-05, "loss_iou": 0.287109375, "loss_num": 0.005035400390625, "loss_xval": 0.025146484375, "num_input_tokens_seen": 570391888, "step": 6249 }, { "epoch": 26.041666666666668, "grad_norm": 3.493859109783065, "learning_rate": 5e-05, "loss": 0.064, "num_input_tokens_seen": 570482944, "step": 6250 }, { "epoch": 26.041666666666668, "eval_seeclick_CIoU": 0.19633438810706139, "eval_seeclick_GIoU": 0.18025581538677216, "eval_seeclick_IoU": 0.31648438423871994, "eval_seeclick_MAE_all": 0.11852361261844635, "eval_seeclick_MAE_h": 0.10228492319583893, "eval_seeclick_MAE_w": 0.255145899951458, "eval_seeclick_MAE_x_boxes": 0.26643601059913635, "eval_seeclick_MAE_y_boxes": 0.10366765409708023, "eval_seeclick_NUM_probability": 0.9999975264072418, "eval_seeclick_inside_bbox": 0.5255681872367859, "eval_seeclick_loss": 0.6100429892539978, "eval_seeclick_loss_ce": 0.10555814579129219, "eval_seeclick_loss_iou": 0.46240234375, "eval_seeclick_loss_num": 0.094268798828125, "eval_seeclick_loss_xval": 0.471435546875, "eval_seeclick_runtime": 76.4862, "eval_seeclick_samples_per_second": 0.562, "eval_seeclick_steps_per_second": 0.026, "num_input_tokens_seen": 570482944, "step": 6250 }, { "epoch": 26.041666666666668, "eval_icons_CIoU": 0.2770818769931793, "eval_icons_GIoU": 0.24754273891448975, "eval_icons_IoU": 0.37252339720726013, "eval_icons_MAE_all": 0.0877491757273674, "eval_icons_MAE_h": 0.1594034805893898, "eval_icons_MAE_w": 0.14346452057361603, "eval_icons_MAE_x_boxes": 0.14612339437007904, "eval_icons_MAE_y_boxes": 0.16382309794425964, "eval_icons_NUM_probability": 0.9999905824661255, "eval_icons_inside_bbox": 0.546875, "eval_icons_loss": 0.43681633472442627, "eval_icons_loss_ce": 0.0001863774232333526, "eval_icons_loss_iou": 0.2720947265625, "eval_icons_loss_num": 0.0906219482421875, "eval_icons_loss_xval": 0.45294189453125, "eval_icons_runtime": 99.8018, "eval_icons_samples_per_second": 0.501, "eval_icons_steps_per_second": 0.02, "num_input_tokens_seen": 570482944, "step": 6250 }, { "epoch": 26.041666666666668, "eval_screenspot_CIoU": 0.37342973550160724, "eval_screenspot_GIoU": 0.36550194025039673, "eval_screenspot_IoU": 0.4399256110191345, "eval_screenspot_MAE_all": 0.09415490676959355, "eval_screenspot_MAE_h": 0.08269187062978745, "eval_screenspot_MAE_w": 0.21002002557118735, "eval_screenspot_MAE_x_boxes": 0.1757701834042867, "eval_screenspot_MAE_y_boxes": 0.08223061760266621, "eval_screenspot_NUM_probability": 0.9999955495198568, "eval_screenspot_inside_bbox": 0.7279166579246521, "eval_screenspot_loss": 0.4741321802139282, "eval_screenspot_loss_ce": 0.0008951277413871139, "eval_screenspot_loss_iou": 0.3416341145833333, "eval_screenspot_loss_num": 0.09652201334635417, "eval_screenspot_loss_xval": 0.4826253255208333, "eval_screenspot_runtime": 161.2618, "eval_screenspot_samples_per_second": 0.552, "eval_screenspot_steps_per_second": 0.019, "num_input_tokens_seen": 570482944, "step": 6250 }, { "epoch": 26.041666666666668, "eval_compot_CIoU": 0.5062353014945984, "eval_compot_GIoU": 0.496506005525589, "eval_compot_IoU": 0.5829664170742035, "eval_compot_MAE_all": 0.0551037210971117, "eval_compot_MAE_h": 0.06339871324598789, "eval_compot_MAE_w": 0.14107514172792435, "eval_compot_MAE_x_boxes": 0.14240705966949463, "eval_compot_MAE_y_boxes": 0.06185857765376568, "eval_compot_NUM_probability": 0.999993234872818, "eval_compot_inside_bbox": 0.7326388955116272, "eval_compot_loss": 0.32540085911750793, "eval_compot_loss_ce": 0.04954234138131142, "eval_compot_loss_iou": 0.323486328125, "eval_compot_loss_num": 0.05290985107421875, "eval_compot_loss_xval": 0.264495849609375, "eval_compot_runtime": 89.7456, "eval_compot_samples_per_second": 0.557, "eval_compot_steps_per_second": 0.022, "num_input_tokens_seen": 570482944, "step": 6250 }, { "epoch": 26.041666666666668, "loss": 0.3000350594520569, "loss_ce": 0.048509202897548676, "loss_iou": 0.3203125, "loss_num": 0.05029296875, "loss_xval": 0.251953125, "num_input_tokens_seen": 570482944, "step": 6250 }, { "epoch": 26.045833333333334, "grad_norm": 2.3315837202573615, "learning_rate": 5e-05, "loss": 0.0598, "num_input_tokens_seen": 570574088, "step": 6251 }, { "epoch": 26.045833333333334, "loss": 0.0314057320356369, "loss_ce": 3.1418903745361604e-06, "loss_iou": 0.310546875, "loss_num": 0.00628662109375, "loss_xval": 0.031494140625, "num_input_tokens_seen": 570574088, "step": 6251 }, { "epoch": 26.05, "grad_norm": 2.7485465532597466, "learning_rate": 5e-05, "loss": 0.0209, "num_input_tokens_seen": 570665532, "step": 6252 }, { "epoch": 26.05, "loss": 0.02470763400197029, "loss_ce": 3.6557296425598906e-06, "loss_iou": 0.2412109375, "loss_num": 0.00494384765625, "loss_xval": 0.024658203125, "num_input_tokens_seen": 570665532, "step": 6252 }, { "epoch": 26.054166666666667, "grad_norm": 2.328907127752638, "learning_rate": 5e-05, "loss": 0.0372, "num_input_tokens_seen": 570756560, "step": 6253 }, { "epoch": 26.054166666666667, "loss": 0.026493418961763382, "loss_ce": 4.162418463238282e-06, "loss_iou": 0.26171875, "loss_num": 0.00531005859375, "loss_xval": 0.0264892578125, "num_input_tokens_seen": 570756560, "step": 6253 }, { "epoch": 26.058333333333334, "grad_norm": 2.9153475674165636, "learning_rate": 5e-05, "loss": 0.0439, "num_input_tokens_seen": 570847504, "step": 6254 }, { "epoch": 26.058333333333334, "loss": 0.04204103723168373, "loss_ce": 3.0710834835190326e-06, "loss_iou": 0.29296875, "loss_num": 0.0084228515625, "loss_xval": 0.0419921875, "num_input_tokens_seen": 570847504, "step": 6254 }, { "epoch": 26.0625, "grad_norm": 2.8012764743833807, "learning_rate": 5e-05, "loss": 0.0416, "num_input_tokens_seen": 570938912, "step": 6255 }, { "epoch": 26.0625, "loss": 0.03852350637316704, "loss_ce": 7.135752093745396e-05, "loss_iou": 0.359375, "loss_num": 0.0076904296875, "loss_xval": 0.03857421875, "num_input_tokens_seen": 570938912, "step": 6255 }, { "epoch": 26.066666666666666, "grad_norm": 2.1701406378525587, "learning_rate": 5e-05, "loss": 0.0319, "num_input_tokens_seen": 571030588, "step": 6256 }, { "epoch": 26.066666666666666, "loss": 0.028773188591003418, "loss_ce": 1.0371810276410542e-05, "loss_iou": 0.24609375, "loss_num": 0.005767822265625, "loss_xval": 0.02880859375, "num_input_tokens_seen": 571030588, "step": 6256 }, { "epoch": 26.070833333333333, "grad_norm": 2.0690085239834533, "learning_rate": 5e-05, "loss": 0.0452, "num_input_tokens_seen": 571120480, "step": 6257 }, { "epoch": 26.070833333333333, "loss": 0.06280811876058578, "loss_ce": 2.944461357401451e-06, "loss_iou": 0.1865234375, "loss_num": 0.0125732421875, "loss_xval": 0.06298828125, "num_input_tokens_seen": 571120480, "step": 6257 }, { "epoch": 26.075, "grad_norm": 2.3099640086091737, "learning_rate": 5e-05, "loss": 0.0691, "num_input_tokens_seen": 571211992, "step": 6258 }, { "epoch": 26.075, "loss": 0.044280171394348145, "loss_ce": 6.795500667067245e-06, "loss_iou": 0.310546875, "loss_num": 0.00885009765625, "loss_xval": 0.044189453125, "num_input_tokens_seen": 571211992, "step": 6258 }, { "epoch": 26.079166666666666, "grad_norm": 2.8956852834842914, "learning_rate": 5e-05, "loss": 0.0562, "num_input_tokens_seen": 571302604, "step": 6259 }, { "epoch": 26.079166666666666, "loss": 0.07299505174160004, "loss_ce": 0.001866208971478045, "loss_iou": 0.1796875, "loss_num": 0.01422119140625, "loss_xval": 0.0712890625, "num_input_tokens_seen": 571302604, "step": 6259 }, { "epoch": 26.083333333333332, "grad_norm": 2.0520974878683633, "learning_rate": 5e-05, "loss": 0.0479, "num_input_tokens_seen": 571394020, "step": 6260 }, { "epoch": 26.083333333333332, "loss": 0.024094898253679276, "loss_ce": 8.897854058886878e-06, "loss_iou": 0.265625, "loss_num": 0.00482177734375, "loss_xval": 0.0240478515625, "num_input_tokens_seen": 571394020, "step": 6260 }, { "epoch": 26.0875, "grad_norm": 1.2220130617000615, "learning_rate": 5e-05, "loss": 0.0409, "num_input_tokens_seen": 571484956, "step": 6261 }, { "epoch": 26.0875, "loss": 0.04031633958220482, "loss_ce": 2.6189673008047976e-06, "loss_iou": 0.185546875, "loss_num": 0.008056640625, "loss_xval": 0.040283203125, "num_input_tokens_seen": 571484956, "step": 6261 }, { "epoch": 26.091666666666665, "grad_norm": 1.8787374771295302, "learning_rate": 5e-05, "loss": 0.0307, "num_input_tokens_seen": 571576136, "step": 6262 }, { "epoch": 26.091666666666665, "loss": 0.028536062687635422, "loss_ce": 1.738655373628717e-05, "loss_iou": 0.255859375, "loss_num": 0.005706787109375, "loss_xval": 0.028564453125, "num_input_tokens_seen": 571576136, "step": 6262 }, { "epoch": 26.095833333333335, "grad_norm": 2.368319643306598, "learning_rate": 5e-05, "loss": 0.0259, "num_input_tokens_seen": 571667516, "step": 6263 }, { "epoch": 26.095833333333335, "loss": 0.02410757541656494, "loss_ce": 6.353846401907504e-05, "loss_iou": 0.1357421875, "loss_num": 0.004791259765625, "loss_xval": 0.0240478515625, "num_input_tokens_seen": 571667516, "step": 6263 }, { "epoch": 26.1, "grad_norm": 2.9416620233734836, "learning_rate": 5e-05, "loss": 0.031, "num_input_tokens_seen": 571759092, "step": 6264 }, { "epoch": 26.1, "loss": 0.02723521925508976, "loss_ce": 5.9099975260323845e-06, "loss_iou": 0.357421875, "loss_num": 0.005462646484375, "loss_xval": 0.0272216796875, "num_input_tokens_seen": 571759092, "step": 6264 }, { "epoch": 26.104166666666668, "grad_norm": 2.9230697977898177, "learning_rate": 5e-05, "loss": 0.0451, "num_input_tokens_seen": 571850480, "step": 6265 }, { "epoch": 26.104166666666668, "loss": 0.03306296467781067, "loss_ce": 0.00019553500169422477, "loss_iou": 0.2373046875, "loss_num": 0.006591796875, "loss_xval": 0.032958984375, "num_input_tokens_seen": 571850480, "step": 6265 }, { "epoch": 26.108333333333334, "grad_norm": 2.8379543798087665, "learning_rate": 5e-05, "loss": 0.0476, "num_input_tokens_seen": 571941896, "step": 6266 }, { "epoch": 26.108333333333334, "loss": 0.05199863761663437, "loss_ce": 4.316520517022582e-06, "loss_iou": 0.1552734375, "loss_num": 0.01043701171875, "loss_xval": 0.052001953125, "num_input_tokens_seen": 571941896, "step": 6266 }, { "epoch": 26.1125, "grad_norm": 2.4718131067011773, "learning_rate": 5e-05, "loss": 0.0545, "num_input_tokens_seen": 572033372, "step": 6267 }, { "epoch": 26.1125, "loss": 0.03578196465969086, "loss_ce": 0.00019083707593381405, "loss_iou": 0.1259765625, "loss_num": 0.007110595703125, "loss_xval": 0.03564453125, "num_input_tokens_seen": 572033372, "step": 6267 }, { "epoch": 26.116666666666667, "grad_norm": 2.5265657710953526, "learning_rate": 5e-05, "loss": 0.0382, "num_input_tokens_seen": 572124444, "step": 6268 }, { "epoch": 26.116666666666667, "loss": 0.021869119256734848, "loss_ce": 3.2737359560996993e-06, "loss_iou": 0.365234375, "loss_num": 0.004364013671875, "loss_xval": 0.0218505859375, "num_input_tokens_seen": 572124444, "step": 6268 }, { "epoch": 26.120833333333334, "grad_norm": 3.850485848251067, "learning_rate": 5e-05, "loss": 0.067, "num_input_tokens_seen": 572216048, "step": 6269 }, { "epoch": 26.120833333333334, "loss": 0.06515754759311676, "loss_ce": 2.5184165224345634e-06, "loss_iou": 0.3046875, "loss_num": 0.0130615234375, "loss_xval": 0.06494140625, "num_input_tokens_seen": 572216048, "step": 6269 }, { "epoch": 26.125, "grad_norm": 2.0856751596103673, "learning_rate": 5e-05, "loss": 0.0281, "num_input_tokens_seen": 572307184, "step": 6270 }, { "epoch": 26.125, "loss": 0.042046919465065, "loss_ce": 1.277327010029694e-05, "loss_iou": 0.236328125, "loss_num": 0.0084228515625, "loss_xval": 0.0419921875, "num_input_tokens_seen": 572307184, "step": 6270 }, { "epoch": 26.129166666666666, "grad_norm": 2.547013880080754, "learning_rate": 5e-05, "loss": 0.0245, "num_input_tokens_seen": 572398924, "step": 6271 }, { "epoch": 26.129166666666666, "loss": 0.026323389261960983, "loss_ce": 1.9775347936956678e-06, "loss_iou": 0.216796875, "loss_num": 0.0052490234375, "loss_xval": 0.0263671875, "num_input_tokens_seen": 572398924, "step": 6271 }, { "epoch": 26.133333333333333, "grad_norm": 2.0702680246502148, "learning_rate": 5e-05, "loss": 0.0358, "num_input_tokens_seen": 572490196, "step": 6272 }, { "epoch": 26.133333333333333, "loss": 0.024699455127120018, "loss_ce": 3.105150426563341e-06, "loss_iou": 0.193359375, "loss_num": 0.00494384765625, "loss_xval": 0.024658203125, "num_input_tokens_seen": 572490196, "step": 6272 }, { "epoch": 26.1375, "grad_norm": 1.8665974664534275, "learning_rate": 5e-05, "loss": 0.021, "num_input_tokens_seen": 572581772, "step": 6273 }, { "epoch": 26.1375, "loss": 0.021007981151342392, "loss_ce": 2.7147021683049388e-05, "loss_iou": 0.23046875, "loss_num": 0.004180908203125, "loss_xval": 0.02099609375, "num_input_tokens_seen": 572581772, "step": 6273 }, { "epoch": 26.141666666666666, "grad_norm": 2.569508785889838, "learning_rate": 5e-05, "loss": 0.0484, "num_input_tokens_seen": 572673364, "step": 6274 }, { "epoch": 26.141666666666666, "loss": 0.03979034721851349, "loss_ce": 6.409214984159917e-05, "loss_iou": 0.099609375, "loss_num": 0.0079345703125, "loss_xval": 0.039794921875, "num_input_tokens_seen": 572673364, "step": 6274 }, { "epoch": 26.145833333333332, "grad_norm": 3.3867004929729982, "learning_rate": 5e-05, "loss": 0.0436, "num_input_tokens_seen": 572765160, "step": 6275 }, { "epoch": 26.145833333333332, "loss": 0.04471275210380554, "loss_ce": 4.498059752222616e-06, "loss_iou": 0.271484375, "loss_num": 0.00897216796875, "loss_xval": 0.044677734375, "num_input_tokens_seen": 572765160, "step": 6275 }, { "epoch": 26.15, "grad_norm": 3.9647607647830805, "learning_rate": 5e-05, "loss": 0.0514, "num_input_tokens_seen": 572856588, "step": 6276 }, { "epoch": 26.15, "loss": 0.07329382747411728, "loss_ce": 3.6381457903189585e-05, "loss_iou": 0.31640625, "loss_num": 0.0146484375, "loss_xval": 0.0732421875, "num_input_tokens_seen": 572856588, "step": 6276 }, { "epoch": 26.154166666666665, "grad_norm": 2.9639328826519242, "learning_rate": 5e-05, "loss": 0.0377, "num_input_tokens_seen": 572947872, "step": 6277 }, { "epoch": 26.154166666666665, "loss": 0.03356025740504265, "loss_ce": 2.1438656403915957e-05, "loss_iou": 0.1572265625, "loss_num": 0.0067138671875, "loss_xval": 0.033447265625, "num_input_tokens_seen": 572947872, "step": 6277 }, { "epoch": 26.158333333333335, "grad_norm": 5.329458968613203, "learning_rate": 5e-05, "loss": 0.0425, "num_input_tokens_seen": 573038684, "step": 6278 }, { "epoch": 26.158333333333335, "loss": 0.031451016664505005, "loss_ce": 2.653630872373469e-06, "loss_iou": 0.296875, "loss_num": 0.00628662109375, "loss_xval": 0.031494140625, "num_input_tokens_seen": 573038684, "step": 6278 }, { "epoch": 26.1625, "grad_norm": 2.430329436830701, "learning_rate": 5e-05, "loss": 0.0466, "num_input_tokens_seen": 573129732, "step": 6279 }, { "epoch": 26.1625, "loss": 0.05689224228262901, "loss_ce": 7.477737199224066e-06, "loss_iou": 0.216796875, "loss_num": 0.0113525390625, "loss_xval": 0.056884765625, "num_input_tokens_seen": 573129732, "step": 6279 }, { "epoch": 26.166666666666668, "grad_norm": 7.444355132302191, "learning_rate": 5e-05, "loss": 0.0208, "num_input_tokens_seen": 573221080, "step": 6280 }, { "epoch": 26.166666666666668, "loss": 0.021823428571224213, "loss_ce": 3.359195943630766e-06, "loss_iou": 0.19921875, "loss_num": 0.004364013671875, "loss_xval": 0.0218505859375, "num_input_tokens_seen": 573221080, "step": 6280 }, { "epoch": 26.170833333333334, "grad_norm": 2.063519178543893, "learning_rate": 5e-05, "loss": 0.0288, "num_input_tokens_seen": 573312500, "step": 6281 }, { "epoch": 26.170833333333334, "loss": 0.0332389771938324, "loss_ce": 5.330958629201632e-06, "loss_iou": 0.23828125, "loss_num": 0.00665283203125, "loss_xval": 0.033203125, "num_input_tokens_seen": 573312500, "step": 6281 }, { "epoch": 26.175, "grad_norm": 2.478197828967112, "learning_rate": 5e-05, "loss": 0.0251, "num_input_tokens_seen": 573403948, "step": 6282 }, { "epoch": 26.175, "loss": 0.018054649233818054, "loss_ce": 1.1132175131933764e-05, "loss_iou": 0.1201171875, "loss_num": 0.00360107421875, "loss_xval": 0.01806640625, "num_input_tokens_seen": 573403948, "step": 6282 }, { "epoch": 26.179166666666667, "grad_norm": 2.0409826501703314, "learning_rate": 5e-05, "loss": 0.0304, "num_input_tokens_seen": 573495656, "step": 6283 }, { "epoch": 26.179166666666667, "loss": 0.026428505778312683, "loss_ce": 0.002380652353167534, "loss_iou": 0.1728515625, "loss_num": 0.00482177734375, "loss_xval": 0.0240478515625, "num_input_tokens_seen": 573495656, "step": 6283 }, { "epoch": 26.183333333333334, "grad_norm": 2.12562414575699, "learning_rate": 5e-05, "loss": 0.0349, "num_input_tokens_seen": 573586556, "step": 6284 }, { "epoch": 26.183333333333334, "loss": 0.04275288060307503, "loss_ce": 0.00014271096733864397, "loss_iou": 0.2255859375, "loss_num": 0.008544921875, "loss_xval": 0.042724609375, "num_input_tokens_seen": 573586556, "step": 6284 }, { "epoch": 26.1875, "grad_norm": 2.3256192561189386, "learning_rate": 5e-05, "loss": 0.0291, "num_input_tokens_seen": 573677892, "step": 6285 }, { "epoch": 26.1875, "loss": 0.0218992717564106, "loss_ce": 2.910128387156874e-06, "loss_iou": 0.287109375, "loss_num": 0.004364013671875, "loss_xval": 0.0218505859375, "num_input_tokens_seen": 573677892, "step": 6285 }, { "epoch": 26.191666666666666, "grad_norm": 2.2700532782155487, "learning_rate": 5e-05, "loss": 0.0244, "num_input_tokens_seen": 573769264, "step": 6286 }, { "epoch": 26.191666666666666, "loss": 0.0227479487657547, "loss_ce": 1.2353164493106306e-05, "loss_iou": 0.2890625, "loss_num": 0.004547119140625, "loss_xval": 0.022705078125, "num_input_tokens_seen": 573769264, "step": 6286 }, { "epoch": 26.195833333333333, "grad_norm": 2.497051277580666, "learning_rate": 5e-05, "loss": 0.0348, "num_input_tokens_seen": 573861032, "step": 6287 }, { "epoch": 26.195833333333333, "loss": 0.02247200906276703, "loss_ce": 3.440377668084693e-06, "loss_iou": 0.3125, "loss_num": 0.004486083984375, "loss_xval": 0.0224609375, "num_input_tokens_seen": 573861032, "step": 6287 }, { "epoch": 26.2, "grad_norm": 2.4665341618284793, "learning_rate": 5e-05, "loss": 0.0418, "num_input_tokens_seen": 573952516, "step": 6288 }, { "epoch": 26.2, "loss": 0.03499322384595871, "loss_ce": 4.820210961042903e-06, "loss_iou": 0.23046875, "loss_num": 0.006988525390625, "loss_xval": 0.034912109375, "num_input_tokens_seen": 573952516, "step": 6288 }, { "epoch": 26.204166666666666, "grad_norm": 2.5178799536284404, "learning_rate": 5e-05, "loss": 0.0402, "num_input_tokens_seen": 574043548, "step": 6289 }, { "epoch": 26.204166666666666, "loss": 0.025905869901180267, "loss_ce": 4.075403012393508e-06, "loss_iou": 0.248046875, "loss_num": 0.00518798828125, "loss_xval": 0.02587890625, "num_input_tokens_seen": 574043548, "step": 6289 }, { "epoch": 26.208333333333332, "grad_norm": 2.815248669719499, "learning_rate": 5e-05, "loss": 0.055, "num_input_tokens_seen": 574134964, "step": 6290 }, { "epoch": 26.208333333333332, "loss": 0.04833405464887619, "loss_ce": 0.009218152612447739, "loss_iou": 0.173828125, "loss_num": 0.0078125, "loss_xval": 0.0390625, "num_input_tokens_seen": 574134964, "step": 6290 }, { "epoch": 26.2125, "grad_norm": 1.2196878914329312, "learning_rate": 5e-05, "loss": 0.068, "num_input_tokens_seen": 574226544, "step": 6291 }, { "epoch": 26.2125, "loss": 0.035638727247714996, "loss_ce": 9.454719474888407e-06, "loss_iou": 0.263671875, "loss_num": 0.00714111328125, "loss_xval": 0.03564453125, "num_input_tokens_seen": 574226544, "step": 6291 }, { "epoch": 26.216666666666665, "grad_norm": 5.70504657339675, "learning_rate": 5e-05, "loss": 0.0539, "num_input_tokens_seen": 574317544, "step": 6292 }, { "epoch": 26.216666666666665, "loss": 0.02251153253018856, "loss_ce": 4.818836259801174e-06, "loss_iou": 0.3046875, "loss_num": 0.004486083984375, "loss_xval": 0.0224609375, "num_input_tokens_seen": 574317544, "step": 6292 }, { "epoch": 26.220833333333335, "grad_norm": 16.549963830690942, "learning_rate": 5e-05, "loss": 0.0297, "num_input_tokens_seen": 574408668, "step": 6293 }, { "epoch": 26.220833333333335, "loss": 0.03274759650230408, "loss_ce": 1.749454349919688e-05, "loss_iou": 0.076171875, "loss_num": 0.00653076171875, "loss_xval": 0.03271484375, "num_input_tokens_seen": 574408668, "step": 6293 }, { "epoch": 26.225, "grad_norm": 1.98771914841208, "learning_rate": 5e-05, "loss": 0.0313, "num_input_tokens_seen": 574499588, "step": 6294 }, { "epoch": 26.225, "loss": 0.04000755771994591, "loss_ce": 1.4269931853050366e-05, "loss_iou": 0.232421875, "loss_num": 0.00799560546875, "loss_xval": 0.0400390625, "num_input_tokens_seen": 574499588, "step": 6294 }, { "epoch": 26.229166666666668, "grad_norm": 2.8155593274898787, "learning_rate": 5e-05, "loss": 0.0246, "num_input_tokens_seen": 574590840, "step": 6295 }, { "epoch": 26.229166666666668, "loss": 0.021502085030078888, "loss_ce": 2.4502253381797345e-06, "loss_iou": 0.2412109375, "loss_num": 0.004302978515625, "loss_xval": 0.021484375, "num_input_tokens_seen": 574590840, "step": 6295 }, { "epoch": 26.233333333333334, "grad_norm": 3.603509176942159, "learning_rate": 5e-05, "loss": 0.0272, "num_input_tokens_seen": 574681876, "step": 6296 }, { "epoch": 26.233333333333334, "loss": 0.0193068515509367, "loss_ce": 4.483481461647898e-06, "loss_iou": 0.25390625, "loss_num": 0.0038604736328125, "loss_xval": 0.019287109375, "num_input_tokens_seen": 574681876, "step": 6296 }, { "epoch": 26.2375, "grad_norm": 5.566119614228712, "learning_rate": 5e-05, "loss": 0.0393, "num_input_tokens_seen": 574773600, "step": 6297 }, { "epoch": 26.2375, "loss": 0.03268130123615265, "loss_ce": 0.00016482088540215045, "loss_iou": 0.294921875, "loss_num": 0.006500244140625, "loss_xval": 0.032470703125, "num_input_tokens_seen": 574773600, "step": 6297 }, { "epoch": 26.241666666666667, "grad_norm": 3.6229640604552147, "learning_rate": 5e-05, "loss": 0.0349, "num_input_tokens_seen": 574865264, "step": 6298 }, { "epoch": 26.241666666666667, "loss": 0.03571357578039169, "loss_ce": 8.007896212802734e-06, "loss_iou": 0.040283203125, "loss_num": 0.00714111328125, "loss_xval": 0.03564453125, "num_input_tokens_seen": 574865264, "step": 6298 }, { "epoch": 26.245833333333334, "grad_norm": 1.0912757230321628, "learning_rate": 5e-05, "loss": 0.0624, "num_input_tokens_seen": 574956888, "step": 6299 }, { "epoch": 26.245833333333334, "loss": 0.06911545991897583, "loss_ce": 0.0002525450545363128, "loss_iou": 0.2890625, "loss_num": 0.0137939453125, "loss_xval": 0.06884765625, "num_input_tokens_seen": 574956888, "step": 6299 }, { "epoch": 26.25, "grad_norm": 1.5003465757889274, "learning_rate": 5e-05, "loss": 0.0497, "num_input_tokens_seen": 575048208, "step": 6300 }, { "epoch": 26.25, "loss": 0.07881193608045578, "loss_ce": 2.3175707610789686e-05, "loss_iou": 0.29296875, "loss_num": 0.0157470703125, "loss_xval": 0.07861328125, "num_input_tokens_seen": 575048208, "step": 6300 }, { "epoch": 26.254166666666666, "grad_norm": 1.4826211105944096, "learning_rate": 5e-05, "loss": 0.0266, "num_input_tokens_seen": 575140140, "step": 6301 }, { "epoch": 26.254166666666666, "loss": 0.032630644738674164, "loss_ce": 0.002044401131570339, "loss_iou": 0.19140625, "loss_num": 0.006134033203125, "loss_xval": 0.0306396484375, "num_input_tokens_seen": 575140140, "step": 6301 }, { "epoch": 26.258333333333333, "grad_norm": 2.0612668053861007, "learning_rate": 5e-05, "loss": 0.0435, "num_input_tokens_seen": 575231776, "step": 6302 }, { "epoch": 26.258333333333333, "loss": 0.02026584930717945, "loss_ce": 1.7435413610655814e-05, "loss_iou": 0.1318359375, "loss_num": 0.004058837890625, "loss_xval": 0.020263671875, "num_input_tokens_seen": 575231776, "step": 6302 }, { "epoch": 26.2625, "grad_norm": 2.461279254233241, "learning_rate": 5e-05, "loss": 0.0292, "num_input_tokens_seen": 575323000, "step": 6303 }, { "epoch": 26.2625, "loss": 0.03142453730106354, "loss_ce": 6.692681381537113e-06, "loss_iou": 0.1875, "loss_num": 0.00628662109375, "loss_xval": 0.031494140625, "num_input_tokens_seen": 575323000, "step": 6303 }, { "epoch": 26.266666666666666, "grad_norm": 4.378948959388897, "learning_rate": 5e-05, "loss": 0.0852, "num_input_tokens_seen": 575414968, "step": 6304 }, { "epoch": 26.266666666666666, "loss": 0.14499732851982117, "loss_ce": 3.8841390050947666e-05, "loss_iou": 0.125, "loss_num": 0.029052734375, "loss_xval": 0.14453125, "num_input_tokens_seen": 575414968, "step": 6304 }, { "epoch": 26.270833333333332, "grad_norm": 2.4258684782840754, "learning_rate": 5e-05, "loss": 0.0342, "num_input_tokens_seen": 575506536, "step": 6305 }, { "epoch": 26.270833333333332, "loss": 0.029087748378515244, "loss_ce": 4.495202119869646e-06, "loss_iou": 0.20703125, "loss_num": 0.005828857421875, "loss_xval": 0.029052734375, "num_input_tokens_seen": 575506536, "step": 6305 }, { "epoch": 26.275, "grad_norm": 3.2960106053065346, "learning_rate": 5e-05, "loss": 0.068, "num_input_tokens_seen": 575598072, "step": 6306 }, { "epoch": 26.275, "loss": 0.06410195678472519, "loss_ce": 3.03038668789668e-05, "loss_iou": 0.130859375, "loss_num": 0.0128173828125, "loss_xval": 0.06396484375, "num_input_tokens_seen": 575598072, "step": 6306 }, { "epoch": 26.279166666666665, "grad_norm": 4.672763395873947, "learning_rate": 5e-05, "loss": 0.032, "num_input_tokens_seen": 575689892, "step": 6307 }, { "epoch": 26.279166666666665, "loss": 0.04166632145643234, "loss_ce": 0.00018530177476350218, "loss_iou": 0.193359375, "loss_num": 0.00830078125, "loss_xval": 0.04150390625, "num_input_tokens_seen": 575689892, "step": 6307 }, { "epoch": 26.283333333333335, "grad_norm": 1.8578568597790612, "learning_rate": 5e-05, "loss": 0.054, "num_input_tokens_seen": 575781492, "step": 6308 }, { "epoch": 26.283333333333335, "loss": 0.05019602179527283, "loss_ce": 9.861505532171577e-06, "loss_iou": 0.2158203125, "loss_num": 0.010009765625, "loss_xval": 0.05029296875, "num_input_tokens_seen": 575781492, "step": 6308 }, { "epoch": 26.2875, "grad_norm": 2.1385111114018813, "learning_rate": 5e-05, "loss": 0.0316, "num_input_tokens_seen": 575872756, "step": 6309 }, { "epoch": 26.2875, "loss": 0.0381060428917408, "loss_ce": 4.844471732212696e-06, "loss_iou": 0.1796875, "loss_num": 0.00762939453125, "loss_xval": 0.0380859375, "num_input_tokens_seen": 575872756, "step": 6309 }, { "epoch": 26.291666666666668, "grad_norm": 1.8730805481238693, "learning_rate": 5e-05, "loss": 0.055, "num_input_tokens_seen": 575964732, "step": 6310 }, { "epoch": 26.291666666666668, "loss": 0.06005624681711197, "loss_ce": 8.920710388338193e-05, "loss_iou": 0.2734375, "loss_num": 0.01202392578125, "loss_xval": 0.06005859375, "num_input_tokens_seen": 575964732, "step": 6310 }, { "epoch": 26.295833333333334, "grad_norm": 2.188500937318792, "learning_rate": 5e-05, "loss": 0.0325, "num_input_tokens_seen": 576055952, "step": 6311 }, { "epoch": 26.295833333333334, "loss": 0.04608750343322754, "loss_ce": 5.960510861768853e-06, "loss_iou": 0.1728515625, "loss_num": 0.00921630859375, "loss_xval": 0.046142578125, "num_input_tokens_seen": 576055952, "step": 6311 }, { "epoch": 26.3, "grad_norm": 3.995286971857075, "learning_rate": 5e-05, "loss": 0.0524, "num_input_tokens_seen": 576147220, "step": 6312 }, { "epoch": 26.3, "loss": 0.05525440722703934, "loss_ce": 2.332332314836094e-06, "loss_iou": 0.08984375, "loss_num": 0.01104736328125, "loss_xval": 0.05517578125, "num_input_tokens_seen": 576147220, "step": 6312 }, { "epoch": 26.304166666666667, "grad_norm": 3.0070715232170655, "learning_rate": 5e-05, "loss": 0.0354, "num_input_tokens_seen": 576238912, "step": 6313 }, { "epoch": 26.304166666666667, "loss": 0.04728226363658905, "loss_ce": 1.0534803550399374e-05, "loss_iou": 0.294921875, "loss_num": 0.00946044921875, "loss_xval": 0.04736328125, "num_input_tokens_seen": 576238912, "step": 6313 }, { "epoch": 26.308333333333334, "grad_norm": 3.2493945466049796, "learning_rate": 5e-05, "loss": 0.03, "num_input_tokens_seen": 576329824, "step": 6314 }, { "epoch": 26.308333333333334, "loss": 0.021796412765979767, "loss_ce": 6.862643203930929e-06, "loss_iou": 0.267578125, "loss_num": 0.004364013671875, "loss_xval": 0.021728515625, "num_input_tokens_seen": 576329824, "step": 6314 }, { "epoch": 26.3125, "grad_norm": 2.8164148728586356, "learning_rate": 5e-05, "loss": 0.0438, "num_input_tokens_seen": 576420952, "step": 6315 }, { "epoch": 26.3125, "loss": 0.04578210785984993, "loss_ce": 5.741334007325349e-06, "loss_iou": 0.31640625, "loss_num": 0.0091552734375, "loss_xval": 0.0458984375, "num_input_tokens_seen": 576420952, "step": 6315 }, { "epoch": 26.316666666666666, "grad_norm": 2.950784137828782, "learning_rate": 5e-05, "loss": 0.0651, "num_input_tokens_seen": 576511960, "step": 6316 }, { "epoch": 26.316666666666666, "loss": 0.1039830893278122, "loss_ce": 2.070630216621794e-06, "loss_iou": 0.30078125, "loss_num": 0.020751953125, "loss_xval": 0.10400390625, "num_input_tokens_seen": 576511960, "step": 6316 }, { "epoch": 26.320833333333333, "grad_norm": 3.0424274172526546, "learning_rate": 5e-05, "loss": 0.0323, "num_input_tokens_seen": 576603444, "step": 6317 }, { "epoch": 26.320833333333333, "loss": 0.025478117167949677, "loss_ce": 3.568322881619679e-06, "loss_iou": 0.259765625, "loss_num": 0.005096435546875, "loss_xval": 0.0255126953125, "num_input_tokens_seen": 576603444, "step": 6317 }, { "epoch": 26.325, "grad_norm": 2.8406572357861637, "learning_rate": 5e-05, "loss": 0.0503, "num_input_tokens_seen": 576694496, "step": 6318 }, { "epoch": 26.325, "loss": 0.05386997014284134, "loss_ce": 6.443972324632341e-06, "loss_iou": 0.1591796875, "loss_num": 0.0107421875, "loss_xval": 0.053955078125, "num_input_tokens_seen": 576694496, "step": 6318 }, { "epoch": 26.329166666666666, "grad_norm": 3.658124062989085, "learning_rate": 5e-05, "loss": 0.0561, "num_input_tokens_seen": 576786176, "step": 6319 }, { "epoch": 26.329166666666666, "loss": 0.040701497346162796, "loss_ce": 6.306990599114215e-06, "loss_iou": 0.234375, "loss_num": 0.00811767578125, "loss_xval": 0.040771484375, "num_input_tokens_seen": 576786176, "step": 6319 }, { "epoch": 26.333333333333332, "grad_norm": 4.215985552079258, "learning_rate": 5e-05, "loss": 0.0513, "num_input_tokens_seen": 576877596, "step": 6320 }, { "epoch": 26.333333333333332, "loss": 0.06873767077922821, "loss_ce": 4.454630470718257e-06, "loss_iou": 0.1923828125, "loss_num": 0.01373291015625, "loss_xval": 0.06884765625, "num_input_tokens_seen": 576877596, "step": 6320 }, { "epoch": 26.3375, "grad_norm": 2.5018603380379165, "learning_rate": 5e-05, "loss": 0.0459, "num_input_tokens_seen": 576968136, "step": 6321 }, { "epoch": 26.3375, "loss": 0.05684886872768402, "loss_ce": 2.248891405542963e-06, "loss_iou": 0.2041015625, "loss_num": 0.01141357421875, "loss_xval": 0.056884765625, "num_input_tokens_seen": 576968136, "step": 6321 }, { "epoch": 26.341666666666665, "grad_norm": 2.5013332338694054, "learning_rate": 5e-05, "loss": 0.023, "num_input_tokens_seen": 577058988, "step": 6322 }, { "epoch": 26.341666666666665, "loss": 0.025415629148483276, "loss_ce": 2.114283233822789e-06, "loss_iou": 0.31640625, "loss_num": 0.00506591796875, "loss_xval": 0.025390625, "num_input_tokens_seen": 577058988, "step": 6322 }, { "epoch": 26.345833333333335, "grad_norm": 3.110519419619886, "learning_rate": 5e-05, "loss": 0.0454, "num_input_tokens_seen": 577149672, "step": 6323 }, { "epoch": 26.345833333333335, "loss": 0.06356197595596313, "loss_ce": 1.491544935561251e-06, "loss_iou": 0.1904296875, "loss_num": 0.0126953125, "loss_xval": 0.0634765625, "num_input_tokens_seen": 577149672, "step": 6323 }, { "epoch": 26.35, "grad_norm": 3.010348438294041, "learning_rate": 5e-05, "loss": 0.036, "num_input_tokens_seen": 577241132, "step": 6324 }, { "epoch": 26.35, "loss": 0.04298759251832962, "loss_ce": 3.583397756301565e-06, "loss_iou": 0.232421875, "loss_num": 0.00860595703125, "loss_xval": 0.04296875, "num_input_tokens_seen": 577241132, "step": 6324 }, { "epoch": 26.354166666666668, "grad_norm": 2.0317761923647613, "learning_rate": 5e-05, "loss": 0.0428, "num_input_tokens_seen": 577333176, "step": 6325 }, { "epoch": 26.354166666666668, "loss": 0.038431257009506226, "loss_ce": 9.62726062425645e-06, "loss_iou": 0.19921875, "loss_num": 0.0076904296875, "loss_xval": 0.038330078125, "num_input_tokens_seen": 577333176, "step": 6325 }, { "epoch": 26.358333333333334, "grad_norm": 2.2036163389130863, "learning_rate": 5e-05, "loss": 0.0407, "num_input_tokens_seen": 577424236, "step": 6326 }, { "epoch": 26.358333333333334, "loss": 0.06302201747894287, "loss_ce": 3.2206221476371866e-06, "loss_iou": 0.361328125, "loss_num": 0.0125732421875, "loss_xval": 0.06298828125, "num_input_tokens_seen": 577424236, "step": 6326 }, { "epoch": 26.3625, "grad_norm": 5.343683713846114, "learning_rate": 5e-05, "loss": 0.0415, "num_input_tokens_seen": 577514744, "step": 6327 }, { "epoch": 26.3625, "loss": 0.024122852832078934, "loss_ce": 6.335745638352819e-06, "loss_iou": 0.224609375, "loss_num": 0.00482177734375, "loss_xval": 0.024169921875, "num_input_tokens_seen": 577514744, "step": 6327 }, { "epoch": 26.366666666666667, "grad_norm": 2.091313578216484, "learning_rate": 5e-05, "loss": 0.0274, "num_input_tokens_seen": 577606116, "step": 6328 }, { "epoch": 26.366666666666667, "loss": 0.025190196931362152, "loss_ce": 1.3193935956223868e-05, "loss_iou": 0.29296875, "loss_num": 0.005035400390625, "loss_xval": 0.025146484375, "num_input_tokens_seen": 577606116, "step": 6328 }, { "epoch": 26.370833333333334, "grad_norm": 2.7810069202307286, "learning_rate": 5e-05, "loss": 0.0403, "num_input_tokens_seen": 577697536, "step": 6329 }, { "epoch": 26.370833333333334, "loss": 0.03468858078122139, "loss_ce": 1.2982125554117374e-05, "loss_iou": 0.27734375, "loss_num": 0.006927490234375, "loss_xval": 0.03466796875, "num_input_tokens_seen": 577697536, "step": 6329 }, { "epoch": 26.375, "grad_norm": 3.9993272580255472, "learning_rate": 5e-05, "loss": 0.0879, "num_input_tokens_seen": 577788520, "step": 6330 }, { "epoch": 26.375, "loss": 0.135927215218544, "loss_ce": 1.926666300278157e-06, "loss_iou": 0.259765625, "loss_num": 0.0272216796875, "loss_xval": 0.1357421875, "num_input_tokens_seen": 577788520, "step": 6330 }, { "epoch": 26.379166666666666, "grad_norm": 2.601935095295643, "learning_rate": 5e-05, "loss": 0.0333, "num_input_tokens_seen": 577880416, "step": 6331 }, { "epoch": 26.379166666666666, "loss": 0.02961255982518196, "loss_ce": 1.8137467122869566e-05, "loss_iou": 0.1650390625, "loss_num": 0.00592041015625, "loss_xval": 0.029541015625, "num_input_tokens_seen": 577880416, "step": 6331 }, { "epoch": 26.383333333333333, "grad_norm": 1.3863780156038121, "learning_rate": 5e-05, "loss": 0.0208, "num_input_tokens_seen": 577972412, "step": 6332 }, { "epoch": 26.383333333333333, "loss": 0.015614290721714497, "loss_ce": 8.847277786117047e-05, "loss_iou": 0.162109375, "loss_num": 0.00311279296875, "loss_xval": 0.0155029296875, "num_input_tokens_seen": 577972412, "step": 6332 }, { "epoch": 26.3875, "grad_norm": 1.5277325908460835, "learning_rate": 5e-05, "loss": 0.0414, "num_input_tokens_seen": 578064024, "step": 6333 }, { "epoch": 26.3875, "loss": 0.05949154123663902, "loss_ce": 1.2783336387656163e-05, "loss_iou": 0.326171875, "loss_num": 0.01190185546875, "loss_xval": 0.0595703125, "num_input_tokens_seen": 578064024, "step": 6333 }, { "epoch": 26.391666666666666, "grad_norm": 1.580821654790845, "learning_rate": 5e-05, "loss": 0.0314, "num_input_tokens_seen": 578155216, "step": 6334 }, { "epoch": 26.391666666666666, "loss": 0.026848390698432922, "loss_ce": 8.180058102880139e-06, "loss_iou": 0.1494140625, "loss_num": 0.00537109375, "loss_xval": 0.02685546875, "num_input_tokens_seen": 578155216, "step": 6334 }, { "epoch": 26.395833333333332, "grad_norm": 2.8275278403687216, "learning_rate": 5e-05, "loss": 0.0223, "num_input_tokens_seen": 578246792, "step": 6335 }, { "epoch": 26.395833333333332, "loss": 0.018020860850811005, "loss_ce": 6.126628431957215e-05, "loss_iou": 0.19140625, "loss_num": 0.0035858154296875, "loss_xval": 0.0179443359375, "num_input_tokens_seen": 578246792, "step": 6335 }, { "epoch": 26.4, "grad_norm": 2.8463810027193905, "learning_rate": 5e-05, "loss": 0.0732, "num_input_tokens_seen": 578338724, "step": 6336 }, { "epoch": 26.4, "loss": 0.07648120820522308, "loss_ce": 4.160474873060593e-06, "loss_iou": 0.255859375, "loss_num": 0.01531982421875, "loss_xval": 0.07666015625, "num_input_tokens_seen": 578338724, "step": 6336 }, { "epoch": 26.404166666666665, "grad_norm": 3.459044094398339, "learning_rate": 5e-05, "loss": 0.0341, "num_input_tokens_seen": 578429780, "step": 6337 }, { "epoch": 26.404166666666665, "loss": 0.046463415026664734, "loss_ce": 8.034942766244058e-06, "loss_iou": 0.1728515625, "loss_num": 0.00927734375, "loss_xval": 0.04638671875, "num_input_tokens_seen": 578429780, "step": 6337 }, { "epoch": 26.408333333333335, "grad_norm": 2.8430893757458002, "learning_rate": 5e-05, "loss": 0.0502, "num_input_tokens_seen": 578520908, "step": 6338 }, { "epoch": 26.408333333333335, "loss": 0.06003076583147049, "loss_ce": 2.689038410608191e-06, "loss_iou": 0.27734375, "loss_num": 0.011962890625, "loss_xval": 0.06005859375, "num_input_tokens_seen": 578520908, "step": 6338 }, { "epoch": 26.4125, "grad_norm": 2.2231752423437774, "learning_rate": 5e-05, "loss": 0.0303, "num_input_tokens_seen": 578611796, "step": 6339 }, { "epoch": 26.4125, "loss": 0.04048309847712517, "loss_ce": 1.5290997907868586e-06, "loss_iou": 0.279296875, "loss_num": 0.00811767578125, "loss_xval": 0.04052734375, "num_input_tokens_seen": 578611796, "step": 6339 }, { "epoch": 26.416666666666668, "grad_norm": 2.7015743485243275, "learning_rate": 5e-05, "loss": 0.03, "num_input_tokens_seen": 578703224, "step": 6340 }, { "epoch": 26.416666666666668, "loss": 0.024205388501286507, "loss_ce": 4.949035428580828e-06, "loss_iou": 0.37109375, "loss_num": 0.004852294921875, "loss_xval": 0.024169921875, "num_input_tokens_seen": 578703224, "step": 6340 }, { "epoch": 26.420833333333334, "grad_norm": 3.4984107307336005, "learning_rate": 5e-05, "loss": 0.0337, "num_input_tokens_seen": 578794864, "step": 6341 }, { "epoch": 26.420833333333334, "loss": 0.03814494609832764, "loss_ce": 2.849268821591977e-05, "loss_iou": 0.1953125, "loss_num": 0.00762939453125, "loss_xval": 0.0380859375, "num_input_tokens_seen": 578794864, "step": 6341 }, { "epoch": 26.425, "grad_norm": 2.6017879252865743, "learning_rate": 5e-05, "loss": 0.0577, "num_input_tokens_seen": 578886592, "step": 6342 }, { "epoch": 26.425, "loss": 0.04673183336853981, "loss_ce": 0.00112331158015877, "loss_iou": 0.2314453125, "loss_num": 0.0091552734375, "loss_xval": 0.045654296875, "num_input_tokens_seen": 578886592, "step": 6342 }, { "epoch": 26.429166666666667, "grad_norm": 2.2730921916039786, "learning_rate": 5e-05, "loss": 0.0265, "num_input_tokens_seen": 578977840, "step": 6343 }, { "epoch": 26.429166666666667, "loss": 0.025332368910312653, "loss_ce": 1.8036354958894663e-05, "loss_iou": 0.2333984375, "loss_num": 0.00506591796875, "loss_xval": 0.0252685546875, "num_input_tokens_seen": 578977840, "step": 6343 }, { "epoch": 26.433333333333334, "grad_norm": 2.074076894002716, "learning_rate": 5e-05, "loss": 0.0252, "num_input_tokens_seen": 579069376, "step": 6344 }, { "epoch": 26.433333333333334, "loss": 0.02509579062461853, "loss_ce": 0.0005596562987193465, "loss_iou": 0.201171875, "loss_num": 0.004913330078125, "loss_xval": 0.0245361328125, "num_input_tokens_seen": 579069376, "step": 6344 }, { "epoch": 26.4375, "grad_norm": 2.98508876389629, "learning_rate": 5e-05, "loss": 0.0258, "num_input_tokens_seen": 579160768, "step": 6345 }, { "epoch": 26.4375, "loss": 0.026991160586476326, "loss_ce": 7.465577073162422e-05, "loss_iou": 0.271484375, "loss_num": 0.005401611328125, "loss_xval": 0.02685546875, "num_input_tokens_seen": 579160768, "step": 6345 }, { "epoch": 26.441666666666666, "grad_norm": 2.852654715793294, "learning_rate": 5e-05, "loss": 0.0542, "num_input_tokens_seen": 579251572, "step": 6346 }, { "epoch": 26.441666666666666, "loss": 0.07384663075208664, "loss_ce": 1.7213085357070668e-06, "loss_iou": 0.2470703125, "loss_num": 0.0147705078125, "loss_xval": 0.07373046875, "num_input_tokens_seen": 579251572, "step": 6346 }, { "epoch": 26.445833333333333, "grad_norm": 4.067748408031735, "learning_rate": 5e-05, "loss": 0.0319, "num_input_tokens_seen": 579342196, "step": 6347 }, { "epoch": 26.445833333333333, "loss": 0.04042934253811836, "loss_ce": 8.811045518086758e-06, "loss_iou": 0.30078125, "loss_num": 0.008056640625, "loss_xval": 0.04052734375, "num_input_tokens_seen": 579342196, "step": 6347 }, { "epoch": 26.45, "grad_norm": 2.060944078779385, "learning_rate": 5e-05, "loss": 0.0286, "num_input_tokens_seen": 579433508, "step": 6348 }, { "epoch": 26.45, "loss": 0.026566024869680405, "loss_ce": 8.101601451926399e-06, "loss_iou": 0.208984375, "loss_num": 0.00531005859375, "loss_xval": 0.026611328125, "num_input_tokens_seen": 579433508, "step": 6348 }, { "epoch": 26.454166666666666, "grad_norm": 1.4420374872645958, "learning_rate": 5e-05, "loss": 0.0509, "num_input_tokens_seen": 579524268, "step": 6349 }, { "epoch": 26.454166666666666, "loss": 0.05209437757730484, "loss_ce": 8.689075912116095e-07, "loss_iou": 0.236328125, "loss_num": 0.01043701171875, "loss_xval": 0.052001953125, "num_input_tokens_seen": 579524268, "step": 6349 }, { "epoch": 26.458333333333332, "grad_norm": 1.372894539742087, "learning_rate": 5e-05, "loss": 0.0289, "num_input_tokens_seen": 579615144, "step": 6350 }, { "epoch": 26.458333333333332, "loss": 0.03426942229270935, "loss_ce": 1.3438528185361065e-05, "loss_iou": 0.1318359375, "loss_num": 0.006866455078125, "loss_xval": 0.0341796875, "num_input_tokens_seen": 579615144, "step": 6350 }, { "epoch": 26.4625, "grad_norm": 2.619487082696982, "learning_rate": 5e-05, "loss": 0.0361, "num_input_tokens_seen": 579706576, "step": 6351 }, { "epoch": 26.4625, "loss": 0.02325865998864174, "loss_ce": 5.004097693017684e-05, "loss_iou": 0.296875, "loss_num": 0.004638671875, "loss_xval": 0.023193359375, "num_input_tokens_seen": 579706576, "step": 6351 }, { "epoch": 26.466666666666665, "grad_norm": 2.9126440330534806, "learning_rate": 5e-05, "loss": 0.0487, "num_input_tokens_seen": 579798132, "step": 6352 }, { "epoch": 26.466666666666665, "loss": 0.030383776873350143, "loss_ce": 3.5252160159870982e-06, "loss_iou": 0.1201171875, "loss_num": 0.006072998046875, "loss_xval": 0.0303955078125, "num_input_tokens_seen": 579798132, "step": 6352 }, { "epoch": 26.470833333333335, "grad_norm": 2.2802737333967307, "learning_rate": 5e-05, "loss": 0.058, "num_input_tokens_seen": 579889192, "step": 6353 }, { "epoch": 26.470833333333335, "loss": 0.05479633808135986, "loss_ce": 2.024693003477296e-06, "loss_iou": 0.232421875, "loss_num": 0.010986328125, "loss_xval": 0.0546875, "num_input_tokens_seen": 579889192, "step": 6353 }, { "epoch": 26.475, "grad_norm": 3.3087152312604875, "learning_rate": 5e-05, "loss": 0.0261, "num_input_tokens_seen": 579981088, "step": 6354 }, { "epoch": 26.475, "loss": 0.024331307038664818, "loss_ce": 6.220270734047517e-05, "loss_iou": 0.181640625, "loss_num": 0.004852294921875, "loss_xval": 0.0242919921875, "num_input_tokens_seen": 579981088, "step": 6354 }, { "epoch": 26.479166666666668, "grad_norm": 2.636841214464431, "learning_rate": 5e-05, "loss": 0.0712, "num_input_tokens_seen": 580071936, "step": 6355 }, { "epoch": 26.479166666666668, "loss": 0.11911989003419876, "loss_ce": 2.156376694983919e-06, "loss_iou": 0.302734375, "loss_num": 0.0238037109375, "loss_xval": 0.119140625, "num_input_tokens_seen": 580071936, "step": 6355 }, { "epoch": 26.483333333333334, "grad_norm": 2.4018988890415365, "learning_rate": 5e-05, "loss": 0.0536, "num_input_tokens_seen": 580162684, "step": 6356 }, { "epoch": 26.483333333333334, "loss": 0.05590973049402237, "loss_ce": 1.5258759731295868e-06, "loss_iou": 0.197265625, "loss_num": 0.01116943359375, "loss_xval": 0.055908203125, "num_input_tokens_seen": 580162684, "step": 6356 }, { "epoch": 26.4875, "grad_norm": 2.111625423943705, "learning_rate": 5e-05, "loss": 0.0229, "num_input_tokens_seen": 580254320, "step": 6357 }, { "epoch": 26.4875, "loss": 0.01646145060658455, "loss_ce": 1.2477152267820202e-05, "loss_iou": 0.1513671875, "loss_num": 0.0032958984375, "loss_xval": 0.0164794921875, "num_input_tokens_seen": 580254320, "step": 6357 }, { "epoch": 26.491666666666667, "grad_norm": 1.7982741445816284, "learning_rate": 5e-05, "loss": 0.0271, "num_input_tokens_seen": 580345336, "step": 6358 }, { "epoch": 26.491666666666667, "loss": 0.028727242723107338, "loss_ce": 2.5719239147292683e-06, "loss_iou": 0.19140625, "loss_num": 0.0057373046875, "loss_xval": 0.0286865234375, "num_input_tokens_seen": 580345336, "step": 6358 }, { "epoch": 26.495833333333334, "grad_norm": 2.4180720667866877, "learning_rate": 5e-05, "loss": 0.0495, "num_input_tokens_seen": 580436756, "step": 6359 }, { "epoch": 26.495833333333334, "loss": 0.07168925553560257, "loss_ce": 3.4634629173524445e-06, "loss_iou": 0.2890625, "loss_num": 0.01434326171875, "loss_xval": 0.07177734375, "num_input_tokens_seen": 580436756, "step": 6359 }, { "epoch": 26.5, "grad_norm": 4.37380711018086, "learning_rate": 5e-05, "loss": 0.047, "num_input_tokens_seen": 580527860, "step": 6360 }, { "epoch": 26.5, "loss": 0.05018797516822815, "loss_ce": 1.8191740309703164e-06, "loss_iou": 0.201171875, "loss_num": 0.010009765625, "loss_xval": 0.05029296875, "num_input_tokens_seen": 580527860, "step": 6360 }, { "epoch": 26.504166666666666, "grad_norm": 3.3143864690576725, "learning_rate": 5e-05, "loss": 0.046, "num_input_tokens_seen": 580619316, "step": 6361 }, { "epoch": 26.504166666666666, "loss": 0.06650644540786743, "loss_ce": 7.730662036919966e-05, "loss_iou": 0.2734375, "loss_num": 0.01324462890625, "loss_xval": 0.06640625, "num_input_tokens_seen": 580619316, "step": 6361 }, { "epoch": 26.508333333333333, "grad_norm": 3.126583552244286, "learning_rate": 5e-05, "loss": 0.0646, "num_input_tokens_seen": 580710312, "step": 6362 }, { "epoch": 26.508333333333333, "loss": 0.027590272948145866, "loss_ce": 2.3825505195418373e-06, "loss_iou": 0.1640625, "loss_num": 0.005523681640625, "loss_xval": 0.027587890625, "num_input_tokens_seen": 580710312, "step": 6362 }, { "epoch": 26.5125, "grad_norm": 1.5455366033668991, "learning_rate": 5e-05, "loss": 0.078, "num_input_tokens_seen": 580801956, "step": 6363 }, { "epoch": 26.5125, "loss": 0.023525547236204147, "loss_ce": 3.464097244432196e-05, "loss_iou": 0.263671875, "loss_num": 0.00469970703125, "loss_xval": 0.0234375, "num_input_tokens_seen": 580801956, "step": 6363 }, { "epoch": 26.516666666666666, "grad_norm": 2.2025538317255884, "learning_rate": 5e-05, "loss": 0.0511, "num_input_tokens_seen": 580893244, "step": 6364 }, { "epoch": 26.516666666666666, "loss": 0.04680035263299942, "loss_ce": 1.6449587292299839e-06, "loss_iou": 0.287109375, "loss_num": 0.00933837890625, "loss_xval": 0.046875, "num_input_tokens_seen": 580893244, "step": 6364 }, { "epoch": 26.520833333333332, "grad_norm": 1.977698351598379, "learning_rate": 5e-05, "loss": 0.0262, "num_input_tokens_seen": 580984300, "step": 6365 }, { "epoch": 26.520833333333332, "loss": 0.033968620002269745, "loss_ce": 2.556229446781799e-06, "loss_iou": 0.29296875, "loss_num": 0.006805419921875, "loss_xval": 0.033935546875, "num_input_tokens_seen": 580984300, "step": 6365 }, { "epoch": 26.525, "grad_norm": 2.769782273195563, "learning_rate": 5e-05, "loss": 0.0251, "num_input_tokens_seen": 581074768, "step": 6366 }, { "epoch": 26.525, "loss": 0.02291957288980484, "loss_ce": 8.502931450493634e-06, "loss_iou": 0.1572265625, "loss_num": 0.00457763671875, "loss_xval": 0.02294921875, "num_input_tokens_seen": 581074768, "step": 6366 }, { "epoch": 26.529166666666665, "grad_norm": 2.812135989180566, "learning_rate": 5e-05, "loss": 0.0593, "num_input_tokens_seen": 581166188, "step": 6367 }, { "epoch": 26.529166666666665, "loss": 0.07515604794025421, "loss_ce": 6.513767857541097e-06, "loss_iou": 0.208984375, "loss_num": 0.0150146484375, "loss_xval": 0.0751953125, "num_input_tokens_seen": 581166188, "step": 6367 }, { "epoch": 26.533333333333335, "grad_norm": 3.291469743063748, "learning_rate": 5e-05, "loss": 0.0312, "num_input_tokens_seen": 581257644, "step": 6368 }, { "epoch": 26.533333333333335, "loss": 0.04181433096528053, "loss_ce": 5.247154149401467e-06, "loss_iou": 0.3046875, "loss_num": 0.00836181640625, "loss_xval": 0.041748046875, "num_input_tokens_seen": 581257644, "step": 6368 }, { "epoch": 26.5375, "grad_norm": 2.6630209253197212, "learning_rate": 5e-05, "loss": 0.0356, "num_input_tokens_seen": 581348840, "step": 6369 }, { "epoch": 26.5375, "loss": 0.043280940502882004, "loss_ce": 7.01455519447336e-06, "loss_iou": 0.3359375, "loss_num": 0.0086669921875, "loss_xval": 0.043212890625, "num_input_tokens_seen": 581348840, "step": 6369 }, { "epoch": 26.541666666666668, "grad_norm": 2.165904526625043, "learning_rate": 5e-05, "loss": 0.0259, "num_input_tokens_seen": 581440356, "step": 6370 }, { "epoch": 26.541666666666668, "loss": 0.030488599091768265, "loss_ce": 1.679923116171267e-05, "loss_iou": 0.193359375, "loss_num": 0.006103515625, "loss_xval": 0.030517578125, "num_input_tokens_seen": 581440356, "step": 6370 }, { "epoch": 26.545833333333334, "grad_norm": 2.1519854563459013, "learning_rate": 5e-05, "loss": 0.0314, "num_input_tokens_seen": 581531112, "step": 6371 }, { "epoch": 26.545833333333334, "loss": 0.04378194361925125, "loss_ce": 4.480289135244675e-06, "loss_iou": 0.2314453125, "loss_num": 0.00872802734375, "loss_xval": 0.043701171875, "num_input_tokens_seen": 581531112, "step": 6371 }, { "epoch": 26.55, "grad_norm": 2.032262603192787, "learning_rate": 5e-05, "loss": 0.0631, "num_input_tokens_seen": 581622908, "step": 6372 }, { "epoch": 26.55, "loss": 0.07665525376796722, "loss_ce": 5.612863606074825e-05, "loss_iou": 0.115234375, "loss_num": 0.01531982421875, "loss_xval": 0.07666015625, "num_input_tokens_seen": 581622908, "step": 6372 }, { "epoch": 26.554166666666667, "grad_norm": 2.916450194533043, "learning_rate": 5e-05, "loss": 0.0258, "num_input_tokens_seen": 581715240, "step": 6373 }, { "epoch": 26.554166666666667, "loss": 0.02922218292951584, "loss_ce": 1.6016360859794077e-06, "loss_iou": 0.26953125, "loss_num": 0.005859375, "loss_xval": 0.0291748046875, "num_input_tokens_seen": 581715240, "step": 6373 }, { "epoch": 26.558333333333334, "grad_norm": 2.7777091959970948, "learning_rate": 5e-05, "loss": 0.0617, "num_input_tokens_seen": 581806232, "step": 6374 }, { "epoch": 26.558333333333334, "loss": 0.08162251114845276, "loss_ce": 3.246418145863572e-06, "loss_iou": 0.33203125, "loss_num": 0.016357421875, "loss_xval": 0.08154296875, "num_input_tokens_seen": 581806232, "step": 6374 }, { "epoch": 26.5625, "grad_norm": 2.759082928506338, "learning_rate": 5e-05, "loss": 0.0317, "num_input_tokens_seen": 581897896, "step": 6375 }, { "epoch": 26.5625, "loss": 0.022695370018482208, "loss_ce": 5.550998139369767e-06, "loss_iou": 0.2421875, "loss_num": 0.004547119140625, "loss_xval": 0.022705078125, "num_input_tokens_seen": 581897896, "step": 6375 }, { "epoch": 26.566666666666666, "grad_norm": 2.8094302276322103, "learning_rate": 5e-05, "loss": 0.033, "num_input_tokens_seen": 581989280, "step": 6376 }, { "epoch": 26.566666666666666, "loss": 0.04536880552768707, "loss_ce": 4.423782229423523e-06, "loss_iou": 0.310546875, "loss_num": 0.00909423828125, "loss_xval": 0.04541015625, "num_input_tokens_seen": 581989280, "step": 6376 }, { "epoch": 26.570833333333333, "grad_norm": 2.3794042855729716, "learning_rate": 5e-05, "loss": 0.0648, "num_input_tokens_seen": 582079520, "step": 6377 }, { "epoch": 26.570833333333333, "loss": 0.06763634085655212, "loss_ce": 1.760738086886704e-06, "loss_iou": 0.1904296875, "loss_num": 0.0135498046875, "loss_xval": 0.06787109375, "num_input_tokens_seen": 582079520, "step": 6377 }, { "epoch": 26.575, "grad_norm": 2.490674979187668, "learning_rate": 5e-05, "loss": 0.054, "num_input_tokens_seen": 582170564, "step": 6378 }, { "epoch": 26.575, "loss": 0.025903530418872833, "loss_ce": 9.36424476094544e-06, "loss_iou": 0.2294921875, "loss_num": 0.00518798828125, "loss_xval": 0.02587890625, "num_input_tokens_seen": 582170564, "step": 6378 }, { "epoch": 26.579166666666666, "grad_norm": 2.6831178349896794, "learning_rate": 5e-05, "loss": 0.0582, "num_input_tokens_seen": 582261796, "step": 6379 }, { "epoch": 26.579166666666666, "loss": 0.04030474275350571, "loss_ce": 6.28036877969862e-06, "loss_iou": 0.3203125, "loss_num": 0.008056640625, "loss_xval": 0.040283203125, "num_input_tokens_seen": 582261796, "step": 6379 }, { "epoch": 26.583333333333332, "grad_norm": 2.9254635506782654, "learning_rate": 5e-05, "loss": 0.0395, "num_input_tokens_seen": 582352716, "step": 6380 }, { "epoch": 26.583333333333332, "loss": 0.030702892690896988, "loss_ce": 2.2086460376158357e-06, "loss_iou": 0.2119140625, "loss_num": 0.006134033203125, "loss_xval": 0.03076171875, "num_input_tokens_seen": 582352716, "step": 6380 }, { "epoch": 26.5875, "grad_norm": 3.318806426993543, "learning_rate": 5e-05, "loss": 0.0284, "num_input_tokens_seen": 582444340, "step": 6381 }, { "epoch": 26.5875, "loss": 0.027070969343185425, "loss_ce": 1.7136593669420108e-05, "loss_iou": 0.3359375, "loss_num": 0.005401611328125, "loss_xval": 0.027099609375, "num_input_tokens_seen": 582444340, "step": 6381 }, { "epoch": 26.591666666666665, "grad_norm": 1.533350298247467, "learning_rate": 5e-05, "loss": 0.0294, "num_input_tokens_seen": 582535436, "step": 6382 }, { "epoch": 26.591666666666665, "loss": 0.04427720606327057, "loss_ce": 3.434727477724664e-05, "loss_iou": 0.1923828125, "loss_num": 0.00885009765625, "loss_xval": 0.044189453125, "num_input_tokens_seen": 582535436, "step": 6382 }, { "epoch": 26.595833333333335, "grad_norm": 1.2006372851725466, "learning_rate": 5e-05, "loss": 0.0526, "num_input_tokens_seen": 582626420, "step": 6383 }, { "epoch": 26.595833333333335, "loss": 0.06297853589057922, "loss_ce": 0.0001886187819764018, "loss_iou": 0.259765625, "loss_num": 0.01251220703125, "loss_xval": 0.06298828125, "num_input_tokens_seen": 582626420, "step": 6383 }, { "epoch": 26.6, "grad_norm": 3.043951801189853, "learning_rate": 5e-05, "loss": 0.0563, "num_input_tokens_seen": 582717916, "step": 6384 }, { "epoch": 26.6, "loss": 0.03581683710217476, "loss_ce": 4.460815489437664e-06, "loss_iou": 0.306640625, "loss_num": 0.007171630859375, "loss_xval": 0.035888671875, "num_input_tokens_seen": 582717916, "step": 6384 }, { "epoch": 26.604166666666668, "grad_norm": 3.1192619091196265, "learning_rate": 5e-05, "loss": 0.0818, "num_input_tokens_seen": 582807888, "step": 6385 }, { "epoch": 26.604166666666668, "loss": 0.03427360951900482, "loss_ce": 1.762903411872685e-05, "loss_iou": 0.1787109375, "loss_num": 0.0068359375, "loss_xval": 0.0341796875, "num_input_tokens_seen": 582807888, "step": 6385 }, { "epoch": 26.608333333333334, "grad_norm": 1.3891490390759393, "learning_rate": 5e-05, "loss": 0.0233, "num_input_tokens_seen": 582899608, "step": 6386 }, { "epoch": 26.608333333333334, "loss": 0.027261588722467422, "loss_ce": 1.7636632492212811e-06, "loss_iou": 0.2373046875, "loss_num": 0.00543212890625, "loss_xval": 0.0272216796875, "num_input_tokens_seen": 582899608, "step": 6386 }, { "epoch": 26.6125, "grad_norm": 1.9439048481514238, "learning_rate": 5e-05, "loss": 0.0467, "num_input_tokens_seen": 582991388, "step": 6387 }, { "epoch": 26.6125, "loss": 0.0331236831843853, "loss_ce": 4.482254098547855e-06, "loss_iou": 0.2890625, "loss_num": 0.006622314453125, "loss_xval": 0.033203125, "num_input_tokens_seen": 582991388, "step": 6387 }, { "epoch": 26.616666666666667, "grad_norm": 2.601169162882571, "learning_rate": 5e-05, "loss": 0.0338, "num_input_tokens_seen": 583082652, "step": 6388 }, { "epoch": 26.616666666666667, "loss": 0.03648631274700165, "loss_ce": 2.5509375518595334e-06, "loss_iou": 0.265625, "loss_num": 0.007293701171875, "loss_xval": 0.036376953125, "num_input_tokens_seen": 583082652, "step": 6388 }, { "epoch": 26.620833333333334, "grad_norm": 3.4201171748095054, "learning_rate": 5e-05, "loss": 0.0724, "num_input_tokens_seen": 583173792, "step": 6389 }, { "epoch": 26.620833333333334, "loss": 0.04953838512301445, "loss_ce": 8.357676961168181e-06, "loss_iou": 0.2314453125, "loss_num": 0.0098876953125, "loss_xval": 0.049560546875, "num_input_tokens_seen": 583173792, "step": 6389 }, { "epoch": 26.625, "grad_norm": 3.2177538717532537, "learning_rate": 5e-05, "loss": 0.0447, "num_input_tokens_seen": 583265528, "step": 6390 }, { "epoch": 26.625, "loss": 0.02730938419699669, "loss_ce": 1.1408922546252143e-05, "loss_iou": 0.24609375, "loss_num": 0.005462646484375, "loss_xval": 0.02734375, "num_input_tokens_seen": 583265528, "step": 6390 }, { "epoch": 26.629166666666666, "grad_norm": 2.6080596153005744, "learning_rate": 5e-05, "loss": 0.0702, "num_input_tokens_seen": 583356632, "step": 6391 }, { "epoch": 26.629166666666666, "loss": 0.1096821278333664, "loss_ce": 1.9534661532816244e-06, "loss_iou": 0.2197265625, "loss_num": 0.0218505859375, "loss_xval": 0.10986328125, "num_input_tokens_seen": 583356632, "step": 6391 }, { "epoch": 26.633333333333333, "grad_norm": 3.3712503495952557, "learning_rate": 5e-05, "loss": 0.0459, "num_input_tokens_seen": 583447960, "step": 6392 }, { "epoch": 26.633333333333333, "loss": 0.05872820317745209, "loss_ce": 0.003735528327524662, "loss_iou": 0.259765625, "loss_num": 0.010986328125, "loss_xval": 0.054931640625, "num_input_tokens_seen": 583447960, "step": 6392 }, { "epoch": 26.6375, "grad_norm": 2.7712620943981388, "learning_rate": 5e-05, "loss": 0.0453, "num_input_tokens_seen": 583539624, "step": 6393 }, { "epoch": 26.6375, "loss": 0.035634495317935944, "loss_ce": 5.2247432904550806e-06, "loss_iou": 0.330078125, "loss_num": 0.00714111328125, "loss_xval": 0.03564453125, "num_input_tokens_seen": 583539624, "step": 6393 }, { "epoch": 26.641666666666666, "grad_norm": 3.1583934392875848, "learning_rate": 5e-05, "loss": 0.042, "num_input_tokens_seen": 583630692, "step": 6394 }, { "epoch": 26.641666666666666, "loss": 0.03953661769628525, "loss_ce": 4.909299605060369e-06, "loss_iou": 0.28125, "loss_num": 0.0079345703125, "loss_xval": 0.03955078125, "num_input_tokens_seen": 583630692, "step": 6394 }, { "epoch": 26.645833333333332, "grad_norm": 2.3011474265121303, "learning_rate": 5e-05, "loss": 0.0445, "num_input_tokens_seen": 583722228, "step": 6395 }, { "epoch": 26.645833333333332, "loss": 0.05926249548792839, "loss_ce": 1.2617707398021594e-05, "loss_iou": 0.28515625, "loss_num": 0.0118408203125, "loss_xval": 0.059326171875, "num_input_tokens_seen": 583722228, "step": 6395 }, { "epoch": 26.65, "grad_norm": 2.3120902553161304, "learning_rate": 5e-05, "loss": 0.0393, "num_input_tokens_seen": 583813768, "step": 6396 }, { "epoch": 26.65, "loss": 0.0495050773024559, "loss_ce": 5.567444532061927e-06, "loss_iou": 0.150390625, "loss_num": 0.0098876953125, "loss_xval": 0.049560546875, "num_input_tokens_seen": 583813768, "step": 6396 }, { "epoch": 26.654166666666665, "grad_norm": 4.047422590953184, "learning_rate": 5e-05, "loss": 0.0473, "num_input_tokens_seen": 583904500, "step": 6397 }, { "epoch": 26.654166666666665, "loss": 0.06963618844747543, "loss_ce": 2.5595696570235305e-05, "loss_iou": 0.30859375, "loss_num": 0.013916015625, "loss_xval": 0.06982421875, "num_input_tokens_seen": 583904500, "step": 6397 }, { "epoch": 26.658333333333335, "grad_norm": 2.0502554889475375, "learning_rate": 5e-05, "loss": 0.0421, "num_input_tokens_seen": 583995700, "step": 6398 }, { "epoch": 26.658333333333335, "loss": 0.03861871361732483, "loss_ce": 1.3975381079944782e-05, "loss_iou": 0.314453125, "loss_num": 0.007720947265625, "loss_xval": 0.03857421875, "num_input_tokens_seen": 583995700, "step": 6398 }, { "epoch": 26.6625, "grad_norm": 2.8899846505666806, "learning_rate": 5e-05, "loss": 0.0405, "num_input_tokens_seen": 584086472, "step": 6399 }, { "epoch": 26.6625, "loss": 0.044085074216127396, "loss_ce": 2.4305013539560605e-06, "loss_iou": 0.302734375, "loss_num": 0.00885009765625, "loss_xval": 0.044189453125, "num_input_tokens_seen": 584086472, "step": 6399 }, { "epoch": 26.666666666666668, "grad_norm": 5.051323947719037, "learning_rate": 5e-05, "loss": 0.0471, "num_input_tokens_seen": 584177612, "step": 6400 }, { "epoch": 26.666666666666668, "loss": 0.06541875004768372, "loss_ce": 4.321193500800291e-06, "loss_iou": 0.318359375, "loss_num": 0.0130615234375, "loss_xval": 0.0654296875, "num_input_tokens_seen": 584177612, "step": 6400 }, { "epoch": 26.670833333333334, "grad_norm": 1.8595043235990891, "learning_rate": 5e-05, "loss": 0.0405, "num_input_tokens_seen": 584268900, "step": 6401 }, { "epoch": 26.670833333333334, "loss": 0.05805381387472153, "loss_ce": 1.7489541050963453e-06, "loss_iou": 0.1845703125, "loss_num": 0.0115966796875, "loss_xval": 0.05810546875, "num_input_tokens_seen": 584268900, "step": 6401 }, { "epoch": 26.675, "grad_norm": 2.1412284194971734, "learning_rate": 5e-05, "loss": 0.0208, "num_input_tokens_seen": 584360708, "step": 6402 }, { "epoch": 26.675, "loss": 0.020786207169294357, "loss_ce": 3.735653081093915e-06, "loss_iou": 0.15234375, "loss_num": 0.004150390625, "loss_xval": 0.020751953125, "num_input_tokens_seen": 584360708, "step": 6402 }, { "epoch": 26.679166666666667, "grad_norm": 6.353105232087393, "learning_rate": 5e-05, "loss": 0.0475, "num_input_tokens_seen": 584451928, "step": 6403 }, { "epoch": 26.679166666666667, "loss": 0.04798254743218422, "loss_ce": 2.417149516986683e-05, "loss_iou": 0.1484375, "loss_num": 0.00958251953125, "loss_xval": 0.0478515625, "num_input_tokens_seen": 584451928, "step": 6403 }, { "epoch": 26.683333333333334, "grad_norm": 2.3612110352957045, "learning_rate": 5e-05, "loss": 0.048, "num_input_tokens_seen": 584543328, "step": 6404 }, { "epoch": 26.683333333333334, "loss": 0.03970205783843994, "loss_ce": 2.9204951715655625e-05, "loss_iou": 0.2890625, "loss_num": 0.0079345703125, "loss_xval": 0.03955078125, "num_input_tokens_seen": 584543328, "step": 6404 }, { "epoch": 26.6875, "grad_norm": 2.4044976294078038, "learning_rate": 5e-05, "loss": 0.0696, "num_input_tokens_seen": 584634444, "step": 6405 }, { "epoch": 26.6875, "loss": 0.052463434636592865, "loss_ce": 3.719230335264001e-06, "loss_iou": 0.28515625, "loss_num": 0.010498046875, "loss_xval": 0.052490234375, "num_input_tokens_seen": 584634444, "step": 6405 }, { "epoch": 26.691666666666666, "grad_norm": 1.9056035326436946, "learning_rate": 5e-05, "loss": 0.0307, "num_input_tokens_seen": 584726220, "step": 6406 }, { "epoch": 26.691666666666666, "loss": 0.029926294460892677, "loss_ce": 0.0003395025269128382, "loss_iou": 0.17578125, "loss_num": 0.00592041015625, "loss_xval": 0.029541015625, "num_input_tokens_seen": 584726220, "step": 6406 }, { "epoch": 26.695833333333333, "grad_norm": 3.4191147212881625, "learning_rate": 5e-05, "loss": 0.0478, "num_input_tokens_seen": 584817984, "step": 6407 }, { "epoch": 26.695833333333333, "loss": 0.02571401745080948, "loss_ce": 2.957248625534703e-06, "loss_iou": 0.2421875, "loss_num": 0.005126953125, "loss_xval": 0.0257568359375, "num_input_tokens_seen": 584817984, "step": 6407 }, { "epoch": 26.7, "grad_norm": 3.549124781623612, "learning_rate": 5e-05, "loss": 0.024, "num_input_tokens_seen": 584909328, "step": 6408 }, { "epoch": 26.7, "loss": 0.022335490211844444, "loss_ce": 4.252930921211373e-06, "loss_iou": 0.24609375, "loss_num": 0.00445556640625, "loss_xval": 0.0223388671875, "num_input_tokens_seen": 584909328, "step": 6408 }, { "epoch": 26.704166666666666, "grad_norm": 8.95269863880642, "learning_rate": 5e-05, "loss": 0.0697, "num_input_tokens_seen": 585000660, "step": 6409 }, { "epoch": 26.704166666666666, "loss": 0.026591291651129723, "loss_ce": 0.0002546220493968576, "loss_iou": 0.216796875, "loss_num": 0.005279541015625, "loss_xval": 0.0263671875, "num_input_tokens_seen": 585000660, "step": 6409 }, { "epoch": 26.708333333333332, "grad_norm": 1.7099945501510865, "learning_rate": 5e-05, "loss": 0.0671, "num_input_tokens_seen": 585092012, "step": 6410 }, { "epoch": 26.708333333333332, "loss": 0.03264483064413071, "loss_ce": 1.3908905202697497e-05, "loss_iou": 0.298828125, "loss_num": 0.00653076171875, "loss_xval": 0.03271484375, "num_input_tokens_seen": 585092012, "step": 6410 }, { "epoch": 26.7125, "grad_norm": 2.8318015894004454, "learning_rate": 5e-05, "loss": 0.0616, "num_input_tokens_seen": 585183520, "step": 6411 }, { "epoch": 26.7125, "loss": 0.08012676239013672, "loss_ce": 0.0002622563042677939, "loss_iou": 0.1875, "loss_num": 0.0159912109375, "loss_xval": 0.080078125, "num_input_tokens_seen": 585183520, "step": 6411 }, { "epoch": 26.716666666666665, "grad_norm": 0.9956697448339735, "learning_rate": 5e-05, "loss": 0.0446, "num_input_tokens_seen": 585274760, "step": 6412 }, { "epoch": 26.716666666666665, "loss": 0.028292525559663773, "loss_ce": 2.729700099735055e-06, "loss_iou": 0.2275390625, "loss_num": 0.005645751953125, "loss_xval": 0.0283203125, "num_input_tokens_seen": 585274760, "step": 6412 }, { "epoch": 26.720833333333335, "grad_norm": 0.9206675696449096, "learning_rate": 5e-05, "loss": 0.0319, "num_input_tokens_seen": 585365972, "step": 6413 }, { "epoch": 26.720833333333335, "loss": 0.04103298857808113, "loss_ce": 1.736307967803441e-05, "loss_iou": 0.1884765625, "loss_num": 0.0081787109375, "loss_xval": 0.041015625, "num_input_tokens_seen": 585365972, "step": 6413 }, { "epoch": 26.725, "grad_norm": 1.5571004714894323, "learning_rate": 5e-05, "loss": 0.0624, "num_input_tokens_seen": 585457120, "step": 6414 }, { "epoch": 26.725, "loss": 0.05378871411085129, "loss_ce": 1.4819468105997657e-06, "loss_iou": 0.265625, "loss_num": 0.0107421875, "loss_xval": 0.0537109375, "num_input_tokens_seen": 585457120, "step": 6414 }, { "epoch": 26.729166666666668, "grad_norm": 16.903678598398205, "learning_rate": 5e-05, "loss": 0.0506, "num_input_tokens_seen": 585548840, "step": 6415 }, { "epoch": 26.729166666666668, "loss": 0.06305142492055893, "loss_ce": 2.106048668792937e-06, "loss_iou": 0.1162109375, "loss_num": 0.01263427734375, "loss_xval": 0.06298828125, "num_input_tokens_seen": 585548840, "step": 6415 }, { "epoch": 26.733333333333334, "grad_norm": 3.6405479201013327, "learning_rate": 5e-05, "loss": 0.0627, "num_input_tokens_seen": 585640304, "step": 6416 }, { "epoch": 26.733333333333334, "loss": 0.10354401171207428, "loss_ce": 5.503875399881508e-06, "loss_iou": 0.353515625, "loss_num": 0.020751953125, "loss_xval": 0.103515625, "num_input_tokens_seen": 585640304, "step": 6416 }, { "epoch": 26.7375, "grad_norm": 5.075702941057741, "learning_rate": 5e-05, "loss": 0.0402, "num_input_tokens_seen": 585731188, "step": 6417 }, { "epoch": 26.7375, "loss": 0.05039920285344124, "loss_ce": 0.0008539170958101749, "loss_iou": 0.2080078125, "loss_num": 0.0098876953125, "loss_xval": 0.049560546875, "num_input_tokens_seen": 585731188, "step": 6417 }, { "epoch": 26.741666666666667, "grad_norm": 2.945129074205288, "learning_rate": 5e-05, "loss": 0.0661, "num_input_tokens_seen": 585822472, "step": 6418 }, { "epoch": 26.741666666666667, "loss": 0.04696325957775116, "loss_ce": 3.485221532173455e-05, "loss_iou": 0.337890625, "loss_num": 0.0093994140625, "loss_xval": 0.046875, "num_input_tokens_seen": 585822472, "step": 6418 }, { "epoch": 26.745833333333334, "grad_norm": 2.7674485302683824, "learning_rate": 5e-05, "loss": 0.0568, "num_input_tokens_seen": 585913776, "step": 6419 }, { "epoch": 26.745833333333334, "loss": 0.05188199505209923, "loss_ce": 2.112512902385788e-06, "loss_iou": 0.28125, "loss_num": 0.0103759765625, "loss_xval": 0.0517578125, "num_input_tokens_seen": 585913776, "step": 6419 }, { "epoch": 26.75, "grad_norm": 2.306503631062339, "learning_rate": 5e-05, "loss": 0.0455, "num_input_tokens_seen": 586004924, "step": 6420 }, { "epoch": 26.75, "loss": 0.031033311039209366, "loss_ce": 0.002835067454725504, "loss_iou": 0.173828125, "loss_num": 0.005645751953125, "loss_xval": 0.0281982421875, "num_input_tokens_seen": 586004924, "step": 6420 }, { "epoch": 26.754166666666666, "grad_norm": 2.7235878325854195, "learning_rate": 5e-05, "loss": 0.0315, "num_input_tokens_seen": 586094432, "step": 6421 }, { "epoch": 26.754166666666666, "loss": 0.025530759245157242, "loss_ce": 2.805929398164153e-06, "loss_iou": 0.265625, "loss_num": 0.005096435546875, "loss_xval": 0.0255126953125, "num_input_tokens_seen": 586094432, "step": 6421 }, { "epoch": 26.758333333333333, "grad_norm": 4.879312451738563, "learning_rate": 5e-05, "loss": 0.0679, "num_input_tokens_seen": 586185332, "step": 6422 }, { "epoch": 26.758333333333333, "loss": 0.0576663538813591, "loss_ce": 1.1019059456884861e-05, "loss_iou": 0.2041015625, "loss_num": 0.01153564453125, "loss_xval": 0.0576171875, "num_input_tokens_seen": 586185332, "step": 6422 }, { "epoch": 26.7625, "grad_norm": 2.0444074012690945, "learning_rate": 5e-05, "loss": 0.0556, "num_input_tokens_seen": 586276412, "step": 6423 }, { "epoch": 26.7625, "loss": 0.049933046102523804, "loss_ce": 6.287096312007634e-06, "loss_iou": 0.06103515625, "loss_num": 0.010009765625, "loss_xval": 0.0498046875, "num_input_tokens_seen": 586276412, "step": 6423 }, { "epoch": 26.766666666666666, "grad_norm": 1.433993243634629, "learning_rate": 5e-05, "loss": 0.0314, "num_input_tokens_seen": 586367844, "step": 6424 }, { "epoch": 26.766666666666666, "loss": 0.039614349603652954, "loss_ce": 1.7794123778003268e-05, "loss_iou": 0.2353515625, "loss_num": 0.0079345703125, "loss_xval": 0.03955078125, "num_input_tokens_seen": 586367844, "step": 6424 }, { "epoch": 26.770833333333332, "grad_norm": 2.1161370072800074, "learning_rate": 5e-05, "loss": 0.0551, "num_input_tokens_seen": 586459180, "step": 6425 }, { "epoch": 26.770833333333332, "loss": 0.08504265546798706, "loss_ce": 0.0005547403707168996, "loss_iou": 0.1630859375, "loss_num": 0.016845703125, "loss_xval": 0.08447265625, "num_input_tokens_seen": 586459180, "step": 6425 }, { "epoch": 26.775, "grad_norm": 1.494492169237464, "learning_rate": 5e-05, "loss": 0.052, "num_input_tokens_seen": 586550300, "step": 6426 }, { "epoch": 26.775, "loss": 0.04698227345943451, "loss_ce": 5.3866875532548875e-05, "loss_iou": 0.162109375, "loss_num": 0.0093994140625, "loss_xval": 0.046875, "num_input_tokens_seen": 586550300, "step": 6426 }, { "epoch": 26.779166666666665, "grad_norm": 1.446415825474081, "learning_rate": 5e-05, "loss": 0.0567, "num_input_tokens_seen": 586641508, "step": 6427 }, { "epoch": 26.779166666666665, "loss": 0.0393327996134758, "loss_ce": 0.0003465966146904975, "loss_iou": 0.28125, "loss_num": 0.0078125, "loss_xval": 0.0390625, "num_input_tokens_seen": 586641508, "step": 6427 }, { "epoch": 26.783333333333335, "grad_norm": 2.0137081030703836, "learning_rate": 5e-05, "loss": 0.0283, "num_input_tokens_seen": 586733016, "step": 6428 }, { "epoch": 26.783333333333335, "loss": 0.029314683750271797, "loss_ce": 0.0001169908355223015, "loss_iou": 0.1904296875, "loss_num": 0.005828857421875, "loss_xval": 0.0291748046875, "num_input_tokens_seen": 586733016, "step": 6428 }, { "epoch": 26.7875, "grad_norm": 4.094413968281199, "learning_rate": 5e-05, "loss": 0.0259, "num_input_tokens_seen": 586824192, "step": 6429 }, { "epoch": 26.7875, "loss": 0.029474452137947083, "loss_ce": 9.729870726005174e-06, "loss_iou": 0.09619140625, "loss_num": 0.005889892578125, "loss_xval": 0.0294189453125, "num_input_tokens_seen": 586824192, "step": 6429 }, { "epoch": 26.791666666666668, "grad_norm": 15.593829077174021, "learning_rate": 5e-05, "loss": 0.0458, "num_input_tokens_seen": 586915608, "step": 6430 }, { "epoch": 26.791666666666668, "loss": 0.04587894305586815, "loss_ce": 3.3958101539610652e-06, "loss_iou": 0.306640625, "loss_num": 0.00921630859375, "loss_xval": 0.0458984375, "num_input_tokens_seen": 586915608, "step": 6430 }, { "epoch": 26.795833333333334, "grad_norm": 2.352842928826233, "learning_rate": 5e-05, "loss": 0.0381, "num_input_tokens_seen": 587006468, "step": 6431 }, { "epoch": 26.795833333333334, "loss": 0.0529550164937973, "loss_ce": 0.0015023784944787621, "loss_iou": 0.1923828125, "loss_num": 0.01031494140625, "loss_xval": 0.051513671875, "num_input_tokens_seen": 587006468, "step": 6431 }, { "epoch": 26.8, "grad_norm": 3.895723652699884, "learning_rate": 5e-05, "loss": 0.0265, "num_input_tokens_seen": 587098508, "step": 6432 }, { "epoch": 26.8, "loss": 0.028584472835063934, "loss_ce": 3.527989247231744e-05, "loss_iou": 0.283203125, "loss_num": 0.005706787109375, "loss_xval": 0.028564453125, "num_input_tokens_seen": 587098508, "step": 6432 }, { "epoch": 26.804166666666667, "grad_norm": 3.2476906357415154, "learning_rate": 5e-05, "loss": 0.0523, "num_input_tokens_seen": 587188992, "step": 6433 }, { "epoch": 26.804166666666667, "loss": 0.026140259578824043, "loss_ce": 1.953450009750668e-06, "loss_iou": 0.2236328125, "loss_num": 0.005218505859375, "loss_xval": 0.026123046875, "num_input_tokens_seen": 587188992, "step": 6433 }, { "epoch": 26.808333333333334, "grad_norm": 4.413858954256823, "learning_rate": 5e-05, "loss": 0.0468, "num_input_tokens_seen": 587280204, "step": 6434 }, { "epoch": 26.808333333333334, "loss": 0.0530477836728096, "loss_ce": 0.0002828882134053856, "loss_iou": 0.212890625, "loss_num": 0.01055908203125, "loss_xval": 0.052734375, "num_input_tokens_seen": 587280204, "step": 6434 }, { "epoch": 26.8125, "grad_norm": 12.91505568211801, "learning_rate": 5e-05, "loss": 0.0588, "num_input_tokens_seen": 587371684, "step": 6435 }, { "epoch": 26.8125, "loss": 0.0907832682132721, "loss_ce": 0.0005962011055089533, "loss_iou": 0.2373046875, "loss_num": 0.01806640625, "loss_xval": 0.09033203125, "num_input_tokens_seen": 587371684, "step": 6435 }, { "epoch": 26.816666666666666, "grad_norm": 3.991737329495078, "learning_rate": 5e-05, "loss": 0.0333, "num_input_tokens_seen": 587463276, "step": 6436 }, { "epoch": 26.816666666666666, "loss": 0.03305169939994812, "loss_ce": 1.2606242307811044e-05, "loss_iou": 0.2275390625, "loss_num": 0.006622314453125, "loss_xval": 0.032958984375, "num_input_tokens_seen": 587463276, "step": 6436 }, { "epoch": 26.820833333333333, "grad_norm": 1.8788612397637245, "learning_rate": 5e-05, "loss": 0.032, "num_input_tokens_seen": 587554432, "step": 6437 }, { "epoch": 26.820833333333333, "loss": 0.02601810172200203, "loss_ce": 1.8665964489628095e-06, "loss_iou": 0.27734375, "loss_num": 0.005218505859375, "loss_xval": 0.0260009765625, "num_input_tokens_seen": 587554432, "step": 6437 }, { "epoch": 26.825, "grad_norm": 1.6852604433469536, "learning_rate": 5e-05, "loss": 0.0589, "num_input_tokens_seen": 587646084, "step": 6438 }, { "epoch": 26.825, "loss": 0.031329937279224396, "loss_ce": 1.8900527720688842e-05, "loss_iou": 0.224609375, "loss_num": 0.006256103515625, "loss_xval": 0.03125, "num_input_tokens_seen": 587646084, "step": 6438 }, { "epoch": 26.829166666666666, "grad_norm": 2.610942859817052, "learning_rate": 5e-05, "loss": 0.0418, "num_input_tokens_seen": 587737264, "step": 6439 }, { "epoch": 26.829166666666666, "loss": 0.05527624860405922, "loss_ce": 0.0020078180823475122, "loss_iou": 0.326171875, "loss_num": 0.01068115234375, "loss_xval": 0.05322265625, "num_input_tokens_seen": 587737264, "step": 6439 }, { "epoch": 26.833333333333332, "grad_norm": 3.0473913921892257, "learning_rate": 5e-05, "loss": 0.0711, "num_input_tokens_seen": 587828408, "step": 6440 }, { "epoch": 26.833333333333332, "loss": 0.022648457437753677, "loss_ce": 0.0008932405617088079, "loss_iou": 0.162109375, "loss_num": 0.004364013671875, "loss_xval": 0.021728515625, "num_input_tokens_seen": 587828408, "step": 6440 }, { "epoch": 26.8375, "grad_norm": 3.7468871750695363, "learning_rate": 5e-05, "loss": 0.0428, "num_input_tokens_seen": 587920180, "step": 6441 }, { "epoch": 26.8375, "loss": 0.03927876800298691, "loss_ce": 2.6419752430228982e-06, "loss_iou": 0.310546875, "loss_num": 0.00787353515625, "loss_xval": 0.039306640625, "num_input_tokens_seen": 587920180, "step": 6441 }, { "epoch": 26.841666666666665, "grad_norm": 2.8556493561688745, "learning_rate": 5e-05, "loss": 0.0394, "num_input_tokens_seen": 588011052, "step": 6442 }, { "epoch": 26.841666666666665, "loss": 0.04216247797012329, "loss_ce": 2.4448554540867917e-06, "loss_iou": 0.310546875, "loss_num": 0.0084228515625, "loss_xval": 0.042236328125, "num_input_tokens_seen": 588011052, "step": 6442 }, { "epoch": 26.845833333333335, "grad_norm": 64.27268256971784, "learning_rate": 5e-05, "loss": 0.0762, "num_input_tokens_seen": 588102664, "step": 6443 }, { "epoch": 26.845833333333335, "loss": 0.08693346381187439, "loss_ce": 1.1767648175009526e-05, "loss_iou": 0.216796875, "loss_num": 0.017333984375, "loss_xval": 0.0869140625, "num_input_tokens_seen": 588102664, "step": 6443 }, { "epoch": 26.85, "grad_norm": 9.652023728117497, "learning_rate": 5e-05, "loss": 0.0536, "num_input_tokens_seen": 588192224, "step": 6444 }, { "epoch": 26.85, "loss": 0.05360259860754013, "loss_ce": 2.8993539672228508e-05, "loss_iou": 0.15234375, "loss_num": 0.0107421875, "loss_xval": 0.053466796875, "num_input_tokens_seen": 588192224, "step": 6444 }, { "epoch": 26.854166666666668, "grad_norm": 2.628580701797132, "learning_rate": 5e-05, "loss": 0.0244, "num_input_tokens_seen": 588283672, "step": 6445 }, { "epoch": 26.854166666666668, "loss": 0.02869114838540554, "loss_ce": 1.9882903870893642e-05, "loss_iou": 0.2138671875, "loss_num": 0.0057373046875, "loss_xval": 0.0286865234375, "num_input_tokens_seen": 588283672, "step": 6445 }, { "epoch": 26.858333333333334, "grad_norm": 3.154952388101508, "learning_rate": 5e-05, "loss": 0.0306, "num_input_tokens_seen": 588374980, "step": 6446 }, { "epoch": 26.858333333333334, "loss": 0.028727829456329346, "loss_ce": 3.1601703085470945e-06, "loss_iou": 0.333984375, "loss_num": 0.0057373046875, "loss_xval": 0.0286865234375, "num_input_tokens_seen": 588374980, "step": 6446 }, { "epoch": 26.8625, "grad_norm": 2.959396044273708, "learning_rate": 5e-05, "loss": 0.0507, "num_input_tokens_seen": 588466732, "step": 6447 }, { "epoch": 26.8625, "loss": 0.031750332564115524, "loss_ce": 0.000164640587172471, "loss_iou": 0.2890625, "loss_num": 0.006317138671875, "loss_xval": 0.031494140625, "num_input_tokens_seen": 588466732, "step": 6447 }, { "epoch": 26.866666666666667, "grad_norm": 2.4942420944744925, "learning_rate": 5e-05, "loss": 0.0415, "num_input_tokens_seen": 588558308, "step": 6448 }, { "epoch": 26.866666666666667, "loss": 0.04990419000387192, "loss_ce": 7.953952263051178e-06, "loss_iou": 0.162109375, "loss_num": 0.010009765625, "loss_xval": 0.0498046875, "num_input_tokens_seen": 588558308, "step": 6448 }, { "epoch": 26.870833333333334, "grad_norm": 2.8619748945556425, "learning_rate": 5e-05, "loss": 0.0472, "num_input_tokens_seen": 588649520, "step": 6449 }, { "epoch": 26.870833333333334, "loss": 0.02948564663529396, "loss_ce": 5.666960532835219e-06, "loss_iou": 0.2890625, "loss_num": 0.005889892578125, "loss_xval": 0.029541015625, "num_input_tokens_seen": 588649520, "step": 6449 }, { "epoch": 26.875, "grad_norm": 3.3762100271150777, "learning_rate": 5e-05, "loss": 0.0299, "num_input_tokens_seen": 588740472, "step": 6450 }, { "epoch": 26.875, "loss": 0.029242604970932007, "loss_ce": 6.764735189790372e-06, "loss_iou": 0.197265625, "loss_num": 0.005859375, "loss_xval": 0.029296875, "num_input_tokens_seen": 588740472, "step": 6450 }, { "epoch": 26.879166666666666, "grad_norm": 2.7603635479763042, "learning_rate": 5e-05, "loss": 0.047, "num_input_tokens_seen": 588832100, "step": 6451 }, { "epoch": 26.879166666666666, "loss": 0.043041642755270004, "loss_ce": 4.237728353473358e-05, "loss_iou": 0.2451171875, "loss_num": 0.00860595703125, "loss_xval": 0.04296875, "num_input_tokens_seen": 588832100, "step": 6451 }, { "epoch": 26.883333333333333, "grad_norm": 3.299961341163053, "learning_rate": 5e-05, "loss": 0.0366, "num_input_tokens_seen": 588923460, "step": 6452 }, { "epoch": 26.883333333333333, "loss": 0.04982820153236389, "loss_ce": 8.258573870989494e-06, "loss_iou": 0.2333984375, "loss_num": 0.00994873046875, "loss_xval": 0.0498046875, "num_input_tokens_seen": 588923460, "step": 6452 }, { "epoch": 26.8875, "grad_norm": 2.48388702600195, "learning_rate": 5e-05, "loss": 0.0346, "num_input_tokens_seen": 589014828, "step": 6453 }, { "epoch": 26.8875, "loss": 0.04472944512963295, "loss_ce": 5.9347225942474324e-06, "loss_iou": 0.2138671875, "loss_num": 0.0089111328125, "loss_xval": 0.044677734375, "num_input_tokens_seen": 589014828, "step": 6453 }, { "epoch": 26.891666666666666, "grad_norm": 5.689731283328901, "learning_rate": 5e-05, "loss": 0.0333, "num_input_tokens_seen": 589105608, "step": 6454 }, { "epoch": 26.891666666666666, "loss": 0.031533852219581604, "loss_ce": 1.5657816447856021e-06, "loss_iou": 0.2734375, "loss_num": 0.00628662109375, "loss_xval": 0.031494140625, "num_input_tokens_seen": 589105608, "step": 6454 }, { "epoch": 26.895833333333332, "grad_norm": 3.700058361609715, "learning_rate": 5e-05, "loss": 0.0405, "num_input_tokens_seen": 589197044, "step": 6455 }, { "epoch": 26.895833333333332, "loss": 0.04795718565583229, "loss_ce": 7.510394789278507e-05, "loss_iou": 0.205078125, "loss_num": 0.00958251953125, "loss_xval": 0.0478515625, "num_input_tokens_seen": 589197044, "step": 6455 }, { "epoch": 26.9, "grad_norm": 2.330781239457574, "learning_rate": 5e-05, "loss": 0.0418, "num_input_tokens_seen": 589287924, "step": 6456 }, { "epoch": 26.9, "loss": 0.05563540384173393, "loss_ce": 1.86222086995258e-06, "loss_iou": 0.1923828125, "loss_num": 0.0111083984375, "loss_xval": 0.0556640625, "num_input_tokens_seen": 589287924, "step": 6456 }, { "epoch": 26.904166666666665, "grad_norm": 2.8092294796667807, "learning_rate": 5e-05, "loss": 0.0297, "num_input_tokens_seen": 589379332, "step": 6457 }, { "epoch": 26.904166666666665, "loss": 0.034930381923913956, "loss_ce": 3.013393325090874e-06, "loss_iou": 0.283203125, "loss_num": 0.006988525390625, "loss_xval": 0.034912109375, "num_input_tokens_seen": 589379332, "step": 6457 }, { "epoch": 26.908333333333335, "grad_norm": 3.501297570900534, "learning_rate": 5e-05, "loss": 0.0401, "num_input_tokens_seen": 589470464, "step": 6458 }, { "epoch": 26.908333333333335, "loss": 0.041064560413360596, "loss_ce": 1.842044002842158e-05, "loss_iou": 0.265625, "loss_num": 0.0081787109375, "loss_xval": 0.041015625, "num_input_tokens_seen": 589470464, "step": 6458 }, { "epoch": 26.9125, "grad_norm": 2.3132474254536217, "learning_rate": 5e-05, "loss": 0.0422, "num_input_tokens_seen": 589561848, "step": 6459 }, { "epoch": 26.9125, "loss": 0.05403226241469383, "loss_ce": 0.0004166927537880838, "loss_iou": 0.0908203125, "loss_num": 0.0107421875, "loss_xval": 0.0537109375, "num_input_tokens_seen": 589561848, "step": 6459 }, { "epoch": 26.916666666666668, "grad_norm": 1.251692634650389, "learning_rate": 5e-05, "loss": 0.0309, "num_input_tokens_seen": 589653132, "step": 6460 }, { "epoch": 26.916666666666668, "loss": 0.02022678591310978, "loss_ce": 0.00016910732665564865, "loss_iou": 0.2109375, "loss_num": 0.003997802734375, "loss_xval": 0.02001953125, "num_input_tokens_seen": 589653132, "step": 6460 }, { "epoch": 26.920833333333334, "grad_norm": 1.7199158661604974, "learning_rate": 5e-05, "loss": 0.0303, "num_input_tokens_seen": 589744456, "step": 6461 }, { "epoch": 26.920833333333334, "loss": 0.03480079025030136, "loss_ce": 3.1222889447235502e-06, "loss_iou": 0.1982421875, "loss_num": 0.0069580078125, "loss_xval": 0.034912109375, "num_input_tokens_seen": 589744456, "step": 6461 }, { "epoch": 26.925, "grad_norm": 7.6365636958705485, "learning_rate": 5e-05, "loss": 0.0534, "num_input_tokens_seen": 589835476, "step": 6462 }, { "epoch": 26.925, "loss": 0.05388407036662102, "loss_ce": 5.286566192808095e-06, "loss_iou": 0.24609375, "loss_num": 0.0107421875, "loss_xval": 0.053955078125, "num_input_tokens_seen": 589835476, "step": 6462 }, { "epoch": 26.929166666666667, "grad_norm": 3.8661025523979697, "learning_rate": 5e-05, "loss": 0.0469, "num_input_tokens_seen": 589927360, "step": 6463 }, { "epoch": 26.929166666666667, "loss": 0.028550995513796806, "loss_ce": 3.994800863438286e-05, "loss_iou": 0.294921875, "loss_num": 0.005706787109375, "loss_xval": 0.028564453125, "num_input_tokens_seen": 589927360, "step": 6463 }, { "epoch": 26.933333333333334, "grad_norm": 2.595665132144297, "learning_rate": 5e-05, "loss": 0.0258, "num_input_tokens_seen": 590018304, "step": 6464 }, { "epoch": 26.933333333333334, "loss": 0.029079755768179893, "loss_ce": 4.132675712753553e-06, "loss_iou": 0.25, "loss_num": 0.005828857421875, "loss_xval": 0.029052734375, "num_input_tokens_seen": 590018304, "step": 6464 }, { "epoch": 26.9375, "grad_norm": 4.17170778740505, "learning_rate": 5e-05, "loss": 0.0507, "num_input_tokens_seen": 590109448, "step": 6465 }, { "epoch": 26.9375, "loss": 0.065787173807621, "loss_ce": 6.5329086282872595e-06, "loss_iou": 0.275390625, "loss_num": 0.01312255859375, "loss_xval": 0.06591796875, "num_input_tokens_seen": 590109448, "step": 6465 }, { "epoch": 26.941666666666666, "grad_norm": 1.5051146651283749, "learning_rate": 5e-05, "loss": 0.049, "num_input_tokens_seen": 590200868, "step": 6466 }, { "epoch": 26.941666666666666, "loss": 0.05446765571832657, "loss_ce": 3.9554543036501855e-05, "loss_iou": 0.173828125, "loss_num": 0.0108642578125, "loss_xval": 0.054443359375, "num_input_tokens_seen": 590200868, "step": 6466 }, { "epoch": 26.945833333333333, "grad_norm": 0.8182828257810135, "learning_rate": 5e-05, "loss": 0.026, "num_input_tokens_seen": 590292396, "step": 6467 }, { "epoch": 26.945833333333333, "loss": 0.030023805797100067, "loss_ce": 9.769059033715166e-06, "loss_iou": 0.10009765625, "loss_num": 0.006011962890625, "loss_xval": 0.030029296875, "num_input_tokens_seen": 590292396, "step": 6467 }, { "epoch": 26.95, "grad_norm": 1.8724105927178325, "learning_rate": 5e-05, "loss": 0.0616, "num_input_tokens_seen": 590384416, "step": 6468 }, { "epoch": 26.95, "loss": 0.07086817920207977, "loss_ce": 6.361617579386802e-06, "loss_iou": 0.1943359375, "loss_num": 0.01416015625, "loss_xval": 0.07080078125, "num_input_tokens_seen": 590384416, "step": 6468 }, { "epoch": 26.954166666666666, "grad_norm": 1.5139939648692018, "learning_rate": 5e-05, "loss": 0.0514, "num_input_tokens_seen": 590475848, "step": 6469 }, { "epoch": 26.954166666666666, "loss": 0.026034872978925705, "loss_ce": 0.00015596569573972374, "loss_iou": 0.2041015625, "loss_num": 0.00518798828125, "loss_xval": 0.02587890625, "num_input_tokens_seen": 590475848, "step": 6469 }, { "epoch": 26.958333333333332, "grad_norm": 1.3300940044856553, "learning_rate": 5e-05, "loss": 0.0452, "num_input_tokens_seen": 590567184, "step": 6470 }, { "epoch": 26.958333333333332, "loss": 0.023993268609046936, "loss_ce": 0.00021626101806759834, "loss_iou": 0.24609375, "loss_num": 0.0047607421875, "loss_xval": 0.0238037109375, "num_input_tokens_seen": 590567184, "step": 6470 }, { "epoch": 26.9625, "grad_norm": 1.7243529092692613, "learning_rate": 5e-05, "loss": 0.035, "num_input_tokens_seen": 590658468, "step": 6471 }, { "epoch": 26.9625, "loss": 0.03004373051226139, "loss_ce": 1.4433577234740369e-05, "loss_iou": 0.1953125, "loss_num": 0.006011962890625, "loss_xval": 0.030029296875, "num_input_tokens_seen": 590658468, "step": 6471 }, { "epoch": 26.966666666666665, "grad_norm": 3.616339040915319, "learning_rate": 5e-05, "loss": 0.0536, "num_input_tokens_seen": 590749624, "step": 6472 }, { "epoch": 26.966666666666665, "loss": 0.06653188169002533, "loss_ce": 3.5614871194411535e-06, "loss_iou": 0.12890625, "loss_num": 0.0133056640625, "loss_xval": 0.06640625, "num_input_tokens_seen": 590749624, "step": 6472 }, { "epoch": 26.970833333333335, "grad_norm": 2.6145017660857124, "learning_rate": 5e-05, "loss": 0.0714, "num_input_tokens_seen": 590840284, "step": 6473 }, { "epoch": 26.970833333333335, "loss": 0.06919960677623749, "loss_ce": 0.00012306452845223248, "loss_iou": 0.1123046875, "loss_num": 0.01385498046875, "loss_xval": 0.06884765625, "num_input_tokens_seen": 590840284, "step": 6473 }, { "epoch": 26.975, "grad_norm": 3.0006287243722394, "learning_rate": 5e-05, "loss": 0.0344, "num_input_tokens_seen": 590931200, "step": 6474 }, { "epoch": 26.975, "loss": 0.02546137198805809, "loss_ce": 2.08112146538042e-06, "loss_iou": 0.234375, "loss_num": 0.005096435546875, "loss_xval": 0.0255126953125, "num_input_tokens_seen": 590931200, "step": 6474 }, { "epoch": 26.979166666666668, "grad_norm": 2.998171401254916, "learning_rate": 5e-05, "loss": 0.0425, "num_input_tokens_seen": 591023128, "step": 6475 }, { "epoch": 26.979166666666668, "loss": 0.025605838745832443, "loss_ce": 1.5902509176157764e-06, "loss_iou": 0.341796875, "loss_num": 0.005126953125, "loss_xval": 0.025634765625, "num_input_tokens_seen": 591023128, "step": 6475 }, { "epoch": 26.983333333333334, "grad_norm": 2.6153748993073815, "learning_rate": 5e-05, "loss": 0.0593, "num_input_tokens_seen": 591114924, "step": 6476 }, { "epoch": 26.983333333333334, "loss": 0.03486858308315277, "loss_ce": 0.003710137214511633, "loss_iou": 0.265625, "loss_num": 0.0062255859375, "loss_xval": 0.0311279296875, "num_input_tokens_seen": 591114924, "step": 6476 }, { "epoch": 26.9875, "grad_norm": 5.765636049482318, "learning_rate": 5e-05, "loss": 0.0378, "num_input_tokens_seen": 591206700, "step": 6477 }, { "epoch": 26.9875, "loss": 0.026380009949207306, "loss_ce": 1.2822742974094581e-05, "loss_iou": 0.283203125, "loss_num": 0.005279541015625, "loss_xval": 0.0263671875, "num_input_tokens_seen": 591206700, "step": 6477 }, { "epoch": 26.991666666666667, "grad_norm": 1.3548250943193227, "learning_rate": 5e-05, "loss": 0.0638, "num_input_tokens_seen": 591298156, "step": 6478 }, { "epoch": 26.991666666666667, "loss": 0.03639143705368042, "loss_ce": 1.4486518921330571e-05, "loss_iou": 0.265625, "loss_num": 0.00726318359375, "loss_xval": 0.036376953125, "num_input_tokens_seen": 591298156, "step": 6478 }, { "epoch": 26.995833333333334, "grad_norm": 2.1892743373009442, "learning_rate": 5e-05, "loss": 0.0881, "num_input_tokens_seen": 591389368, "step": 6479 }, { "epoch": 26.995833333333334, "loss": 0.07875439524650574, "loss_ce": 0.0001640051050344482, "loss_iou": 0.185546875, "loss_num": 0.0157470703125, "loss_xval": 0.07861328125, "num_input_tokens_seen": 591389368, "step": 6479 }, { "epoch": 27.0, "grad_norm": 2.6684763478882854, "learning_rate": 5e-05, "loss": 0.0381, "num_input_tokens_seen": 591481344, "step": 6480 }, { "epoch": 27.0, "loss": 0.021271036937832832, "loss_ce": 2.3174412490334362e-05, "loss_iou": 0.1015625, "loss_num": 0.004241943359375, "loss_xval": 0.021240234375, "num_input_tokens_seen": 591481344, "step": 6480 }, { "epoch": 27.004166666666666, "grad_norm": 1.955922810243266, "learning_rate": 5e-05, "loss": 0.0249, "num_input_tokens_seen": 591571916, "step": 6481 }, { "epoch": 27.004166666666666, "loss": 0.028039991855621338, "loss_ce": 1.966868239833275e-06, "loss_iou": 0.2158203125, "loss_num": 0.005615234375, "loss_xval": 0.028076171875, "num_input_tokens_seen": 591571916, "step": 6481 }, { "epoch": 27.008333333333333, "grad_norm": 2.962709243450798, "learning_rate": 5e-05, "loss": 0.0319, "num_input_tokens_seen": 591662092, "step": 6482 }, { "epoch": 27.008333333333333, "loss": 0.040201835334300995, "loss_ce": 2.5569020181137603e-06, "loss_iou": 0.2119140625, "loss_num": 0.008056640625, "loss_xval": 0.040283203125, "num_input_tokens_seen": 591662092, "step": 6482 }, { "epoch": 27.0125, "grad_norm": 2.4820166684216094, "learning_rate": 5e-05, "loss": 0.0287, "num_input_tokens_seen": 591753724, "step": 6483 }, { "epoch": 27.0125, "loss": 0.031597621738910675, "loss_ce": 4.300906311982544e-06, "loss_iou": 0.0654296875, "loss_num": 0.006317138671875, "loss_xval": 0.031494140625, "num_input_tokens_seen": 591753724, "step": 6483 }, { "epoch": 27.016666666666666, "grad_norm": 2.258753161775535, "learning_rate": 5e-05, "loss": 0.0282, "num_input_tokens_seen": 591844740, "step": 6484 }, { "epoch": 27.016666666666666, "loss": 0.030123792588710785, "loss_ce": 0.002207839395850897, "loss_iou": 0.1640625, "loss_num": 0.005584716796875, "loss_xval": 0.0279541015625, "num_input_tokens_seen": 591844740, "step": 6484 }, { "epoch": 27.020833333333332, "grad_norm": 3.1742272127851017, "learning_rate": 5e-05, "loss": 0.0418, "num_input_tokens_seen": 591936264, "step": 6485 }, { "epoch": 27.020833333333332, "loss": 0.03546886146068573, "loss_ce": 0.0020978914108127356, "loss_iou": 0.275390625, "loss_num": 0.006683349609375, "loss_xval": 0.033447265625, "num_input_tokens_seen": 591936264, "step": 6485 }, { "epoch": 27.025, "grad_norm": 3.280678281674529, "learning_rate": 5e-05, "loss": 0.0655, "num_input_tokens_seen": 592026920, "step": 6486 }, { "epoch": 27.025, "loss": 0.06747589260339737, "loss_ce": 1.5269779396476224e-06, "loss_iou": 0.32421875, "loss_num": 0.01348876953125, "loss_xval": 0.0673828125, "num_input_tokens_seen": 592026920, "step": 6486 }, { "epoch": 27.029166666666665, "grad_norm": 5.005440450340139, "learning_rate": 5e-05, "loss": 0.0383, "num_input_tokens_seen": 592118488, "step": 6487 }, { "epoch": 27.029166666666665, "loss": 0.040529705584049225, "loss_ce": 2.362769464525627e-06, "loss_iou": 0.2890625, "loss_num": 0.00811767578125, "loss_xval": 0.04052734375, "num_input_tokens_seen": 592118488, "step": 6487 }, { "epoch": 27.033333333333335, "grad_norm": 3.3301406471708206, "learning_rate": 5e-05, "loss": 0.0378, "num_input_tokens_seen": 592209508, "step": 6488 }, { "epoch": 27.033333333333335, "loss": 0.025289881974458694, "loss_ce": 6.0693087107210886e-06, "loss_iou": 0.263671875, "loss_num": 0.00506591796875, "loss_xval": 0.0252685546875, "num_input_tokens_seen": 592209508, "step": 6488 }, { "epoch": 27.0375, "grad_norm": 2.9558401362765756, "learning_rate": 5e-05, "loss": 0.0401, "num_input_tokens_seen": 592301356, "step": 6489 }, { "epoch": 27.0375, "loss": 0.05254870653152466, "loss_ce": 5.066004177933792e-06, "loss_iou": 0.30078125, "loss_num": 0.010498046875, "loss_xval": 0.052490234375, "num_input_tokens_seen": 592301356, "step": 6489 }, { "epoch": 27.041666666666668, "grad_norm": 2.2463626000732617, "learning_rate": 5e-05, "loss": 0.0584, "num_input_tokens_seen": 592392420, "step": 6490 }, { "epoch": 27.041666666666668, "loss": 0.04210108518600464, "loss_ce": 1.7344702428090386e-05, "loss_iou": 0.2158203125, "loss_num": 0.0084228515625, "loss_xval": 0.0419921875, "num_input_tokens_seen": 592392420, "step": 6490 }, { "epoch": 27.045833333333334, "grad_norm": 1.7286917187255142, "learning_rate": 5e-05, "loss": 0.0588, "num_input_tokens_seen": 592483924, "step": 6491 }, { "epoch": 27.045833333333334, "loss": 0.026698973029851913, "loss_ce": 3.4238990338053554e-05, "loss_iou": 0.23046875, "loss_num": 0.005340576171875, "loss_xval": 0.026611328125, "num_input_tokens_seen": 592483924, "step": 6491 }, { "epoch": 27.05, "grad_norm": 1.945067902151619, "learning_rate": 5e-05, "loss": 0.0688, "num_input_tokens_seen": 592575252, "step": 6492 }, { "epoch": 27.05, "loss": 0.06354920566082001, "loss_ce": 1.161103409685893e-05, "loss_iou": 0.2021484375, "loss_num": 0.01275634765625, "loss_xval": 0.0634765625, "num_input_tokens_seen": 592575252, "step": 6492 }, { "epoch": 27.054166666666667, "grad_norm": 1.6626659274386684, "learning_rate": 5e-05, "loss": 0.0543, "num_input_tokens_seen": 592666608, "step": 6493 }, { "epoch": 27.054166666666667, "loss": 0.06014417111873627, "loss_ce": 0.0001466095563955605, "loss_iou": 0.22265625, "loss_num": 0.01202392578125, "loss_xval": 0.06005859375, "num_input_tokens_seen": 592666608, "step": 6493 }, { "epoch": 27.058333333333334, "grad_norm": 1.3952258022061073, "learning_rate": 5e-05, "loss": 0.0418, "num_input_tokens_seen": 592758172, "step": 6494 }, { "epoch": 27.058333333333334, "loss": 0.06435603648424149, "loss_ce": 2.091861915687332e-06, "loss_iou": 0.21484375, "loss_num": 0.01287841796875, "loss_xval": 0.064453125, "num_input_tokens_seen": 592758172, "step": 6494 }, { "epoch": 27.0625, "grad_norm": 2.322261381202757, "learning_rate": 5e-05, "loss": 0.0387, "num_input_tokens_seen": 592849552, "step": 6495 }, { "epoch": 27.0625, "loss": 0.0410141684114933, "loss_ce": 1.3802233297610655e-05, "loss_iou": 0.1865234375, "loss_num": 0.0081787109375, "loss_xval": 0.041015625, "num_input_tokens_seen": 592849552, "step": 6495 }, { "epoch": 27.066666666666666, "grad_norm": 1.9380490050746328, "learning_rate": 5e-05, "loss": 0.0573, "num_input_tokens_seen": 592941172, "step": 6496 }, { "epoch": 27.066666666666666, "loss": 0.07027255743741989, "loss_ce": 0.002462498378008604, "loss_iou": 0.01123046875, "loss_num": 0.0135498046875, "loss_xval": 0.06787109375, "num_input_tokens_seen": 592941172, "step": 6496 }, { "epoch": 27.070833333333333, "grad_norm": 1.9863267251115089, "learning_rate": 5e-05, "loss": 0.0462, "num_input_tokens_seen": 593032284, "step": 6497 }, { "epoch": 27.070833333333333, "loss": 0.035664528608322144, "loss_ce": 0.0006608680123463273, "loss_iou": 0.177734375, "loss_num": 0.00701904296875, "loss_xval": 0.034912109375, "num_input_tokens_seen": 593032284, "step": 6497 }, { "epoch": 27.075, "grad_norm": 2.818201131258505, "learning_rate": 5e-05, "loss": 0.0289, "num_input_tokens_seen": 593123252, "step": 6498 }, { "epoch": 27.075, "loss": 0.025768805295228958, "loss_ce": 4.3383024603826925e-06, "loss_iou": 0.263671875, "loss_num": 0.005157470703125, "loss_xval": 0.0257568359375, "num_input_tokens_seen": 593123252, "step": 6498 }, { "epoch": 27.079166666666666, "grad_norm": 2.9105649505028994, "learning_rate": 5e-05, "loss": 0.0312, "num_input_tokens_seen": 593214604, "step": 6499 }, { "epoch": 27.079166666666666, "loss": 0.020308678969740868, "loss_ce": 1.4489427485386841e-05, "loss_iou": 0.173828125, "loss_num": 0.004058837890625, "loss_xval": 0.020263671875, "num_input_tokens_seen": 593214604, "step": 6499 }, { "epoch": 27.083333333333332, "grad_norm": 2.359393385537779, "learning_rate": 5e-05, "loss": 0.0329, "num_input_tokens_seen": 593303984, "step": 6500 }, { "epoch": 27.083333333333332, "eval_seeclick_CIoU": 0.1611507646739483, "eval_seeclick_GIoU": 0.12508939485996962, "eval_seeclick_IoU": 0.28630343824625015, "eval_seeclick_MAE_all": 0.12311594188213348, "eval_seeclick_MAE_h": 0.1294156238436699, "eval_seeclick_MAE_w": 0.24873895943164825, "eval_seeclick_MAE_x_boxes": 0.2699016109108925, "eval_seeclick_MAE_y_boxes": 0.12702525407075882, "eval_seeclick_NUM_probability": 0.9999813139438629, "eval_seeclick_inside_bbox": 0.4801136404275894, "eval_seeclick_loss": 0.6404656171798706, "eval_seeclick_loss_ce": 0.11072489991784096, "eval_seeclick_loss_iou": 0.42852783203125, "eval_seeclick_loss_num": 0.0981903076171875, "eval_seeclick_loss_xval": 0.49114990234375, "eval_seeclick_runtime": 78.1395, "eval_seeclick_samples_per_second": 0.55, "eval_seeclick_steps_per_second": 0.026, "num_input_tokens_seen": 593303984, "step": 6500 }, { "epoch": 27.083333333333332, "eval_icons_CIoU": 0.2290511429309845, "eval_icons_GIoU": 0.2096809484064579, "eval_icons_IoU": 0.3420000970363617, "eval_icons_MAE_all": 0.0876893661916256, "eval_icons_MAE_h": 0.17075396329164505, "eval_icons_MAE_w": 0.1464579701423645, "eval_icons_MAE_x_boxes": 0.1476225182414055, "eval_icons_MAE_y_boxes": 0.17341304197907448, "eval_icons_NUM_probability": 0.9999927282333374, "eval_icons_inside_bbox": 0.4913194477558136, "eval_icons_loss": 0.42048099637031555, "eval_icons_loss_ce": 0.0010824212222360075, "eval_icons_loss_iou": 0.19549560546875, "eval_icons_loss_num": 0.085906982421875, "eval_icons_loss_xval": 0.42987060546875, "eval_icons_runtime": 90.1406, "eval_icons_samples_per_second": 0.555, "eval_icons_steps_per_second": 0.022, "num_input_tokens_seen": 593303984, "step": 6500 }, { "epoch": 27.083333333333332, "eval_screenspot_CIoU": 0.4100871682167053, "eval_screenspot_GIoU": 0.39583032329877216, "eval_screenspot_IoU": 0.48177264134089154, "eval_screenspot_MAE_all": 0.09190142154693604, "eval_screenspot_MAE_h": 0.09084974229335785, "eval_screenspot_MAE_w": 0.18963578095038733, "eval_screenspot_MAE_x_boxes": 0.17057538032531738, "eval_screenspot_MAE_y_boxes": 0.08838931967814763, "eval_screenspot_NUM_probability": 0.9996122717857361, "eval_screenspot_inside_bbox": 0.7279166579246521, "eval_screenspot_loss": 0.46608996391296387, "eval_screenspot_loss_ce": 0.00031701042704905075, "eval_screenspot_loss_iou": 0.3773193359375, "eval_screenspot_loss_num": 0.09621938069661458, "eval_screenspot_loss_xval": 0.481201171875, "eval_screenspot_runtime": 151.372, "eval_screenspot_samples_per_second": 0.588, "eval_screenspot_steps_per_second": 0.02, "num_input_tokens_seen": 593303984, "step": 6500 }, { "epoch": 27.083333333333332, "eval_compot_CIoU": 0.5083262324333191, "eval_compot_GIoU": 0.5026091039180756, "eval_compot_IoU": 0.5841458737850189, "eval_compot_MAE_all": 0.052470432594418526, "eval_compot_MAE_h": 0.06258269213140011, "eval_compot_MAE_w": 0.13531950861215591, "eval_compot_MAE_x_boxes": 0.1363530457019806, "eval_compot_MAE_y_boxes": 0.06230769865214825, "eval_compot_NUM_probability": 0.9999942779541016, "eval_compot_inside_bbox": 0.7604166567325592, "eval_compot_loss": 0.3140476942062378, "eval_compot_loss_ce": 0.042354028671979904, "eval_compot_loss_iou": 0.31719970703125, "eval_compot_loss_num": 0.05543327331542969, "eval_compot_loss_xval": 0.2773284912109375, "eval_compot_runtime": 87.6305, "eval_compot_samples_per_second": 0.571, "eval_compot_steps_per_second": 0.023, "num_input_tokens_seen": 593303984, "step": 6500 }, { "epoch": 27.083333333333332, "loss": 0.3146435022354126, "loss_ce": 0.043616883456707, "loss_iou": 0.33203125, "loss_num": 0.05419921875, "loss_xval": 0.271484375, "num_input_tokens_seen": 593303984, "step": 6500 }, { "epoch": 27.0875, "grad_norm": 2.1077022206848555, "learning_rate": 5e-05, "loss": 0.033, "num_input_tokens_seen": 593394836, "step": 6501 }, { "epoch": 27.0875, "loss": 0.037172507494688034, "loss_ce": 2.0956435946573038e-06, "loss_iou": 0.330078125, "loss_num": 0.0074462890625, "loss_xval": 0.037109375, "num_input_tokens_seen": 593394836, "step": 6501 }, { "epoch": 27.091666666666665, "grad_norm": 1.9656642718284403, "learning_rate": 5e-05, "loss": 0.0447, "num_input_tokens_seen": 593485112, "step": 6502 }, { "epoch": 27.091666666666665, "loss": 0.06263962388038635, "loss_ce": 2.301181439179345e-06, "loss_iou": 0.33203125, "loss_num": 0.01251220703125, "loss_xval": 0.0625, "num_input_tokens_seen": 593485112, "step": 6502 }, { "epoch": 27.095833333333335, "grad_norm": 1.9131504146432234, "learning_rate": 5e-05, "loss": 0.0597, "num_input_tokens_seen": 593576028, "step": 6503 }, { "epoch": 27.095833333333335, "loss": 0.0599580779671669, "loss_ce": 6.293254045885988e-06, "loss_iou": 0.1513671875, "loss_num": 0.011962890625, "loss_xval": 0.06005859375, "num_input_tokens_seen": 593576028, "step": 6503 }, { "epoch": 27.1, "grad_norm": 1.9339228752806417, "learning_rate": 5e-05, "loss": 0.0496, "num_input_tokens_seen": 593667172, "step": 6504 }, { "epoch": 27.1, "loss": 0.01530100591480732, "loss_ce": 1.9329276256030425e-05, "loss_iou": 0.013671875, "loss_num": 0.0030517578125, "loss_xval": 0.0152587890625, "num_input_tokens_seen": 593667172, "step": 6504 }, { "epoch": 27.104166666666668, "grad_norm": 1.9726035509891664, "learning_rate": 5e-05, "loss": 0.0729, "num_input_tokens_seen": 593758656, "step": 6505 }, { "epoch": 27.104166666666668, "loss": 0.11320458352565765, "loss_ce": 0.004150021355599165, "loss_iou": 0.203125, "loss_num": 0.0218505859375, "loss_xval": 0.10888671875, "num_input_tokens_seen": 593758656, "step": 6505 }, { "epoch": 27.108333333333334, "grad_norm": 1.4187761290531202, "learning_rate": 5e-05, "loss": 0.0277, "num_input_tokens_seen": 593850620, "step": 6506 }, { "epoch": 27.108333333333334, "loss": 0.023839503526687622, "loss_ce": 5.276498086459469e-06, "loss_iou": 0.216796875, "loss_num": 0.0047607421875, "loss_xval": 0.0238037109375, "num_input_tokens_seen": 593850620, "step": 6506 }, { "epoch": 27.1125, "grad_norm": 3.1787292236573577, "learning_rate": 5e-05, "loss": 0.0343, "num_input_tokens_seen": 593942132, "step": 6507 }, { "epoch": 27.1125, "loss": 0.044819869101047516, "loss_ce": 4.804438958672108e-06, "loss_iou": 0.2421875, "loss_num": 0.00897216796875, "loss_xval": 0.044921875, "num_input_tokens_seen": 593942132, "step": 6507 }, { "epoch": 27.116666666666667, "grad_norm": 3.089052011599488, "learning_rate": 5e-05, "loss": 0.0488, "num_input_tokens_seen": 594033800, "step": 6508 }, { "epoch": 27.116666666666667, "loss": 0.06599961966276169, "loss_ce": 5.357538157113595e-06, "loss_iou": 0.18359375, "loss_num": 0.01318359375, "loss_xval": 0.06591796875, "num_input_tokens_seen": 594033800, "step": 6508 }, { "epoch": 27.120833333333334, "grad_norm": 2.2688935031872757, "learning_rate": 5e-05, "loss": 0.0289, "num_input_tokens_seen": 594125320, "step": 6509 }, { "epoch": 27.120833333333334, "loss": 0.03243381530046463, "loss_ce": 0.00011569818889256567, "loss_iou": 0.255859375, "loss_num": 0.0064697265625, "loss_xval": 0.0322265625, "num_input_tokens_seen": 594125320, "step": 6509 }, { "epoch": 27.125, "grad_norm": 2.1392485184909646, "learning_rate": 5e-05, "loss": 0.0431, "num_input_tokens_seen": 594216656, "step": 6510 }, { "epoch": 27.125, "loss": 0.03446205332875252, "loss_ce": 0.0003281440294813365, "loss_iou": 0.15234375, "loss_num": 0.0068359375, "loss_xval": 0.0341796875, "num_input_tokens_seen": 594216656, "step": 6510 }, { "epoch": 27.129166666666666, "grad_norm": 1.9749644119750247, "learning_rate": 5e-05, "loss": 0.0462, "num_input_tokens_seen": 594306504, "step": 6511 }, { "epoch": 27.129166666666666, "loss": 0.031366996467113495, "loss_ce": 2.5445739083806984e-05, "loss_iou": 0.29296875, "loss_num": 0.006256103515625, "loss_xval": 0.03125, "num_input_tokens_seen": 594306504, "step": 6511 }, { "epoch": 27.133333333333333, "grad_norm": 1.4282145397187538, "learning_rate": 5e-05, "loss": 0.0525, "num_input_tokens_seen": 594396640, "step": 6512 }, { "epoch": 27.133333333333333, "loss": 0.03622637316584587, "loss_ce": 2.0083837171114283e-06, "loss_iou": 0.134765625, "loss_num": 0.00726318359375, "loss_xval": 0.0361328125, "num_input_tokens_seen": 594396640, "step": 6512 }, { "epoch": 27.1375, "grad_norm": 1.3492857134776273, "learning_rate": 5e-05, "loss": 0.0354, "num_input_tokens_seen": 594488128, "step": 6513 }, { "epoch": 27.1375, "loss": 0.04107179492712021, "loss_ce": 2.764478722383501e-06, "loss_iou": 0.19921875, "loss_num": 0.00823974609375, "loss_xval": 0.041015625, "num_input_tokens_seen": 594488128, "step": 6513 }, { "epoch": 27.141666666666666, "grad_norm": 1.769915660912848, "learning_rate": 5e-05, "loss": 0.0177, "num_input_tokens_seen": 594579368, "step": 6514 }, { "epoch": 27.141666666666666, "loss": 0.017422253265976906, "loss_ce": 2.7233463697484694e-05, "loss_iou": 0.09326171875, "loss_num": 0.00347900390625, "loss_xval": 0.017333984375, "num_input_tokens_seen": 594579368, "step": 6514 }, { "epoch": 27.145833333333332, "grad_norm": 0.755243181489823, "learning_rate": 5e-05, "loss": 0.0435, "num_input_tokens_seen": 594670584, "step": 6515 }, { "epoch": 27.145833333333332, "loss": 0.05167026072740555, "loss_ce": 4.001905836048536e-06, "loss_iou": 0.24609375, "loss_num": 0.01031494140625, "loss_xval": 0.0517578125, "num_input_tokens_seen": 594670584, "step": 6515 }, { "epoch": 27.15, "grad_norm": 0.9541012916032766, "learning_rate": 5e-05, "loss": 0.0196, "num_input_tokens_seen": 594761792, "step": 6516 }, { "epoch": 27.15, "loss": 0.01985876075923443, "loss_ce": 7.076235306158196e-06, "loss_iou": 0.2158203125, "loss_num": 0.00396728515625, "loss_xval": 0.0198974609375, "num_input_tokens_seen": 594761792, "step": 6516 }, { "epoch": 27.154166666666665, "grad_norm": 1.6180947072383487, "learning_rate": 5e-05, "loss": 0.0415, "num_input_tokens_seen": 594853332, "step": 6517 }, { "epoch": 27.154166666666665, "loss": 0.031560756266117096, "loss_ce": 2.0839312128373422e-05, "loss_iou": 0.09716796875, "loss_num": 0.006317138671875, "loss_xval": 0.031494140625, "num_input_tokens_seen": 594853332, "step": 6517 }, { "epoch": 27.158333333333335, "grad_norm": 1.5742236895851742, "learning_rate": 5e-05, "loss": 0.0507, "num_input_tokens_seen": 594944404, "step": 6518 }, { "epoch": 27.158333333333335, "loss": 0.07269126921892166, "loss_ce": 7.469241973012686e-05, "loss_iou": 0.27734375, "loss_num": 0.0145263671875, "loss_xval": 0.07275390625, "num_input_tokens_seen": 594944404, "step": 6518 }, { "epoch": 27.1625, "grad_norm": 1.9779687504647347, "learning_rate": 5e-05, "loss": 0.0673, "num_input_tokens_seen": 595034692, "step": 6519 }, { "epoch": 27.1625, "loss": 0.07391928136348724, "loss_ce": 5.7055276556639e-06, "loss_iou": 0.11474609375, "loss_num": 0.0147705078125, "loss_xval": 0.07373046875, "num_input_tokens_seen": 595034692, "step": 6519 }, { "epoch": 27.166666666666668, "grad_norm": 0.8543401365989585, "learning_rate": 5e-05, "loss": 0.0268, "num_input_tokens_seen": 595126832, "step": 6520 }, { "epoch": 27.166666666666668, "loss": 0.020465940237045288, "loss_ce": 4.58647555205971e-05, "loss_iou": 0.146484375, "loss_num": 0.00408935546875, "loss_xval": 0.0203857421875, "num_input_tokens_seen": 595126832, "step": 6520 }, { "epoch": 27.170833333333334, "grad_norm": 0.8576067334089554, "learning_rate": 5e-05, "loss": 0.0203, "num_input_tokens_seen": 595218192, "step": 6521 }, { "epoch": 27.170833333333334, "loss": 0.021774495020508766, "loss_ce": 3.0721468647243455e-05, "loss_iou": 0.2021484375, "loss_num": 0.00433349609375, "loss_xval": 0.021728515625, "num_input_tokens_seen": 595218192, "step": 6521 }, { "epoch": 27.175, "grad_norm": 1.1798262388390548, "learning_rate": 5e-05, "loss": 0.0453, "num_input_tokens_seen": 595310668, "step": 6522 }, { "epoch": 27.175, "loss": 0.055068276822566986, "loss_ce": 0.002219459041953087, "loss_iou": 0.12890625, "loss_num": 0.01055908203125, "loss_xval": 0.052734375, "num_input_tokens_seen": 595310668, "step": 6522 }, { "epoch": 27.179166666666667, "grad_norm": 0.47659015671630084, "learning_rate": 5e-05, "loss": 0.0408, "num_input_tokens_seen": 595401872, "step": 6523 }, { "epoch": 27.179166666666667, "loss": 0.02778068743646145, "loss_ce": 2.0612305888789706e-06, "loss_iou": 0.220703125, "loss_num": 0.00555419921875, "loss_xval": 0.02783203125, "num_input_tokens_seen": 595401872, "step": 6523 }, { "epoch": 27.183333333333334, "grad_norm": 0.753248181051116, "learning_rate": 5e-05, "loss": 0.0336, "num_input_tokens_seen": 595492764, "step": 6524 }, { "epoch": 27.183333333333334, "loss": 0.042344845831394196, "loss_ce": 0.0001695503160590306, "loss_iou": 0.0400390625, "loss_num": 0.0084228515625, "loss_xval": 0.042236328125, "num_input_tokens_seen": 595492764, "step": 6524 }, { "epoch": 27.1875, "grad_norm": 1.3000092644957844, "learning_rate": 5e-05, "loss": 0.0227, "num_input_tokens_seen": 595584444, "step": 6525 }, { "epoch": 27.1875, "loss": 0.013810301199555397, "loss_ce": 0.00022997862834017724, "loss_iou": 0.11474609375, "loss_num": 0.002716064453125, "loss_xval": 0.0135498046875, "num_input_tokens_seen": 595584444, "step": 6525 }, { "epoch": 27.191666666666666, "grad_norm": 1.0670653485738448, "learning_rate": 5e-05, "loss": 0.0222, "num_input_tokens_seen": 595675460, "step": 6526 }, { "epoch": 27.191666666666666, "loss": 0.019739195704460144, "loss_ce": 1.9522099137248006e-06, "loss_iou": 0.158203125, "loss_num": 0.003936767578125, "loss_xval": 0.019775390625, "num_input_tokens_seen": 595675460, "step": 6526 }, { "epoch": 27.195833333333333, "grad_norm": 1.4191768284685875, "learning_rate": 5e-05, "loss": 0.0221, "num_input_tokens_seen": 595766744, "step": 6527 }, { "epoch": 27.195833333333333, "loss": 0.014562261290848255, "loss_ce": 5.375677119445754e-06, "loss_iou": 0.1875, "loss_num": 0.0029144287109375, "loss_xval": 0.0145263671875, "num_input_tokens_seen": 595766744, "step": 6527 }, { "epoch": 27.2, "grad_norm": 2.510616324231998, "learning_rate": 5e-05, "loss": 0.0373, "num_input_tokens_seen": 595857988, "step": 6528 }, { "epoch": 27.2, "loss": 0.03148343786597252, "loss_ce": 4.559001354209613e-06, "loss_iou": 0.244140625, "loss_num": 0.00628662109375, "loss_xval": 0.031494140625, "num_input_tokens_seen": 595857988, "step": 6528 }, { "epoch": 27.204166666666666, "grad_norm": 3.548223310534183, "learning_rate": 5e-05, "loss": 0.0418, "num_input_tokens_seen": 595949040, "step": 6529 }, { "epoch": 27.204166666666666, "loss": 0.04415207728743553, "loss_ce": 8.40070060803555e-06, "loss_iou": 0.2265625, "loss_num": 0.00885009765625, "loss_xval": 0.044189453125, "num_input_tokens_seen": 595949040, "step": 6529 }, { "epoch": 27.208333333333332, "grad_norm": 3.9173736783739335, "learning_rate": 5e-05, "loss": 0.0599, "num_input_tokens_seen": 596040256, "step": 6530 }, { "epoch": 27.208333333333332, "loss": 0.03422192111611366, "loss_ce": 0.00021007962641306221, "loss_iou": 0.3828125, "loss_num": 0.006805419921875, "loss_xval": 0.033935546875, "num_input_tokens_seen": 596040256, "step": 6530 }, { "epoch": 27.2125, "grad_norm": 2.4748804437242664, "learning_rate": 5e-05, "loss": 0.0362, "num_input_tokens_seen": 596131668, "step": 6531 }, { "epoch": 27.2125, "loss": 0.02880486100912094, "loss_ce": 1.152791992353741e-05, "loss_iou": 0.271484375, "loss_num": 0.005767822265625, "loss_xval": 0.02880859375, "num_input_tokens_seen": 596131668, "step": 6531 }, { "epoch": 27.216666666666665, "grad_norm": 2.1199309385036447, "learning_rate": 5e-05, "loss": 0.0566, "num_input_tokens_seen": 596223196, "step": 6532 }, { "epoch": 27.216666666666665, "loss": 0.07532989978790283, "loss_ce": 0.00040924627683125436, "loss_iou": 0.2001953125, "loss_num": 0.01495361328125, "loss_xval": 0.07470703125, "num_input_tokens_seen": 596223196, "step": 6532 }, { "epoch": 27.220833333333335, "grad_norm": 3.021299338389343, "learning_rate": 5e-05, "loss": 0.0399, "num_input_tokens_seen": 596315184, "step": 6533 }, { "epoch": 27.220833333333335, "loss": 0.0591164231300354, "loss_ce": 3.877664312312845e-06, "loss_iou": 0.34375, "loss_num": 0.0118408203125, "loss_xval": 0.05908203125, "num_input_tokens_seen": 596315184, "step": 6533 }, { "epoch": 27.225, "grad_norm": 4.6485282022338446, "learning_rate": 5e-05, "loss": 0.0733, "num_input_tokens_seen": 596406900, "step": 6534 }, { "epoch": 27.225, "loss": 0.1163218691945076, "loss_ce": 4.120826815778855e-06, "loss_iou": 0.263671875, "loss_num": 0.0233154296875, "loss_xval": 0.1162109375, "num_input_tokens_seen": 596406900, "step": 6534 }, { "epoch": 27.229166666666668, "grad_norm": 2.9641180907333062, "learning_rate": 5e-05, "loss": 0.0283, "num_input_tokens_seen": 596498008, "step": 6535 }, { "epoch": 27.229166666666668, "loss": 0.024163711816072464, "loss_ce": 4.7195982915582135e-05, "loss_iou": 0.30859375, "loss_num": 0.00482177734375, "loss_xval": 0.024169921875, "num_input_tokens_seen": 596498008, "step": 6535 }, { "epoch": 27.233333333333334, "grad_norm": 3.072393946682052, "learning_rate": 5e-05, "loss": 0.0549, "num_input_tokens_seen": 596588428, "step": 6536 }, { "epoch": 27.233333333333334, "loss": 0.07454818487167358, "loss_ce": 9.000251338875387e-06, "loss_iou": 0.259765625, "loss_num": 0.014892578125, "loss_xval": 0.07470703125, "num_input_tokens_seen": 596588428, "step": 6536 }, { "epoch": 27.2375, "grad_norm": 2.4653601053704937, "learning_rate": 5e-05, "loss": 0.031, "num_input_tokens_seen": 596678336, "step": 6537 }, { "epoch": 27.2375, "loss": 0.03270196542143822, "loss_ce": 0.001253600581549108, "loss_iou": 0.244140625, "loss_num": 0.00628662109375, "loss_xval": 0.031494140625, "num_input_tokens_seen": 596678336, "step": 6537 }, { "epoch": 27.241666666666667, "grad_norm": 2.775849081662359, "learning_rate": 5e-05, "loss": 0.0349, "num_input_tokens_seen": 596769908, "step": 6538 }, { "epoch": 27.241666666666667, "loss": 0.04061917960643768, "loss_ce": 6.131500413175672e-05, "loss_iou": 0.2734375, "loss_num": 0.00811767578125, "loss_xval": 0.04052734375, "num_input_tokens_seen": 596769908, "step": 6538 }, { "epoch": 27.245833333333334, "grad_norm": 3.645464170111079, "learning_rate": 5e-05, "loss": 0.0402, "num_input_tokens_seen": 596859852, "step": 6539 }, { "epoch": 27.245833333333334, "loss": 0.05066291242837906, "loss_ce": 3.7343652365962043e-06, "loss_iou": 0.2236328125, "loss_num": 0.0101318359375, "loss_xval": 0.05078125, "num_input_tokens_seen": 596859852, "step": 6539 }, { "epoch": 27.25, "grad_norm": 3.247650890370929, "learning_rate": 5e-05, "loss": 0.0533, "num_input_tokens_seen": 596951336, "step": 6540 }, { "epoch": 27.25, "loss": 0.06673350930213928, "loss_ce": 6.826207481935853e-06, "loss_iou": 0.1279296875, "loss_num": 0.01336669921875, "loss_xval": 0.06689453125, "num_input_tokens_seen": 596951336, "step": 6540 }, { "epoch": 27.254166666666666, "grad_norm": 2.689825035863558, "learning_rate": 5e-05, "loss": 0.0295, "num_input_tokens_seen": 597042140, "step": 6541 }, { "epoch": 27.254166666666666, "loss": 0.02926325984299183, "loss_ce": 4.53209349871031e-06, "loss_iou": 0.228515625, "loss_num": 0.005859375, "loss_xval": 0.029296875, "num_input_tokens_seen": 597042140, "step": 6541 }, { "epoch": 27.258333333333333, "grad_norm": 4.583612867604823, "learning_rate": 5e-05, "loss": 0.0353, "num_input_tokens_seen": 597132976, "step": 6542 }, { "epoch": 27.258333333333333, "loss": 0.0501759797334671, "loss_ce": 3.559742253855802e-05, "loss_iou": 0.28125, "loss_num": 0.010009765625, "loss_xval": 0.050048828125, "num_input_tokens_seen": 597132976, "step": 6542 }, { "epoch": 27.2625, "grad_norm": 3.109616886623326, "learning_rate": 5e-05, "loss": 0.041, "num_input_tokens_seen": 597222868, "step": 6543 }, { "epoch": 27.2625, "loss": 0.040508195757865906, "loss_ce": 2.6628946216078475e-05, "loss_iou": 0.310546875, "loss_num": 0.00811767578125, "loss_xval": 0.04052734375, "num_input_tokens_seen": 597222868, "step": 6543 }, { "epoch": 27.266666666666666, "grad_norm": 2.402863282173942, "learning_rate": 5e-05, "loss": 0.0239, "num_input_tokens_seen": 597313968, "step": 6544 }, { "epoch": 27.266666666666666, "loss": 0.025843966752290726, "loss_ce": 6.424347520805895e-05, "loss_iou": 0.2265625, "loss_num": 0.005157470703125, "loss_xval": 0.0257568359375, "num_input_tokens_seen": 597313968, "step": 6544 }, { "epoch": 27.270833333333332, "grad_norm": 2.2805245659831757, "learning_rate": 5e-05, "loss": 0.0311, "num_input_tokens_seen": 597405364, "step": 6545 }, { "epoch": 27.270833333333332, "loss": 0.04524778574705124, "loss_ce": 5.473571491165785e-06, "loss_iou": 0.1875, "loss_num": 0.009033203125, "loss_xval": 0.045166015625, "num_input_tokens_seen": 597405364, "step": 6545 }, { "epoch": 27.275, "grad_norm": 2.0017398197524843, "learning_rate": 5e-05, "loss": 0.0261, "num_input_tokens_seen": 597496600, "step": 6546 }, { "epoch": 27.275, "loss": 0.030996788293123245, "loss_ce": 6.188166480569635e-06, "loss_iou": 0.29296875, "loss_num": 0.006195068359375, "loss_xval": 0.031005859375, "num_input_tokens_seen": 597496600, "step": 6546 }, { "epoch": 27.279166666666665, "grad_norm": 2.6121521722878294, "learning_rate": 5e-05, "loss": 0.0399, "num_input_tokens_seen": 597587648, "step": 6547 }, { "epoch": 27.279166666666665, "loss": 0.052928172051906586, "loss_ce": 3.3578489819774404e-05, "loss_iou": 0.1484375, "loss_num": 0.01055908203125, "loss_xval": 0.052978515625, "num_input_tokens_seen": 597587648, "step": 6547 }, { "epoch": 27.283333333333335, "grad_norm": 8.989873845089479, "learning_rate": 5e-05, "loss": 0.035, "num_input_tokens_seen": 597678912, "step": 6548 }, { "epoch": 27.283333333333335, "loss": 0.02183392643928528, "loss_ce": 0.0006089517846703529, "loss_iou": 0.138671875, "loss_num": 0.004241943359375, "loss_xval": 0.021240234375, "num_input_tokens_seen": 597678912, "step": 6548 }, { "epoch": 27.2875, "grad_norm": 4.589243530412472, "learning_rate": 5e-05, "loss": 0.0363, "num_input_tokens_seen": 597770348, "step": 6549 }, { "epoch": 27.2875, "loss": 0.05072301626205444, "loss_ce": 2.801705477395444e-06, "loss_iou": 0.2158203125, "loss_num": 0.0101318359375, "loss_xval": 0.05078125, "num_input_tokens_seen": 597770348, "step": 6549 }, { "epoch": 27.291666666666668, "grad_norm": 1.3975896587582461, "learning_rate": 5e-05, "loss": 0.0459, "num_input_tokens_seen": 597861368, "step": 6550 }, { "epoch": 27.291666666666668, "loss": 0.02159380167722702, "loss_ce": 2.615761331981048e-06, "loss_iou": 0.1240234375, "loss_num": 0.00433349609375, "loss_xval": 0.0216064453125, "num_input_tokens_seen": 597861368, "step": 6550 }, { "epoch": 27.295833333333334, "grad_norm": 0.7843153006333098, "learning_rate": 5e-05, "loss": 0.0169, "num_input_tokens_seen": 597952988, "step": 6551 }, { "epoch": 27.295833333333334, "loss": 0.01972576230764389, "loss_ce": 0.004619560670107603, "loss_iou": 0.1474609375, "loss_num": 0.003021240234375, "loss_xval": 0.01513671875, "num_input_tokens_seen": 597952988, "step": 6551 }, { "epoch": 27.3, "grad_norm": 1.377873437741385, "learning_rate": 5e-05, "loss": 0.0461, "num_input_tokens_seen": 598044820, "step": 6552 }, { "epoch": 27.3, "loss": 0.028581751510500908, "loss_ce": 1.729914583847858e-05, "loss_iou": 0.2060546875, "loss_num": 0.005706787109375, "loss_xval": 0.028564453125, "num_input_tokens_seen": 598044820, "step": 6552 }, { "epoch": 27.304166666666667, "grad_norm": 1.13192773336822, "learning_rate": 5e-05, "loss": 0.0259, "num_input_tokens_seen": 598136288, "step": 6553 }, { "epoch": 27.304166666666667, "loss": 0.015871770679950714, "loss_ce": 6.445116923714522e-06, "loss_iou": 0.150390625, "loss_num": 0.003173828125, "loss_xval": 0.015869140625, "num_input_tokens_seen": 598136288, "step": 6553 }, { "epoch": 27.308333333333334, "grad_norm": 1.3050478403503047, "learning_rate": 5e-05, "loss": 0.0465, "num_input_tokens_seen": 598228480, "step": 6554 }, { "epoch": 27.308333333333334, "loss": 0.05149269849061966, "loss_ce": 0.000391012872569263, "loss_iou": 0.181640625, "loss_num": 0.01019287109375, "loss_xval": 0.051025390625, "num_input_tokens_seen": 598228480, "step": 6554 }, { "epoch": 27.3125, "grad_norm": 2.7849902168565297, "learning_rate": 5e-05, "loss": 0.0182, "num_input_tokens_seen": 598319732, "step": 6555 }, { "epoch": 27.3125, "loss": 0.018088556826114655, "loss_ce": 6.891947577969404e-06, "loss_iou": 0.322265625, "loss_num": 0.0036163330078125, "loss_xval": 0.01806640625, "num_input_tokens_seen": 598319732, "step": 6555 }, { "epoch": 27.316666666666666, "grad_norm": 3.9446926543306478, "learning_rate": 5e-05, "loss": 0.0253, "num_input_tokens_seen": 598411192, "step": 6556 }, { "epoch": 27.316666666666666, "loss": 0.03370252996683121, "loss_ce": 7.215633377199993e-05, "loss_iou": 0.1171875, "loss_num": 0.006744384765625, "loss_xval": 0.03369140625, "num_input_tokens_seen": 598411192, "step": 6556 }, { "epoch": 27.320833333333333, "grad_norm": 1.657969730059567, "learning_rate": 5e-05, "loss": 0.0195, "num_input_tokens_seen": 598502060, "step": 6557 }, { "epoch": 27.320833333333333, "loss": 0.01847069337964058, "loss_ce": 7.558067409263458e-06, "loss_iou": 0.166015625, "loss_num": 0.003692626953125, "loss_xval": 0.0184326171875, "num_input_tokens_seen": 598502060, "step": 6557 }, { "epoch": 27.325, "grad_norm": 2.5518112566432496, "learning_rate": 5e-05, "loss": 0.0509, "num_input_tokens_seen": 598593176, "step": 6558 }, { "epoch": 27.325, "loss": 0.02984347566962242, "loss_ce": 4.3059942981926724e-05, "loss_iou": 0.1630859375, "loss_num": 0.005950927734375, "loss_xval": 0.02978515625, "num_input_tokens_seen": 598593176, "step": 6558 }, { "epoch": 27.329166666666666, "grad_norm": 2.505236392254768, "learning_rate": 5e-05, "loss": 0.0324, "num_input_tokens_seen": 598684100, "step": 6559 }, { "epoch": 27.329166666666666, "loss": 0.03522047773003578, "loss_ce": 3.191518317180453e-06, "loss_iou": 0.255859375, "loss_num": 0.007049560546875, "loss_xval": 0.03515625, "num_input_tokens_seen": 598684100, "step": 6559 }, { "epoch": 27.333333333333332, "grad_norm": 3.9208746222693662, "learning_rate": 5e-05, "loss": 0.0471, "num_input_tokens_seen": 598775708, "step": 6560 }, { "epoch": 27.333333333333332, "loss": 0.06393549591302872, "loss_ce": 0.00035211897920817137, "loss_iou": 0.1416015625, "loss_num": 0.0126953125, "loss_xval": 0.0634765625, "num_input_tokens_seen": 598775708, "step": 6560 }, { "epoch": 27.3375, "grad_norm": 5.306174373965517, "learning_rate": 5e-05, "loss": 0.032, "num_input_tokens_seen": 598867024, "step": 6561 }, { "epoch": 27.3375, "loss": 0.038117777556180954, "loss_ce": 0.00013102231605444103, "loss_iou": 0.302734375, "loss_num": 0.007598876953125, "loss_xval": 0.0380859375, "num_input_tokens_seen": 598867024, "step": 6561 }, { "epoch": 27.341666666666665, "grad_norm": 3.203872946930661, "learning_rate": 5e-05, "loss": 0.0345, "num_input_tokens_seen": 598958032, "step": 6562 }, { "epoch": 27.341666666666665, "loss": 0.035031914710998535, "loss_ce": 4.351133975433186e-05, "loss_iou": 0.294921875, "loss_num": 0.006988525390625, "loss_xval": 0.034912109375, "num_input_tokens_seen": 598958032, "step": 6562 }, { "epoch": 27.345833333333335, "grad_norm": 2.9163353781723034, "learning_rate": 5e-05, "loss": 0.0433, "num_input_tokens_seen": 599047688, "step": 6563 }, { "epoch": 27.345833333333335, "loss": 0.03683867305517197, "loss_ce": 3.95470760850003e-06, "loss_iou": 0.283203125, "loss_num": 0.007354736328125, "loss_xval": 0.036865234375, "num_input_tokens_seen": 599047688, "step": 6563 }, { "epoch": 27.35, "grad_norm": 2.516729600554519, "learning_rate": 5e-05, "loss": 0.0547, "num_input_tokens_seen": 599139488, "step": 6564 }, { "epoch": 27.35, "loss": 0.0386323407292366, "loss_ce": 1.234715455211699e-05, "loss_iou": 0.2138671875, "loss_num": 0.007720947265625, "loss_xval": 0.03857421875, "num_input_tokens_seen": 599139488, "step": 6564 }, { "epoch": 27.354166666666668, "grad_norm": 2.2636362822526723, "learning_rate": 5e-05, "loss": 0.0277, "num_input_tokens_seen": 599231412, "step": 6565 }, { "epoch": 27.354166666666668, "loss": 0.023040983825922012, "loss_ce": 4.598782834364101e-05, "loss_iou": 0.181640625, "loss_num": 0.004608154296875, "loss_xval": 0.02294921875, "num_input_tokens_seen": 599231412, "step": 6565 }, { "epoch": 27.358333333333334, "grad_norm": 1.6733517038068515, "learning_rate": 5e-05, "loss": 0.0353, "num_input_tokens_seen": 599322796, "step": 6566 }, { "epoch": 27.358333333333334, "loss": 0.021526511758565903, "loss_ce": 1.1619857104960829e-05, "loss_iou": 0.1103515625, "loss_num": 0.004302978515625, "loss_xval": 0.021484375, "num_input_tokens_seen": 599322796, "step": 6566 }, { "epoch": 27.3625, "grad_norm": 6.377268414193188, "learning_rate": 5e-05, "loss": 0.0251, "num_input_tokens_seen": 599414560, "step": 6567 }, { "epoch": 27.3625, "loss": 0.030199095606803894, "loss_ce": 9.581130143487826e-06, "loss_iou": 0.30859375, "loss_num": 0.00604248046875, "loss_xval": 0.0301513671875, "num_input_tokens_seen": 599414560, "step": 6567 }, { "epoch": 27.366666666666667, "grad_norm": 2.1677706360799585, "learning_rate": 5e-05, "loss": 0.0364, "num_input_tokens_seen": 599506228, "step": 6568 }, { "epoch": 27.366666666666667, "loss": 0.03312592953443527, "loss_ce": 1.4354819541040342e-05, "loss_iou": 0.115234375, "loss_num": 0.006622314453125, "loss_xval": 0.033203125, "num_input_tokens_seen": 599506228, "step": 6568 }, { "epoch": 27.370833333333334, "grad_norm": 1.1997122500015587, "learning_rate": 5e-05, "loss": 0.0479, "num_input_tokens_seen": 599597184, "step": 6569 }, { "epoch": 27.370833333333334, "loss": 0.03354697674512863, "loss_ce": 8.157267075148411e-06, "loss_iou": 0.2197265625, "loss_num": 0.0067138671875, "loss_xval": 0.033447265625, "num_input_tokens_seen": 599597184, "step": 6569 }, { "epoch": 27.375, "grad_norm": 3.7223113507625176, "learning_rate": 5e-05, "loss": 0.0256, "num_input_tokens_seen": 599688540, "step": 6570 }, { "epoch": 27.375, "loss": 0.02370530739426613, "loss_ce": 2.3666618290008046e-05, "loss_iou": 0.236328125, "loss_num": 0.004730224609375, "loss_xval": 0.023681640625, "num_input_tokens_seen": 599688540, "step": 6570 }, { "epoch": 27.379166666666666, "grad_norm": 5.7049482022435525, "learning_rate": 5e-05, "loss": 0.0584, "num_input_tokens_seen": 599779348, "step": 6571 }, { "epoch": 27.379166666666666, "loss": 0.051517054438591, "loss_ce": 3.3838264243968297e-06, "loss_iou": 0.2236328125, "loss_num": 0.01031494140625, "loss_xval": 0.051513671875, "num_input_tokens_seen": 599779348, "step": 6571 }, { "epoch": 27.383333333333333, "grad_norm": 4.808404128333326, "learning_rate": 5e-05, "loss": 0.0571, "num_input_tokens_seen": 599871480, "step": 6572 }, { "epoch": 27.383333333333333, "loss": 0.03034200705587864, "loss_ce": 7.535805707448162e-06, "loss_iou": 0.28125, "loss_num": 0.006072998046875, "loss_xval": 0.0302734375, "num_input_tokens_seen": 599871480, "step": 6572 }, { "epoch": 27.3875, "grad_norm": 2.96591911729717, "learning_rate": 5e-05, "loss": 0.0433, "num_input_tokens_seen": 599962540, "step": 6573 }, { "epoch": 27.3875, "loss": 0.06300961971282959, "loss_ce": 0.00025021936744451523, "loss_iou": 0.259765625, "loss_num": 0.0125732421875, "loss_xval": 0.06298828125, "num_input_tokens_seen": 599962540, "step": 6573 }, { "epoch": 27.391666666666666, "grad_norm": 2.7157211325861375, "learning_rate": 5e-05, "loss": 0.0401, "num_input_tokens_seen": 600054692, "step": 6574 }, { "epoch": 27.391666666666666, "loss": 0.037177495658397675, "loss_ce": 7.08368588675512e-06, "loss_iou": 0.2734375, "loss_num": 0.0074462890625, "loss_xval": 0.037109375, "num_input_tokens_seen": 600054692, "step": 6574 }, { "epoch": 27.395833333333332, "grad_norm": 2.550427003119342, "learning_rate": 5e-05, "loss": 0.0235, "num_input_tokens_seen": 600146196, "step": 6575 }, { "epoch": 27.395833333333332, "loss": 0.02404908463358879, "loss_ce": 1.6491594578837976e-05, "loss_iou": 0.12890625, "loss_num": 0.004791259765625, "loss_xval": 0.0240478515625, "num_input_tokens_seen": 600146196, "step": 6575 }, { "epoch": 27.4, "grad_norm": 2.644869533589869, "learning_rate": 5e-05, "loss": 0.0718, "num_input_tokens_seen": 600235640, "step": 6576 }, { "epoch": 27.4, "loss": 0.11950768530368805, "loss_ce": 0.0006264570401981473, "loss_iou": 0.19921875, "loss_num": 0.0238037109375, "loss_xval": 0.11865234375, "num_input_tokens_seen": 600235640, "step": 6576 }, { "epoch": 27.404166666666665, "grad_norm": 9.303528233594129, "learning_rate": 5e-05, "loss": 0.0467, "num_input_tokens_seen": 600326960, "step": 6577 }, { "epoch": 27.404166666666665, "loss": 0.043067824095487595, "loss_ce": 3.8039237551856786e-05, "loss_iou": 0.2333984375, "loss_num": 0.00860595703125, "loss_xval": 0.04296875, "num_input_tokens_seen": 600326960, "step": 6577 }, { "epoch": 27.408333333333335, "grad_norm": 2.732674325736436, "learning_rate": 5e-05, "loss": 0.0622, "num_input_tokens_seen": 600418444, "step": 6578 }, { "epoch": 27.408333333333335, "loss": 0.07342066615819931, "loss_ce": 0.0001555857015773654, "loss_iou": 0.2578125, "loss_num": 0.0146484375, "loss_xval": 0.0732421875, "num_input_tokens_seen": 600418444, "step": 6578 }, { "epoch": 27.4125, "grad_norm": 1.7043220860281332, "learning_rate": 5e-05, "loss": 0.0267, "num_input_tokens_seen": 600510068, "step": 6579 }, { "epoch": 27.4125, "loss": 0.036038704216480255, "loss_ce": 5.077363312011585e-06, "loss_iou": 0.248046875, "loss_num": 0.0072021484375, "loss_xval": 0.0361328125, "num_input_tokens_seen": 600510068, "step": 6579 }, { "epoch": 27.416666666666668, "grad_norm": 1.2667137607979664, "learning_rate": 5e-05, "loss": 0.032, "num_input_tokens_seen": 600601860, "step": 6580 }, { "epoch": 27.416666666666668, "loss": 0.01787398010492325, "loss_ce": 0.00020430199219845235, "loss_iou": 0.1171875, "loss_num": 0.0035247802734375, "loss_xval": 0.0177001953125, "num_input_tokens_seen": 600601860, "step": 6580 }, { "epoch": 27.420833333333334, "grad_norm": 1.8383693180637422, "learning_rate": 5e-05, "loss": 0.0188, "num_input_tokens_seen": 600692928, "step": 6581 }, { "epoch": 27.420833333333334, "loss": 0.021061724051833153, "loss_ce": 0.0002411069581285119, "loss_iou": 0.19140625, "loss_num": 0.004180908203125, "loss_xval": 0.0208740234375, "num_input_tokens_seen": 600692928, "step": 6581 }, { "epoch": 27.425, "grad_norm": 2.1614473169920543, "learning_rate": 5e-05, "loss": 0.0392, "num_input_tokens_seen": 600783732, "step": 6582 }, { "epoch": 27.425, "loss": 0.046985093504190445, "loss_ce": 3.2839527648320654e-06, "loss_iou": 0.275390625, "loss_num": 0.0093994140625, "loss_xval": 0.046875, "num_input_tokens_seen": 600783732, "step": 6582 }, { "epoch": 27.429166666666667, "grad_norm": 2.2908035124483934, "learning_rate": 5e-05, "loss": 0.0296, "num_input_tokens_seen": 600875184, "step": 6583 }, { "epoch": 27.429166666666667, "loss": 0.022312387824058533, "loss_ce": 4.037513917864999e-06, "loss_iou": 0.158203125, "loss_num": 0.00445556640625, "loss_xval": 0.0223388671875, "num_input_tokens_seen": 600875184, "step": 6583 }, { "epoch": 27.433333333333334, "grad_norm": 2.351502737531473, "learning_rate": 5e-05, "loss": 0.0322, "num_input_tokens_seen": 600967108, "step": 6584 }, { "epoch": 27.433333333333334, "loss": 0.032669175416231155, "loss_ce": 7.739663487882353e-06, "loss_iou": 0.345703125, "loss_num": 0.00653076171875, "loss_xval": 0.03271484375, "num_input_tokens_seen": 600967108, "step": 6584 }, { "epoch": 27.4375, "grad_norm": 2.510176621938398, "learning_rate": 5e-05, "loss": 0.036, "num_input_tokens_seen": 601057556, "step": 6585 }, { "epoch": 27.4375, "loss": 0.024011608213186264, "loss_ce": 9.533564480079804e-06, "loss_iou": 0.279296875, "loss_num": 0.004791259765625, "loss_xval": 0.0240478515625, "num_input_tokens_seen": 601057556, "step": 6585 }, { "epoch": 27.441666666666666, "grad_norm": 4.978150988584718, "learning_rate": 5e-05, "loss": 0.0364, "num_input_tokens_seen": 601149160, "step": 6586 }, { "epoch": 27.441666666666666, "loss": 0.029743358492851257, "loss_ce": 1.9236387743148953e-05, "loss_iou": 0.283203125, "loss_num": 0.005950927734375, "loss_xval": 0.02978515625, "num_input_tokens_seen": 601149160, "step": 6586 }, { "epoch": 27.445833333333333, "grad_norm": 2.660998513384476, "learning_rate": 5e-05, "loss": 0.0692, "num_input_tokens_seen": 601240432, "step": 6587 }, { "epoch": 27.445833333333333, "loss": 0.08215197920799255, "loss_ce": 1.3918957847636193e-05, "loss_iou": 0.177734375, "loss_num": 0.016357421875, "loss_xval": 0.08203125, "num_input_tokens_seen": 601240432, "step": 6587 }, { "epoch": 27.45, "grad_norm": 2.9024923055038885, "learning_rate": 5e-05, "loss": 0.0314, "num_input_tokens_seen": 601331376, "step": 6588 }, { "epoch": 27.45, "loss": 0.030946815386414528, "loss_ce": 1.990160626519355e-06, "loss_iou": 0.22265625, "loss_num": 0.006195068359375, "loss_xval": 0.031005859375, "num_input_tokens_seen": 601331376, "step": 6588 }, { "epoch": 27.454166666666666, "grad_norm": 3.02695444764913, "learning_rate": 5e-05, "loss": 0.0533, "num_input_tokens_seen": 601421736, "step": 6589 }, { "epoch": 27.454166666666666, "loss": 0.038657695055007935, "loss_ce": 3.770285184145905e-05, "loss_iou": 0.26171875, "loss_num": 0.007720947265625, "loss_xval": 0.03857421875, "num_input_tokens_seen": 601421736, "step": 6589 }, { "epoch": 27.458333333333332, "grad_norm": 2.7917795632046367, "learning_rate": 5e-05, "loss": 0.0439, "num_input_tokens_seen": 601513420, "step": 6590 }, { "epoch": 27.458333333333332, "loss": 0.02959408238530159, "loss_ce": 3.78065014956519e-05, "loss_iou": 0.150390625, "loss_num": 0.00592041015625, "loss_xval": 0.029541015625, "num_input_tokens_seen": 601513420, "step": 6590 }, { "epoch": 27.4625, "grad_norm": 1.975953164903365, "learning_rate": 5e-05, "loss": 0.0701, "num_input_tokens_seen": 601605732, "step": 6591 }, { "epoch": 27.4625, "loss": 0.055715471506118774, "loss_ce": 2.0891760868835263e-05, "loss_iou": 0.265625, "loss_num": 0.0111083984375, "loss_xval": 0.0556640625, "num_input_tokens_seen": 601605732, "step": 6591 }, { "epoch": 27.466666666666665, "grad_norm": 2.1041934779287628, "learning_rate": 5e-05, "loss": 0.0307, "num_input_tokens_seen": 601696996, "step": 6592 }, { "epoch": 27.466666666666665, "loss": 0.020221196115016937, "loss_ce": 3.2998759706970304e-06, "loss_iou": 0.2158203125, "loss_num": 0.004058837890625, "loss_xval": 0.020263671875, "num_input_tokens_seen": 601696996, "step": 6592 }, { "epoch": 27.470833333333335, "grad_norm": 3.003669877138788, "learning_rate": 5e-05, "loss": 0.0461, "num_input_tokens_seen": 601788580, "step": 6593 }, { "epoch": 27.470833333333335, "loss": 0.05574270710349083, "loss_ce": 9.980880349758081e-06, "loss_iou": 0.2314453125, "loss_num": 0.01116943359375, "loss_xval": 0.0556640625, "num_input_tokens_seen": 601788580, "step": 6593 }, { "epoch": 27.475, "grad_norm": 3.1522051116112224, "learning_rate": 5e-05, "loss": 0.0438, "num_input_tokens_seen": 601880024, "step": 6594 }, { "epoch": 27.475, "loss": 0.03714306652545929, "loss_ce": 4.1322076867800206e-05, "loss_iou": 0.26953125, "loss_num": 0.007415771484375, "loss_xval": 0.037109375, "num_input_tokens_seen": 601880024, "step": 6594 }, { "epoch": 27.479166666666668, "grad_norm": 2.147560213566929, "learning_rate": 5e-05, "loss": 0.0221, "num_input_tokens_seen": 601971004, "step": 6595 }, { "epoch": 27.479166666666668, "loss": 0.020969413220882416, "loss_ce": 3.836278210656019e-06, "loss_iou": 0.2041015625, "loss_num": 0.004180908203125, "loss_xval": 0.02099609375, "num_input_tokens_seen": 601971004, "step": 6595 }, { "epoch": 27.483333333333334, "grad_norm": 4.119361896806683, "learning_rate": 5e-05, "loss": 0.0629, "num_input_tokens_seen": 602062960, "step": 6596 }, { "epoch": 27.483333333333334, "loss": 0.09118642657995224, "loss_ce": 1.5162068848439958e-05, "loss_iou": 0.275390625, "loss_num": 0.0181884765625, "loss_xval": 0.09130859375, "num_input_tokens_seen": 602062960, "step": 6596 }, { "epoch": 27.4875, "grad_norm": 2.342176167246577, "learning_rate": 5e-05, "loss": 0.0377, "num_input_tokens_seen": 602154216, "step": 6597 }, { "epoch": 27.4875, "loss": 0.03796786069869995, "loss_ce": 3.992761776316911e-06, "loss_iou": 0.1533203125, "loss_num": 0.007598876953125, "loss_xval": 0.0380859375, "num_input_tokens_seen": 602154216, "step": 6597 }, { "epoch": 27.491666666666667, "grad_norm": 0.905613639951875, "learning_rate": 5e-05, "loss": 0.0408, "num_input_tokens_seen": 602245320, "step": 6598 }, { "epoch": 27.491666666666667, "loss": 0.057194821536540985, "loss_ce": 0.00029479575459845364, "loss_iou": 0.1962890625, "loss_num": 0.01141357421875, "loss_xval": 0.056884765625, "num_input_tokens_seen": 602245320, "step": 6598 }, { "epoch": 27.495833333333334, "grad_norm": 1.3463095939446643, "learning_rate": 5e-05, "loss": 0.0285, "num_input_tokens_seen": 602336928, "step": 6599 }, { "epoch": 27.495833333333334, "loss": 0.018810540437698364, "loss_ce": 4.083451585756848e-06, "loss_iou": 0.2001953125, "loss_num": 0.0037689208984375, "loss_xval": 0.018798828125, "num_input_tokens_seen": 602336928, "step": 6599 }, { "epoch": 27.5, "grad_norm": 1.2228630031767433, "learning_rate": 5e-05, "loss": 0.035, "num_input_tokens_seen": 602428236, "step": 6600 }, { "epoch": 27.5, "loss": 0.047089651226997375, "loss_ce": 1.6286081518046558e-05, "loss_iou": 0.1875, "loss_num": 0.0093994140625, "loss_xval": 0.047119140625, "num_input_tokens_seen": 602428236, "step": 6600 }, { "epoch": 27.504166666666666, "grad_norm": 1.7518779199013301, "learning_rate": 5e-05, "loss": 0.0209, "num_input_tokens_seen": 602519408, "step": 6601 }, { "epoch": 27.504166666666666, "loss": 0.023205768316984177, "loss_ce": 1.2409835107973777e-05, "loss_iou": 0.220703125, "loss_num": 0.004638671875, "loss_xval": 0.023193359375, "num_input_tokens_seen": 602519408, "step": 6601 }, { "epoch": 27.508333333333333, "grad_norm": 3.188805646269444, "learning_rate": 5e-05, "loss": 0.0388, "num_input_tokens_seen": 602611080, "step": 6602 }, { "epoch": 27.508333333333333, "loss": 0.027485787868499756, "loss_ce": 4.707836069428595e-06, "loss_iou": 0.2578125, "loss_num": 0.0054931640625, "loss_xval": 0.0274658203125, "num_input_tokens_seen": 602611080, "step": 6602 }, { "epoch": 27.5125, "grad_norm": 3.558059379057071, "learning_rate": 5e-05, "loss": 0.0427, "num_input_tokens_seen": 602702520, "step": 6603 }, { "epoch": 27.5125, "loss": 0.03928153216838837, "loss_ce": 0.00020377383043523878, "loss_iou": 0.353515625, "loss_num": 0.0078125, "loss_xval": 0.0390625, "num_input_tokens_seen": 602702520, "step": 6603 }, { "epoch": 27.516666666666666, "grad_norm": 3.153982145205354, "learning_rate": 5e-05, "loss": 0.0462, "num_input_tokens_seen": 602793520, "step": 6604 }, { "epoch": 27.516666666666666, "loss": 0.03709595650434494, "loss_ce": 1.8420216747472296e-06, "loss_iou": 0.294921875, "loss_num": 0.007415771484375, "loss_xval": 0.037109375, "num_input_tokens_seen": 602793520, "step": 6604 }, { "epoch": 27.520833333333332, "grad_norm": 2.53906577654717, "learning_rate": 5e-05, "loss": 0.0608, "num_input_tokens_seen": 602884316, "step": 6605 }, { "epoch": 27.520833333333332, "loss": 0.08753709495067596, "loss_ce": 5.0501193982199766e-06, "loss_iou": 0.177734375, "loss_num": 0.0174560546875, "loss_xval": 0.08740234375, "num_input_tokens_seen": 602884316, "step": 6605 }, { "epoch": 27.525, "grad_norm": 2.4297300661582253, "learning_rate": 5e-05, "loss": 0.0298, "num_input_tokens_seen": 602976368, "step": 6606 }, { "epoch": 27.525, "loss": 0.03763948008418083, "loss_ce": 0.007869582623243332, "loss_iou": 0.3046875, "loss_num": 0.005950927734375, "loss_xval": 0.02978515625, "num_input_tokens_seen": 602976368, "step": 6606 }, { "epoch": 27.529166666666665, "grad_norm": 2.7158826587930633, "learning_rate": 5e-05, "loss": 0.0473, "num_input_tokens_seen": 603068292, "step": 6607 }, { "epoch": 27.529166666666665, "loss": 0.06054652854800224, "loss_ce": 4.543113027466461e-05, "loss_iou": 0.23828125, "loss_num": 0.0120849609375, "loss_xval": 0.060546875, "num_input_tokens_seen": 603068292, "step": 6607 }, { "epoch": 27.533333333333335, "grad_norm": 2.1029110616488156, "learning_rate": 5e-05, "loss": 0.0389, "num_input_tokens_seen": 603159684, "step": 6608 }, { "epoch": 27.533333333333335, "loss": 0.030283518135547638, "loss_ce": 0.00010163510160055012, "loss_iou": 0.197265625, "loss_num": 0.00604248046875, "loss_xval": 0.0301513671875, "num_input_tokens_seen": 603159684, "step": 6608 }, { "epoch": 27.5375, "grad_norm": 3.078600833549705, "learning_rate": 5e-05, "loss": 0.0444, "num_input_tokens_seen": 603250252, "step": 6609 }, { "epoch": 27.5375, "loss": 0.046682506799697876, "loss_ce": 5.869356755283661e-06, "loss_iou": 0.158203125, "loss_num": 0.00933837890625, "loss_xval": 0.046630859375, "num_input_tokens_seen": 603250252, "step": 6609 }, { "epoch": 27.541666666666668, "grad_norm": 2.6082151175759902, "learning_rate": 5e-05, "loss": 0.0325, "num_input_tokens_seen": 603341720, "step": 6610 }, { "epoch": 27.541666666666668, "loss": 0.034782443195581436, "loss_ce": 5.343872544472106e-05, "loss_iou": 0.30859375, "loss_num": 0.0069580078125, "loss_xval": 0.03466796875, "num_input_tokens_seen": 603341720, "step": 6610 }, { "epoch": 27.545833333333334, "grad_norm": 2.358940988759403, "learning_rate": 5e-05, "loss": 0.0373, "num_input_tokens_seen": 603433232, "step": 6611 }, { "epoch": 27.545833333333334, "loss": 0.05367887765169144, "loss_ce": 6.086263056204189e-06, "loss_iou": 0.234375, "loss_num": 0.0107421875, "loss_xval": 0.0537109375, "num_input_tokens_seen": 603433232, "step": 6611 }, { "epoch": 27.55, "grad_norm": 1.7271143762622954, "learning_rate": 5e-05, "loss": 0.0464, "num_input_tokens_seen": 603524472, "step": 6612 }, { "epoch": 27.55, "loss": 0.02543582022190094, "loss_ce": 3.2346802072424907e-06, "loss_iou": 0.1865234375, "loss_num": 0.005096435546875, "loss_xval": 0.025390625, "num_input_tokens_seen": 603524472, "step": 6612 }, { "epoch": 27.554166666666667, "grad_norm": 2.6463370892329623, "learning_rate": 5e-05, "loss": 0.0406, "num_input_tokens_seen": 603615952, "step": 6613 }, { "epoch": 27.554166666666667, "loss": 0.051030345261096954, "loss_ce": 3.546969310264103e-05, "loss_iou": 0.31640625, "loss_num": 0.01019287109375, "loss_xval": 0.051025390625, "num_input_tokens_seen": 603615952, "step": 6613 }, { "epoch": 27.558333333333334, "grad_norm": 2.7903412369422806, "learning_rate": 5e-05, "loss": 0.0414, "num_input_tokens_seen": 603706352, "step": 6614 }, { "epoch": 27.558333333333334, "loss": 0.03343695402145386, "loss_ce": 1.2576303561218083e-05, "loss_iou": 0.2578125, "loss_num": 0.006683349609375, "loss_xval": 0.033447265625, "num_input_tokens_seen": 603706352, "step": 6614 }, { "epoch": 27.5625, "grad_norm": 2.762314092578423, "learning_rate": 5e-05, "loss": 0.0266, "num_input_tokens_seen": 603797440, "step": 6615 }, { "epoch": 27.5625, "loss": 0.017136216163635254, "loss_ce": 4.410403107613092e-06, "loss_iou": 0.208984375, "loss_num": 0.0034332275390625, "loss_xval": 0.01708984375, "num_input_tokens_seen": 603797440, "step": 6615 }, { "epoch": 27.566666666666666, "grad_norm": 2.852916686819886, "learning_rate": 5e-05, "loss": 0.0512, "num_input_tokens_seen": 603889088, "step": 6616 }, { "epoch": 27.566666666666666, "loss": 0.061923280358314514, "loss_ce": 3.1138415579334833e-06, "loss_iou": 0.26171875, "loss_num": 0.01239013671875, "loss_xval": 0.06201171875, "num_input_tokens_seen": 603889088, "step": 6616 }, { "epoch": 27.570833333333333, "grad_norm": 2.4174418189875975, "learning_rate": 5e-05, "loss": 0.0357, "num_input_tokens_seen": 603981204, "step": 6617 }, { "epoch": 27.570833333333333, "loss": 0.044821545481681824, "loss_ce": 0.00014381064102053642, "loss_iou": 0.265625, "loss_num": 0.0089111328125, "loss_xval": 0.044677734375, "num_input_tokens_seen": 603981204, "step": 6617 }, { "epoch": 27.575, "grad_norm": 2.7924830501984013, "learning_rate": 5e-05, "loss": 0.0326, "num_input_tokens_seen": 604072500, "step": 6618 }, { "epoch": 27.575, "loss": 0.028552627190947533, "loss_ce": 3.4320119084441103e-06, "loss_iou": 0.298828125, "loss_num": 0.005706787109375, "loss_xval": 0.028564453125, "num_input_tokens_seen": 604072500, "step": 6618 }, { "epoch": 27.579166666666666, "grad_norm": 2.0640590359943154, "learning_rate": 5e-05, "loss": 0.0351, "num_input_tokens_seen": 604164248, "step": 6619 }, { "epoch": 27.579166666666666, "loss": 0.0477108359336853, "loss_ce": 4.231982529745437e-06, "loss_iou": 0.3515625, "loss_num": 0.009521484375, "loss_xval": 0.047607421875, "num_input_tokens_seen": 604164248, "step": 6619 }, { "epoch": 27.583333333333332, "grad_norm": 2.7243145323678717, "learning_rate": 5e-05, "loss": 0.0332, "num_input_tokens_seen": 604255476, "step": 6620 }, { "epoch": 27.583333333333332, "loss": 0.026827622205018997, "loss_ce": 2.6708999030233826e-06, "loss_iou": 0.26171875, "loss_num": 0.00537109375, "loss_xval": 0.02685546875, "num_input_tokens_seen": 604255476, "step": 6620 }, { "epoch": 27.5875, "grad_norm": 2.9996038692424625, "learning_rate": 5e-05, "loss": 0.0594, "num_input_tokens_seen": 604345568, "step": 6621 }, { "epoch": 27.5875, "loss": 0.05639968067407608, "loss_ce": 3.193629254383268e-06, "loss_iou": 0.322265625, "loss_num": 0.01129150390625, "loss_xval": 0.056396484375, "num_input_tokens_seen": 604345568, "step": 6621 }, { "epoch": 27.591666666666665, "grad_norm": 2.791497014815601, "learning_rate": 5e-05, "loss": 0.0361, "num_input_tokens_seen": 604436948, "step": 6622 }, { "epoch": 27.591666666666665, "loss": 0.04797312244772911, "loss_ce": 6.0525271692313254e-05, "loss_iou": 0.1748046875, "loss_num": 0.00958251953125, "loss_xval": 0.0478515625, "num_input_tokens_seen": 604436948, "step": 6622 }, { "epoch": 27.595833333333335, "grad_norm": 3.23239804379117, "learning_rate": 5e-05, "loss": 0.0332, "num_input_tokens_seen": 604528500, "step": 6623 }, { "epoch": 27.595833333333335, "loss": 0.03417633846402168, "loss_ce": 4.281735073163873e-06, "loss_iou": 0.283203125, "loss_num": 0.0068359375, "loss_xval": 0.0341796875, "num_input_tokens_seen": 604528500, "step": 6623 }, { "epoch": 27.6, "grad_norm": 2.5592091033285413, "learning_rate": 5e-05, "loss": 0.0305, "num_input_tokens_seen": 604620180, "step": 6624 }, { "epoch": 27.6, "loss": 0.03764050453901291, "loss_ce": 4.700970748672262e-06, "loss_iou": 0.2734375, "loss_num": 0.007537841796875, "loss_xval": 0.03759765625, "num_input_tokens_seen": 604620180, "step": 6624 }, { "epoch": 27.604166666666668, "grad_norm": 3.5211528586839744, "learning_rate": 5e-05, "loss": 0.0352, "num_input_tokens_seen": 604711684, "step": 6625 }, { "epoch": 27.604166666666668, "loss": 0.021237660199403763, "loss_ce": 9.660785872256383e-05, "loss_iou": 0.2001953125, "loss_num": 0.00421142578125, "loss_xval": 0.0211181640625, "num_input_tokens_seen": 604711684, "step": 6625 }, { "epoch": 27.608333333333334, "grad_norm": 2.95923657655911, "learning_rate": 5e-05, "loss": 0.0398, "num_input_tokens_seen": 604802744, "step": 6626 }, { "epoch": 27.608333333333334, "loss": 0.04550544172525406, "loss_ce": 3.7324662116589025e-06, "loss_iou": 0.28125, "loss_num": 0.00909423828125, "loss_xval": 0.04541015625, "num_input_tokens_seen": 604802744, "step": 6626 }, { "epoch": 27.6125, "grad_norm": 2.2750047447413237, "learning_rate": 5e-05, "loss": 0.0259, "num_input_tokens_seen": 604893984, "step": 6627 }, { "epoch": 27.6125, "loss": 0.027261460199952126, "loss_ce": 1.63518939189089e-06, "loss_iou": 0.27734375, "loss_num": 0.005462646484375, "loss_xval": 0.0272216796875, "num_input_tokens_seen": 604893984, "step": 6627 }, { "epoch": 27.616666666666667, "grad_norm": 2.2490624202174647, "learning_rate": 5e-05, "loss": 0.0311, "num_input_tokens_seen": 604985744, "step": 6628 }, { "epoch": 27.616666666666667, "loss": 0.03529675304889679, "loss_ce": 3.1767167456564493e-06, "loss_iou": 0.263671875, "loss_num": 0.007049560546875, "loss_xval": 0.035400390625, "num_input_tokens_seen": 604985744, "step": 6628 }, { "epoch": 27.620833333333334, "grad_norm": 1.7491363871611125, "learning_rate": 5e-05, "loss": 0.0509, "num_input_tokens_seen": 605077648, "step": 6629 }, { "epoch": 27.620833333333334, "loss": 0.054396286606788635, "loss_ce": 2.1591577024082653e-05, "loss_iou": 0.23046875, "loss_num": 0.0108642578125, "loss_xval": 0.054443359375, "num_input_tokens_seen": 605077648, "step": 6629 }, { "epoch": 27.625, "grad_norm": 1.8101714745526631, "learning_rate": 5e-05, "loss": 0.0308, "num_input_tokens_seen": 605168448, "step": 6630 }, { "epoch": 27.625, "loss": 0.03479132801294327, "loss_ce": 1.2861009963671677e-06, "loss_iou": 0.197265625, "loss_num": 0.0069580078125, "loss_xval": 0.03466796875, "num_input_tokens_seen": 605168448, "step": 6630 }, { "epoch": 27.629166666666666, "grad_norm": 2.3916940840025216, "learning_rate": 5e-05, "loss": 0.0522, "num_input_tokens_seen": 605259724, "step": 6631 }, { "epoch": 27.629166666666666, "loss": 0.08446063101291656, "loss_ce": 3.2409195682703285e-06, "loss_iou": 0.171875, "loss_num": 0.016845703125, "loss_xval": 0.08447265625, "num_input_tokens_seen": 605259724, "step": 6631 }, { "epoch": 27.633333333333333, "grad_norm": 3.048251825415468, "learning_rate": 5e-05, "loss": 0.0256, "num_input_tokens_seen": 605350616, "step": 6632 }, { "epoch": 27.633333333333333, "loss": 0.02368415705859661, "loss_ce": 2.51623964686587e-06, "loss_iou": 0.259765625, "loss_num": 0.004730224609375, "loss_xval": 0.023681640625, "num_input_tokens_seen": 605350616, "step": 6632 }, { "epoch": 27.6375, "grad_norm": 3.634069175070679, "learning_rate": 5e-05, "loss": 0.0406, "num_input_tokens_seen": 605441932, "step": 6633 }, { "epoch": 27.6375, "loss": 0.0382399708032608, "loss_ce": 1.4437129038924468e-06, "loss_iou": 0.298828125, "loss_num": 0.00762939453125, "loss_xval": 0.038330078125, "num_input_tokens_seen": 605441932, "step": 6633 }, { "epoch": 27.641666666666666, "grad_norm": 2.9491222861087563, "learning_rate": 5e-05, "loss": 0.0288, "num_input_tokens_seen": 605533148, "step": 6634 }, { "epoch": 27.641666666666666, "loss": 0.022725991904735565, "loss_ce": 5.656002485920908e-06, "loss_iou": 0.1904296875, "loss_num": 0.004547119140625, "loss_xval": 0.022705078125, "num_input_tokens_seen": 605533148, "step": 6634 }, { "epoch": 27.645833333333332, "grad_norm": 4.376426521985265, "learning_rate": 5e-05, "loss": 0.0393, "num_input_tokens_seen": 605624092, "step": 6635 }, { "epoch": 27.645833333333332, "loss": 0.0188091192394495, "loss_ce": 2.6615209662850248e-06, "loss_iou": 0.177734375, "loss_num": 0.0037689208984375, "loss_xval": 0.018798828125, "num_input_tokens_seen": 605624092, "step": 6635 }, { "epoch": 27.65, "grad_norm": 2.509259588092114, "learning_rate": 5e-05, "loss": 0.0928, "num_input_tokens_seen": 605715632, "step": 6636 }, { "epoch": 27.65, "loss": 0.15086206793785095, "loss_ce": 0.00016627281729597598, "loss_iou": 0.1123046875, "loss_num": 0.0301513671875, "loss_xval": 0.150390625, "num_input_tokens_seen": 605715632, "step": 6636 }, { "epoch": 27.654166666666665, "grad_norm": 1.192599307960487, "learning_rate": 5e-05, "loss": 0.0439, "num_input_tokens_seen": 605807444, "step": 6637 }, { "epoch": 27.654166666666665, "loss": 0.027369150891900063, "loss_ce": 0.00013221264816820621, "loss_iou": 0.271484375, "loss_num": 0.005462646484375, "loss_xval": 0.0272216796875, "num_input_tokens_seen": 605807444, "step": 6637 }, { "epoch": 27.658333333333335, "grad_norm": 0.8708589826806008, "learning_rate": 5e-05, "loss": 0.0312, "num_input_tokens_seen": 605898772, "step": 6638 }, { "epoch": 27.658333333333335, "loss": 0.03177279233932495, "loss_ce": 3.994483449787367e-06, "loss_iou": 0.2099609375, "loss_num": 0.00634765625, "loss_xval": 0.03173828125, "num_input_tokens_seen": 605898772, "step": 6638 }, { "epoch": 27.6625, "grad_norm": 2.7583714638593837, "learning_rate": 5e-05, "loss": 0.0361, "num_input_tokens_seen": 605990100, "step": 6639 }, { "epoch": 27.6625, "loss": 0.028095796704292297, "loss_ce": 4.366681423562113e-06, "loss_iou": 0.29296875, "loss_num": 0.005615234375, "loss_xval": 0.028076171875, "num_input_tokens_seen": 605990100, "step": 6639 }, { "epoch": 27.666666666666668, "grad_norm": 4.235285412737973, "learning_rate": 5e-05, "loss": 0.0326, "num_input_tokens_seen": 606081324, "step": 6640 }, { "epoch": 27.666666666666668, "loss": 0.02892245352268219, "loss_ce": 7.046784048725385e-06, "loss_iou": 0.302734375, "loss_num": 0.00579833984375, "loss_xval": 0.0289306640625, "num_input_tokens_seen": 606081324, "step": 6640 }, { "epoch": 27.670833333333334, "grad_norm": 2.0263615613746264, "learning_rate": 5e-05, "loss": 0.0511, "num_input_tokens_seen": 606173076, "step": 6641 }, { "epoch": 27.670833333333334, "loss": 0.06474164873361588, "loss_ce": 0.00016645470168441534, "loss_iou": 0.2001953125, "loss_num": 0.012939453125, "loss_xval": 0.064453125, "num_input_tokens_seen": 606173076, "step": 6641 }, { "epoch": 27.675, "grad_norm": 2.379405254853854, "learning_rate": 5e-05, "loss": 0.0388, "num_input_tokens_seen": 606264580, "step": 6642 }, { "epoch": 27.675, "loss": 0.024179283529520035, "loss_ce": 9.360705917060841e-06, "loss_iou": 0.265625, "loss_num": 0.004852294921875, "loss_xval": 0.024169921875, "num_input_tokens_seen": 606264580, "step": 6642 }, { "epoch": 27.679166666666667, "grad_norm": 2.5738652281852654, "learning_rate": 5e-05, "loss": 0.0337, "num_input_tokens_seen": 606355852, "step": 6643 }, { "epoch": 27.679166666666667, "loss": 0.020705537870526314, "loss_ce": 6.990535439399537e-06, "loss_iou": 0.20703125, "loss_num": 0.004150390625, "loss_xval": 0.020751953125, "num_input_tokens_seen": 606355852, "step": 6643 }, { "epoch": 27.683333333333334, "grad_norm": 1.7440057256069197, "learning_rate": 5e-05, "loss": 0.0396, "num_input_tokens_seen": 606447168, "step": 6644 }, { "epoch": 27.683333333333334, "loss": 0.05534441024065018, "loss_ce": 0.0016563633689656854, "loss_iou": 0.14453125, "loss_num": 0.0107421875, "loss_xval": 0.0537109375, "num_input_tokens_seen": 606447168, "step": 6644 }, { "epoch": 27.6875, "grad_norm": 2.037974230861017, "learning_rate": 5e-05, "loss": 0.0593, "num_input_tokens_seen": 606538228, "step": 6645 }, { "epoch": 27.6875, "loss": 0.07503647357225418, "loss_ce": 0.0033430559560656548, "loss_iou": 0.224609375, "loss_num": 0.01434326171875, "loss_xval": 0.07177734375, "num_input_tokens_seen": 606538228, "step": 6645 }, { "epoch": 27.691666666666666, "grad_norm": 3.0074458940026974, "learning_rate": 5e-05, "loss": 0.0399, "num_input_tokens_seen": 606629192, "step": 6646 }, { "epoch": 27.691666666666666, "loss": 0.03610651195049286, "loss_ce": 4.216415163682541e-06, "loss_iou": 0.224609375, "loss_num": 0.007232666015625, "loss_xval": 0.0361328125, "num_input_tokens_seen": 606629192, "step": 6646 }, { "epoch": 27.695833333333333, "grad_norm": 2.5371188704551253, "learning_rate": 5e-05, "loss": 0.0386, "num_input_tokens_seen": 606718940, "step": 6647 }, { "epoch": 27.695833333333333, "loss": 0.030656758695840836, "loss_ce": 1.8496901930120657e-06, "loss_iou": 0.240234375, "loss_num": 0.006134033203125, "loss_xval": 0.0306396484375, "num_input_tokens_seen": 606718940, "step": 6647 }, { "epoch": 27.7, "grad_norm": 2.105683239007001, "learning_rate": 5e-05, "loss": 0.0463, "num_input_tokens_seen": 606810252, "step": 6648 }, { "epoch": 27.7, "loss": 0.02906056120991707, "loss_ce": 0.002235610270872712, "loss_iou": 0.333984375, "loss_num": 0.00537109375, "loss_xval": 0.02685546875, "num_input_tokens_seen": 606810252, "step": 6648 }, { "epoch": 27.704166666666666, "grad_norm": 2.023054843563252, "learning_rate": 5e-05, "loss": 0.0299, "num_input_tokens_seen": 606901096, "step": 6649 }, { "epoch": 27.704166666666666, "loss": 0.02129945158958435, "loss_ce": 0.0005322406068444252, "loss_iou": 0.2578125, "loss_num": 0.004150390625, "loss_xval": 0.020751953125, "num_input_tokens_seen": 606901096, "step": 6649 }, { "epoch": 27.708333333333332, "grad_norm": 1.6183348267504132, "learning_rate": 5e-05, "loss": 0.0279, "num_input_tokens_seen": 606992180, "step": 6650 }, { "epoch": 27.708333333333332, "loss": 0.01805499568581581, "loss_ce": 3.84854456569883e-06, "loss_iou": 0.11865234375, "loss_num": 0.0036163330078125, "loss_xval": 0.01806640625, "num_input_tokens_seen": 606992180, "step": 6650 }, { "epoch": 27.7125, "grad_norm": 1.3219883169124516, "learning_rate": 5e-05, "loss": 0.0273, "num_input_tokens_seen": 607083380, "step": 6651 }, { "epoch": 27.7125, "loss": 0.022418688982725143, "loss_ce": 4.9306090659229085e-05, "loss_iou": 0.1630859375, "loss_num": 0.004486083984375, "loss_xval": 0.0223388671875, "num_input_tokens_seen": 607083380, "step": 6651 }, { "epoch": 27.716666666666665, "grad_norm": 0.530552864302545, "learning_rate": 5e-05, "loss": 0.0203, "num_input_tokens_seen": 607174456, "step": 6652 }, { "epoch": 27.716666666666665, "loss": 0.018681395798921585, "loss_ce": 1.608139427844435e-05, "loss_iou": 0.13671875, "loss_num": 0.0037384033203125, "loss_xval": 0.0186767578125, "num_input_tokens_seen": 607174456, "step": 6652 }, { "epoch": 27.720833333333335, "grad_norm": 2.2263865427479312, "learning_rate": 5e-05, "loss": 0.0238, "num_input_tokens_seen": 607265168, "step": 6653 }, { "epoch": 27.720833333333335, "loss": 0.024402815848588943, "loss_ce": 4.011170858575497e-06, "loss_iou": 0.212890625, "loss_num": 0.0048828125, "loss_xval": 0.0244140625, "num_input_tokens_seen": 607265168, "step": 6653 }, { "epoch": 27.725, "grad_norm": 1.7763990809981527, "learning_rate": 5e-05, "loss": 0.0298, "num_input_tokens_seen": 607355936, "step": 6654 }, { "epoch": 27.725, "loss": 0.02579689212143421, "loss_ce": 1.909734010041575e-06, "loss_iou": 0.21875, "loss_num": 0.005157470703125, "loss_xval": 0.0257568359375, "num_input_tokens_seen": 607355936, "step": 6654 }, { "epoch": 27.729166666666668, "grad_norm": 1.6228947218015455, "learning_rate": 5e-05, "loss": 0.0439, "num_input_tokens_seen": 607447924, "step": 6655 }, { "epoch": 27.729166666666668, "loss": 0.04186485707759857, "loss_ce": 1.7629317881073803e-05, "loss_iou": 0.169921875, "loss_num": 0.00836181640625, "loss_xval": 0.041748046875, "num_input_tokens_seen": 607447924, "step": 6655 }, { "epoch": 27.733333333333334, "grad_norm": 1.6086721925953786, "learning_rate": 5e-05, "loss": 0.019, "num_input_tokens_seen": 607539088, "step": 6656 }, { "epoch": 27.733333333333334, "loss": 0.022941526025533676, "loss_ce": 7.5659963840735145e-06, "loss_iou": 0.1826171875, "loss_num": 0.00457763671875, "loss_xval": 0.02294921875, "num_input_tokens_seen": 607539088, "step": 6656 }, { "epoch": 27.7375, "grad_norm": 2.8696915212555485, "learning_rate": 5e-05, "loss": 0.0754, "num_input_tokens_seen": 607630200, "step": 6657 }, { "epoch": 27.7375, "loss": 0.02179615944623947, "loss_ce": 6.6089974097849336e-06, "loss_iou": 0.203125, "loss_num": 0.004364013671875, "loss_xval": 0.021728515625, "num_input_tokens_seen": 607630200, "step": 6657 }, { "epoch": 27.741666666666667, "grad_norm": 4.928794728075151, "learning_rate": 5e-05, "loss": 0.099, "num_input_tokens_seen": 607720344, "step": 6658 }, { "epoch": 27.741666666666667, "loss": 0.07867708057165146, "loss_ce": 1.8020633433479816e-05, "loss_iou": 0.17578125, "loss_num": 0.0157470703125, "loss_xval": 0.07861328125, "num_input_tokens_seen": 607720344, "step": 6658 }, { "epoch": 27.745833333333334, "grad_norm": 0.8958089934352309, "learning_rate": 5e-05, "loss": 0.0567, "num_input_tokens_seen": 607811356, "step": 6659 }, { "epoch": 27.745833333333334, "loss": 0.020234694704413414, "loss_ce": 1.5400872825921397e-06, "loss_iou": 0.09716796875, "loss_num": 0.004058837890625, "loss_xval": 0.020263671875, "num_input_tokens_seen": 607811356, "step": 6659 }, { "epoch": 27.75, "grad_norm": 2.0022405141020387, "learning_rate": 5e-05, "loss": 0.0566, "num_input_tokens_seen": 607901144, "step": 6660 }, { "epoch": 27.75, "loss": 0.09240372478961945, "loss_ce": 4.121278834645636e-06, "loss_iou": 0.16015625, "loss_num": 0.0185546875, "loss_xval": 0.09228515625, "num_input_tokens_seen": 607901144, "step": 6660 }, { "epoch": 27.754166666666666, "grad_norm": 3.718947048580481, "learning_rate": 5e-05, "loss": 0.0389, "num_input_tokens_seen": 607992280, "step": 6661 }, { "epoch": 27.754166666666666, "loss": 0.05292084068059921, "loss_ce": 3.360340770086623e-06, "loss_iou": 0.166015625, "loss_num": 0.01055908203125, "loss_xval": 0.052978515625, "num_input_tokens_seen": 607992280, "step": 6661 }, { "epoch": 27.758333333333333, "grad_norm": 2.8810944333714255, "learning_rate": 5e-05, "loss": 0.0311, "num_input_tokens_seen": 608083284, "step": 6662 }, { "epoch": 27.758333333333333, "loss": 0.02787664532661438, "loss_ce": 2.653983301570406e-06, "loss_iou": 0.2060546875, "loss_num": 0.005584716796875, "loss_xval": 0.02783203125, "num_input_tokens_seen": 608083284, "step": 6662 }, { "epoch": 27.7625, "grad_norm": 3.25957753965448, "learning_rate": 5e-05, "loss": 0.0543, "num_input_tokens_seen": 608174528, "step": 6663 }, { "epoch": 27.7625, "loss": 0.03937568515539169, "loss_ce": 6.904626206960529e-05, "loss_iou": 0.25, "loss_num": 0.00787353515625, "loss_xval": 0.039306640625, "num_input_tokens_seen": 608174528, "step": 6663 }, { "epoch": 27.766666666666666, "grad_norm": 3.233480124426848, "learning_rate": 5e-05, "loss": 0.0689, "num_input_tokens_seen": 608266056, "step": 6664 }, { "epoch": 27.766666666666666, "loss": 0.07732213288545609, "loss_ce": 5.852544745721389e-06, "loss_iou": 0.1796875, "loss_num": 0.01544189453125, "loss_xval": 0.0771484375, "num_input_tokens_seen": 608266056, "step": 6664 }, { "epoch": 27.770833333333332, "grad_norm": 2.6917479858733073, "learning_rate": 5e-05, "loss": 0.0415, "num_input_tokens_seen": 608356956, "step": 6665 }, { "epoch": 27.770833333333332, "loss": 0.02752842754125595, "loss_ce": 1.5733947975604679e-06, "loss_iou": 0.291015625, "loss_num": 0.0054931640625, "loss_xval": 0.027587890625, "num_input_tokens_seen": 608356956, "step": 6665 }, { "epoch": 27.775, "grad_norm": 2.1732125363132093, "learning_rate": 5e-05, "loss": 0.0405, "num_input_tokens_seen": 608447756, "step": 6666 }, { "epoch": 27.775, "loss": 0.024806858971714973, "loss_ce": 3.4214372135465965e-05, "loss_iou": 0.1630859375, "loss_num": 0.00494384765625, "loss_xval": 0.0247802734375, "num_input_tokens_seen": 608447756, "step": 6666 }, { "epoch": 27.779166666666665, "grad_norm": 3.061774590524382, "learning_rate": 5e-05, "loss": 0.0491, "num_input_tokens_seen": 608538840, "step": 6667 }, { "epoch": 27.779166666666665, "loss": 0.07025608420372009, "loss_ce": 6.565061630681157e-05, "loss_iou": 0.228515625, "loss_num": 0.0140380859375, "loss_xval": 0.0703125, "num_input_tokens_seen": 608538840, "step": 6667 }, { "epoch": 27.783333333333335, "grad_norm": 3.6641274833009785, "learning_rate": 5e-05, "loss": 0.0399, "num_input_tokens_seen": 608630136, "step": 6668 }, { "epoch": 27.783333333333335, "loss": 0.041263651102781296, "loss_ce": 3.4404733014525846e-05, "loss_iou": 0.2119140625, "loss_num": 0.00823974609375, "loss_xval": 0.041259765625, "num_input_tokens_seen": 608630136, "step": 6668 }, { "epoch": 27.7875, "grad_norm": 2.2530095216389774, "learning_rate": 5e-05, "loss": 0.0437, "num_input_tokens_seen": 608721748, "step": 6669 }, { "epoch": 27.7875, "loss": 0.06024031713604927, "loss_ce": 2.430454514978919e-06, "loss_iou": 0.21484375, "loss_num": 0.01202392578125, "loss_xval": 0.060302734375, "num_input_tokens_seen": 608721748, "step": 6669 }, { "epoch": 27.791666666666668, "grad_norm": 1.7668261934744909, "learning_rate": 5e-05, "loss": 0.0324, "num_input_tokens_seen": 608813348, "step": 6670 }, { "epoch": 27.791666666666668, "loss": 0.023870760574936867, "loss_ce": 3.653265594039112e-05, "loss_iou": 0.361328125, "loss_num": 0.0047607421875, "loss_xval": 0.0238037109375, "num_input_tokens_seen": 608813348, "step": 6670 }, { "epoch": 27.795833333333334, "grad_norm": 2.27547960921719, "learning_rate": 5e-05, "loss": 0.0362, "num_input_tokens_seen": 608904652, "step": 6671 }, { "epoch": 27.795833333333334, "loss": 0.023014262318611145, "loss_ce": 4.0084287320496514e-06, "loss_iou": 0.265625, "loss_num": 0.004608154296875, "loss_xval": 0.02294921875, "num_input_tokens_seen": 608904652, "step": 6671 }, { "epoch": 27.8, "grad_norm": 2.7513623023413776, "learning_rate": 5e-05, "loss": 0.0349, "num_input_tokens_seen": 608995660, "step": 6672 }, { "epoch": 27.8, "loss": 0.03639537841081619, "loss_ce": 3.166042688462767e-06, "loss_iou": 0.330078125, "loss_num": 0.007293701171875, "loss_xval": 0.036376953125, "num_input_tokens_seen": 608995660, "step": 6672 }, { "epoch": 27.804166666666667, "grad_norm": 2.6703657851807545, "learning_rate": 5e-05, "loss": 0.0446, "num_input_tokens_seen": 609087460, "step": 6673 }, { "epoch": 27.804166666666667, "loss": 0.03163629025220871, "loss_ce": 2.0082923583686352e-05, "loss_iou": 0.26953125, "loss_num": 0.006317138671875, "loss_xval": 0.03173828125, "num_input_tokens_seen": 609087460, "step": 6673 }, { "epoch": 27.808333333333334, "grad_norm": 1.2789380699694435, "learning_rate": 5e-05, "loss": 0.0259, "num_input_tokens_seen": 609178952, "step": 6674 }, { "epoch": 27.808333333333334, "loss": 0.0326501727104187, "loss_ce": 3.995027327619027e-06, "loss_iou": 0.06005859375, "loss_num": 0.00653076171875, "loss_xval": 0.03271484375, "num_input_tokens_seen": 609178952, "step": 6674 }, { "epoch": 27.8125, "grad_norm": 1.3440151423332536, "learning_rate": 5e-05, "loss": 0.0209, "num_input_tokens_seen": 609270636, "step": 6675 }, { "epoch": 27.8125, "loss": 0.020898228511214256, "loss_ce": 1.315923327638302e-06, "loss_iou": 0.298828125, "loss_num": 0.004180908203125, "loss_xval": 0.0208740234375, "num_input_tokens_seen": 609270636, "step": 6675 }, { "epoch": 27.816666666666666, "grad_norm": 1.9783284850733671, "learning_rate": 5e-05, "loss": 0.0179, "num_input_tokens_seen": 609362236, "step": 6676 }, { "epoch": 27.816666666666666, "loss": 0.01814625971019268, "loss_ce": 3.5588950595411006e-06, "loss_iou": 0.146484375, "loss_num": 0.003631591796875, "loss_xval": 0.0181884765625, "num_input_tokens_seen": 609362236, "step": 6676 }, { "epoch": 27.820833333333333, "grad_norm": 3.5619040739492087, "learning_rate": 5e-05, "loss": 0.0574, "num_input_tokens_seen": 609453936, "step": 6677 }, { "epoch": 27.820833333333333, "loss": 0.023126963526010513, "loss_ce": 2.2699837245454546e-06, "loss_iou": 0.224609375, "loss_num": 0.004638671875, "loss_xval": 0.0230712890625, "num_input_tokens_seen": 609453936, "step": 6677 }, { "epoch": 27.825, "grad_norm": 1.7534022906086244, "learning_rate": 5e-05, "loss": 0.0567, "num_input_tokens_seen": 609545840, "step": 6678 }, { "epoch": 27.825, "loss": 0.07971315085887909, "loss_ce": 0.00019960546342190355, "loss_iou": 0.25390625, "loss_num": 0.015869140625, "loss_xval": 0.07958984375, "num_input_tokens_seen": 609545840, "step": 6678 }, { "epoch": 27.829166666666666, "grad_norm": 1.8491931893484272, "learning_rate": 5e-05, "loss": 0.0362, "num_input_tokens_seen": 609637468, "step": 6679 }, { "epoch": 27.829166666666666, "loss": 0.021543636918067932, "loss_ce": 0.0019894989673048258, "loss_iou": 0.173828125, "loss_num": 0.00390625, "loss_xval": 0.01953125, "num_input_tokens_seen": 609637468, "step": 6679 }, { "epoch": 27.833333333333332, "grad_norm": 2.2468572646660143, "learning_rate": 5e-05, "loss": 0.0474, "num_input_tokens_seen": 609728584, "step": 6680 }, { "epoch": 27.833333333333332, "loss": 0.055621709674596786, "loss_ce": 3.423035423111287e-06, "loss_iou": 0.19140625, "loss_num": 0.0111083984375, "loss_xval": 0.0556640625, "num_input_tokens_seen": 609728584, "step": 6680 }, { "epoch": 27.8375, "grad_norm": 2.3759722096065943, "learning_rate": 5e-05, "loss": 0.0428, "num_input_tokens_seen": 609818644, "step": 6681 }, { "epoch": 27.8375, "loss": 0.032642923295497894, "loss_ce": 1.2005425560346339e-05, "loss_iou": 0.18359375, "loss_num": 0.00653076171875, "loss_xval": 0.03271484375, "num_input_tokens_seen": 609818644, "step": 6681 }, { "epoch": 27.841666666666665, "grad_norm": 2.356444501863688, "learning_rate": 5e-05, "loss": 0.0547, "num_input_tokens_seen": 609909732, "step": 6682 }, { "epoch": 27.841666666666665, "loss": 0.05059649795293808, "loss_ce": 5.984739800624084e-06, "loss_iou": 0.1982421875, "loss_num": 0.0101318359375, "loss_xval": 0.050537109375, "num_input_tokens_seen": 609909732, "step": 6682 }, { "epoch": 27.845833333333335, "grad_norm": 2.9347829583069807, "learning_rate": 5e-05, "loss": 0.0386, "num_input_tokens_seen": 610001188, "step": 6683 }, { "epoch": 27.845833333333335, "loss": 0.030507180839776993, "loss_ce": 4.862364676228026e-06, "loss_iou": 0.28515625, "loss_num": 0.006103515625, "loss_xval": 0.030517578125, "num_input_tokens_seen": 610001188, "step": 6683 }, { "epoch": 27.85, "grad_norm": 2.470245589424793, "learning_rate": 5e-05, "loss": 0.0288, "num_input_tokens_seen": 610092612, "step": 6684 }, { "epoch": 27.85, "loss": 0.02897973544895649, "loss_ce": 4.90726888529025e-05, "loss_iou": 0.2099609375, "loss_num": 0.00579833984375, "loss_xval": 0.0289306640625, "num_input_tokens_seen": 610092612, "step": 6684 }, { "epoch": 27.854166666666668, "grad_norm": 2.6669907034193967, "learning_rate": 5e-05, "loss": 0.0683, "num_input_tokens_seen": 610183948, "step": 6685 }, { "epoch": 27.854166666666668, "loss": 0.0505242794752121, "loss_ce": 1.7687789295450784e-05, "loss_iou": 0.2470703125, "loss_num": 0.0101318359375, "loss_xval": 0.050537109375, "num_input_tokens_seen": 610183948, "step": 6685 }, { "epoch": 27.858333333333334, "grad_norm": 4.943193288243672, "learning_rate": 5e-05, "loss": 0.0323, "num_input_tokens_seen": 610274844, "step": 6686 }, { "epoch": 27.858333333333334, "loss": 0.044877685606479645, "loss_ce": 1.5843521623537526e-06, "loss_iou": 0.3671875, "loss_num": 0.00897216796875, "loss_xval": 0.044921875, "num_input_tokens_seen": 610274844, "step": 6686 }, { "epoch": 27.8625, "grad_norm": 3.015357849727821, "learning_rate": 5e-05, "loss": 0.0331, "num_input_tokens_seen": 610366324, "step": 6687 }, { "epoch": 27.8625, "loss": 0.032734472304582596, "loss_ce": 4.369842827145476e-06, "loss_iou": 0.2216796875, "loss_num": 0.00653076171875, "loss_xval": 0.03271484375, "num_input_tokens_seen": 610366324, "step": 6687 }, { "epoch": 27.866666666666667, "grad_norm": 2.194911272264501, "learning_rate": 5e-05, "loss": 0.0327, "num_input_tokens_seen": 610458028, "step": 6688 }, { "epoch": 27.866666666666667, "loss": 0.03970428556203842, "loss_ce": 8.5460051195696e-06, "loss_iou": 0.146484375, "loss_num": 0.0079345703125, "loss_xval": 0.039794921875, "num_input_tokens_seen": 610458028, "step": 6688 }, { "epoch": 27.870833333333334, "grad_norm": 2.0279866514973808, "learning_rate": 5e-05, "loss": 0.0425, "num_input_tokens_seen": 610549548, "step": 6689 }, { "epoch": 27.870833333333334, "loss": 0.03714201599359512, "loss_ce": 9.751396646606736e-06, "loss_iou": 0.2216796875, "loss_num": 0.007415771484375, "loss_xval": 0.037109375, "num_input_tokens_seen": 610549548, "step": 6689 }, { "epoch": 27.875, "grad_norm": 1.929867615469621, "learning_rate": 5e-05, "loss": 0.0414, "num_input_tokens_seen": 610639092, "step": 6690 }, { "epoch": 27.875, "loss": 0.04645119607448578, "loss_ce": 3.4437287013133755e-06, "loss_iou": 0.201171875, "loss_num": 0.00927734375, "loss_xval": 0.04638671875, "num_input_tokens_seen": 610639092, "step": 6690 }, { "epoch": 27.879166666666666, "grad_norm": 2.744225789976649, "learning_rate": 5e-05, "loss": 0.0452, "num_input_tokens_seen": 610730424, "step": 6691 }, { "epoch": 27.879166666666666, "loss": 0.02222595363855362, "loss_ce": 1.5272669315891108e-06, "loss_iou": 0.27734375, "loss_num": 0.00445556640625, "loss_xval": 0.022216796875, "num_input_tokens_seen": 610730424, "step": 6691 }, { "epoch": 27.883333333333333, "grad_norm": 3.1370565803980033, "learning_rate": 5e-05, "loss": 0.0344, "num_input_tokens_seen": 610821192, "step": 6692 }, { "epoch": 27.883333333333333, "loss": 0.021936416625976562, "loss_ce": 1.9063465970248217e-06, "loss_iou": 0.1982421875, "loss_num": 0.00439453125, "loss_xval": 0.02197265625, "num_input_tokens_seen": 610821192, "step": 6692 }, { "epoch": 27.8875, "grad_norm": 2.9068527893783416, "learning_rate": 5e-05, "loss": 0.0244, "num_input_tokens_seen": 610912760, "step": 6693 }, { "epoch": 27.8875, "loss": 0.029053665697574615, "loss_ce": 1.6190129827009514e-05, "loss_iou": 0.296875, "loss_num": 0.00579833984375, "loss_xval": 0.029052734375, "num_input_tokens_seen": 610912760, "step": 6693 }, { "epoch": 27.891666666666666, "grad_norm": 2.527096086377251, "learning_rate": 5e-05, "loss": 0.0322, "num_input_tokens_seen": 611002952, "step": 6694 }, { "epoch": 27.891666666666666, "loss": 0.033598653972148895, "loss_ce": 1.4061615729588084e-05, "loss_iou": 0.283203125, "loss_num": 0.0067138671875, "loss_xval": 0.03369140625, "num_input_tokens_seen": 611002952, "step": 6694 }, { "epoch": 27.895833333333332, "grad_norm": 2.518044426828584, "learning_rate": 5e-05, "loss": 0.0447, "num_input_tokens_seen": 611094256, "step": 6695 }, { "epoch": 27.895833333333332, "loss": 0.04927809536457062, "loss_ce": 7.464379450539127e-06, "loss_iou": 0.30078125, "loss_num": 0.00982666015625, "loss_xval": 0.04931640625, "num_input_tokens_seen": 611094256, "step": 6695 }, { "epoch": 27.9, "grad_norm": 1.5148853802682194, "learning_rate": 5e-05, "loss": 0.0229, "num_input_tokens_seen": 611185464, "step": 6696 }, { "epoch": 27.9, "loss": 0.024848662316799164, "loss_ce": 6.838909030193463e-05, "loss_iou": 0.1318359375, "loss_num": 0.00494384765625, "loss_xval": 0.0247802734375, "num_input_tokens_seen": 611185464, "step": 6696 }, { "epoch": 27.904166666666665, "grad_norm": 1.5103727608247175, "learning_rate": 5e-05, "loss": 0.0356, "num_input_tokens_seen": 611275700, "step": 6697 }, { "epoch": 27.904166666666665, "loss": 0.05257517471909523, "loss_ce": 8.644882655062247e-06, "loss_iou": 0.125, "loss_num": 0.010498046875, "loss_xval": 0.052490234375, "num_input_tokens_seen": 611275700, "step": 6697 }, { "epoch": 27.908333333333335, "grad_norm": 1.5339462371131791, "learning_rate": 5e-05, "loss": 0.0193, "num_input_tokens_seen": 611367152, "step": 6698 }, { "epoch": 27.908333333333335, "loss": 0.021390093490481377, "loss_ce": 4.900951353192795e-06, "loss_iou": 0.1650390625, "loss_num": 0.0042724609375, "loss_xval": 0.0213623046875, "num_input_tokens_seen": 611367152, "step": 6698 }, { "epoch": 27.9125, "grad_norm": 1.4095863126869015, "learning_rate": 5e-05, "loss": 0.0314, "num_input_tokens_seen": 611457952, "step": 6699 }, { "epoch": 27.9125, "loss": 0.03978870064020157, "loss_ce": 1.409547621733509e-06, "loss_iou": 0.212890625, "loss_num": 0.00799560546875, "loss_xval": 0.039794921875, "num_input_tokens_seen": 611457952, "step": 6699 }, { "epoch": 27.916666666666668, "grad_norm": 1.5880024976095921, "learning_rate": 5e-05, "loss": 0.0209, "num_input_tokens_seen": 611548968, "step": 6700 }, { "epoch": 27.916666666666668, "loss": 0.023582756519317627, "loss_ce": 4.225938391755335e-05, "loss_iou": 0.1533203125, "loss_num": 0.00469970703125, "loss_xval": 0.0235595703125, "num_input_tokens_seen": 611548968, "step": 6700 }, { "epoch": 27.920833333333334, "grad_norm": 1.9742190307650425, "learning_rate": 5e-05, "loss": 0.0365, "num_input_tokens_seen": 611639728, "step": 6701 }, { "epoch": 27.920833333333334, "loss": 0.025729922577738762, "loss_ce": 3.604785206334782e-06, "loss_iou": 0.232421875, "loss_num": 0.005157470703125, "loss_xval": 0.0257568359375, "num_input_tokens_seen": 611639728, "step": 6701 }, { "epoch": 27.925, "grad_norm": 1.992467386622936, "learning_rate": 5e-05, "loss": 0.0323, "num_input_tokens_seen": 611731080, "step": 6702 }, { "epoch": 27.925, "loss": 0.03682165965437889, "loss_ce": 2.2019166863174178e-06, "loss_iou": 0.296875, "loss_num": 0.007354736328125, "loss_xval": 0.036865234375, "num_input_tokens_seen": 611731080, "step": 6702 }, { "epoch": 27.929166666666667, "grad_norm": 2.074614272677992, "learning_rate": 5e-05, "loss": 0.0427, "num_input_tokens_seen": 611822700, "step": 6703 }, { "epoch": 27.929166666666667, "loss": 0.028840631246566772, "loss_ce": 1.677820364420768e-05, "loss_iou": 0.2265625, "loss_num": 0.005767822265625, "loss_xval": 0.02880859375, "num_input_tokens_seen": 611822700, "step": 6703 }, { "epoch": 27.933333333333334, "grad_norm": 2.3413248371276985, "learning_rate": 5e-05, "loss": 0.091, "num_input_tokens_seen": 611914400, "step": 6704 }, { "epoch": 27.933333333333334, "loss": 0.1385079026222229, "loss_ce": 3.8804105315648485e-06, "loss_iou": 0.2265625, "loss_num": 0.0277099609375, "loss_xval": 0.138671875, "num_input_tokens_seen": 611914400, "step": 6704 }, { "epoch": 27.9375, "grad_norm": 2.836668070462786, "learning_rate": 5e-05, "loss": 0.0393, "num_input_tokens_seen": 612005760, "step": 6705 }, { "epoch": 27.9375, "loss": 0.03196059912443161, "loss_ce": 2.3952296032803133e-05, "loss_iou": 0.2265625, "loss_num": 0.006378173828125, "loss_xval": 0.031982421875, "num_input_tokens_seen": 612005760, "step": 6705 }, { "epoch": 27.941666666666666, "grad_norm": 3.2116535409493494, "learning_rate": 5e-05, "loss": 0.0311, "num_input_tokens_seen": 612096928, "step": 6706 }, { "epoch": 27.941666666666666, "loss": 0.028221435844898224, "loss_ce": 7.934193490655161e-06, "loss_iou": 0.27734375, "loss_num": 0.005645751953125, "loss_xval": 0.0281982421875, "num_input_tokens_seen": 612096928, "step": 6706 }, { "epoch": 27.945833333333333, "grad_norm": 3.1307035433075927, "learning_rate": 5e-05, "loss": 0.043, "num_input_tokens_seen": 612188292, "step": 6707 }, { "epoch": 27.945833333333333, "loss": 0.028533106669783592, "loss_ce": 0.0002280521730426699, "loss_iou": 0.265625, "loss_num": 0.00567626953125, "loss_xval": 0.0283203125, "num_input_tokens_seen": 612188292, "step": 6707 }, { "epoch": 27.95, "grad_norm": 1.2504373411970096, "learning_rate": 5e-05, "loss": 0.024, "num_input_tokens_seen": 612280228, "step": 6708 }, { "epoch": 27.95, "loss": 0.0176064595580101, "loss_ce": 5.445829629024956e-06, "loss_iou": 0.244140625, "loss_num": 0.0035247802734375, "loss_xval": 0.017578125, "num_input_tokens_seen": 612280228, "step": 6708 }, { "epoch": 27.954166666666666, "grad_norm": 1.2803907274616162, "learning_rate": 5e-05, "loss": 0.0483, "num_input_tokens_seen": 612370392, "step": 6709 }, { "epoch": 27.954166666666666, "loss": 0.024072404950857162, "loss_ce": 9.293554285250138e-06, "loss_iou": 0.181640625, "loss_num": 0.00482177734375, "loss_xval": 0.0240478515625, "num_input_tokens_seen": 612370392, "step": 6709 }, { "epoch": 27.958333333333332, "grad_norm": 1.9039607328747052, "learning_rate": 5e-05, "loss": 0.022, "num_input_tokens_seen": 612461204, "step": 6710 }, { "epoch": 27.958333333333332, "loss": 0.017778582870960236, "loss_ce": 2.0933200630679494e-06, "loss_iou": 0.2119140625, "loss_num": 0.0035552978515625, "loss_xval": 0.017822265625, "num_input_tokens_seen": 612461204, "step": 6710 }, { "epoch": 27.9625, "grad_norm": 1.077575573157038, "learning_rate": 5e-05, "loss": 0.0626, "num_input_tokens_seen": 612552660, "step": 6711 }, { "epoch": 27.9625, "loss": 0.09176675230264664, "loss_ce": 8.022767360671423e-06, "loss_iou": 0.2412109375, "loss_num": 0.018310546875, "loss_xval": 0.091796875, "num_input_tokens_seen": 612552660, "step": 6711 }, { "epoch": 27.966666666666665, "grad_norm": 1.4654702672188098, "learning_rate": 5e-05, "loss": 0.0219, "num_input_tokens_seen": 612644160, "step": 6712 }, { "epoch": 27.966666666666665, "loss": 0.019673090428113937, "loss_ce": 4.2658390157157555e-05, "loss_iou": 0.1953125, "loss_num": 0.00390625, "loss_xval": 0.0196533203125, "num_input_tokens_seen": 612644160, "step": 6712 }, { "epoch": 27.970833333333335, "grad_norm": 2.7529606130467177, "learning_rate": 5e-05, "loss": 0.0537, "num_input_tokens_seen": 612735848, "step": 6713 }, { "epoch": 27.970833333333335, "loss": 0.033948902040719986, "loss_ce": 0.0007762951427139342, "loss_iou": 0.251953125, "loss_num": 0.00665283203125, "loss_xval": 0.033203125, "num_input_tokens_seen": 612735848, "step": 6713 }, { "epoch": 27.975, "grad_norm": 3.718214513967778, "learning_rate": 5e-05, "loss": 0.0594, "num_input_tokens_seen": 612827740, "step": 6714 }, { "epoch": 27.975, "loss": 0.04464350640773773, "loss_ce": 7.25825666449964e-05, "loss_iou": 0.1953125, "loss_num": 0.0089111328125, "loss_xval": 0.044677734375, "num_input_tokens_seen": 612827740, "step": 6714 }, { "epoch": 27.979166666666668, "grad_norm": 1.9675331451311393, "learning_rate": 5e-05, "loss": 0.0316, "num_input_tokens_seen": 612918596, "step": 6715 }, { "epoch": 27.979166666666668, "loss": 0.031432561576366425, "loss_ce": 7.085216111590853e-06, "loss_iou": 0.1494140625, "loss_num": 0.00628662109375, "loss_xval": 0.031494140625, "num_input_tokens_seen": 612918596, "step": 6715 }, { "epoch": 27.983333333333334, "grad_norm": 2.744930369139453, "learning_rate": 5e-05, "loss": 0.062, "num_input_tokens_seen": 613009696, "step": 6716 }, { "epoch": 27.983333333333334, "loss": 0.045453984290361404, "loss_ce": 2.8569376809173264e-05, "loss_iou": 0.1796875, "loss_num": 0.00909423828125, "loss_xval": 0.04541015625, "num_input_tokens_seen": 613009696, "step": 6716 }, { "epoch": 27.9875, "grad_norm": 2.170973130092673, "learning_rate": 5e-05, "loss": 0.0562, "num_input_tokens_seen": 613101436, "step": 6717 }, { "epoch": 27.9875, "loss": 0.01815599389374256, "loss_ce": 5.664732270815875e-06, "loss_iou": 0.2041015625, "loss_num": 0.003631591796875, "loss_xval": 0.0181884765625, "num_input_tokens_seen": 613101436, "step": 6717 }, { "epoch": 27.991666666666667, "grad_norm": 2.912160469589973, "learning_rate": 5e-05, "loss": 0.036, "num_input_tokens_seen": 613193484, "step": 6718 }, { "epoch": 27.991666666666667, "loss": 0.03415211662650108, "loss_ce": 2.9476209419954102e-06, "loss_iou": 0.2333984375, "loss_num": 0.0068359375, "loss_xval": 0.0341796875, "num_input_tokens_seen": 613193484, "step": 6718 }, { "epoch": 27.995833333333334, "grad_norm": 2.2128424458657494, "learning_rate": 5e-05, "loss": 0.0443, "num_input_tokens_seen": 613284420, "step": 6719 }, { "epoch": 27.995833333333334, "loss": 0.06363622099161148, "loss_ce": 7.0679270720575005e-06, "loss_iou": 0.24609375, "loss_num": 0.0126953125, "loss_xval": 0.0634765625, "num_input_tokens_seen": 613284420, "step": 6719 }, { "epoch": 28.0, "grad_norm": 2.350621454205893, "learning_rate": 5e-05, "loss": 0.0312, "num_input_tokens_seen": 613375388, "step": 6720 }, { "epoch": 28.0, "loss": 0.042087651789188385, "loss_ce": 3.914565695595229e-06, "loss_iou": 0.2333984375, "loss_num": 0.0084228515625, "loss_xval": 0.0419921875, "num_input_tokens_seen": 613375388, "step": 6720 }, { "epoch": 28.004166666666666, "grad_norm": 3.2327163007317914, "learning_rate": 5e-05, "loss": 0.0263, "num_input_tokens_seen": 613466476, "step": 6721 }, { "epoch": 28.004166666666666, "loss": 0.02487345226109028, "loss_ce": 1.6264102669083513e-06, "loss_iou": 0.296875, "loss_num": 0.004974365234375, "loss_xval": 0.02490234375, "num_input_tokens_seen": 613466476, "step": 6721 }, { "epoch": 28.008333333333333, "grad_norm": 3.589281169371409, "learning_rate": 5e-05, "loss": 0.1078, "num_input_tokens_seen": 613557576, "step": 6722 }, { "epoch": 28.008333333333333, "loss": 0.1355500966310501, "loss_ce": 8.257750596385449e-05, "loss_iou": 0.345703125, "loss_num": 0.027099609375, "loss_xval": 0.1357421875, "num_input_tokens_seen": 613557576, "step": 6722 }, { "epoch": 28.0125, "grad_norm": 4.843901347091087, "learning_rate": 5e-05, "loss": 0.0292, "num_input_tokens_seen": 613649712, "step": 6723 }, { "epoch": 28.0125, "loss": 0.025940248742699623, "loss_ce": 0.0003283706319052726, "loss_iou": 0.28515625, "loss_num": 0.005126953125, "loss_xval": 0.025634765625, "num_input_tokens_seen": 613649712, "step": 6723 }, { "epoch": 28.016666666666666, "grad_norm": 2.309924836065568, "learning_rate": 5e-05, "loss": 0.0318, "num_input_tokens_seen": 613740504, "step": 6724 }, { "epoch": 28.016666666666666, "loss": 0.03217145800590515, "loss_ce": 5.931042323936708e-06, "loss_iou": 0.173828125, "loss_num": 0.006439208984375, "loss_xval": 0.0322265625, "num_input_tokens_seen": 613740504, "step": 6724 }, { "epoch": 28.020833333333332, "grad_norm": 2.744417311709559, "learning_rate": 5e-05, "loss": 0.0222, "num_input_tokens_seen": 613831876, "step": 6725 }, { "epoch": 28.020833333333332, "loss": 0.023073479533195496, "loss_ce": 2.190429313486675e-06, "loss_iou": 0.328125, "loss_num": 0.004608154296875, "loss_xval": 0.0230712890625, "num_input_tokens_seen": 613831876, "step": 6725 }, { "epoch": 28.025, "grad_norm": 3.9389467811443013, "learning_rate": 5e-05, "loss": 0.0411, "num_input_tokens_seen": 613923236, "step": 6726 }, { "epoch": 28.025, "loss": 0.02820133976638317, "loss_ce": 3.0974363198765786e-06, "loss_iou": 0.234375, "loss_num": 0.005645751953125, "loss_xval": 0.0281982421875, "num_input_tokens_seen": 613923236, "step": 6726 }, { "epoch": 28.029166666666665, "grad_norm": 2.3543451636267423, "learning_rate": 5e-05, "loss": 0.0436, "num_input_tokens_seen": 614014544, "step": 6727 }, { "epoch": 28.029166666666665, "loss": 0.025972997769713402, "loss_ce": 2.539197112128022e-06, "loss_iou": 0.1796875, "loss_num": 0.00518798828125, "loss_xval": 0.0260009765625, "num_input_tokens_seen": 614014544, "step": 6727 }, { "epoch": 28.033333333333335, "grad_norm": 1.8596789984578768, "learning_rate": 5e-05, "loss": 0.0761, "num_input_tokens_seen": 614105536, "step": 6728 }, { "epoch": 28.033333333333335, "loss": 0.039376746863126755, "loss_ce": 0.00011588324559852481, "loss_iou": 0.259765625, "loss_num": 0.00787353515625, "loss_xval": 0.039306640625, "num_input_tokens_seen": 614105536, "step": 6728 }, { "epoch": 28.0375, "grad_norm": 1.2802451215459856, "learning_rate": 5e-05, "loss": 0.0342, "num_input_tokens_seen": 614196852, "step": 6729 }, { "epoch": 28.0375, "loss": 0.04407196491956711, "loss_ce": 4.585856004268862e-06, "loss_iou": 0.30859375, "loss_num": 0.0087890625, "loss_xval": 0.0439453125, "num_input_tokens_seen": 614196852, "step": 6729 }, { "epoch": 28.041666666666668, "grad_norm": 1.8547256072084315, "learning_rate": 5e-05, "loss": 0.0388, "num_input_tokens_seen": 614288368, "step": 6730 }, { "epoch": 28.041666666666668, "loss": 0.0452733188867569, "loss_ce": 8.120239726849832e-06, "loss_iou": 0.177734375, "loss_num": 0.009033203125, "loss_xval": 0.045166015625, "num_input_tokens_seen": 614288368, "step": 6730 }, { "epoch": 28.045833333333334, "grad_norm": 2.529875475786179, "learning_rate": 5e-05, "loss": 0.0411, "num_input_tokens_seen": 614379672, "step": 6731 }, { "epoch": 28.045833333333334, "loss": 0.04790426790714264, "loss_ce": 1.0746186489996035e-05, "loss_iou": 0.205078125, "loss_num": 0.00958251953125, "loss_xval": 0.0478515625, "num_input_tokens_seen": 614379672, "step": 6731 }, { "epoch": 28.05, "grad_norm": 2.8016778294167586, "learning_rate": 5e-05, "loss": 0.0541, "num_input_tokens_seen": 614471132, "step": 6732 }, { "epoch": 28.05, "loss": 0.05601131543517113, "loss_ce": 2.68183030129876e-05, "loss_iou": 0.1494140625, "loss_num": 0.01123046875, "loss_xval": 0.055908203125, "num_input_tokens_seen": 614471132, "step": 6732 }, { "epoch": 28.054166666666667, "grad_norm": 5.041261792599298, "learning_rate": 5e-05, "loss": 0.0273, "num_input_tokens_seen": 614562476, "step": 6733 }, { "epoch": 28.054166666666667, "loss": 0.01708902418613434, "loss_ce": 1.4439017832046375e-05, "loss_iou": 0.2333984375, "loss_num": 0.00341796875, "loss_xval": 0.01708984375, "num_input_tokens_seen": 614562476, "step": 6733 }, { "epoch": 28.058333333333334, "grad_norm": 21.63092022483019, "learning_rate": 5e-05, "loss": 0.032, "num_input_tokens_seen": 614653496, "step": 6734 }, { "epoch": 28.058333333333334, "loss": 0.04017886146903038, "loss_ce": 2.4707196644158103e-06, "loss_iou": 0.2734375, "loss_num": 0.008056640625, "loss_xval": 0.040283203125, "num_input_tokens_seen": 614653496, "step": 6734 }, { "epoch": 28.0625, "grad_norm": 6.262098948531676, "learning_rate": 5e-05, "loss": 0.033, "num_input_tokens_seen": 614744988, "step": 6735 }, { "epoch": 28.0625, "loss": 0.028947506099939346, "loss_ce": 1.5855760011618258e-06, "loss_iou": 0.2197265625, "loss_num": 0.00579833984375, "loss_xval": 0.0289306640625, "num_input_tokens_seen": 614744988, "step": 6735 }, { "epoch": 28.066666666666666, "grad_norm": 1.9436654058305354, "learning_rate": 5e-05, "loss": 0.046, "num_input_tokens_seen": 614835832, "step": 6736 }, { "epoch": 28.066666666666666, "loss": 0.04625730589032173, "loss_ce": 0.00016050375415943563, "loss_iou": 0.296875, "loss_num": 0.00921630859375, "loss_xval": 0.046142578125, "num_input_tokens_seen": 614835832, "step": 6736 }, { "epoch": 28.070833333333333, "grad_norm": 2.257151469855315, "learning_rate": 5e-05, "loss": 0.0621, "num_input_tokens_seen": 614927224, "step": 6737 }, { "epoch": 28.070833333333333, "loss": 0.059449754655361176, "loss_ce": 1.6775371477706358e-05, "loss_iou": 0.275390625, "loss_num": 0.01190185546875, "loss_xval": 0.059326171875, "num_input_tokens_seen": 614927224, "step": 6737 }, { "epoch": 28.075, "grad_norm": 2.2855989114564763, "learning_rate": 5e-05, "loss": 0.0298, "num_input_tokens_seen": 615018312, "step": 6738 }, { "epoch": 28.075, "loss": 0.02760922722518444, "loss_ce": 5.185429836274125e-05, "loss_iou": 0.30859375, "loss_num": 0.005523681640625, "loss_xval": 0.027587890625, "num_input_tokens_seen": 615018312, "step": 6738 }, { "epoch": 28.079166666666666, "grad_norm": 2.297345574198754, "learning_rate": 5e-05, "loss": 0.0502, "num_input_tokens_seen": 615109608, "step": 6739 }, { "epoch": 28.079166666666666, "loss": 0.05746372416615486, "loss_ce": 6.779150862712413e-05, "loss_iou": 0.134765625, "loss_num": 0.011474609375, "loss_xval": 0.057373046875, "num_input_tokens_seen": 615109608, "step": 6739 }, { "epoch": 28.083333333333332, "grad_norm": 1.5291320958908552, "learning_rate": 5e-05, "loss": 0.0529, "num_input_tokens_seen": 615200904, "step": 6740 }, { "epoch": 28.083333333333332, "loss": 0.05671272426843643, "loss_ce": 0.00014839068171568215, "loss_iou": 0.0301513671875, "loss_num": 0.0113525390625, "loss_xval": 0.056640625, "num_input_tokens_seen": 615200904, "step": 6740 }, { "epoch": 28.0875, "grad_norm": 1.089316633257019, "learning_rate": 5e-05, "loss": 0.0608, "num_input_tokens_seen": 615292096, "step": 6741 }, { "epoch": 28.0875, "loss": 0.09118057787418365, "loss_ce": 2.457026857882738e-05, "loss_iou": 0.2041015625, "loss_num": 0.0181884765625, "loss_xval": 0.09130859375, "num_input_tokens_seen": 615292096, "step": 6741 }, { "epoch": 28.091666666666665, "grad_norm": 1.018290463227347, "learning_rate": 5e-05, "loss": 0.0467, "num_input_tokens_seen": 615382728, "step": 6742 }, { "epoch": 28.091666666666665, "loss": 0.05897248536348343, "loss_ce": 2.7785092242993414e-05, "loss_iou": 0.20703125, "loss_num": 0.01177978515625, "loss_xval": 0.058837890625, "num_input_tokens_seen": 615382728, "step": 6742 }, { "epoch": 28.095833333333335, "grad_norm": 1.4832378691014567, "learning_rate": 5e-05, "loss": 0.0337, "num_input_tokens_seen": 615473816, "step": 6743 }, { "epoch": 28.095833333333335, "loss": 0.031228337436914444, "loss_ce": 1.2240070645930246e-06, "loss_iou": 0.251953125, "loss_num": 0.006256103515625, "loss_xval": 0.03125, "num_input_tokens_seen": 615473816, "step": 6743 }, { "epoch": 28.1, "grad_norm": 1.7110579965688832, "learning_rate": 5e-05, "loss": 0.0334, "num_input_tokens_seen": 615565288, "step": 6744 }, { "epoch": 28.1, "loss": 0.03355923295021057, "loss_ce": 5.154472546564648e-06, "loss_iou": 0.2392578125, "loss_num": 0.0067138671875, "loss_xval": 0.033447265625, "num_input_tokens_seen": 615565288, "step": 6744 }, { "epoch": 28.104166666666668, "grad_norm": 2.22850107697282, "learning_rate": 5e-05, "loss": 0.0422, "num_input_tokens_seen": 615656148, "step": 6745 }, { "epoch": 28.104166666666668, "loss": 0.06713393330574036, "loss_ce": 2.895528268709313e-06, "loss_iou": 0.2041015625, "loss_num": 0.013427734375, "loss_xval": 0.06689453125, "num_input_tokens_seen": 615656148, "step": 6745 }, { "epoch": 28.108333333333334, "grad_norm": 2.8948865798773413, "learning_rate": 5e-05, "loss": 0.0421, "num_input_tokens_seen": 615747460, "step": 6746 }, { "epoch": 28.108333333333334, "loss": 0.034502334892749786, "loss_ce": 2.2117551452538464e-06, "loss_iou": 0.25390625, "loss_num": 0.00689697265625, "loss_xval": 0.034423828125, "num_input_tokens_seen": 615747460, "step": 6746 }, { "epoch": 28.1125, "grad_norm": 2.809796632839258, "learning_rate": 5e-05, "loss": 0.0585, "num_input_tokens_seen": 615838432, "step": 6747 }, { "epoch": 28.1125, "loss": 0.05419039726257324, "loss_ce": 2.1699428543797694e-05, "loss_iou": 0.17578125, "loss_num": 0.0108642578125, "loss_xval": 0.05419921875, "num_input_tokens_seen": 615838432, "step": 6747 }, { "epoch": 28.116666666666667, "grad_norm": 3.1390099084089327, "learning_rate": 5e-05, "loss": 0.0335, "num_input_tokens_seen": 615929520, "step": 6748 }, { "epoch": 28.116666666666667, "loss": 0.030735237523913383, "loss_ce": 1.9295372112537734e-05, "loss_iou": 0.29296875, "loss_num": 0.006134033203125, "loss_xval": 0.03076171875, "num_input_tokens_seen": 615929520, "step": 6748 }, { "epoch": 28.120833333333334, "grad_norm": 3.0726683309385914, "learning_rate": 5e-05, "loss": 0.0406, "num_input_tokens_seen": 616020724, "step": 6749 }, { "epoch": 28.120833333333334, "loss": 0.03250548616051674, "loss_ce": 0.0012020806316286325, "loss_iou": 0.228515625, "loss_num": 0.006256103515625, "loss_xval": 0.03125, "num_input_tokens_seen": 616020724, "step": 6749 }, { "epoch": 28.125, "grad_norm": 1.7974385929387977, "learning_rate": 5e-05, "loss": 0.0571, "num_input_tokens_seen": 616111644, "step": 6750 }, { "epoch": 28.125, "eval_seeclick_CIoU": 0.22142113745212555, "eval_seeclick_GIoU": 0.20158283412456512, "eval_seeclick_IoU": 0.31360870599746704, "eval_seeclick_MAE_all": 0.09904173389077187, "eval_seeclick_MAE_h": 0.11917447298765182, "eval_seeclick_MAE_w": 0.16062359511852264, "eval_seeclick_MAE_x_boxes": 0.17907852679491043, "eval_seeclick_MAE_y_boxes": 0.11885930970311165, "eval_seeclick_NUM_probability": 0.9999977946281433, "eval_seeclick_inside_bbox": 0.4943181872367859, "eval_seeclick_loss": 0.5863343477249146, "eval_seeclick_loss_ce": 0.11647450551390648, "eval_seeclick_loss_iou": 0.46881103515625, "eval_seeclick_loss_num": 0.092193603515625, "eval_seeclick_loss_xval": 0.4608154296875, "eval_seeclick_runtime": 77.5535, "eval_seeclick_samples_per_second": 0.554, "eval_seeclick_steps_per_second": 0.026, "num_input_tokens_seen": 616111644, "step": 6750 }, { "epoch": 28.125, "eval_icons_CIoU": 0.24185068905353546, "eval_icons_GIoU": 0.22074273973703384, "eval_icons_IoU": 0.3384847342967987, "eval_icons_MAE_all": 0.08675602078437805, "eval_icons_MAE_h": 0.17416387051343918, "eval_icons_MAE_w": 0.13255734741687775, "eval_icons_MAE_x_boxes": 0.13261838257312775, "eval_icons_MAE_y_boxes": 0.17597932368516922, "eval_icons_NUM_probability": 0.9999973177909851, "eval_icons_inside_bbox": 0.5190972238779068, "eval_icons_loss": 0.4279177784919739, "eval_icons_loss_ce": 0.000579208746785298, "eval_icons_loss_iou": 0.2396240234375, "eval_icons_loss_num": 0.0855712890625, "eval_icons_loss_xval": 0.42791748046875, "eval_icons_runtime": 86.732, "eval_icons_samples_per_second": 0.576, "eval_icons_steps_per_second": 0.023, "num_input_tokens_seen": 616111644, "step": 6750 }, { "epoch": 28.125, "eval_screenspot_CIoU": 0.39986973504225415, "eval_screenspot_GIoU": 0.38962361713250476, "eval_screenspot_IoU": 0.4628542462984721, "eval_screenspot_MAE_all": 0.08986213803291321, "eval_screenspot_MAE_h": 0.09811956932147343, "eval_screenspot_MAE_w": 0.17064129809538522, "eval_screenspot_MAE_x_boxes": 0.15360971788565317, "eval_screenspot_MAE_y_boxes": 0.09253939737876256, "eval_screenspot_NUM_probability": 0.999987006187439, "eval_screenspot_inside_bbox": 0.7145833373069763, "eval_screenspot_loss": 0.45824649930000305, "eval_screenspot_loss_ce": 4.505477348478356e-05, "eval_screenspot_loss_iou": 0.3572591145833333, "eval_screenspot_loss_num": 0.09360249837239583, "eval_screenspot_loss_xval": 0.4679768880208333, "eval_screenspot_runtime": 152.3895, "eval_screenspot_samples_per_second": 0.584, "eval_screenspot_steps_per_second": 0.02, "num_input_tokens_seen": 616111644, "step": 6750 }, { "epoch": 28.125, "eval_compot_CIoU": 0.5180985331535339, "eval_compot_GIoU": 0.5265152156352997, "eval_compot_IoU": 0.5750917792320251, "eval_compot_MAE_all": 0.0474148690700531, "eval_compot_MAE_h": 0.05340435355901718, "eval_compot_MAE_w": 0.12024356797337532, "eval_compot_MAE_x_boxes": 0.12448347359895706, "eval_compot_MAE_y_boxes": 0.05318191833794117, "eval_compot_NUM_probability": 0.9999876022338867, "eval_compot_inside_bbox": 0.7447916567325592, "eval_compot_loss": 0.2997451722621918, "eval_compot_loss_ce": 0.0403006412088871, "eval_compot_loss_iou": 0.2772216796875, "eval_compot_loss_num": 0.0525665283203125, "eval_compot_loss_xval": 0.262939453125, "eval_compot_runtime": 87.0473, "eval_compot_samples_per_second": 0.574, "eval_compot_steps_per_second": 0.023, "num_input_tokens_seen": 616111644, "step": 6750 } ], "logging_steps": 1.0, "max_steps": 10000, "num_input_tokens_seen": 616111644, "num_train_epochs": 42, "save_steps": 250, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3818994542903296.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }