diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,123997 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 28.125, + "eval_steps": 250, + "global_step": 6750, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.004166666666666667, + "grad_norm": 15.225836429636562, + "learning_rate": 5e-05, + "loss": 0.6348, + "num_input_tokens_seen": 91672, + "step": 1 + }, + { + "epoch": 0.004166666666666667, + "loss": 0.6350572109222412, + "loss_ce": 0.3329332172870636, + "loss_iou": 0.44140625, + "loss_num": 0.060546875, + "loss_xval": 0.302734375, + "num_input_tokens_seen": 91672, + "step": 1 + }, + { + "epoch": 0.008333333333333333, + "grad_norm": 43.774989330975025, + "learning_rate": 5e-05, + "loss": 0.685, + "num_input_tokens_seen": 182816, + "step": 2 + }, + { + "epoch": 0.008333333333333333, + "loss": 0.5322504043579102, + "loss_ce": 0.18776792287826538, + "loss_iou": 0.451171875, + "loss_num": 0.06884765625, + "loss_xval": 0.34375, + "num_input_tokens_seen": 182816, + "step": 2 + }, + { + "epoch": 0.0125, + "grad_norm": 33.173891217282254, + "learning_rate": 5e-05, + "loss": 0.8189, + "num_input_tokens_seen": 274016, + "step": 3 + }, + { + "epoch": 0.0125, + "loss": 0.7139671444892883, + "loss_ce": 0.21665266156196594, + "loss_iou": 0.1796875, + "loss_num": 0.099609375, + "loss_xval": 0.498046875, + "num_input_tokens_seen": 274016, + "step": 3 + }, + { + "epoch": 0.016666666666666666, + "grad_norm": 40.975983462161, + "learning_rate": 5e-05, + "loss": 0.7548, + "num_input_tokens_seen": 365104, + "step": 4 + }, + { + "epoch": 0.016666666666666666, + "loss": 0.7472636699676514, + "loss_ce": 0.21650193631649017, + "loss_iou": 0.34375, + "loss_num": 0.1064453125, + "loss_xval": 0.53125, + "num_input_tokens_seen": 365104, + "step": 4 + }, + { + "epoch": 0.020833333333333332, + "grad_norm": 29.770808911000874, + "learning_rate": 5e-05, + "loss": 0.5785, + "num_input_tokens_seen": 456624, + "step": 5 + }, + { + "epoch": 0.020833333333333332, + "loss": 0.6999983787536621, + "loss_ce": 0.11918780207633972, + "loss_iou": 0.2421875, + "loss_num": 0.1162109375, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 456624, + "step": 5 + }, + { + "epoch": 0.025, + "grad_norm": 28.600650860163952, + "learning_rate": 5e-05, + "loss": 0.5233, + "num_input_tokens_seen": 547728, + "step": 6 + }, + { + "epoch": 0.025, + "loss": 0.4359915554523468, + "loss_ce": 0.034868501126766205, + "loss_iou": 0.40625, + "loss_num": 0.080078125, + "loss_xval": 0.400390625, + "num_input_tokens_seen": 547728, + "step": 6 + }, + { + "epoch": 0.029166666666666667, + "grad_norm": 71.49824118709361, + "learning_rate": 5e-05, + "loss": 0.5259, + "num_input_tokens_seen": 638328, + "step": 7 + }, + { + "epoch": 0.029166666666666667, + "loss": 0.5717108845710754, + "loss_ce": 0.059625912457704544, + "loss_iou": 0.390625, + "loss_num": 0.10205078125, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 638328, + "step": 7 + }, + { + "epoch": 0.03333333333333333, + "grad_norm": 21.14414243809237, + "learning_rate": 5e-05, + "loss": 0.3605, + "num_input_tokens_seen": 730144, + "step": 8 + }, + { + "epoch": 0.03333333333333333, + "loss": 0.3930358290672302, + "loss_ce": 0.09115596115589142, + "loss_iou": 0.5546875, + "loss_num": 0.06005859375, + "loss_xval": 0.302734375, + "num_input_tokens_seen": 730144, + "step": 8 + }, + { + "epoch": 0.0375, + "grad_norm": 8.929265747178142, + "learning_rate": 5e-05, + "loss": 0.5824, + "num_input_tokens_seen": 820752, + "step": 9 + }, + { + "epoch": 0.0375, + "loss": 0.616942286491394, + "loss_ce": 0.13989152014255524, + "loss_iou": 0.41796875, + "loss_num": 0.09521484375, + "loss_xval": 0.4765625, + "num_input_tokens_seen": 820752, + "step": 9 + }, + { + "epoch": 0.041666666666666664, + "grad_norm": 23.71291071409022, + "learning_rate": 5e-05, + "loss": 0.4991, + "num_input_tokens_seen": 911332, + "step": 10 + }, + { + "epoch": 0.041666666666666664, + "loss": 0.5156526565551758, + "loss_ce": 0.07937334477901459, + "loss_iou": 0.30859375, + "loss_num": 0.0869140625, + "loss_xval": 0.435546875, + "num_input_tokens_seen": 911332, + "step": 10 + }, + { + "epoch": 0.04583333333333333, + "grad_norm": 12.311647302325172, + "learning_rate": 5e-05, + "loss": 0.4041, + "num_input_tokens_seen": 1002020, + "step": 11 + }, + { + "epoch": 0.04583333333333333, + "loss": 0.4094349145889282, + "loss_ce": 0.09174692630767822, + "loss_iou": 0.35546875, + "loss_num": 0.0634765625, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 1002020, + "step": 11 + }, + { + "epoch": 0.05, + "grad_norm": 25.222662070545216, + "learning_rate": 5e-05, + "loss": 0.4487, + "num_input_tokens_seen": 1093716, + "step": 12 + }, + { + "epoch": 0.05, + "loss": 0.4946695566177368, + "loss_ce": 0.052530914545059204, + "loss_iou": 0.0, + "loss_num": 0.08837890625, + "loss_xval": 0.44140625, + "num_input_tokens_seen": 1093716, + "step": 12 + }, + { + "epoch": 0.05416666666666667, + "grad_norm": 36.595578870386646, + "learning_rate": 5e-05, + "loss": 0.4639, + "num_input_tokens_seen": 1185292, + "step": 13 + }, + { + "epoch": 0.05416666666666667, + "loss": 0.4886914789676666, + "loss_ce": 0.06559577584266663, + "loss_iou": 0.55078125, + "loss_num": 0.083984375, + "loss_xval": 0.423828125, + "num_input_tokens_seen": 1185292, + "step": 13 + }, + { + "epoch": 0.058333333333333334, + "grad_norm": 16.932254739386053, + "learning_rate": 5e-05, + "loss": 0.4863, + "num_input_tokens_seen": 1276800, + "step": 14 + }, + { + "epoch": 0.058333333333333334, + "loss": 0.5169302821159363, + "loss_ce": 0.04061192646622658, + "loss_iou": 0.4140625, + "loss_num": 0.0947265625, + "loss_xval": 0.4765625, + "num_input_tokens_seen": 1276800, + "step": 14 + }, + { + "epoch": 0.0625, + "grad_norm": 20.112262019681005, + "learning_rate": 5e-05, + "loss": 0.4915, + "num_input_tokens_seen": 1368160, + "step": 15 + }, + { + "epoch": 0.0625, + "loss": 0.4668349027633667, + "loss_ce": 0.052039965987205505, + "loss_iou": 0.3515625, + "loss_num": 0.08251953125, + "loss_xval": 0.4140625, + "num_input_tokens_seen": 1368160, + "step": 15 + }, + { + "epoch": 0.06666666666666667, + "grad_norm": 19.330753258290258, + "learning_rate": 5e-05, + "loss": 0.3854, + "num_input_tokens_seen": 1459500, + "step": 16 + }, + { + "epoch": 0.06666666666666667, + "loss": 0.30341359972953796, + "loss_ce": 0.025642598047852516, + "loss_iou": 0.21484375, + "loss_num": 0.05517578125, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 1459500, + "step": 16 + }, + { + "epoch": 0.07083333333333333, + "grad_norm": 15.206029260523568, + "learning_rate": 5e-05, + "loss": 0.3651, + "num_input_tokens_seen": 1549624, + "step": 17 + }, + { + "epoch": 0.07083333333333333, + "loss": 0.31986337900161743, + "loss_ce": 0.00980481505393982, + "loss_iou": 0.404296875, + "loss_num": 0.0615234375, + "loss_xval": 0.310546875, + "num_input_tokens_seen": 1549624, + "step": 17 + }, + { + "epoch": 0.075, + "grad_norm": 11.090189016650587, + "learning_rate": 5e-05, + "loss": 0.3437, + "num_input_tokens_seen": 1641188, + "step": 18 + }, + { + "epoch": 0.075, + "loss": 0.3119279146194458, + "loss_ce": 0.006019714288413525, + "loss_iou": 0.298828125, + "loss_num": 0.060791015625, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 1641188, + "step": 18 + }, + { + "epoch": 0.07916666666666666, + "grad_norm": 7.815000271195775, + "learning_rate": 5e-05, + "loss": 0.4179, + "num_input_tokens_seen": 1732592, + "step": 19 + }, + { + "epoch": 0.07916666666666666, + "loss": 0.37536707520484924, + "loss_ce": 0.05020228400826454, + "loss_iou": 0.234375, + "loss_num": 0.064453125, + "loss_xval": 0.32421875, + "num_input_tokens_seen": 1732592, + "step": 19 + }, + { + "epoch": 0.08333333333333333, + "grad_norm": 17.019565838229987, + "learning_rate": 5e-05, + "loss": 0.3924, + "num_input_tokens_seen": 1824392, + "step": 20 + }, + { + "epoch": 0.08333333333333333, + "loss": 0.4389875829219818, + "loss_ce": 0.053001243621110916, + "loss_iou": 0.197265625, + "loss_num": 0.07666015625, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 1824392, + "step": 20 + }, + { + "epoch": 0.0875, + "grad_norm": 27.289690836694703, + "learning_rate": 5e-05, + "loss": 0.3755, + "num_input_tokens_seen": 1916148, + "step": 21 + }, + { + "epoch": 0.0875, + "loss": 0.38594403862953186, + "loss_ce": 0.006244330201297998, + "loss_iou": 0.283203125, + "loss_num": 0.0751953125, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 1916148, + "step": 21 + }, + { + "epoch": 0.09166666666666666, + "grad_norm": 9.666250864795536, + "learning_rate": 5e-05, + "loss": 0.3872, + "num_input_tokens_seen": 2007508, + "step": 22 + }, + { + "epoch": 0.09166666666666666, + "loss": 0.42383676767349243, + "loss_ce": 0.02857312560081482, + "loss_iou": 0.15234375, + "loss_num": 0.07861328125, + "loss_xval": 0.39453125, + "num_input_tokens_seen": 2007508, + "step": 22 + }, + { + "epoch": 0.09583333333333334, + "grad_norm": 51.40578636615118, + "learning_rate": 5e-05, + "loss": 0.4555, + "num_input_tokens_seen": 2098884, + "step": 23 + }, + { + "epoch": 0.09583333333333334, + "loss": 0.5797292590141296, + "loss_ce": 0.024309329688549042, + "loss_iou": 0.1376953125, + "loss_num": 0.11083984375, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 2098884, + "step": 23 + }, + { + "epoch": 0.1, + "grad_norm": 51.60493277040869, + "learning_rate": 5e-05, + "loss": 0.3568, + "num_input_tokens_seen": 2190524, + "step": 24 + }, + { + "epoch": 0.1, + "loss": 0.3909764885902405, + "loss_ce": 0.031479425728321075, + "loss_iou": 0.2216796875, + "loss_num": 0.0712890625, + "loss_xval": 0.359375, + "num_input_tokens_seen": 2190524, + "step": 24 + }, + { + "epoch": 0.10416666666666667, + "grad_norm": 16.93786629596498, + "learning_rate": 5e-05, + "loss": 0.3903, + "num_input_tokens_seen": 2281704, + "step": 25 + }, + { + "epoch": 0.10416666666666667, + "loss": 0.4263181686401367, + "loss_ce": 0.012866010889410973, + "loss_iou": 0.3984375, + "loss_num": 0.08203125, + "loss_xval": 0.4140625, + "num_input_tokens_seen": 2281704, + "step": 25 + }, + { + "epoch": 0.10833333333333334, + "grad_norm": 8.204292663190124, + "learning_rate": 5e-05, + "loss": 0.3353, + "num_input_tokens_seen": 2373136, + "step": 26 + }, + { + "epoch": 0.10833333333333334, + "loss": 0.3441917300224304, + "loss_ce": 0.04109114035964012, + "loss_iou": 0.416015625, + "loss_num": 0.059814453125, + "loss_xval": 0.302734375, + "num_input_tokens_seen": 2373136, + "step": 26 + }, + { + "epoch": 0.1125, + "grad_norm": 14.965225605623639, + "learning_rate": 5e-05, + "loss": 0.3129, + "num_input_tokens_seen": 2464864, + "step": 27 + }, + { + "epoch": 0.1125, + "loss": 0.2762772738933563, + "loss_ce": 0.008210879750549793, + "loss_iou": 0.185546875, + "loss_num": 0.05322265625, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 2464864, + "step": 27 + }, + { + "epoch": 0.11666666666666667, + "grad_norm": 8.997632193662453, + "learning_rate": 5e-05, + "loss": 0.3726, + "num_input_tokens_seen": 2556148, + "step": 28 + }, + { + "epoch": 0.11666666666666667, + "loss": 0.29735785722732544, + "loss_ce": 0.0031683961860835552, + "loss_iou": 0.39453125, + "loss_num": 0.05810546875, + "loss_xval": 0.294921875, + "num_input_tokens_seen": 2556148, + "step": 28 + }, + { + "epoch": 0.12083333333333333, + "grad_norm": 9.340585910836419, + "learning_rate": 5e-05, + "loss": 0.3192, + "num_input_tokens_seen": 2647432, + "step": 29 + }, + { + "epoch": 0.12083333333333333, + "loss": 0.3471378684043884, + "loss_ce": 0.037689611315727234, + "loss_iou": 0.50390625, + "loss_num": 0.060791015625, + "loss_xval": 0.30859375, + "num_input_tokens_seen": 2647432, + "step": 29 + }, + { + "epoch": 0.125, + "grad_norm": 6.897516566909319, + "learning_rate": 5e-05, + "loss": 0.4767, + "num_input_tokens_seen": 2738528, + "step": 30 + }, + { + "epoch": 0.125, + "loss": 0.5621503591537476, + "loss_ce": 0.0030683819204568863, + "loss_iou": 0.6015625, + "loss_num": 0.1103515625, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 2738528, + "step": 30 + }, + { + "epoch": 0.12916666666666668, + "grad_norm": 6.137522658485071, + "learning_rate": 5e-05, + "loss": 0.3425, + "num_input_tokens_seen": 2829600, + "step": 31 + }, + { + "epoch": 0.12916666666666668, + "loss": 0.31172770261764526, + "loss_ce": 0.00258462643250823, + "loss_iou": 0.408203125, + "loss_num": 0.060791015625, + "loss_xval": 0.30859375, + "num_input_tokens_seen": 2829600, + "step": 31 + }, + { + "epoch": 0.13333333333333333, + "grad_norm": 11.061040660241414, + "learning_rate": 5e-05, + "loss": 0.3697, + "num_input_tokens_seen": 2920560, + "step": 32 + }, + { + "epoch": 0.13333333333333333, + "loss": 0.455640584230423, + "loss_ce": 0.01374603807926178, + "loss_iou": 0.021728515625, + "loss_num": 0.08837890625, + "loss_xval": 0.44140625, + "num_input_tokens_seen": 2920560, + "step": 32 + }, + { + "epoch": 0.1375, + "grad_norm": 7.153570971149252, + "learning_rate": 5e-05, + "loss": 0.2913, + "num_input_tokens_seen": 3011632, + "step": 33 + }, + { + "epoch": 0.1375, + "loss": 0.26216161251068115, + "loss_ce": 0.0017856480553746223, + "loss_iou": 0.466796875, + "loss_num": 0.051025390625, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 3011632, + "step": 33 + }, + { + "epoch": 0.14166666666666666, + "grad_norm": 9.363662789430165, + "learning_rate": 5e-05, + "loss": 0.3185, + "num_input_tokens_seen": 3103048, + "step": 34 + }, + { + "epoch": 0.14166666666666666, + "loss": 0.23897996544837952, + "loss_ce": 0.0021635466255247593, + "loss_iou": 0.431640625, + "loss_num": 0.046142578125, + "loss_xval": 0.236328125, + "num_input_tokens_seen": 3103048, + "step": 34 + }, + { + "epoch": 0.14583333333333334, + "grad_norm": 42.136960884006605, + "learning_rate": 5e-05, + "loss": 0.345, + "num_input_tokens_seen": 3194184, + "step": 35 + }, + { + "epoch": 0.14583333333333334, + "loss": 0.2797856330871582, + "loss_ce": 0.00503586744889617, + "loss_iou": 0.443359375, + "loss_num": 0.0537109375, + "loss_xval": 0.275390625, + "num_input_tokens_seen": 3194184, + "step": 35 + }, + { + "epoch": 0.15, + "grad_norm": 19.84075677755947, + "learning_rate": 5e-05, + "loss": 0.3154, + "num_input_tokens_seen": 3286064, + "step": 36 + }, + { + "epoch": 0.15, + "loss": 0.34752780199050903, + "loss_ce": 0.006341293454170227, + "loss_iou": 0.5, + "loss_num": 0.06689453125, + "loss_xval": 0.341796875, + "num_input_tokens_seen": 3286064, + "step": 36 + }, + { + "epoch": 0.15416666666666667, + "grad_norm": 12.230342812593133, + "learning_rate": 5e-05, + "loss": 0.2361, + "num_input_tokens_seen": 3377544, + "step": 37 + }, + { + "epoch": 0.15416666666666667, + "loss": 0.2773832082748413, + "loss_ce": 0.016152730211615562, + "loss_iou": 0.4140625, + "loss_num": 0.051025390625, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 3377544, + "step": 37 + }, + { + "epoch": 0.15833333333333333, + "grad_norm": 22.44920054627549, + "learning_rate": 5e-05, + "loss": 0.3588, + "num_input_tokens_seen": 3468760, + "step": 38 + }, + { + "epoch": 0.15833333333333333, + "loss": 0.338986337184906, + "loss_ce": 0.011349605396389961, + "loss_iou": 0.384765625, + "loss_num": 0.064453125, + "loss_xval": 0.328125, + "num_input_tokens_seen": 3468760, + "step": 38 + }, + { + "epoch": 0.1625, + "grad_norm": 45.16270859992362, + "learning_rate": 5e-05, + "loss": 0.3721, + "num_input_tokens_seen": 3560220, + "step": 39 + }, + { + "epoch": 0.1625, + "loss": 0.4003419578075409, + "loss_ce": 0.00947281252592802, + "loss_iou": 0.640625, + "loss_num": 0.076171875, + "loss_xval": 0.390625, + "num_input_tokens_seen": 3560220, + "step": 39 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 8.84677489699275, + "learning_rate": 5e-05, + "loss": 0.2993, + "num_input_tokens_seen": 3652116, + "step": 40 + }, + { + "epoch": 0.16666666666666666, + "loss": 0.3454613983631134, + "loss_ce": 0.00494627607986331, + "loss_iou": 0.447265625, + "loss_num": 0.06689453125, + "loss_xval": 0.33984375, + "num_input_tokens_seen": 3652116, + "step": 40 + }, + { + "epoch": 0.17083333333333334, + "grad_norm": 19.296762031984866, + "learning_rate": 5e-05, + "loss": 0.3231, + "num_input_tokens_seen": 3743564, + "step": 41 + }, + { + "epoch": 0.17083333333333334, + "loss": 0.33203789591789246, + "loss_ce": 0.010382615029811859, + "loss_iou": 0.408203125, + "loss_num": 0.06298828125, + "loss_xval": 0.322265625, + "num_input_tokens_seen": 3743564, + "step": 41 + }, + { + "epoch": 0.175, + "grad_norm": 9.611147368567874, + "learning_rate": 5e-05, + "loss": 0.4113, + "num_input_tokens_seen": 3833652, + "step": 42 + }, + { + "epoch": 0.175, + "loss": 0.3624129593372345, + "loss_ce": 0.005113149061799049, + "loss_iou": 0.29296875, + "loss_num": 0.0703125, + "loss_xval": 0.357421875, + "num_input_tokens_seen": 3833652, + "step": 42 + }, + { + "epoch": 0.17916666666666667, + "grad_norm": 14.443376364235421, + "learning_rate": 5e-05, + "loss": 0.2944, + "num_input_tokens_seen": 3923920, + "step": 43 + }, + { + "epoch": 0.17916666666666667, + "loss": 0.25886574387550354, + "loss_ce": 0.007675546687096357, + "loss_iou": 0.20703125, + "loss_num": 0.049560546875, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 3923920, + "step": 43 + }, + { + "epoch": 0.18333333333333332, + "grad_norm": 9.688109757625302, + "learning_rate": 5e-05, + "loss": 0.317, + "num_input_tokens_seen": 4015400, + "step": 44 + }, + { + "epoch": 0.18333333333333332, + "loss": 0.3496725559234619, + "loss_ce": 0.007753598503768444, + "loss_iou": 0.54296875, + "loss_num": 0.06640625, + "loss_xval": 0.341796875, + "num_input_tokens_seen": 4015400, + "step": 44 + }, + { + "epoch": 0.1875, + "grad_norm": 14.737713800416389, + "learning_rate": 5e-05, + "loss": 0.3431, + "num_input_tokens_seen": 4107036, + "step": 45 + }, + { + "epoch": 0.1875, + "loss": 0.28952276706695557, + "loss_ce": 0.010653123259544373, + "loss_iou": 0.484375, + "loss_num": 0.053955078125, + "loss_xval": 0.279296875, + "num_input_tokens_seen": 4107036, + "step": 45 + }, + { + "epoch": 0.19166666666666668, + "grad_norm": 12.460730457125793, + "learning_rate": 5e-05, + "loss": 0.3115, + "num_input_tokens_seen": 4198220, + "step": 46 + }, + { + "epoch": 0.19166666666666668, + "loss": 0.35599175095558167, + "loss_ce": 0.007236876059323549, + "loss_iou": 0.2216796875, + "loss_num": 0.06884765625, + "loss_xval": 0.349609375, + "num_input_tokens_seen": 4198220, + "step": 46 + }, + { + "epoch": 0.19583333333333333, + "grad_norm": 8.898392444091169, + "learning_rate": 5e-05, + "loss": 0.2926, + "num_input_tokens_seen": 4289704, + "step": 47 + }, + { + "epoch": 0.19583333333333333, + "loss": 0.318065345287323, + "loss_ce": 0.0007435796433128417, + "loss_iou": 0.462890625, + "loss_num": 0.061767578125, + "loss_xval": 0.31640625, + "num_input_tokens_seen": 4289704, + "step": 47 + }, + { + "epoch": 0.2, + "grad_norm": 67.97750606769903, + "learning_rate": 5e-05, + "loss": 0.3416, + "num_input_tokens_seen": 4380952, + "step": 48 + }, + { + "epoch": 0.2, + "loss": 0.3091282844543457, + "loss_ce": 0.014999864622950554, + "loss_iou": 0.38671875, + "loss_num": 0.057373046875, + "loss_xval": 0.294921875, + "num_input_tokens_seen": 4380952, + "step": 48 + }, + { + "epoch": 0.20416666666666666, + "grad_norm": 6.486228680019228, + "learning_rate": 5e-05, + "loss": 0.3388, + "num_input_tokens_seen": 4471980, + "step": 49 + }, + { + "epoch": 0.20416666666666666, + "loss": 0.370783269405365, + "loss_ce": 0.0007881773635745049, + "loss_iou": 0.44921875, + "loss_num": 0.072265625, + "loss_xval": 0.369140625, + "num_input_tokens_seen": 4471980, + "step": 49 + }, + { + "epoch": 0.20833333333333334, + "grad_norm": 8.955989103250289, + "learning_rate": 5e-05, + "loss": 0.2479, + "num_input_tokens_seen": 4563032, + "step": 50 + }, + { + "epoch": 0.20833333333333334, + "loss": 0.2502215504646301, + "loss_ce": 0.024360958486795425, + "loss_iou": 0.23828125, + "loss_num": 0.044189453125, + "loss_xval": 0.2255859375, + "num_input_tokens_seen": 4563032, + "step": 50 + }, + { + "epoch": 0.2125, + "grad_norm": 31.650556935603895, + "learning_rate": 5e-05, + "loss": 0.3706, + "num_input_tokens_seen": 4654648, + "step": 51 + }, + { + "epoch": 0.2125, + "loss": 0.33480727672576904, + "loss_ce": 0.018278930336236954, + "loss_iou": 0.1875, + "loss_num": 0.0625, + "loss_xval": 0.31640625, + "num_input_tokens_seen": 4654648, + "step": 51 + }, + { + "epoch": 0.21666666666666667, + "grad_norm": 7.122796481536201, + "learning_rate": 5e-05, + "loss": 0.2971, + "num_input_tokens_seen": 4746260, + "step": 52 + }, + { + "epoch": 0.21666666666666667, + "loss": 0.2732027769088745, + "loss_ce": 0.012216457165777683, + "loss_iou": 0.47265625, + "loss_num": 0.05029296875, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 4746260, + "step": 52 + }, + { + "epoch": 0.22083333333333333, + "grad_norm": 6.577564970207067, + "learning_rate": 5e-05, + "loss": 0.2967, + "num_input_tokens_seen": 4837624, + "step": 53 + }, + { + "epoch": 0.22083333333333333, + "loss": 0.29663506150245667, + "loss_ce": 0.011112616397440434, + "loss_iou": 0.47265625, + "loss_num": 0.05517578125, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 4837624, + "step": 53 + }, + { + "epoch": 0.225, + "grad_norm": 4.981634336121052, + "learning_rate": 5e-05, + "loss": 0.3019, + "num_input_tokens_seen": 4928564, + "step": 54 + }, + { + "epoch": 0.225, + "loss": 0.2648267447948456, + "loss_ce": 0.002497634617611766, + "loss_iou": 0.466796875, + "loss_num": 0.05029296875, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 4928564, + "step": 54 + }, + { + "epoch": 0.22916666666666666, + "grad_norm": 13.548163897961462, + "learning_rate": 5e-05, + "loss": 0.3469, + "num_input_tokens_seen": 5018920, + "step": 55 + }, + { + "epoch": 0.22916666666666666, + "loss": 0.37160640954971313, + "loss_ce": 0.031274404376745224, + "loss_iou": 0.384765625, + "loss_num": 0.06640625, + "loss_xval": 0.33984375, + "num_input_tokens_seen": 5018920, + "step": 55 + }, + { + "epoch": 0.23333333333333334, + "grad_norm": 8.432532350358397, + "learning_rate": 5e-05, + "loss": 0.298, + "num_input_tokens_seen": 5110704, + "step": 56 + }, + { + "epoch": 0.23333333333333334, + "loss": 0.2524632215499878, + "loss_ce": 0.00435529975220561, + "loss_iou": 0.302734375, + "loss_num": 0.04833984375, + "loss_xval": 0.248046875, + "num_input_tokens_seen": 5110704, + "step": 56 + }, + { + "epoch": 0.2375, + "grad_norm": 8.823900463968759, + "learning_rate": 5e-05, + "loss": 0.2824, + "num_input_tokens_seen": 5201996, + "step": 57 + }, + { + "epoch": 0.2375, + "loss": 0.25943028926849365, + "loss_ce": 0.012115844525396824, + "loss_iou": 0.458984375, + "loss_num": 0.04736328125, + "loss_xval": 0.2470703125, + "num_input_tokens_seen": 5201996, + "step": 57 + }, + { + "epoch": 0.24166666666666667, + "grad_norm": 4.634353168268041, + "learning_rate": 5e-05, + "loss": 0.3658, + "num_input_tokens_seen": 5292808, + "step": 58 + }, + { + "epoch": 0.24166666666666667, + "loss": 0.4157818555831909, + "loss_ce": 0.023814085870981216, + "loss_iou": 0.3046875, + "loss_num": 0.0771484375, + "loss_xval": 0.392578125, + "num_input_tokens_seen": 5292808, + "step": 58 + }, + { + "epoch": 0.24583333333333332, + "grad_norm": 5.143584787871478, + "learning_rate": 5e-05, + "loss": 0.2389, + "num_input_tokens_seen": 5384260, + "step": 59 + }, + { + "epoch": 0.24583333333333332, + "loss": 0.2749660611152649, + "loss_ce": 0.005678959656506777, + "loss_iou": 0.5, + "loss_num": 0.051513671875, + "loss_xval": 0.26953125, + "num_input_tokens_seen": 5384260, + "step": 59 + }, + { + "epoch": 0.25, + "grad_norm": 10.50448792162475, + "learning_rate": 5e-05, + "loss": 0.3283, + "num_input_tokens_seen": 5475680, + "step": 60 + }, + { + "epoch": 0.25, + "loss": 0.3407435119152069, + "loss_ce": 0.005263775587081909, + "loss_iou": 0.455078125, + "loss_num": 0.06494140625, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 5475680, + "step": 60 + }, + { + "epoch": 0.25416666666666665, + "grad_norm": 11.771930179404578, + "learning_rate": 5e-05, + "loss": 0.2479, + "num_input_tokens_seen": 5567168, + "step": 61 + }, + { + "epoch": 0.25416666666666665, + "loss": 0.3167330026626587, + "loss_ce": 0.010031351819634438, + "loss_iou": 0.376953125, + "loss_num": 0.0595703125, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 5567168, + "step": 61 + }, + { + "epoch": 0.25833333333333336, + "grad_norm": 22.159889524888786, + "learning_rate": 5e-05, + "loss": 0.3364, + "num_input_tokens_seen": 5657968, + "step": 62 + }, + { + "epoch": 0.25833333333333336, + "loss": 0.34341248869895935, + "loss_ce": 0.011869520880281925, + "loss_iou": 0.49609375, + "loss_num": 0.06396484375, + "loss_xval": 0.33203125, + "num_input_tokens_seen": 5657968, + "step": 62 + }, + { + "epoch": 0.2625, + "grad_norm": 16.96247501163317, + "learning_rate": 5e-05, + "loss": 0.2797, + "num_input_tokens_seen": 5749784, + "step": 63 + }, + { + "epoch": 0.2625, + "loss": 0.19430118799209595, + "loss_ce": 0.0032611587084829807, + "loss_iou": 0.5546875, + "loss_num": 0.035400390625, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 5749784, + "step": 63 + }, + { + "epoch": 0.26666666666666666, + "grad_norm": 46.41347542383105, + "learning_rate": 5e-05, + "loss": 0.3089, + "num_input_tokens_seen": 5841148, + "step": 64 + }, + { + "epoch": 0.26666666666666666, + "loss": 0.34790486097335815, + "loss_ce": 0.005558639299124479, + "loss_iou": 0.408203125, + "loss_num": 0.06640625, + "loss_xval": 0.341796875, + "num_input_tokens_seen": 5841148, + "step": 64 + }, + { + "epoch": 0.2708333333333333, + "grad_norm": 6.935369905975971, + "learning_rate": 5e-05, + "loss": 0.237, + "num_input_tokens_seen": 5932988, + "step": 65 + }, + { + "epoch": 0.2708333333333333, + "loss": 0.24167031049728394, + "loss_ce": 0.001985268434509635, + "loss_iou": 0.453125, + "loss_num": 0.045654296875, + "loss_xval": 0.2392578125, + "num_input_tokens_seen": 5932988, + "step": 65 + }, + { + "epoch": 0.275, + "grad_norm": 4.4304114101387375, + "learning_rate": 5e-05, + "loss": 0.3343, + "num_input_tokens_seen": 6023916, + "step": 66 + }, + { + "epoch": 0.275, + "loss": 0.3570048213005066, + "loss_ce": 0.003122997935861349, + "loss_iou": 0.53515625, + "loss_num": 0.06787109375, + "loss_xval": 0.353515625, + "num_input_tokens_seen": 6023916, + "step": 66 + }, + { + "epoch": 0.2791666666666667, + "grad_norm": 26.0353832503157, + "learning_rate": 5e-05, + "loss": 0.3269, + "num_input_tokens_seen": 6115132, + "step": 67 + }, + { + "epoch": 0.2791666666666667, + "loss": 0.34636855125427246, + "loss_ce": 0.007257209159433842, + "loss_iou": 0.1767578125, + "loss_num": 0.06689453125, + "loss_xval": 0.33984375, + "num_input_tokens_seen": 6115132, + "step": 67 + }, + { + "epoch": 0.2833333333333333, + "grad_norm": 16.533984696631137, + "learning_rate": 5e-05, + "loss": 0.2739, + "num_input_tokens_seen": 6206608, + "step": 68 + }, + { + "epoch": 0.2833333333333333, + "loss": 0.2337610274553299, + "loss_ce": 0.018154341727495193, + "loss_iou": 0.46484375, + "loss_num": 0.04052734375, + "loss_xval": 0.2158203125, + "num_input_tokens_seen": 6206608, + "step": 68 + }, + { + "epoch": 0.2875, + "grad_norm": 9.690989332225378, + "learning_rate": 5e-05, + "loss": 0.2708, + "num_input_tokens_seen": 6297836, + "step": 69 + }, + { + "epoch": 0.2875, + "loss": 0.26258543133735657, + "loss_ce": 0.0014770347625017166, + "loss_iou": 0.32421875, + "loss_num": 0.05029296875, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 6297836, + "step": 69 + }, + { + "epoch": 0.2916666666666667, + "grad_norm": 6.900383133357548, + "learning_rate": 5e-05, + "loss": 0.2778, + "num_input_tokens_seen": 6388956, + "step": 70 + }, + { + "epoch": 0.2916666666666667, + "loss": 0.3020828366279602, + "loss_ce": 0.01985626295208931, + "loss_iou": 0.46484375, + "loss_num": 0.053955078125, + "loss_xval": 0.28125, + "num_input_tokens_seen": 6388956, + "step": 70 + }, + { + "epoch": 0.29583333333333334, + "grad_norm": 8.989876897160364, + "learning_rate": 5e-05, + "loss": 0.1958, + "num_input_tokens_seen": 6480796, + "step": 71 + }, + { + "epoch": 0.29583333333333334, + "loss": 0.2226022183895111, + "loss_ce": 0.004340487997978926, + "loss_iou": 0.34765625, + "loss_num": 0.041748046875, + "loss_xval": 0.21875, + "num_input_tokens_seen": 6480796, + "step": 71 + }, + { + "epoch": 0.3, + "grad_norm": 10.580237135943857, + "learning_rate": 5e-05, + "loss": 0.249, + "num_input_tokens_seen": 6571884, + "step": 72 + }, + { + "epoch": 0.3, + "loss": 0.24384717643260956, + "loss_ce": 0.004589363466948271, + "loss_iou": 0.546875, + "loss_num": 0.044921875, + "loss_xval": 0.2392578125, + "num_input_tokens_seen": 6571884, + "step": 72 + }, + { + "epoch": 0.30416666666666664, + "grad_norm": 17.721014973302793, + "learning_rate": 5e-05, + "loss": 0.2367, + "num_input_tokens_seen": 6663112, + "step": 73 + }, + { + "epoch": 0.30416666666666664, + "loss": 0.28261661529541016, + "loss_ce": 0.0018548790831118822, + "loss_iou": 0.46875, + "loss_num": 0.053466796875, + "loss_xval": 0.28125, + "num_input_tokens_seen": 6663112, + "step": 73 + }, + { + "epoch": 0.30833333333333335, + "grad_norm": 10.30157162097328, + "learning_rate": 5e-05, + "loss": 0.2444, + "num_input_tokens_seen": 6755296, + "step": 74 + }, + { + "epoch": 0.30833333333333335, + "loss": 0.2022906094789505, + "loss_ce": 0.00679500587284565, + "loss_iou": 0.55859375, + "loss_num": 0.035888671875, + "loss_xval": 0.1953125, + "num_input_tokens_seen": 6755296, + "step": 74 + }, + { + "epoch": 0.3125, + "grad_norm": 5.447215047810138, + "learning_rate": 5e-05, + "loss": 0.2813, + "num_input_tokens_seen": 6846452, + "step": 75 + }, + { + "epoch": 0.3125, + "loss": 0.2468206286430359, + "loss_ce": 0.0020086378790438175, + "loss_iou": 0.384765625, + "loss_num": 0.046630859375, + "loss_xval": 0.2451171875, + "num_input_tokens_seen": 6846452, + "step": 75 + }, + { + "epoch": 0.31666666666666665, + "grad_norm": 9.019464644230485, + "learning_rate": 5e-05, + "loss": 0.3132, + "num_input_tokens_seen": 6938392, + "step": 76 + }, + { + "epoch": 0.31666666666666665, + "loss": 0.2754860520362854, + "loss_ce": 0.006504114717245102, + "loss_iou": 0.384765625, + "loss_num": 0.051513671875, + "loss_xval": 0.26953125, + "num_input_tokens_seen": 6938392, + "step": 76 + }, + { + "epoch": 0.32083333333333336, + "grad_norm": 9.476201368391846, + "learning_rate": 5e-05, + "loss": 0.3307, + "num_input_tokens_seen": 7030100, + "step": 77 + }, + { + "epoch": 0.32083333333333336, + "loss": 0.27541017532348633, + "loss_ce": 0.014820555225014687, + "loss_iou": 0.27734375, + "loss_num": 0.050537109375, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 7030100, + "step": 77 + }, + { + "epoch": 0.325, + "grad_norm": 11.219509778544023, + "learning_rate": 5e-05, + "loss": 0.213, + "num_input_tokens_seen": 7120872, + "step": 78 + }, + { + "epoch": 0.325, + "loss": 0.2377692461013794, + "loss_ce": 0.0004035182937514037, + "loss_iou": 0.267578125, + "loss_num": 0.0458984375, + "loss_xval": 0.2373046875, + "num_input_tokens_seen": 7120872, + "step": 78 + }, + { + "epoch": 0.32916666666666666, + "grad_norm": 5.808669094612053, + "learning_rate": 5e-05, + "loss": 0.2224, + "num_input_tokens_seen": 7212292, + "step": 79 + }, + { + "epoch": 0.32916666666666666, + "loss": 0.21997323632240295, + "loss_ce": 0.0014368824195116758, + "loss_iou": 0.265625, + "loss_num": 0.0419921875, + "loss_xval": 0.21875, + "num_input_tokens_seen": 7212292, + "step": 79 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 10.864936156293977, + "learning_rate": 5e-05, + "loss": 0.3307, + "num_input_tokens_seen": 7303588, + "step": 80 + }, + { + "epoch": 0.3333333333333333, + "loss": 0.30561554431915283, + "loss_ce": 0.0013247651513665915, + "loss_iou": 0.28125, + "loss_num": 0.05908203125, + "loss_xval": 0.3046875, + "num_input_tokens_seen": 7303588, + "step": 80 + }, + { + "epoch": 0.3375, + "grad_norm": 5.325260315463099, + "learning_rate": 5e-05, + "loss": 0.3184, + "num_input_tokens_seen": 7394048, + "step": 81 + }, + { + "epoch": 0.3375, + "loss": 0.23910076916217804, + "loss_ce": 0.02486737072467804, + "loss_iou": 0.30859375, + "loss_num": 0.041015625, + "loss_xval": 0.2138671875, + "num_input_tokens_seen": 7394048, + "step": 81 + }, + { + "epoch": 0.3416666666666667, + "grad_norm": 7.976433511080435, + "learning_rate": 5e-05, + "loss": 0.2626, + "num_input_tokens_seen": 7485128, + "step": 82 + }, + { + "epoch": 0.3416666666666667, + "loss": 0.2837026119232178, + "loss_ce": 0.0012929437216371298, + "loss_iou": 0.314453125, + "loss_num": 0.054443359375, + "loss_xval": 0.283203125, + "num_input_tokens_seen": 7485128, + "step": 82 + }, + { + "epoch": 0.3458333333333333, + "grad_norm": 16.49736325407084, + "learning_rate": 5e-05, + "loss": 0.2772, + "num_input_tokens_seen": 7576344, + "step": 83 + }, + { + "epoch": 0.3458333333333333, + "loss": 0.22794455289840698, + "loss_ce": 0.004677943419665098, + "loss_iou": 0.337890625, + "loss_num": 0.04248046875, + "loss_xval": 0.2236328125, + "num_input_tokens_seen": 7576344, + "step": 83 + }, + { + "epoch": 0.35, + "grad_norm": 15.249594869388494, + "learning_rate": 5e-05, + "loss": 0.2613, + "num_input_tokens_seen": 7668016, + "step": 84 + }, + { + "epoch": 0.35, + "loss": 0.37888282537460327, + "loss_ce": 0.004035438410937786, + "loss_iou": 0.30859375, + "loss_num": 0.0732421875, + "loss_xval": 0.375, + "num_input_tokens_seen": 7668016, + "step": 84 + }, + { + "epoch": 0.3541666666666667, + "grad_norm": 7.649439612726913, + "learning_rate": 5e-05, + "loss": 0.2568, + "num_input_tokens_seen": 7758804, + "step": 85 + }, + { + "epoch": 0.3541666666666667, + "loss": 0.2840992212295532, + "loss_ce": 0.002971289912238717, + "loss_iou": 0.44921875, + "loss_num": 0.05322265625, + "loss_xval": 0.28125, + "num_input_tokens_seen": 7758804, + "step": 85 + }, + { + "epoch": 0.35833333333333334, + "grad_norm": 16.462632446510703, + "learning_rate": 5e-05, + "loss": 0.3259, + "num_input_tokens_seen": 7850368, + "step": 86 + }, + { + "epoch": 0.35833333333333334, + "loss": 0.30999892950057983, + "loss_ce": 0.007264568004757166, + "loss_iou": 0.328125, + "loss_num": 0.058349609375, + "loss_xval": 0.302734375, + "num_input_tokens_seen": 7850368, + "step": 86 + }, + { + "epoch": 0.3625, + "grad_norm": 9.740030239140937, + "learning_rate": 5e-05, + "loss": 0.2168, + "num_input_tokens_seen": 7941692, + "step": 87 + }, + { + "epoch": 0.3625, + "loss": 0.22348150610923767, + "loss_ce": 0.0030530274379998446, + "loss_iou": 0.443359375, + "loss_num": 0.041015625, + "loss_xval": 0.220703125, + "num_input_tokens_seen": 7941692, + "step": 87 + }, + { + "epoch": 0.36666666666666664, + "grad_norm": 6.63686532350692, + "learning_rate": 5e-05, + "loss": 0.2207, + "num_input_tokens_seen": 8032592, + "step": 88 + }, + { + "epoch": 0.36666666666666664, + "loss": 0.22159643471240997, + "loss_ce": 0.0024802093394100666, + "loss_iou": 0.365234375, + "loss_num": 0.041259765625, + "loss_xval": 0.21875, + "num_input_tokens_seen": 8032592, + "step": 88 + }, + { + "epoch": 0.37083333333333335, + "grad_norm": 37.485183748386525, + "learning_rate": 5e-05, + "loss": 0.2739, + "num_input_tokens_seen": 8124148, + "step": 89 + }, + { + "epoch": 0.37083333333333335, + "loss": 0.2878793478012085, + "loss_ce": 0.0028451611287891865, + "loss_iou": 0.404296875, + "loss_num": 0.05419921875, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 8124148, + "step": 89 + }, + { + "epoch": 0.375, + "grad_norm": 17.34743136643758, + "learning_rate": 5e-05, + "loss": 0.322, + "num_input_tokens_seen": 8215068, + "step": 90 + }, + { + "epoch": 0.375, + "loss": 0.3442806005477905, + "loss_ce": 0.003948563244193792, + "loss_iou": 0.41796875, + "loss_num": 0.06494140625, + "loss_xval": 0.33984375, + "num_input_tokens_seen": 8215068, + "step": 90 + }, + { + "epoch": 0.37916666666666665, + "grad_norm": 9.224857697957768, + "learning_rate": 5e-05, + "loss": 0.475, + "num_input_tokens_seen": 8306212, + "step": 91 + }, + { + "epoch": 0.37916666666666665, + "loss": 0.6492692828178406, + "loss_ce": 0.2845231592655182, + "loss_iou": 0.408203125, + "loss_num": 0.0703125, + "loss_xval": 0.365234375, + "num_input_tokens_seen": 8306212, + "step": 91 + }, + { + "epoch": 0.38333333333333336, + "grad_norm": 5.8131095835936035, + "learning_rate": 5e-05, + "loss": 0.2281, + "num_input_tokens_seen": 8397524, + "step": 92 + }, + { + "epoch": 0.38333333333333336, + "loss": 0.21828222274780273, + "loss_ce": 0.0027060469146817923, + "loss_iou": 0.1572265625, + "loss_num": 0.0419921875, + "loss_xval": 0.2158203125, + "num_input_tokens_seen": 8397524, + "step": 92 + }, + { + "epoch": 0.3875, + "grad_norm": 5.37773321348737, + "learning_rate": 5e-05, + "loss": 0.2631, + "num_input_tokens_seen": 8488852, + "step": 93 + }, + { + "epoch": 0.3875, + "loss": 0.3609490692615509, + "loss_ce": 0.0015740722883492708, + "loss_iou": 0.318359375, + "loss_num": 0.0693359375, + "loss_xval": 0.359375, + "num_input_tokens_seen": 8488852, + "step": 93 + }, + { + "epoch": 0.39166666666666666, + "grad_norm": 11.54814011667921, + "learning_rate": 5e-05, + "loss": 0.2231, + "num_input_tokens_seen": 8580496, + "step": 94 + }, + { + "epoch": 0.39166666666666666, + "loss": 0.2142496556043625, + "loss_ce": 0.0024576662108302116, + "loss_iou": 0.439453125, + "loss_num": 0.0390625, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 8580496, + "step": 94 + }, + { + "epoch": 0.3958333333333333, + "grad_norm": 4.732196078472765, + "learning_rate": 5e-05, + "loss": 0.268, + "num_input_tokens_seen": 8671772, + "step": 95 + }, + { + "epoch": 0.3958333333333333, + "loss": 0.23772427439689636, + "loss_ce": 0.003501851111650467, + "loss_iou": 0.283203125, + "loss_num": 0.044677734375, + "loss_xval": 0.234375, + "num_input_tokens_seen": 8671772, + "step": 95 + }, + { + "epoch": 0.4, + "grad_norm": 11.019497865376769, + "learning_rate": 5e-05, + "loss": 0.1872, + "num_input_tokens_seen": 8763588, + "step": 96 + }, + { + "epoch": 0.4, + "loss": 0.18473270535469055, + "loss_ce": 0.008768357336521149, + "loss_iou": 0.400390625, + "loss_num": 0.0322265625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 8763588, + "step": 96 + }, + { + "epoch": 0.4041666666666667, + "grad_norm": 21.95480025427789, + "learning_rate": 5e-05, + "loss": 0.4284, + "num_input_tokens_seen": 8854600, + "step": 97 + }, + { + "epoch": 0.4041666666666667, + "loss": 0.43076610565185547, + "loss_ce": 0.0046186321415007114, + "loss_iou": 0.244140625, + "loss_num": 0.08349609375, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 8854600, + "step": 97 + }, + { + "epoch": 0.4083333333333333, + "grad_norm": 18.314886493997594, + "learning_rate": 5e-05, + "loss": 0.259, + "num_input_tokens_seen": 8945532, + "step": 98 + }, + { + "epoch": 0.4083333333333333, + "loss": 0.28518539667129517, + "loss_ce": 0.001432962715625763, + "loss_iou": 0.37109375, + "loss_num": 0.053955078125, + "loss_xval": 0.283203125, + "num_input_tokens_seen": 8945532, + "step": 98 + }, + { + "epoch": 0.4125, + "grad_norm": 11.880511271847071, + "learning_rate": 5e-05, + "loss": 0.2449, + "num_input_tokens_seen": 9037100, + "step": 99 + }, + { + "epoch": 0.4125, + "loss": 0.2502959072589874, + "loss_ce": 0.0009062608005478978, + "loss_iou": 0.388671875, + "loss_num": 0.046875, + "loss_xval": 0.2490234375, + "num_input_tokens_seen": 9037100, + "step": 99 + }, + { + "epoch": 0.4166666666666667, + "grad_norm": 8.111545719884933, + "learning_rate": 5e-05, + "loss": 0.2111, + "num_input_tokens_seen": 9128664, + "step": 100 + }, + { + "epoch": 0.4166666666666667, + "loss": 0.1966070681810379, + "loss_ce": 0.004254776053130627, + "loss_iou": 0.38671875, + "loss_num": 0.035400390625, + "loss_xval": 0.1923828125, + "num_input_tokens_seen": 9128664, + "step": 100 + }, + { + "epoch": 0.42083333333333334, + "grad_norm": 26.817760455564922, + "learning_rate": 5e-05, + "loss": 0.2578, + "num_input_tokens_seen": 9220284, + "step": 101 + }, + { + "epoch": 0.42083333333333334, + "loss": 0.17894884943962097, + "loss_ce": 0.00060411257436499, + "loss_iou": 0.400390625, + "loss_num": 0.032470703125, + "loss_xval": 0.1787109375, + "num_input_tokens_seen": 9220284, + "step": 101 + }, + { + "epoch": 0.425, + "grad_norm": 34.603333402240615, + "learning_rate": 5e-05, + "loss": 0.3548, + "num_input_tokens_seen": 9311456, + "step": 102 + }, + { + "epoch": 0.425, + "loss": 0.4787541925907135, + "loss_ce": 0.0017644375329837203, + "loss_iou": 0.40625, + "loss_num": 0.09228515625, + "loss_xval": 0.4765625, + "num_input_tokens_seen": 9311456, + "step": 102 + }, + { + "epoch": 0.42916666666666664, + "grad_norm": 14.937731018546275, + "learning_rate": 5e-05, + "loss": 0.3112, + "num_input_tokens_seen": 9402384, + "step": 103 + }, + { + "epoch": 0.42916666666666664, + "loss": 0.3006994128227234, + "loss_ce": 0.002115420065820217, + "loss_iou": 0.322265625, + "loss_num": 0.05712890625, + "loss_xval": 0.298828125, + "num_input_tokens_seen": 9402384, + "step": 103 + }, + { + "epoch": 0.43333333333333335, + "grad_norm": 43.31661854600968, + "learning_rate": 5e-05, + "loss": 0.2905, + "num_input_tokens_seen": 9491964, + "step": 104 + }, + { + "epoch": 0.43333333333333335, + "loss": 0.26289206743240356, + "loss_ce": 0.002882319502532482, + "loss_iou": 0.58984375, + "loss_num": 0.047119140625, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 9491964, + "step": 104 + }, + { + "epoch": 0.4375, + "grad_norm": 11.993910734445342, + "learning_rate": 5e-05, + "loss": 0.2776, + "num_input_tokens_seen": 9583456, + "step": 105 + }, + { + "epoch": 0.4375, + "loss": 0.2879137396812439, + "loss_ce": 0.0011705834185704589, + "loss_iou": 0.43359375, + "loss_num": 0.0537109375, + "loss_xval": 0.287109375, + "num_input_tokens_seen": 9583456, + "step": 105 + }, + { + "epoch": 0.44166666666666665, + "grad_norm": 16.616379534353655, + "learning_rate": 5e-05, + "loss": 0.2202, + "num_input_tokens_seen": 9674304, + "step": 106 + }, + { + "epoch": 0.44166666666666665, + "loss": 0.23460063338279724, + "loss_ce": 0.003094287123531103, + "loss_iou": 0.40625, + "loss_num": 0.04296875, + "loss_xval": 0.2314453125, + "num_input_tokens_seen": 9674304, + "step": 106 + }, + { + "epoch": 0.44583333333333336, + "grad_norm": 6.232161255971668, + "learning_rate": 5e-05, + "loss": 0.2685, + "num_input_tokens_seen": 9765824, + "step": 107 + }, + { + "epoch": 0.44583333333333336, + "loss": 0.22564728558063507, + "loss_ce": 0.00476106209680438, + "loss_iou": 0.330078125, + "loss_num": 0.04150390625, + "loss_xval": 0.220703125, + "num_input_tokens_seen": 9765824, + "step": 107 + }, + { + "epoch": 0.45, + "grad_norm": 69.52339064652675, + "learning_rate": 5e-05, + "loss": 0.288, + "num_input_tokens_seen": 9857156, + "step": 108 + }, + { + "epoch": 0.45, + "loss": 0.27105996012687683, + "loss_ce": 0.0035123610869050026, + "loss_iou": 0.33203125, + "loss_num": 0.05078125, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 9857156, + "step": 108 + }, + { + "epoch": 0.45416666666666666, + "grad_norm": 18.49210837706306, + "learning_rate": 5e-05, + "loss": 0.2633, + "num_input_tokens_seen": 9948644, + "step": 109 + }, + { + "epoch": 0.45416666666666666, + "loss": 0.2351188361644745, + "loss_ce": 0.001476275036111474, + "loss_iou": 0.330078125, + "loss_num": 0.0439453125, + "loss_xval": 0.2333984375, + "num_input_tokens_seen": 9948644, + "step": 109 + }, + { + "epoch": 0.4583333333333333, + "grad_norm": 17.00180692584306, + "learning_rate": 5e-05, + "loss": 0.2762, + "num_input_tokens_seen": 10040404, + "step": 110 + }, + { + "epoch": 0.4583333333333333, + "loss": 0.2623690068721771, + "loss_ce": 0.004007187206298113, + "loss_iou": 0.453125, + "loss_num": 0.0478515625, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 10040404, + "step": 110 + }, + { + "epoch": 0.4625, + "grad_norm": 7.786667888006078, + "learning_rate": 5e-05, + "loss": 0.2906, + "num_input_tokens_seen": 10131492, + "step": 111 + }, + { + "epoch": 0.4625, + "loss": 0.1874142587184906, + "loss_ce": 0.0004025435191579163, + "loss_iou": 0.51171875, + "loss_num": 0.032958984375, + "loss_xval": 0.1875, + "num_input_tokens_seen": 10131492, + "step": 111 + }, + { + "epoch": 0.4666666666666667, + "grad_norm": 10.089216726276307, + "learning_rate": 5e-05, + "loss": 0.3346, + "num_input_tokens_seen": 10222468, + "step": 112 + }, + { + "epoch": 0.4666666666666667, + "loss": 0.37050771713256836, + "loss_ce": 0.009789920412003994, + "loss_iou": 0.5859375, + "loss_num": 0.06689453125, + "loss_xval": 0.361328125, + "num_input_tokens_seen": 10222468, + "step": 112 + }, + { + "epoch": 0.4708333333333333, + "grad_norm": 13.538809032933797, + "learning_rate": 5e-05, + "loss": 0.1975, + "num_input_tokens_seen": 10313368, + "step": 113 + }, + { + "epoch": 0.4708333333333333, + "loss": 0.1806737631559372, + "loss_ce": 0.0014745458029210567, + "loss_iou": 0.388671875, + "loss_num": 0.032470703125, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 10313368, + "step": 113 + }, + { + "epoch": 0.475, + "grad_norm": 9.844448685947137, + "learning_rate": 5e-05, + "loss": 0.2637, + "num_input_tokens_seen": 10404232, + "step": 114 + }, + { + "epoch": 0.475, + "loss": 0.20554538071155548, + "loss_ce": 0.0026645271573215723, + "loss_iou": 0.474609375, + "loss_num": 0.036376953125, + "loss_xval": 0.203125, + "num_input_tokens_seen": 10404232, + "step": 114 + }, + { + "epoch": 0.4791666666666667, + "grad_norm": 29.180353821250876, + "learning_rate": 5e-05, + "loss": 0.3316, + "num_input_tokens_seen": 10495580, + "step": 115 + }, + { + "epoch": 0.4791666666666667, + "loss": 0.2651398777961731, + "loss_ce": 0.0038788975216448307, + "loss_iou": 0.490234375, + "loss_num": 0.0478515625, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 10495580, + "step": 115 + }, + { + "epoch": 0.48333333333333334, + "grad_norm": 9.586548205484243, + "learning_rate": 5e-05, + "loss": 0.3117, + "num_input_tokens_seen": 10586396, + "step": 116 + }, + { + "epoch": 0.48333333333333334, + "loss": 0.34220072627067566, + "loss_ce": 0.01053567323833704, + "loss_iou": 0.26171875, + "loss_num": 0.06396484375, + "loss_xval": 0.33203125, + "num_input_tokens_seen": 10586396, + "step": 116 + }, + { + "epoch": 0.4875, + "grad_norm": 9.509270620685877, + "learning_rate": 5e-05, + "loss": 0.3585, + "num_input_tokens_seen": 10678060, + "step": 117 + }, + { + "epoch": 0.4875, + "loss": 0.3074171543121338, + "loss_ce": 0.020185697823762894, + "loss_iou": 0.357421875, + "loss_num": 0.053955078125, + "loss_xval": 0.287109375, + "num_input_tokens_seen": 10678060, + "step": 117 + }, + { + "epoch": 0.49166666666666664, + "grad_norm": 5.073874129263811, + "learning_rate": 5e-05, + "loss": 0.2701, + "num_input_tokens_seen": 10769996, + "step": 118 + }, + { + "epoch": 0.49166666666666664, + "loss": 0.27782976627349854, + "loss_ce": 0.00036397005897015333, + "loss_iou": 0.5625, + "loss_num": 0.050048828125, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 10769996, + "step": 118 + }, + { + "epoch": 0.49583333333333335, + "grad_norm": 8.65526030944302, + "learning_rate": 5e-05, + "loss": 0.2471, + "num_input_tokens_seen": 10860812, + "step": 119 + }, + { + "epoch": 0.49583333333333335, + "loss": 0.26075470447540283, + "loss_ce": 0.0018435618840157986, + "loss_iou": 0.5234375, + "loss_num": 0.046875, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 10860812, + "step": 119 + }, + { + "epoch": 0.5, + "grad_norm": 18.74547743651069, + "learning_rate": 5e-05, + "loss": 0.3201, + "num_input_tokens_seen": 10952028, + "step": 120 + }, + { + "epoch": 0.5, + "loss": 0.35629406571388245, + "loss_ce": 0.017671015113592148, + "loss_iou": 0.373046875, + "loss_num": 0.064453125, + "loss_xval": 0.337890625, + "num_input_tokens_seen": 10952028, + "step": 120 + }, + { + "epoch": 0.5041666666666667, + "grad_norm": 12.242796749612552, + "learning_rate": 5e-05, + "loss": 0.2829, + "num_input_tokens_seen": 11042748, + "step": 121 + }, + { + "epoch": 0.5041666666666667, + "loss": 0.29572194814682007, + "loss_ce": 0.0006780114490538836, + "loss_iou": 0.35546875, + "loss_num": 0.0556640625, + "loss_xval": 0.294921875, + "num_input_tokens_seen": 11042748, + "step": 121 + }, + { + "epoch": 0.5083333333333333, + "grad_norm": 80.74118621393487, + "learning_rate": 5e-05, + "loss": 0.323, + "num_input_tokens_seen": 11134452, + "step": 122 + }, + { + "epoch": 0.5083333333333333, + "loss": 0.3073047995567322, + "loss_ce": 0.0019458993338048458, + "loss_iou": 0.458984375, + "loss_num": 0.056640625, + "loss_xval": 0.3046875, + "num_input_tokens_seen": 11134452, + "step": 122 + }, + { + "epoch": 0.5125, + "grad_norm": 19.969269792064445, + "learning_rate": 5e-05, + "loss": 0.3026, + "num_input_tokens_seen": 11226092, + "step": 123 + }, + { + "epoch": 0.5125, + "loss": 0.37382322549819946, + "loss_ce": 0.0026074047200381756, + "loss_iou": 0.400390625, + "loss_num": 0.0703125, + "loss_xval": 0.37109375, + "num_input_tokens_seen": 11226092, + "step": 123 + }, + { + "epoch": 0.5166666666666667, + "grad_norm": 7.3966393387659926, + "learning_rate": 5e-05, + "loss": 0.2249, + "num_input_tokens_seen": 11317360, + "step": 124 + }, + { + "epoch": 0.5166666666666667, + "loss": 0.22373417019844055, + "loss_ce": 0.00443485751748085, + "loss_iou": 0.462890625, + "loss_num": 0.039306640625, + "loss_xval": 0.2197265625, + "num_input_tokens_seen": 11317360, + "step": 124 + }, + { + "epoch": 0.5208333333333334, + "grad_norm": 27.827753563904736, + "learning_rate": 5e-05, + "loss": 0.2454, + "num_input_tokens_seen": 11408524, + "step": 125 + }, + { + "epoch": 0.5208333333333334, + "loss": 0.23156914114952087, + "loss_ce": 0.027467573061585426, + "loss_iou": 0.271484375, + "loss_num": 0.0380859375, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 11408524, + "step": 125 + }, + { + "epoch": 0.525, + "grad_norm": 16.137469248679036, + "learning_rate": 5e-05, + "loss": 0.249, + "num_input_tokens_seen": 11499824, + "step": 126 + }, + { + "epoch": 0.525, + "loss": 0.21419081091880798, + "loss_ce": 0.0033753756433725357, + "loss_iou": 0.470703125, + "loss_num": 0.03759765625, + "loss_xval": 0.2109375, + "num_input_tokens_seen": 11499824, + "step": 126 + }, + { + "epoch": 0.5291666666666667, + "grad_norm": 31.700797289386937, + "learning_rate": 5e-05, + "loss": 0.3223, + "num_input_tokens_seen": 11591604, + "step": 127 + }, + { + "epoch": 0.5291666666666667, + "loss": 0.2671835422515869, + "loss_ce": 0.004915494006127119, + "loss_iou": 0.435546875, + "loss_num": 0.048095703125, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 11591604, + "step": 127 + }, + { + "epoch": 0.5333333333333333, + "grad_norm": 9.023404502399261, + "learning_rate": 5e-05, + "loss": 0.2629, + "num_input_tokens_seen": 11683324, + "step": 128 + }, + { + "epoch": 0.5333333333333333, + "loss": 0.21468223631381989, + "loss_ce": 0.014853136613965034, + "loss_iou": 0.1669921875, + "loss_num": 0.038330078125, + "loss_xval": 0.2001953125, + "num_input_tokens_seen": 11683324, + "step": 128 + }, + { + "epoch": 0.5375, + "grad_norm": 25.693519570004373, + "learning_rate": 5e-05, + "loss": 0.3072, + "num_input_tokens_seen": 11774320, + "step": 129 + }, + { + "epoch": 0.5375, + "loss": 0.39914512634277344, + "loss_ce": 0.000707621977198869, + "loss_iou": 0.3984375, + "loss_num": 0.07568359375, + "loss_xval": 0.3984375, + "num_input_tokens_seen": 11774320, + "step": 129 + }, + { + "epoch": 0.5416666666666666, + "grad_norm": 9.866756200521515, + "learning_rate": 5e-05, + "loss": 0.2577, + "num_input_tokens_seen": 11865392, + "step": 130 + }, + { + "epoch": 0.5416666666666666, + "loss": 0.29488706588745117, + "loss_ce": 0.01870298571884632, + "loss_iou": 0.37109375, + "loss_num": 0.05126953125, + "loss_xval": 0.275390625, + "num_input_tokens_seen": 11865392, + "step": 130 + }, + { + "epoch": 0.5458333333333333, + "grad_norm": 10.436733246035443, + "learning_rate": 5e-05, + "loss": 0.3341, + "num_input_tokens_seen": 11957140, + "step": 131 + }, + { + "epoch": 0.5458333333333333, + "loss": 0.32612884044647217, + "loss_ce": 0.0023983772844076157, + "loss_iou": 0.412109375, + "loss_num": 0.060546875, + "loss_xval": 0.32421875, + "num_input_tokens_seen": 11957140, + "step": 131 + }, + { + "epoch": 0.55, + "grad_norm": 10.922744020062677, + "learning_rate": 5e-05, + "loss": 0.2328, + "num_input_tokens_seen": 12048596, + "step": 132 + }, + { + "epoch": 0.55, + "loss": 0.1844669133424759, + "loss_ce": 0.002460076939314604, + "loss_iou": 0.37109375, + "loss_num": 0.032470703125, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 12048596, + "step": 132 + }, + { + "epoch": 0.5541666666666667, + "grad_norm": 23.953360994914252, + "learning_rate": 5e-05, + "loss": 0.2737, + "num_input_tokens_seen": 12140068, + "step": 133 + }, + { + "epoch": 0.5541666666666667, + "loss": 0.24452215433120728, + "loss_ce": 0.0019684485159814358, + "loss_iou": 0.3515625, + "loss_num": 0.044677734375, + "loss_xval": 0.2421875, + "num_input_tokens_seen": 12140068, + "step": 133 + }, + { + "epoch": 0.5583333333333333, + "grad_norm": 4.560719803742324, + "learning_rate": 5e-05, + "loss": 0.2217, + "num_input_tokens_seen": 12231244, + "step": 134 + }, + { + "epoch": 0.5583333333333333, + "loss": 0.2742983102798462, + "loss_ce": 0.0020815201569348574, + "loss_iou": 0.515625, + "loss_num": 0.048828125, + "loss_xval": 0.271484375, + "num_input_tokens_seen": 12231244, + "step": 134 + }, + { + "epoch": 0.5625, + "grad_norm": 7.269681166884061, + "learning_rate": 5e-05, + "loss": 0.2293, + "num_input_tokens_seen": 12322624, + "step": 135 + }, + { + "epoch": 0.5625, + "loss": 0.16657213866710663, + "loss_ce": 0.006843137554824352, + "loss_iou": 0.4140625, + "loss_num": 0.0274658203125, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 12322624, + "step": 135 + }, + { + "epoch": 0.5666666666666667, + "grad_norm": 7.655933198469534, + "learning_rate": 5e-05, + "loss": 0.3303, + "num_input_tokens_seen": 12413664, + "step": 136 + }, + { + "epoch": 0.5666666666666667, + "loss": 0.3964250683784485, + "loss_ce": 0.08453544229269028, + "loss_iou": 0.1513671875, + "loss_num": 0.060791015625, + "loss_xval": 0.3125, + "num_input_tokens_seen": 12413664, + "step": 136 + }, + { + "epoch": 0.5708333333333333, + "grad_norm": 7.870782926065455, + "learning_rate": 5e-05, + "loss": 0.3437, + "num_input_tokens_seen": 12505268, + "step": 137 + }, + { + "epoch": 0.5708333333333333, + "loss": 0.3565272092819214, + "loss_ce": 0.0037439956795424223, + "loss_iou": 0.302734375, + "loss_num": 0.0673828125, + "loss_xval": 0.353515625, + "num_input_tokens_seen": 12505268, + "step": 137 + }, + { + "epoch": 0.575, + "grad_norm": 4.491010759014802, + "learning_rate": 5e-05, + "loss": 0.1927, + "num_input_tokens_seen": 12596560, + "step": 138 + }, + { + "epoch": 0.575, + "loss": 0.18377013504505157, + "loss_ce": 0.001732780598104, + "loss_iou": 0.36328125, + "loss_num": 0.032470703125, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 12596560, + "step": 138 + }, + { + "epoch": 0.5791666666666667, + "grad_norm": 6.588859235712896, + "learning_rate": 5e-05, + "loss": 0.351, + "num_input_tokens_seen": 12687640, + "step": 139 + }, + { + "epoch": 0.5791666666666667, + "loss": 0.3708999454975128, + "loss_ce": 0.012837199494242668, + "loss_iou": 0.416015625, + "loss_num": 0.06689453125, + "loss_xval": 0.357421875, + "num_input_tokens_seen": 12687640, + "step": 139 + }, + { + "epoch": 0.5833333333333334, + "grad_norm": 11.319551129339365, + "learning_rate": 5e-05, + "loss": 0.2907, + "num_input_tokens_seen": 12778776, + "step": 140 + }, + { + "epoch": 0.5833333333333334, + "loss": 0.21045735478401184, + "loss_ce": 0.0015340123791247606, + "loss_iou": 0.388671875, + "loss_num": 0.03759765625, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 12778776, + "step": 140 + }, + { + "epoch": 0.5875, + "grad_norm": 9.707420160182311, + "learning_rate": 5e-05, + "loss": 0.3175, + "num_input_tokens_seen": 12869816, + "step": 141 + }, + { + "epoch": 0.5875, + "loss": 0.3253687620162964, + "loss_ce": 0.004812105558812618, + "loss_iou": 0.419921875, + "loss_num": 0.0595703125, + "loss_xval": 0.3203125, + "num_input_tokens_seen": 12869816, + "step": 141 + }, + { + "epoch": 0.5916666666666667, + "grad_norm": 4.292805483536467, + "learning_rate": 5e-05, + "loss": 0.2064, + "num_input_tokens_seen": 12960888, + "step": 142 + }, + { + "epoch": 0.5916666666666667, + "loss": 0.25991156697273254, + "loss_ce": 0.00014595442917197943, + "loss_iou": 0.404296875, + "loss_num": 0.04736328125, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 12960888, + "step": 142 + }, + { + "epoch": 0.5958333333333333, + "grad_norm": 21.379545153312392, + "learning_rate": 5e-05, + "loss": 0.2521, + "num_input_tokens_seen": 13052452, + "step": 143 + }, + { + "epoch": 0.5958333333333333, + "loss": 0.23024097084999084, + "loss_ce": 0.002030514180660248, + "loss_iou": 0.34765625, + "loss_num": 0.041748046875, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 13052452, + "step": 143 + }, + { + "epoch": 0.6, + "grad_norm": 8.227914536382473, + "learning_rate": 5e-05, + "loss": 0.248, + "num_input_tokens_seen": 13143192, + "step": 144 + }, + { + "epoch": 0.6, + "loss": 0.23771356046199799, + "loss_ce": 0.0054137492552399635, + "loss_iou": 0.328125, + "loss_num": 0.042724609375, + "loss_xval": 0.232421875, + "num_input_tokens_seen": 13143192, + "step": 144 + }, + { + "epoch": 0.6041666666666666, + "grad_norm": 8.446517974381681, + "learning_rate": 5e-05, + "loss": 0.2465, + "num_input_tokens_seen": 13234824, + "step": 145 + }, + { + "epoch": 0.6041666666666666, + "loss": 0.20083218812942505, + "loss_ce": 0.0016134518664330244, + "loss_iou": 0.2578125, + "loss_num": 0.036865234375, + "loss_xval": 0.19921875, + "num_input_tokens_seen": 13234824, + "step": 145 + }, + { + "epoch": 0.6083333333333333, + "grad_norm": 8.30597746700804, + "learning_rate": 5e-05, + "loss": 0.2549, + "num_input_tokens_seen": 13325684, + "step": 146 + }, + { + "epoch": 0.6083333333333333, + "loss": 0.19978776574134827, + "loss_ce": 0.0020948995370417833, + "loss_iou": 0.310546875, + "loss_num": 0.035888671875, + "loss_xval": 0.197265625, + "num_input_tokens_seen": 13325684, + "step": 146 + }, + { + "epoch": 0.6125, + "grad_norm": 5.005823165635506, + "learning_rate": 5e-05, + "loss": 0.2255, + "num_input_tokens_seen": 13417384, + "step": 147 + }, + { + "epoch": 0.6125, + "loss": 0.22988131642341614, + "loss_ce": 0.008873018436133862, + "loss_iou": 0.4609375, + "loss_num": 0.038818359375, + "loss_xval": 0.220703125, + "num_input_tokens_seen": 13417384, + "step": 147 + }, + { + "epoch": 0.6166666666666667, + "grad_norm": 7.873135529753248, + "learning_rate": 5e-05, + "loss": 0.2545, + "num_input_tokens_seen": 13508672, + "step": 148 + }, + { + "epoch": 0.6166666666666667, + "loss": 0.2839931845664978, + "loss_ce": 0.0019497520988807082, + "loss_iou": 0.404296875, + "loss_num": 0.0517578125, + "loss_xval": 0.28125, + "num_input_tokens_seen": 13508672, + "step": 148 + }, + { + "epoch": 0.6208333333333333, + "grad_norm": 4.554201262753171, + "learning_rate": 5e-05, + "loss": 0.1965, + "num_input_tokens_seen": 13600320, + "step": 149 + }, + { + "epoch": 0.6208333333333333, + "loss": 0.17919516563415527, + "loss_ce": 0.0008504376164637506, + "loss_iou": 0.34765625, + "loss_num": 0.031494140625, + "loss_xval": 0.1787109375, + "num_input_tokens_seen": 13600320, + "step": 149 + }, + { + "epoch": 0.625, + "grad_norm": 9.670541579455165, + "learning_rate": 5e-05, + "loss": 0.3536, + "num_input_tokens_seen": 13689296, + "step": 150 + }, + { + "epoch": 0.625, + "loss": 0.39481455087661743, + "loss_ce": 0.043099481612443924, + "loss_iou": 0.1962890625, + "loss_num": 0.06787109375, + "loss_xval": 0.3515625, + "num_input_tokens_seen": 13689296, + "step": 150 + }, + { + "epoch": 0.6291666666666667, + "grad_norm": 19.88548591475147, + "learning_rate": 5e-05, + "loss": 0.2459, + "num_input_tokens_seen": 13780820, + "step": 151 + }, + { + "epoch": 0.6291666666666667, + "loss": 0.22300229966640472, + "loss_ce": 0.0016888338141143322, + "loss_iou": 0.33203125, + "loss_num": 0.040283203125, + "loss_xval": 0.2216796875, + "num_input_tokens_seen": 13780820, + "step": 151 + }, + { + "epoch": 0.6333333333333333, + "grad_norm": 5.031981077375081, + "learning_rate": 5e-05, + "loss": 0.1604, + "num_input_tokens_seen": 13872392, + "step": 152 + }, + { + "epoch": 0.6333333333333333, + "loss": 0.19695362448692322, + "loss_ce": 0.0008171653607860208, + "loss_iou": 0.353515625, + "loss_num": 0.034912109375, + "loss_xval": 0.1962890625, + "num_input_tokens_seen": 13872392, + "step": 152 + }, + { + "epoch": 0.6375, + "grad_norm": 8.194026676883864, + "learning_rate": 5e-05, + "loss": 0.2498, + "num_input_tokens_seen": 13963840, + "step": 153 + }, + { + "epoch": 0.6375, + "loss": 0.1730082929134369, + "loss_ce": 0.009373043663799763, + "loss_iou": 0.2216796875, + "loss_num": 0.030029296875, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 13963840, + "step": 153 + }, + { + "epoch": 0.6416666666666667, + "grad_norm": 7.232097422901255, + "learning_rate": 5e-05, + "loss": 0.1843, + "num_input_tokens_seen": 14054860, + "step": 154 + }, + { + "epoch": 0.6416666666666667, + "loss": 0.23983432352542877, + "loss_ce": 0.0012784129939973354, + "loss_iou": 0.29296875, + "loss_num": 0.044189453125, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 14054860, + "step": 154 + }, + { + "epoch": 0.6458333333333334, + "grad_norm": 10.002820581844796, + "learning_rate": 5e-05, + "loss": 0.2336, + "num_input_tokens_seen": 14146504, + "step": 155 + }, + { + "epoch": 0.6458333333333334, + "loss": 0.24934682250022888, + "loss_ce": 0.0051146335899829865, + "loss_iou": 0.408203125, + "loss_num": 0.043701171875, + "loss_xval": 0.244140625, + "num_input_tokens_seen": 14146504, + "step": 155 + }, + { + "epoch": 0.65, + "grad_norm": 12.315543047257076, + "learning_rate": 5e-05, + "loss": 0.2067, + "num_input_tokens_seen": 14237576, + "step": 156 + }, + { + "epoch": 0.65, + "loss": 0.21194952726364136, + "loss_ce": 0.003270330373197794, + "loss_iou": 0.283203125, + "loss_num": 0.038330078125, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 14237576, + "step": 156 + }, + { + "epoch": 0.6541666666666667, + "grad_norm": 2.8928526807905732, + "learning_rate": 5e-05, + "loss": 0.2503, + "num_input_tokens_seen": 14329332, + "step": 157 + }, + { + "epoch": 0.6541666666666667, + "loss": 0.32788723707199097, + "loss_ce": 0.012030322104692459, + "loss_iou": 0.25390625, + "loss_num": 0.06005859375, + "loss_xval": 0.31640625, + "num_input_tokens_seen": 14329332, + "step": 157 + }, + { + "epoch": 0.6583333333333333, + "grad_norm": 5.933111792460177, + "learning_rate": 5e-05, + "loss": 0.3143, + "num_input_tokens_seen": 14420520, + "step": 158 + }, + { + "epoch": 0.6583333333333333, + "loss": 0.3221921920776367, + "loss_ce": 0.002001758897677064, + "loss_iou": 0.185546875, + "loss_num": 0.061767578125, + "loss_xval": 0.3203125, + "num_input_tokens_seen": 14420520, + "step": 158 + }, + { + "epoch": 0.6625, + "grad_norm": 9.471262618082035, + "learning_rate": 5e-05, + "loss": 0.233, + "num_input_tokens_seen": 14511276, + "step": 159 + }, + { + "epoch": 0.6625, + "loss": 0.19342216849327087, + "loss_ce": 0.0015886790351942182, + "loss_iou": 0.267578125, + "loss_num": 0.034912109375, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 14511276, + "step": 159 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 7.9125575547617535, + "learning_rate": 5e-05, + "loss": 0.346, + "num_input_tokens_seen": 14602924, + "step": 160 + }, + { + "epoch": 0.6666666666666666, + "loss": 0.23464325070381165, + "loss_ce": 0.0049679577350616455, + "loss_iou": 0.33203125, + "loss_num": 0.041748046875, + "loss_xval": 0.2294921875, + "num_input_tokens_seen": 14602924, + "step": 160 + }, + { + "epoch": 0.6708333333333333, + "grad_norm": 3.765087310549006, + "learning_rate": 5e-05, + "loss": 0.2288, + "num_input_tokens_seen": 14694336, + "step": 161 + }, + { + "epoch": 0.6708333333333333, + "loss": 0.23900958895683289, + "loss_ce": 0.0012776643270626664, + "loss_iou": 0.322265625, + "loss_num": 0.04345703125, + "loss_xval": 0.2373046875, + "num_input_tokens_seen": 14694336, + "step": 161 + }, + { + "epoch": 0.675, + "grad_norm": 3.2411568645640965, + "learning_rate": 5e-05, + "loss": 0.2227, + "num_input_tokens_seen": 14786132, + "step": 162 + }, + { + "epoch": 0.675, + "loss": 0.13679921627044678, + "loss_ce": 0.002338771941140294, + "loss_iou": 0.298828125, + "loss_num": 0.02294921875, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 14786132, + "step": 162 + }, + { + "epoch": 0.6791666666666667, + "grad_norm": 6.949492710171496, + "learning_rate": 5e-05, + "loss": 0.321, + "num_input_tokens_seen": 14877432, + "step": 163 + }, + { + "epoch": 0.6791666666666667, + "loss": 0.2304680496454239, + "loss_ce": 0.0015862042782828212, + "loss_iou": 0.361328125, + "loss_num": 0.041015625, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 14877432, + "step": 163 + }, + { + "epoch": 0.6833333333333333, + "grad_norm": 8.767037215884402, + "learning_rate": 5e-05, + "loss": 0.2281, + "num_input_tokens_seen": 14968476, + "step": 164 + }, + { + "epoch": 0.6833333333333333, + "loss": 0.24121038615703583, + "loss_ce": 0.000365669431630522, + "loss_iou": 0.490234375, + "loss_num": 0.041748046875, + "loss_xval": 0.2412109375, + "num_input_tokens_seen": 14968476, + "step": 164 + }, + { + "epoch": 0.6875, + "grad_norm": 3.3738702456098504, + "learning_rate": 5e-05, + "loss": 0.2412, + "num_input_tokens_seen": 15059764, + "step": 165 + }, + { + "epoch": 0.6875, + "loss": 0.1965644657611847, + "loss_ce": 0.0044258031994104385, + "loss_iou": 0.330078125, + "loss_num": 0.0341796875, + "loss_xval": 0.1923828125, + "num_input_tokens_seen": 15059764, + "step": 165 + }, + { + "epoch": 0.6916666666666667, + "grad_norm": 7.237334795990486, + "learning_rate": 5e-05, + "loss": 0.272, + "num_input_tokens_seen": 15151340, + "step": 166 + }, + { + "epoch": 0.6916666666666667, + "loss": 0.2698882818222046, + "loss_ce": 0.0034087959211319685, + "loss_iou": 0.341796875, + "loss_num": 0.048828125, + "loss_xval": 0.265625, + "num_input_tokens_seen": 15151340, + "step": 166 + }, + { + "epoch": 0.6958333333333333, + "grad_norm": 4.492972449650082, + "learning_rate": 5e-05, + "loss": 0.2617, + "num_input_tokens_seen": 15243280, + "step": 167 + }, + { + "epoch": 0.6958333333333333, + "loss": 0.3138772249221802, + "loss_ce": 0.0039406768046319485, + "loss_iou": 0.2431640625, + "loss_num": 0.05859375, + "loss_xval": 0.310546875, + "num_input_tokens_seen": 15243280, + "step": 167 + }, + { + "epoch": 0.7, + "grad_norm": 5.065317522783334, + "learning_rate": 5e-05, + "loss": 0.2395, + "num_input_tokens_seen": 15333408, + "step": 168 + }, + { + "epoch": 0.7, + "loss": 0.24651506543159485, + "loss_ce": 0.0019166837446391582, + "loss_iou": 0.275390625, + "loss_num": 0.04541015625, + "loss_xval": 0.244140625, + "num_input_tokens_seen": 15333408, + "step": 168 + }, + { + "epoch": 0.7041666666666667, + "grad_norm": 37.52703552517235, + "learning_rate": 5e-05, + "loss": 0.2407, + "num_input_tokens_seen": 15425096, + "step": 169 + }, + { + "epoch": 0.7041666666666667, + "loss": 0.2536095082759857, + "loss_ce": 0.0022667348384857178, + "loss_iou": 0.33203125, + "loss_num": 0.0458984375, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 15425096, + "step": 169 + }, + { + "epoch": 0.7083333333333334, + "grad_norm": 11.029881651516087, + "learning_rate": 5e-05, + "loss": 0.2446, + "num_input_tokens_seen": 15516556, + "step": 170 + }, + { + "epoch": 0.7083333333333334, + "loss": 0.2343108355998993, + "loss_ce": 0.013241507112979889, + "loss_iou": 0.412109375, + "loss_num": 0.03857421875, + "loss_xval": 0.220703125, + "num_input_tokens_seen": 15516556, + "step": 170 + }, + { + "epoch": 0.7125, + "grad_norm": 4.589534025223679, + "learning_rate": 5e-05, + "loss": 0.2029, + "num_input_tokens_seen": 15607732, + "step": 171 + }, + { + "epoch": 0.7125, + "loss": 0.16617685556411743, + "loss_ce": 0.0017481537070125341, + "loss_iou": 0.26171875, + "loss_num": 0.029296875, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 15607732, + "step": 171 + }, + { + "epoch": 0.7166666666666667, + "grad_norm": 5.615815550066399, + "learning_rate": 5e-05, + "loss": 0.2673, + "num_input_tokens_seen": 15699344, + "step": 172 + }, + { + "epoch": 0.7166666666666667, + "loss": 0.3418896198272705, + "loss_ce": 0.0038158849347382784, + "loss_iou": 0.271484375, + "loss_num": 0.06396484375, + "loss_xval": 0.337890625, + "num_input_tokens_seen": 15699344, + "step": 172 + }, + { + "epoch": 0.7208333333333333, + "grad_norm": 25.13652914203415, + "learning_rate": 5e-05, + "loss": 0.276, + "num_input_tokens_seen": 15791220, + "step": 173 + }, + { + "epoch": 0.7208333333333333, + "loss": 0.26045161485671997, + "loss_ce": 0.0012353132478892803, + "loss_iou": 0.267578125, + "loss_num": 0.048095703125, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 15791220, + "step": 173 + }, + { + "epoch": 0.725, + "grad_norm": 13.090432791261154, + "learning_rate": 5e-05, + "loss": 0.2685, + "num_input_tokens_seen": 15882808, + "step": 174 + }, + { + "epoch": 0.725, + "loss": 0.19032147526741028, + "loss_ce": 0.004835621453821659, + "loss_iou": 0.462890625, + "loss_num": 0.0306396484375, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 15882808, + "step": 174 + }, + { + "epoch": 0.7291666666666666, + "grad_norm": 7.67733189232213, + "learning_rate": 5e-05, + "loss": 0.1935, + "num_input_tokens_seen": 15973956, + "step": 175 + }, + { + "epoch": 0.7291666666666666, + "loss": 0.16638563573360443, + "loss_ce": 0.0017127819592133164, + "loss_iou": 0.388671875, + "loss_num": 0.027587890625, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 15973956, + "step": 175 + }, + { + "epoch": 0.7333333333333333, + "grad_norm": 6.305357125357973, + "learning_rate": 5e-05, + "loss": 0.2835, + "num_input_tokens_seen": 16065444, + "step": 176 + }, + { + "epoch": 0.7333333333333333, + "loss": 0.22557082772254944, + "loss_ce": 0.017501965165138245, + "loss_iou": 0.40625, + "loss_num": 0.035888671875, + "loss_xval": 0.2080078125, + "num_input_tokens_seen": 16065444, + "step": 176 + }, + { + "epoch": 0.7375, + "grad_norm": 8.085000629433312, + "learning_rate": 5e-05, + "loss": 0.2616, + "num_input_tokens_seen": 16156380, + "step": 177 + }, + { + "epoch": 0.7375, + "loss": 0.29290682077407837, + "loss_ce": 0.0012808414176106453, + "loss_iou": 0.396484375, + "loss_num": 0.052734375, + "loss_xval": 0.291015625, + "num_input_tokens_seen": 16156380, + "step": 177 + }, + { + "epoch": 0.7416666666666667, + "grad_norm": 13.57474615095706, + "learning_rate": 5e-05, + "loss": 0.2052, + "num_input_tokens_seen": 16247840, + "step": 178 + }, + { + "epoch": 0.7416666666666667, + "loss": 0.2054433822631836, + "loss_ce": 0.026518816128373146, + "loss_iou": 0.265625, + "loss_num": 0.031982421875, + "loss_xval": 0.1787109375, + "num_input_tokens_seen": 16247840, + "step": 178 + }, + { + "epoch": 0.7458333333333333, + "grad_norm": 14.721342043506576, + "learning_rate": 5e-05, + "loss": 0.2228, + "num_input_tokens_seen": 16338660, + "step": 179 + }, + { + "epoch": 0.7458333333333333, + "loss": 0.21274816989898682, + "loss_ce": 0.009684196673333645, + "loss_iou": 0.25, + "loss_num": 0.037109375, + "loss_xval": 0.203125, + "num_input_tokens_seen": 16338660, + "step": 179 + }, + { + "epoch": 0.75, + "grad_norm": 8.959153814889348, + "learning_rate": 5e-05, + "loss": 0.2944, + "num_input_tokens_seen": 16430176, + "step": 180 + }, + { + "epoch": 0.75, + "loss": 0.3967931270599365, + "loss_ce": 0.0061376336961984634, + "loss_iou": 0.25390625, + "loss_num": 0.07470703125, + "loss_xval": 0.390625, + "num_input_tokens_seen": 16430176, + "step": 180 + }, + { + "epoch": 0.7541666666666667, + "grad_norm": 4.907715439269668, + "learning_rate": 5e-05, + "loss": 0.2148, + "num_input_tokens_seen": 16521752, + "step": 181 + }, + { + "epoch": 0.7541666666666667, + "loss": 0.2014847695827484, + "loss_ce": 0.004097080789506435, + "loss_iou": 0.28125, + "loss_num": 0.035400390625, + "loss_xval": 0.197265625, + "num_input_tokens_seen": 16521752, + "step": 181 + }, + { + "epoch": 0.7583333333333333, + "grad_norm": 12.251478551523167, + "learning_rate": 5e-05, + "loss": 0.2387, + "num_input_tokens_seen": 16613096, + "step": 182 + }, + { + "epoch": 0.7583333333333333, + "loss": 0.20524071156978607, + "loss_ce": 0.00028466549701988697, + "loss_iou": 0.259765625, + "loss_num": 0.037353515625, + "loss_xval": 0.205078125, + "num_input_tokens_seen": 16613096, + "step": 182 + }, + { + "epoch": 0.7625, + "grad_norm": 14.719844192142459, + "learning_rate": 5e-05, + "loss": 0.1968, + "num_input_tokens_seen": 16703780, + "step": 183 + }, + { + "epoch": 0.7625, + "loss": 0.21019019186496735, + "loss_ce": 0.005417247768491507, + "loss_iou": 0.306640625, + "loss_num": 0.03662109375, + "loss_xval": 0.205078125, + "num_input_tokens_seen": 16703780, + "step": 183 + }, + { + "epoch": 0.7666666666666667, + "grad_norm": 13.62691494879007, + "learning_rate": 5e-05, + "loss": 0.2891, + "num_input_tokens_seen": 16794776, + "step": 184 + }, + { + "epoch": 0.7666666666666667, + "loss": 0.27239733934402466, + "loss_ce": 0.000912959803827107, + "loss_iou": 0.62890625, + "loss_num": 0.045166015625, + "loss_xval": 0.271484375, + "num_input_tokens_seen": 16794776, + "step": 184 + }, + { + "epoch": 0.7708333333333334, + "grad_norm": 24.418623447127, + "learning_rate": 5e-05, + "loss": 0.2182, + "num_input_tokens_seen": 16886236, + "step": 185 + }, + { + "epoch": 0.7708333333333334, + "loss": 0.22815991938114166, + "loss_ce": 0.022166268900036812, + "loss_iou": 0.404296875, + "loss_num": 0.03515625, + "loss_xval": 0.2060546875, + "num_input_tokens_seen": 16886236, + "step": 185 + }, + { + "epoch": 0.775, + "grad_norm": 9.956108878177691, + "learning_rate": 5e-05, + "loss": 0.2518, + "num_input_tokens_seen": 16977808, + "step": 186 + }, + { + "epoch": 0.775, + "loss": 0.22952379286289215, + "loss_ce": 0.007844101637601852, + "loss_iou": 0.310546875, + "loss_num": 0.039794921875, + "loss_xval": 0.2216796875, + "num_input_tokens_seen": 16977808, + "step": 186 + }, + { + "epoch": 0.7791666666666667, + "grad_norm": 10.905812593604734, + "learning_rate": 5e-05, + "loss": 0.19, + "num_input_tokens_seen": 17069052, + "step": 187 + }, + { + "epoch": 0.7791666666666667, + "loss": 0.21624162793159485, + "loss_ce": 0.009637624025344849, + "loss_iou": 0.25, + "loss_num": 0.03759765625, + "loss_xval": 0.20703125, + "num_input_tokens_seen": 17069052, + "step": 187 + }, + { + "epoch": 0.7833333333333333, + "grad_norm": 7.466897830018128, + "learning_rate": 5e-05, + "loss": 0.3065, + "num_input_tokens_seen": 17158888, + "step": 188 + }, + { + "epoch": 0.7833333333333333, + "loss": 0.3236009478569031, + "loss_ce": 0.0020066953729838133, + "loss_iou": 0.40234375, + "loss_num": 0.05810546875, + "loss_xval": 0.322265625, + "num_input_tokens_seen": 17158888, + "step": 188 + }, + { + "epoch": 0.7875, + "grad_norm": 11.43031761199009, + "learning_rate": 5e-05, + "loss": 0.2721, + "num_input_tokens_seen": 17250224, + "step": 189 + }, + { + "epoch": 0.7875, + "loss": 0.27115926146507263, + "loss_ce": 0.002482494106516242, + "loss_iou": 0.419921875, + "loss_num": 0.04736328125, + "loss_xval": 0.26953125, + "num_input_tokens_seen": 17250224, + "step": 189 + }, + { + "epoch": 0.7916666666666666, + "grad_norm": 6.376115694685188, + "learning_rate": 5e-05, + "loss": 0.3104, + "num_input_tokens_seen": 17341208, + "step": 190 + }, + { + "epoch": 0.7916666666666666, + "loss": 0.3440701961517334, + "loss_ce": 0.0027616131119430065, + "loss_iou": 0.330078125, + "loss_num": 0.0634765625, + "loss_xval": 0.341796875, + "num_input_tokens_seen": 17341208, + "step": 190 + }, + { + "epoch": 0.7958333333333333, + "grad_norm": 7.893330863623168, + "learning_rate": 5e-05, + "loss": 0.2149, + "num_input_tokens_seen": 17432612, + "step": 191 + }, + { + "epoch": 0.7958333333333333, + "loss": 0.23782208561897278, + "loss_ce": 0.0010667047463357449, + "loss_iou": 0.2177734375, + "loss_num": 0.044189453125, + "loss_xval": 0.236328125, + "num_input_tokens_seen": 17432612, + "step": 191 + }, + { + "epoch": 0.8, + "grad_norm": 22.18997521206718, + "learning_rate": 5e-05, + "loss": 0.3162, + "num_input_tokens_seen": 17523780, + "step": 192 + }, + { + "epoch": 0.8, + "loss": 0.34398892521858215, + "loss_ce": 0.002802408766001463, + "loss_iou": 0.388671875, + "loss_num": 0.062255859375, + "loss_xval": 0.341796875, + "num_input_tokens_seen": 17523780, + "step": 192 + }, + { + "epoch": 0.8041666666666667, + "grad_norm": 7.048614109115301, + "learning_rate": 5e-05, + "loss": 0.2152, + "num_input_tokens_seen": 17615380, + "step": 193 + }, + { + "epoch": 0.8041666666666667, + "loss": 0.21223366260528564, + "loss_ce": 0.0012351283803582191, + "loss_iou": 0.373046875, + "loss_num": 0.03662109375, + "loss_xval": 0.2109375, + "num_input_tokens_seen": 17615380, + "step": 193 + }, + { + "epoch": 0.8083333333333333, + "grad_norm": 7.8711229313668305, + "learning_rate": 5e-05, + "loss": 0.2219, + "num_input_tokens_seen": 17706776, + "step": 194 + }, + { + "epoch": 0.8083333333333333, + "loss": 0.24827273190021515, + "loss_ce": 0.004498313646763563, + "loss_iou": 0.30078125, + "loss_num": 0.044189453125, + "loss_xval": 0.244140625, + "num_input_tokens_seen": 17706776, + "step": 194 + }, + { + "epoch": 0.8125, + "grad_norm": 11.585751160130735, + "learning_rate": 5e-05, + "loss": 0.2551, + "num_input_tokens_seen": 17798216, + "step": 195 + }, + { + "epoch": 0.8125, + "loss": 0.2494007796049118, + "loss_ce": 0.0014149582711979747, + "loss_iou": 0.494140625, + "loss_num": 0.0419921875, + "loss_xval": 0.248046875, + "num_input_tokens_seen": 17798216, + "step": 195 + }, + { + "epoch": 0.8166666666666667, + "grad_norm": 11.481203844670077, + "learning_rate": 5e-05, + "loss": 0.2607, + "num_input_tokens_seen": 17889460, + "step": 196 + }, + { + "epoch": 0.8166666666666667, + "loss": 0.303374707698822, + "loss_ce": 0.003509008791297674, + "loss_iou": 0.392578125, + "loss_num": 0.053955078125, + "loss_xval": 0.30078125, + "num_input_tokens_seen": 17889460, + "step": 196 + }, + { + "epoch": 0.8208333333333333, + "grad_norm": 12.774468104359062, + "learning_rate": 5e-05, + "loss": 0.2782, + "num_input_tokens_seen": 17981152, + "step": 197 + }, + { + "epoch": 0.8208333333333333, + "loss": 0.27713096141815186, + "loss_ce": 0.001740352949127555, + "loss_iou": 0.302734375, + "loss_num": 0.05029296875, + "loss_xval": 0.275390625, + "num_input_tokens_seen": 17981152, + "step": 197 + }, + { + "epoch": 0.825, + "grad_norm": 6.114818970612895, + "learning_rate": 5e-05, + "loss": 0.285, + "num_input_tokens_seen": 18072828, + "step": 198 + }, + { + "epoch": 0.825, + "loss": 0.22632259130477905, + "loss_ce": 0.0010418322635814548, + "loss_iou": 0.296875, + "loss_num": 0.040283203125, + "loss_xval": 0.2255859375, + "num_input_tokens_seen": 18072828, + "step": 198 + }, + { + "epoch": 0.8291666666666667, + "grad_norm": 18.157425733012563, + "learning_rate": 5e-05, + "loss": 0.2555, + "num_input_tokens_seen": 18164472, + "step": 199 + }, + { + "epoch": 0.8291666666666667, + "loss": 0.28996580839157104, + "loss_ce": 0.002185048069804907, + "loss_iou": 0.3359375, + "loss_num": 0.05224609375, + "loss_xval": 0.287109375, + "num_input_tokens_seen": 18164472, + "step": 199 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 7.129196337506752, + "learning_rate": 5e-05, + "loss": 0.2419, + "num_input_tokens_seen": 18254864, + "step": 200 + }, + { + "epoch": 0.8333333333333334, + "loss": 0.2802783250808716, + "loss_ce": 0.01379882637411356, + "loss_iou": 0.1826171875, + "loss_num": 0.05029296875, + "loss_xval": 0.265625, + "num_input_tokens_seen": 18254864, + "step": 200 + }, + { + "epoch": 0.8375, + "grad_norm": 12.797975605688038, + "learning_rate": 5e-05, + "loss": 0.2356, + "num_input_tokens_seen": 18346400, + "step": 201 + }, + { + "epoch": 0.8375, + "loss": 0.24703559279441833, + "loss_ce": 0.0029560080729424953, + "loss_iou": 0.43359375, + "loss_num": 0.0419921875, + "loss_xval": 0.244140625, + "num_input_tokens_seen": 18346400, + "step": 201 + }, + { + "epoch": 0.8416666666666667, + "grad_norm": 16.933843013817935, + "learning_rate": 5e-05, + "loss": 0.3267, + "num_input_tokens_seen": 18437604, + "step": 202 + }, + { + "epoch": 0.8416666666666667, + "loss": 0.2825471758842468, + "loss_ce": 0.0008394101168960333, + "loss_iou": 0.36328125, + "loss_num": 0.050537109375, + "loss_xval": 0.28125, + "num_input_tokens_seen": 18437604, + "step": 202 + }, + { + "epoch": 0.8458333333333333, + "grad_norm": 5.550749003705444, + "learning_rate": 5e-05, + "loss": 0.2229, + "num_input_tokens_seen": 18528984, + "step": 203 + }, + { + "epoch": 0.8458333333333333, + "loss": 0.22848908603191376, + "loss_ce": 0.0011941741686314344, + "loss_iou": 0.396484375, + "loss_num": 0.0390625, + "loss_xval": 0.2275390625, + "num_input_tokens_seen": 18528984, + "step": 203 + }, + { + "epoch": 0.85, + "grad_norm": 3.87991186767316, + "learning_rate": 5e-05, + "loss": 0.2451, + "num_input_tokens_seen": 18620812, + "step": 204 + }, + { + "epoch": 0.85, + "loss": 0.21404461562633514, + "loss_ce": 0.0022221102844923735, + "loss_iou": 0.2734375, + "loss_num": 0.037841796875, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 18620812, + "step": 204 + }, + { + "epoch": 0.8541666666666666, + "grad_norm": 13.040976616283691, + "learning_rate": 5e-05, + "loss": 0.2431, + "num_input_tokens_seen": 18711628, + "step": 205 + }, + { + "epoch": 0.8541666666666666, + "loss": 0.2216428518295288, + "loss_ce": 0.003564231563359499, + "loss_iou": 0.291015625, + "loss_num": 0.038818359375, + "loss_xval": 0.2177734375, + "num_input_tokens_seen": 18711628, + "step": 205 + }, + { + "epoch": 0.8583333333333333, + "grad_norm": 16.947379452066215, + "learning_rate": 5e-05, + "loss": 0.1924, + "num_input_tokens_seen": 18803292, + "step": 206 + }, + { + "epoch": 0.8583333333333333, + "loss": 0.1999918520450592, + "loss_ce": 0.004557272419333458, + "loss_iou": 0.359375, + "loss_num": 0.033203125, + "loss_xval": 0.1953125, + "num_input_tokens_seen": 18803292, + "step": 206 + }, + { + "epoch": 0.8625, + "grad_norm": 6.14659998334949, + "learning_rate": 5e-05, + "loss": 0.2451, + "num_input_tokens_seen": 18894160, + "step": 207 + }, + { + "epoch": 0.8625, + "loss": 0.3386986255645752, + "loss_ce": 0.0001366123033221811, + "loss_iou": 0.310546875, + "loss_num": 0.0625, + "loss_xval": 0.337890625, + "num_input_tokens_seen": 18894160, + "step": 207 + }, + { + "epoch": 0.8666666666666667, + "grad_norm": 11.278150709662043, + "learning_rate": 5e-05, + "loss": 0.2223, + "num_input_tokens_seen": 18985776, + "step": 208 + }, + { + "epoch": 0.8666666666666667, + "loss": 0.22064605355262756, + "loss_ce": 0.0021401969715952873, + "loss_iou": 0.46875, + "loss_num": 0.035888671875, + "loss_xval": 0.21875, + "num_input_tokens_seen": 18985776, + "step": 208 + }, + { + "epoch": 0.8708333333333333, + "grad_norm": 10.346683453020976, + "learning_rate": 5e-05, + "loss": 0.1693, + "num_input_tokens_seen": 19076880, + "step": 209 + }, + { + "epoch": 0.8708333333333333, + "loss": 0.15130558609962463, + "loss_ce": 0.016723062843084335, + "loss_iou": 0.25390625, + "loss_num": 0.022705078125, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 19076880, + "step": 209 + }, + { + "epoch": 0.875, + "grad_norm": 8.402730925321864, + "learning_rate": 5e-05, + "loss": 0.2744, + "num_input_tokens_seen": 19167716, + "step": 210 + }, + { + "epoch": 0.875, + "loss": 0.31717830896377563, + "loss_ce": 0.0007720459252595901, + "loss_iou": 0.353515625, + "loss_num": 0.057373046875, + "loss_xval": 0.31640625, + "num_input_tokens_seen": 19167716, + "step": 210 + }, + { + "epoch": 0.8791666666666667, + "grad_norm": 18.201038700412692, + "learning_rate": 5e-05, + "loss": 0.2495, + "num_input_tokens_seen": 19259032, + "step": 211 + }, + { + "epoch": 0.8791666666666667, + "loss": 0.2656664550304413, + "loss_ce": 0.0011400955263525248, + "loss_iou": 0.3984375, + "loss_num": 0.04638671875, + "loss_xval": 0.263671875, + "num_input_tokens_seen": 19259032, + "step": 211 + }, + { + "epoch": 0.8833333333333333, + "grad_norm": 4.753155681213643, + "learning_rate": 5e-05, + "loss": 0.2433, + "num_input_tokens_seen": 19349292, + "step": 212 + }, + { + "epoch": 0.8833333333333333, + "loss": 0.2522953152656555, + "loss_ce": 0.0007084023673087358, + "loss_iou": 0.41015625, + "loss_num": 0.04345703125, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 19349292, + "step": 212 + }, + { + "epoch": 0.8875, + "grad_norm": 24.512003053945374, + "learning_rate": 5e-05, + "loss": 0.2135, + "num_input_tokens_seen": 19441488, + "step": 213 + }, + { + "epoch": 0.8875, + "loss": 0.22530657052993774, + "loss_ce": 0.003199649043381214, + "loss_iou": 0.41796875, + "loss_num": 0.037353515625, + "loss_xval": 0.2216796875, + "num_input_tokens_seen": 19441488, + "step": 213 + }, + { + "epoch": 0.8916666666666667, + "grad_norm": 17.0384982817077, + "learning_rate": 5e-05, + "loss": 0.2938, + "num_input_tokens_seen": 19532868, + "step": 214 + }, + { + "epoch": 0.8916666666666667, + "loss": 0.37303754687309265, + "loss_ce": 0.0029203486628830433, + "loss_iou": 0.220703125, + "loss_num": 0.0703125, + "loss_xval": 0.37109375, + "num_input_tokens_seen": 19532868, + "step": 214 + }, + { + "epoch": 0.8958333333333334, + "grad_norm": 29.921832394338654, + "learning_rate": 5e-05, + "loss": 0.2486, + "num_input_tokens_seen": 19624120, + "step": 215 + }, + { + "epoch": 0.8958333333333334, + "loss": 0.22709499299526215, + "loss_ce": 0.005018561612814665, + "loss_iou": 0.326171875, + "loss_num": 0.038818359375, + "loss_xval": 0.2216796875, + "num_input_tokens_seen": 19624120, + "step": 215 + }, + { + "epoch": 0.9, + "grad_norm": 9.333427646818315, + "learning_rate": 5e-05, + "loss": 0.2147, + "num_input_tokens_seen": 19715496, + "step": 216 + }, + { + "epoch": 0.9, + "loss": 0.2823033928871155, + "loss_ce": 0.003311683889478445, + "loss_iou": 0.314453125, + "loss_num": 0.050537109375, + "loss_xval": 0.279296875, + "num_input_tokens_seen": 19715496, + "step": 216 + }, + { + "epoch": 0.9041666666666667, + "grad_norm": 4.019548375654141, + "learning_rate": 5e-05, + "loss": 0.2971, + "num_input_tokens_seen": 19806812, + "step": 217 + }, + { + "epoch": 0.9041666666666667, + "loss": 0.30291223526000977, + "loss_ce": 0.0006051263189874589, + "loss_iou": 0.474609375, + "loss_num": 0.05224609375, + "loss_xval": 0.302734375, + "num_input_tokens_seen": 19806812, + "step": 217 + }, + { + "epoch": 0.9083333333333333, + "grad_norm": 4.067634067480343, + "learning_rate": 5e-05, + "loss": 0.2725, + "num_input_tokens_seen": 19898440, + "step": 218 + }, + { + "epoch": 0.9083333333333333, + "loss": 0.20470158755779266, + "loss_ce": 0.007558043580502272, + "loss_iou": 0.5, + "loss_num": 0.03076171875, + "loss_xval": 0.197265625, + "num_input_tokens_seen": 19898440, + "step": 218 + }, + { + "epoch": 0.9125, + "grad_norm": 4.292723853828602, + "learning_rate": 5e-05, + "loss": 0.2351, + "num_input_tokens_seen": 19989852, + "step": 219 + }, + { + "epoch": 0.9125, + "loss": 0.29990026354789734, + "loss_ce": 0.00043127487879246473, + "loss_iou": 0.27734375, + "loss_num": 0.05517578125, + "loss_xval": 0.298828125, + "num_input_tokens_seen": 19989852, + "step": 219 + }, + { + "epoch": 0.9166666666666666, + "grad_norm": 8.376505004890973, + "learning_rate": 5e-05, + "loss": 0.2903, + "num_input_tokens_seen": 20081004, + "step": 220 + }, + { + "epoch": 0.9166666666666666, + "loss": 0.2845813035964966, + "loss_ce": 0.0008288907120004296, + "loss_iou": 0.255859375, + "loss_num": 0.05224609375, + "loss_xval": 0.283203125, + "num_input_tokens_seen": 20081004, + "step": 220 + }, + { + "epoch": 0.9208333333333333, + "grad_norm": 15.542808906125297, + "learning_rate": 5e-05, + "loss": 0.2976, + "num_input_tokens_seen": 20172584, + "step": 221 + }, + { + "epoch": 0.9208333333333333, + "loss": 0.33047717809677124, + "loss_ce": 0.003969602286815643, + "loss_iou": 0.275390625, + "loss_num": 0.060546875, + "loss_xval": 0.326171875, + "num_input_tokens_seen": 20172584, + "step": 221 + }, + { + "epoch": 0.925, + "grad_norm": 12.923561463971636, + "learning_rate": 5e-05, + "loss": 0.2815, + "num_input_tokens_seen": 20264016, + "step": 222 + }, + { + "epoch": 0.925, + "loss": 0.3422977030277252, + "loss_ce": 0.0036746645346283913, + "loss_iou": 0.421875, + "loss_num": 0.060302734375, + "loss_xval": 0.337890625, + "num_input_tokens_seen": 20264016, + "step": 222 + }, + { + "epoch": 0.9291666666666667, + "grad_norm": 6.655322104784028, + "learning_rate": 5e-05, + "loss": 0.2127, + "num_input_tokens_seen": 20355796, + "step": 223 + }, + { + "epoch": 0.9291666666666667, + "loss": 0.19036522507667542, + "loss_ce": 0.0031093659345060587, + "loss_iou": 0.259765625, + "loss_num": 0.032958984375, + "loss_xval": 0.1875, + "num_input_tokens_seen": 20355796, + "step": 223 + }, + { + "epoch": 0.9333333333333333, + "grad_norm": 37.85533531919503, + "learning_rate": 5e-05, + "loss": 0.2133, + "num_input_tokens_seen": 20447404, + "step": 224 + }, + { + "epoch": 0.9333333333333333, + "loss": 0.257813036441803, + "loss_ce": 0.004028834868222475, + "loss_iou": 0.373046875, + "loss_num": 0.044189453125, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 20447404, + "step": 224 + }, + { + "epoch": 0.9375, + "grad_norm": 30.416837635863704, + "learning_rate": 5e-05, + "loss": 0.2211, + "num_input_tokens_seen": 20538856, + "step": 225 + }, + { + "epoch": 0.9375, + "loss": 0.24632111191749573, + "loss_ce": 0.002546711126342416, + "loss_iou": 0.435546875, + "loss_num": 0.041015625, + "loss_xval": 0.244140625, + "num_input_tokens_seen": 20538856, + "step": 225 + }, + { + "epoch": 0.9416666666666667, + "grad_norm": 11.051058935071033, + "learning_rate": 5e-05, + "loss": 0.1935, + "num_input_tokens_seen": 20629960, + "step": 226 + }, + { + "epoch": 0.9416666666666667, + "loss": 0.17319779098033905, + "loss_ce": 0.003947307821363211, + "loss_iou": 0.388671875, + "loss_num": 0.02685546875, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 20629960, + "step": 226 + }, + { + "epoch": 0.9458333333333333, + "grad_norm": 48.233469576133025, + "learning_rate": 5e-05, + "loss": 0.2862, + "num_input_tokens_seen": 20721456, + "step": 227 + }, + { + "epoch": 0.9458333333333333, + "loss": 0.29020804166793823, + "loss_ce": 0.002122091129422188, + "loss_iou": 0.3984375, + "loss_num": 0.05029296875, + "loss_xval": 0.2890625, + "num_input_tokens_seen": 20721456, + "step": 227 + }, + { + "epoch": 0.95, + "grad_norm": 8.113741595409435, + "learning_rate": 5e-05, + "loss": 0.2437, + "num_input_tokens_seen": 20812236, + "step": 228 + }, + { + "epoch": 0.95, + "loss": 0.2340792417526245, + "loss_ce": 0.004526016302406788, + "loss_iou": 0.392578125, + "loss_num": 0.038818359375, + "loss_xval": 0.2294921875, + "num_input_tokens_seen": 20812236, + "step": 228 + }, + { + "epoch": 0.9541666666666667, + "grad_norm": 4.566364595359815, + "learning_rate": 5e-05, + "loss": 0.2271, + "num_input_tokens_seen": 20903888, + "step": 229 + }, + { + "epoch": 0.9541666666666667, + "loss": 0.19538497924804688, + "loss_ce": 0.0030632000416517258, + "loss_iou": 0.34765625, + "loss_num": 0.0322265625, + "loss_xval": 0.1923828125, + "num_input_tokens_seen": 20903888, + "step": 229 + }, + { + "epoch": 0.9583333333333334, + "grad_norm": 7.209407509206219, + "learning_rate": 5e-05, + "loss": 0.2768, + "num_input_tokens_seen": 20995620, + "step": 230 + }, + { + "epoch": 0.9583333333333334, + "loss": 0.30961310863494873, + "loss_ce": 0.00449835229665041, + "loss_iou": 0.251953125, + "loss_num": 0.056396484375, + "loss_xval": 0.3046875, + "num_input_tokens_seen": 20995620, + "step": 230 + }, + { + "epoch": 0.9625, + "grad_norm": 5.104687256344693, + "learning_rate": 5e-05, + "loss": 0.255, + "num_input_tokens_seen": 21086992, + "step": 231 + }, + { + "epoch": 0.9625, + "loss": 0.252264142036438, + "loss_ce": 0.010686978697776794, + "loss_iou": 0.2421875, + "loss_num": 0.0439453125, + "loss_xval": 0.2412109375, + "num_input_tokens_seen": 21086992, + "step": 231 + }, + { + "epoch": 0.9666666666666667, + "grad_norm": 7.374332772342045, + "learning_rate": 5e-05, + "loss": 0.2872, + "num_input_tokens_seen": 21178204, + "step": 232 + }, + { + "epoch": 0.9666666666666667, + "loss": 0.29594823718070984, + "loss_ce": 0.00017185957403853536, + "loss_iou": 0.1591796875, + "loss_num": 0.05615234375, + "loss_xval": 0.294921875, + "num_input_tokens_seen": 21178204, + "step": 232 + }, + { + "epoch": 0.9708333333333333, + "grad_norm": 18.00709127423236, + "learning_rate": 5e-05, + "loss": 0.2467, + "num_input_tokens_seen": 21269452, + "step": 233 + }, + { + "epoch": 0.9708333333333333, + "loss": 0.25176990032196045, + "loss_ce": 0.005004778038710356, + "loss_iou": 0.4140625, + "loss_num": 0.04150390625, + "loss_xval": 0.2470703125, + "num_input_tokens_seen": 21269452, + "step": 233 + }, + { + "epoch": 0.975, + "grad_norm": 5.732409812390045, + "learning_rate": 5e-05, + "loss": 0.3674, + "num_input_tokens_seen": 21360916, + "step": 234 + }, + { + "epoch": 0.975, + "loss": 0.4744373559951782, + "loss_ce": 7.211390766315162e-05, + "loss_iou": 0.474609375, + "loss_num": 0.0859375, + "loss_xval": 0.474609375, + "num_input_tokens_seen": 21360916, + "step": 234 + }, + { + "epoch": 0.9791666666666666, + "grad_norm": 36.35128373431882, + "learning_rate": 5e-05, + "loss": 0.2559, + "num_input_tokens_seen": 21452632, + "step": 235 + }, + { + "epoch": 0.9791666666666666, + "loss": 0.2777550220489502, + "loss_ce": 0.0019981854129582644, + "loss_iou": 0.423828125, + "loss_num": 0.047119140625, + "loss_xval": 0.275390625, + "num_input_tokens_seen": 21452632, + "step": 235 + }, + { + "epoch": 0.9833333333333333, + "grad_norm": 11.575343055187652, + "learning_rate": 5e-05, + "loss": 0.2949, + "num_input_tokens_seen": 21544024, + "step": 236 + }, + { + "epoch": 0.9833333333333333, + "loss": 0.2095259130001068, + "loss_ce": 0.003837417345494032, + "loss_iou": 0.1787109375, + "loss_num": 0.037841796875, + "loss_xval": 0.2060546875, + "num_input_tokens_seen": 21544024, + "step": 236 + }, + { + "epoch": 0.9875, + "grad_norm": 10.772901233226547, + "learning_rate": 5e-05, + "loss": 0.2122, + "num_input_tokens_seen": 21635772, + "step": 237 + }, + { + "epoch": 0.9875, + "loss": 0.25139501690864563, + "loss_ce": 0.0066440412774682045, + "loss_iou": 0.25390625, + "loss_num": 0.044189453125, + "loss_xval": 0.2451171875, + "num_input_tokens_seen": 21635772, + "step": 237 + }, + { + "epoch": 0.9916666666666667, + "grad_norm": 13.725125356732619, + "learning_rate": 5e-05, + "loss": 0.2273, + "num_input_tokens_seen": 21727184, + "step": 238 + }, + { + "epoch": 0.9916666666666667, + "loss": 0.22478605806827545, + "loss_ce": 0.0029842983931303024, + "loss_iou": 0.314453125, + "loss_num": 0.038330078125, + "loss_xval": 0.2216796875, + "num_input_tokens_seen": 21727184, + "step": 238 + }, + { + "epoch": 0.9958333333333333, + "grad_norm": 8.603437946176337, + "learning_rate": 5e-05, + "loss": 0.2233, + "num_input_tokens_seen": 21818536, + "step": 239 + }, + { + "epoch": 0.9958333333333333, + "loss": 0.20272454619407654, + "loss_ce": 0.011501399800181389, + "loss_iou": 0.24609375, + "loss_num": 0.033447265625, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 21818536, + "step": 239 + }, + { + "epoch": 1.0, + "grad_norm": 5.559071328596137, + "learning_rate": 5e-05, + "loss": 0.3042, + "num_input_tokens_seen": 21910184, + "step": 240 + }, + { + "epoch": 1.0, + "loss": 0.33313196897506714, + "loss_ce": 0.0036031443160027266, + "loss_iou": 0.271484375, + "loss_num": 0.060546875, + "loss_xval": 0.330078125, + "num_input_tokens_seen": 21910184, + "step": 240 + }, + { + "epoch": 1.0041666666666667, + "grad_norm": 16.35994363459648, + "learning_rate": 5e-05, + "loss": 0.2529, + "num_input_tokens_seen": 22001376, + "step": 241 + }, + { + "epoch": 1.0041666666666667, + "loss": 0.2235061228275299, + "loss_ce": 0.0001784780470188707, + "loss_iou": 0.34765625, + "loss_num": 0.0380859375, + "loss_xval": 0.2236328125, + "num_input_tokens_seen": 22001376, + "step": 241 + }, + { + "epoch": 1.0083333333333333, + "grad_norm": 17.81522470048974, + "learning_rate": 5e-05, + "loss": 0.3148, + "num_input_tokens_seen": 22092840, + "step": 242 + }, + { + "epoch": 1.0083333333333333, + "loss": 0.3183177709579468, + "loss_ce": 0.0004466568643692881, + "loss_iou": 0.44140625, + "loss_num": 0.05517578125, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 22092840, + "step": 242 + }, + { + "epoch": 1.0125, + "grad_norm": 23.345640800716485, + "learning_rate": 5e-05, + "loss": 0.3031, + "num_input_tokens_seen": 22184588, + "step": 243 + }, + { + "epoch": 1.0125, + "loss": 0.20814365148544312, + "loss_ce": 0.0030655222944915295, + "loss_iou": 0.36328125, + "loss_num": 0.033935546875, + "loss_xval": 0.205078125, + "num_input_tokens_seen": 22184588, + "step": 243 + }, + { + "epoch": 1.0166666666666666, + "grad_norm": 6.665116879017802, + "learning_rate": 5e-05, + "loss": 0.2888, + "num_input_tokens_seen": 22275524, + "step": 244 + }, + { + "epoch": 1.0166666666666666, + "loss": 0.21397538483142853, + "loss_ce": 0.00033707439433783293, + "loss_iou": 0.337890625, + "loss_num": 0.0361328125, + "loss_xval": 0.2138671875, + "num_input_tokens_seen": 22275524, + "step": 244 + }, + { + "epoch": 1.0208333333333333, + "grad_norm": 6.587733942465594, + "learning_rate": 5e-05, + "loss": 0.2162, + "num_input_tokens_seen": 22366976, + "step": 245 + }, + { + "epoch": 1.0208333333333333, + "loss": 0.2064395546913147, + "loss_ce": 0.0006289951270446181, + "loss_iou": 0.32421875, + "loss_num": 0.034912109375, + "loss_xval": 0.2060546875, + "num_input_tokens_seen": 22366976, + "step": 245 + }, + { + "epoch": 1.025, + "grad_norm": 3.399496317951607, + "learning_rate": 5e-05, + "loss": 0.1985, + "num_input_tokens_seen": 22458628, + "step": 246 + }, + { + "epoch": 1.025, + "loss": 0.18037378787994385, + "loss_ce": 0.001357677741907537, + "loss_iou": 0.01239013671875, + "loss_num": 0.03564453125, + "loss_xval": 0.1787109375, + "num_input_tokens_seen": 22458628, + "step": 246 + }, + { + "epoch": 1.0291666666666666, + "grad_norm": 7.5768226569415775, + "learning_rate": 5e-05, + "loss": 0.2429, + "num_input_tokens_seen": 22549536, + "step": 247 + }, + { + "epoch": 1.0291666666666666, + "loss": 0.22366707026958466, + "loss_ce": 0.0014991069911047816, + "loss_iou": 0.3984375, + "loss_num": 0.03662109375, + "loss_xval": 0.22265625, + "num_input_tokens_seen": 22549536, + "step": 247 + }, + { + "epoch": 1.0333333333333334, + "grad_norm": 6.386220702582793, + "learning_rate": 5e-05, + "loss": 0.2185, + "num_input_tokens_seen": 22641084, + "step": 248 + }, + { + "epoch": 1.0333333333333334, + "loss": 0.2533068358898163, + "loss_ce": 0.002147157210856676, + "loss_iou": 0.37109375, + "loss_num": 0.04296875, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 22641084, + "step": 248 + }, + { + "epoch": 1.0375, + "grad_norm": 10.756252270916605, + "learning_rate": 5e-05, + "loss": 0.2297, + "num_input_tokens_seen": 22731980, + "step": 249 + }, + { + "epoch": 1.0375, + "loss": 0.20865212380886078, + "loss_ce": 0.001681906171143055, + "loss_iou": 0.3046875, + "loss_num": 0.035400390625, + "loss_xval": 0.20703125, + "num_input_tokens_seen": 22731980, + "step": 249 + }, + { + "epoch": 1.0416666666666667, + "grad_norm": 11.368459723856441, + "learning_rate": 5e-05, + "loss": 0.2095, + "num_input_tokens_seen": 22823056, + "step": 250 + }, + { + "epoch": 1.0416666666666667, + "eval_seeclick_CIoU": 0.21376945078372955, + "eval_seeclick_GIoU": 0.1773088276386261, + "eval_seeclick_IoU": 0.3156193494796753, + "eval_seeclick_MAE_all": 0.09457048028707504, + "eval_seeclick_MAE_h": 0.05827994458377361, + "eval_seeclick_MAE_w": 0.1870652362704277, + "eval_seeclick_MAE_x_boxes": 0.21573207527399063, + "eval_seeclick_MAE_y_boxes": 0.059349725022912025, + "eval_seeclick_NUM_probability": 0.9999997615814209, + "eval_seeclick_inside_bbox": 0.5994318127632141, + "eval_seeclick_loss": 0.5629642605781555, + "eval_seeclick_loss_ce": 0.06879188492894173, + "eval_seeclick_loss_iou": 0.4404296875, + "eval_seeclick_loss_num": 0.0899200439453125, + "eval_seeclick_loss_xval": 0.4935302734375, + "eval_seeclick_runtime": 76.372, + "eval_seeclick_samples_per_second": 0.563, + "eval_seeclick_steps_per_second": 0.026, + "num_input_tokens_seen": 22823056, + "step": 250 + }, + { + "epoch": 1.0416666666666667, + "eval_icons_CIoU": 0.30915556102991104, + "eval_icons_GIoU": 0.32306139916181564, + "eval_icons_IoU": 0.39342615008354187, + "eval_icons_MAE_all": 0.0679849199950695, + "eval_icons_MAE_h": 0.1593536138534546, + "eval_icons_MAE_w": 0.08073913678526878, + "eval_icons_MAE_x_boxes": 0.08451684936881065, + "eval_icons_MAE_y_boxes": 0.1595774106681347, + "eval_icons_NUM_probability": 1.0, + "eval_icons_inside_bbox": 0.5104166716337204, + "eval_icons_loss": 0.34727242588996887, + "eval_icons_loss_ce": 0.00019893267926818226, + "eval_icons_loss_iou": 0.283203125, + "eval_icons_loss_num": 0.0634307861328125, + "eval_icons_loss_xval": 0.3458251953125, + "eval_icons_runtime": 95.8043, + "eval_icons_samples_per_second": 0.522, + "eval_icons_steps_per_second": 0.021, + "num_input_tokens_seen": 22823056, + "step": 250 + }, + { + "epoch": 1.0416666666666667, + "eval_screenspot_CIoU": 0.29506024221579236, + "eval_screenspot_GIoU": 0.2745039314031601, + "eval_screenspot_IoU": 0.37262112895647687, + "eval_screenspot_MAE_all": 0.10671550035476685, + "eval_screenspot_MAE_h": 0.11584235727787018, + "eval_screenspot_MAE_w": 0.21633888532718024, + "eval_screenspot_MAE_x_boxes": 0.18261671562989554, + "eval_screenspot_MAE_y_boxes": 0.10873916993538539, + "eval_screenspot_NUM_probability": 0.9998256166776022, + "eval_screenspot_inside_bbox": 0.6329166690508524, + "eval_screenspot_loss": 0.566607654094696, + "eval_screenspot_loss_ce": 6.591878506393793e-05, + "eval_screenspot_loss_iou": 0.3468831380208333, + "eval_screenspot_loss_num": 0.10788981119791667, + "eval_screenspot_loss_xval": 0.5740559895833334, + "eval_screenspot_runtime": 158.8771, + "eval_screenspot_samples_per_second": 0.56, + "eval_screenspot_steps_per_second": 0.019, + "num_input_tokens_seen": 22823056, + "step": 250 + }, + { + "epoch": 1.0416666666666667, + "eval_compot_CIoU": 0.3294719457626343, + "eval_compot_GIoU": 0.31741394102573395, + "eval_compot_IoU": 0.42581257224082947, + "eval_compot_MAE_all": 0.07543003186583519, + "eval_compot_MAE_h": 0.12245305627584457, + "eval_compot_MAE_w": 0.1439114511013031, + "eval_compot_MAE_x_boxes": 0.14288334921002388, + "eval_compot_MAE_y_boxes": 0.12545089423656464, + "eval_compot_NUM_probability": 0.9999739527702332, + "eval_compot_inside_bbox": 0.5277777910232544, + "eval_compot_loss": 0.42674627900123596, + "eval_compot_loss_ce": 0.016201181337237358, + "eval_compot_loss_iou": 0.3455810546875, + "eval_compot_loss_num": 0.0708465576171875, + "eval_compot_loss_xval": 0.388427734375, + "eval_compot_runtime": 88.5242, + "eval_compot_samples_per_second": 0.565, + "eval_compot_steps_per_second": 0.023, + "num_input_tokens_seen": 22823056, + "step": 250 + }, + { + "epoch": 1.0416666666666667, + "loss": 0.3685532510280609, + "loss_ce": 0.016990739852190018, + "loss_iou": 0.380859375, + "loss_num": 0.0625, + "loss_xval": 0.3515625, + "num_input_tokens_seen": 22823056, + "step": 250 + }, + { + "epoch": 1.0458333333333334, + "grad_norm": 13.750598628368111, + "learning_rate": 5e-05, + "loss": 0.2017, + "num_input_tokens_seen": 22914132, + "step": 251 + }, + { + "epoch": 1.0458333333333334, + "loss": 0.18371494114398956, + "loss_ce": 0.0029288064688444138, + "loss_iou": 0.36328125, + "loss_num": 0.0289306640625, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 22914132, + "step": 251 + }, + { + "epoch": 1.05, + "grad_norm": 7.245198716742816, + "learning_rate": 5e-05, + "loss": 0.2539, + "num_input_tokens_seen": 23004908, + "step": 252 + }, + { + "epoch": 1.05, + "loss": 0.2888379693031311, + "loss_ce": 0.0006299512460827827, + "loss_iou": 0.34375, + "loss_num": 0.05078125, + "loss_xval": 0.2890625, + "num_input_tokens_seen": 23004908, + "step": 252 + }, + { + "epoch": 1.0541666666666667, + "grad_norm": 10.747899233785548, + "learning_rate": 5e-05, + "loss": 0.2191, + "num_input_tokens_seen": 23096088, + "step": 253 + }, + { + "epoch": 1.0541666666666667, + "loss": 0.22767138481140137, + "loss_ce": 7.128823199309409e-05, + "loss_iou": 0.44921875, + "loss_num": 0.03662109375, + "loss_xval": 0.2275390625, + "num_input_tokens_seen": 23096088, + "step": 253 + }, + { + "epoch": 1.0583333333333333, + "grad_norm": 12.232746097702254, + "learning_rate": 5e-05, + "loss": 0.2383, + "num_input_tokens_seen": 23187160, + "step": 254 + }, + { + "epoch": 1.0583333333333333, + "loss": 0.2036757469177246, + "loss_ce": 0.0005507570458576083, + "loss_iou": 0.330078125, + "loss_num": 0.033935546875, + "loss_xval": 0.203125, + "num_input_tokens_seen": 23187160, + "step": 254 + }, + { + "epoch": 1.0625, + "grad_norm": 7.885073097843605, + "learning_rate": 5e-05, + "loss": 0.1814, + "num_input_tokens_seen": 23278440, + "step": 255 + }, + { + "epoch": 1.0625, + "loss": 0.20344702899456024, + "loss_ce": 0.0006882417947053909, + "loss_iou": 0.369140625, + "loss_num": 0.032958984375, + "loss_xval": 0.203125, + "num_input_tokens_seen": 23278440, + "step": 255 + }, + { + "epoch": 1.0666666666666667, + "grad_norm": 16.40812586396314, + "learning_rate": 5e-05, + "loss": 0.236, + "num_input_tokens_seen": 23370636, + "step": 256 + }, + { + "epoch": 1.0666666666666667, + "loss": 0.26500198245048523, + "loss_ce": 0.0009028694476000965, + "loss_iou": 0.2890625, + "loss_num": 0.047119140625, + "loss_xval": 0.263671875, + "num_input_tokens_seen": 23370636, + "step": 256 + }, + { + "epoch": 1.0708333333333333, + "grad_norm": 11.215786680034467, + "learning_rate": 5e-05, + "loss": 0.2144, + "num_input_tokens_seen": 23462460, + "step": 257 + }, + { + "epoch": 1.0708333333333333, + "loss": 0.19554069638252258, + "loss_ce": 0.001387865049764514, + "loss_iou": 0.44921875, + "loss_num": 0.029541015625, + "loss_xval": 0.1943359375, + "num_input_tokens_seen": 23462460, + "step": 257 + }, + { + "epoch": 1.075, + "grad_norm": 12.60165243155499, + "learning_rate": 5e-05, + "loss": 0.2056, + "num_input_tokens_seen": 23553788, + "step": 258 + }, + { + "epoch": 1.075, + "loss": 0.17294445633888245, + "loss_ce": 0.0011457615764811635, + "loss_iou": 0.158203125, + "loss_num": 0.0311279296875, + "loss_xval": 0.171875, + "num_input_tokens_seen": 23553788, + "step": 258 + }, + { + "epoch": 1.0791666666666666, + "grad_norm": 28.365998182995593, + "learning_rate": 5e-05, + "loss": 0.2485, + "num_input_tokens_seen": 23645260, + "step": 259 + }, + { + "epoch": 1.0791666666666666, + "loss": 0.22533473372459412, + "loss_ce": 0.00267848395742476, + "loss_iou": 0.224609375, + "loss_num": 0.0400390625, + "loss_xval": 0.22265625, + "num_input_tokens_seen": 23645260, + "step": 259 + }, + { + "epoch": 1.0833333333333333, + "grad_norm": 3.6123675014465024, + "learning_rate": 5e-05, + "loss": 0.2137, + "num_input_tokens_seen": 23736488, + "step": 260 + }, + { + "epoch": 1.0833333333333333, + "loss": 0.2302786111831665, + "loss_ce": 0.00432644784450531, + "loss_iou": 0.27734375, + "loss_num": 0.03955078125, + "loss_xval": 0.2255859375, + "num_input_tokens_seen": 23736488, + "step": 260 + }, + { + "epoch": 1.0875, + "grad_norm": 6.263666981437489, + "learning_rate": 5e-05, + "loss": 0.2006, + "num_input_tokens_seen": 23827712, + "step": 261 + }, + { + "epoch": 1.0875, + "loss": 0.1470654010772705, + "loss_ce": 0.0011608521454036236, + "loss_iou": 0.2734375, + "loss_num": 0.0234375, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 23827712, + "step": 261 + }, + { + "epoch": 1.0916666666666666, + "grad_norm": 17.564574153957086, + "learning_rate": 5e-05, + "loss": 0.2446, + "num_input_tokens_seen": 23918900, + "step": 262 + }, + { + "epoch": 1.0916666666666666, + "loss": 0.23478296399116516, + "loss_ce": 0.0009572738781571388, + "loss_iou": 0.34375, + "loss_num": 0.03955078125, + "loss_xval": 0.2333984375, + "num_input_tokens_seen": 23918900, + "step": 262 + }, + { + "epoch": 1.0958333333333334, + "grad_norm": 6.90548226071317, + "learning_rate": 5e-05, + "loss": 0.1946, + "num_input_tokens_seen": 24010364, + "step": 263 + }, + { + "epoch": 1.0958333333333334, + "loss": 0.23993369936943054, + "loss_ce": 0.0012862500734627247, + "loss_iou": 0.240234375, + "loss_num": 0.042724609375, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 24010364, + "step": 263 + }, + { + "epoch": 1.1, + "grad_norm": 10.53808713151041, + "learning_rate": 5e-05, + "loss": 0.1977, + "num_input_tokens_seen": 24102292, + "step": 264 + }, + { + "epoch": 1.1, + "loss": 0.14225786924362183, + "loss_ce": 0.0004732094530481845, + "loss_iou": 0.353515625, + "loss_num": 0.02099609375, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 24102292, + "step": 264 + }, + { + "epoch": 1.1041666666666667, + "grad_norm": 7.929983801483684, + "learning_rate": 5e-05, + "loss": 0.2208, + "num_input_tokens_seen": 24193944, + "step": 265 + }, + { + "epoch": 1.1041666666666667, + "loss": 0.22911550104618073, + "loss_ce": 0.0102434316650033, + "loss_iou": 0.51171875, + "loss_num": 0.032958984375, + "loss_xval": 0.21875, + "num_input_tokens_seen": 24193944, + "step": 265 + }, + { + "epoch": 1.1083333333333334, + "grad_norm": 15.646854153164862, + "learning_rate": 5e-05, + "loss": 0.2539, + "num_input_tokens_seen": 24283588, + "step": 266 + }, + { + "epoch": 1.1083333333333334, + "loss": 0.24303734302520752, + "loss_ce": 5.6382563343504444e-05, + "loss_iou": 0.51953125, + "loss_num": 0.03759765625, + "loss_xval": 0.2431640625, + "num_input_tokens_seen": 24283588, + "step": 266 + }, + { + "epoch": 1.1125, + "grad_norm": 18.931589220962483, + "learning_rate": 5e-05, + "loss": 0.2833, + "num_input_tokens_seen": 24374832, + "step": 267 + }, + { + "epoch": 1.1125, + "loss": 0.33671608567237854, + "loss_ce": 0.0007785820635035634, + "loss_iou": 0.263671875, + "loss_num": 0.0615234375, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 24374832, + "step": 267 + }, + { + "epoch": 1.1166666666666667, + "grad_norm": 4.215656324565556, + "learning_rate": 5e-05, + "loss": 0.2252, + "num_input_tokens_seen": 24466228, + "step": 268 + }, + { + "epoch": 1.1166666666666667, + "loss": 0.1903807669878006, + "loss_ce": 0.0022093746811151505, + "loss_iou": 0.30078125, + "loss_num": 0.03125, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 24466228, + "step": 268 + }, + { + "epoch": 1.1208333333333333, + "grad_norm": 8.300731681675398, + "learning_rate": 5e-05, + "loss": 0.2449, + "num_input_tokens_seen": 24557400, + "step": 269 + }, + { + "epoch": 1.1208333333333333, + "loss": 0.21225546300411224, + "loss_ce": 0.0002803659299388528, + "loss_iou": 0.357421875, + "loss_num": 0.03466796875, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 24557400, + "step": 269 + }, + { + "epoch": 1.125, + "grad_norm": 5.119351160147199, + "learning_rate": 5e-05, + "loss": 0.2128, + "num_input_tokens_seen": 24648884, + "step": 270 + }, + { + "epoch": 1.125, + "loss": 0.21190392971038818, + "loss_ce": 0.007558222860097885, + "loss_iou": 0.34375, + "loss_num": 0.033447265625, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 24648884, + "step": 270 + }, + { + "epoch": 1.1291666666666667, + "grad_norm": 4.529424949583247, + "learning_rate": 5e-05, + "loss": 0.1556, + "num_input_tokens_seen": 24740556, + "step": 271 + }, + { + "epoch": 1.1291666666666667, + "loss": 0.13350823521614075, + "loss_ce": 0.0006652114097960293, + "loss_iou": 0.2490234375, + "loss_num": 0.021240234375, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 24740556, + "step": 271 + }, + { + "epoch": 1.1333333333333333, + "grad_norm": 6.937498607854174, + "learning_rate": 5e-05, + "loss": 0.2454, + "num_input_tokens_seen": 24831980, + "step": 272 + }, + { + "epoch": 1.1333333333333333, + "loss": 0.32967180013656616, + "loss_ce": 0.0028895826544612646, + "loss_iou": 0.380859375, + "loss_num": 0.05712890625, + "loss_xval": 0.326171875, + "num_input_tokens_seen": 24831980, + "step": 272 + }, + { + "epoch": 1.1375, + "grad_norm": 2.920099778327708, + "learning_rate": 5e-05, + "loss": 0.1839, + "num_input_tokens_seen": 24923548, + "step": 273 + }, + { + "epoch": 1.1375, + "loss": 0.2199542224407196, + "loss_ce": 0.001448356662876904, + "loss_iou": 0.115234375, + "loss_num": 0.041259765625, + "loss_xval": 0.21875, + "num_input_tokens_seen": 24923548, + "step": 273 + }, + { + "epoch": 1.1416666666666666, + "grad_norm": 13.780157485919663, + "learning_rate": 5e-05, + "loss": 0.2412, + "num_input_tokens_seen": 25014488, + "step": 274 + }, + { + "epoch": 1.1416666666666666, + "loss": 0.32479098439216614, + "loss_ce": 0.00014497421216219664, + "loss_iou": 0.369140625, + "loss_num": 0.056884765625, + "loss_xval": 0.32421875, + "num_input_tokens_seen": 25014488, + "step": 274 + }, + { + "epoch": 1.1458333333333333, + "grad_norm": 9.083108705402712, + "learning_rate": 5e-05, + "loss": 0.2343, + "num_input_tokens_seen": 25106400, + "step": 275 + }, + { + "epoch": 1.1458333333333333, + "loss": 0.2582840025424957, + "loss_ce": 0.0035232664085924625, + "loss_iou": 0.255859375, + "loss_num": 0.04541015625, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 25106400, + "step": 275 + }, + { + "epoch": 1.15, + "grad_norm": 12.211370049752565, + "learning_rate": 5e-05, + "loss": 0.196, + "num_input_tokens_seen": 25197264, + "step": 276 + }, + { + "epoch": 1.15, + "loss": 0.18650269508361816, + "loss_ce": 0.0001013166838674806, + "loss_iou": 0.26171875, + "loss_num": 0.031494140625, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 25197264, + "step": 276 + }, + { + "epoch": 1.1541666666666668, + "grad_norm": 9.56433072906577, + "learning_rate": 5e-05, + "loss": 0.1665, + "num_input_tokens_seen": 25288508, + "step": 277 + }, + { + "epoch": 1.1541666666666668, + "loss": 0.20603136718273163, + "loss_ce": 0.00034288677852600813, + "loss_iou": 0.439453125, + "loss_num": 0.031494140625, + "loss_xval": 0.2060546875, + "num_input_tokens_seen": 25288508, + "step": 277 + }, + { + "epoch": 1.1583333333333332, + "grad_norm": 6.93267755197553, + "learning_rate": 5e-05, + "loss": 0.2313, + "num_input_tokens_seen": 25379688, + "step": 278 + }, + { + "epoch": 1.1583333333333332, + "loss": 0.2501569390296936, + "loss_ce": 0.00015694127068854868, + "loss_iou": 0.40625, + "loss_num": 0.041015625, + "loss_xval": 0.25, + "num_input_tokens_seen": 25379688, + "step": 278 + }, + { + "epoch": 1.1625, + "grad_norm": 6.723997876952268, + "learning_rate": 5e-05, + "loss": 0.1984, + "num_input_tokens_seen": 25471164, + "step": 279 + }, + { + "epoch": 1.1625, + "loss": 0.25789207220077515, + "loss_ce": 7.95528685557656e-05, + "loss_iou": 0.34375, + "loss_num": 0.0439453125, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 25471164, + "step": 279 + }, + { + "epoch": 1.1666666666666667, + "grad_norm": 4.1449049890656795, + "learning_rate": 5e-05, + "loss": 0.2235, + "num_input_tokens_seen": 25562844, + "step": 280 + }, + { + "epoch": 1.1666666666666667, + "loss": 0.2036113440990448, + "loss_ce": 0.002988772466778755, + "loss_iou": 0.3671875, + "loss_num": 0.031982421875, + "loss_xval": 0.2001953125, + "num_input_tokens_seen": 25562844, + "step": 280 + }, + { + "epoch": 1.1708333333333334, + "grad_norm": 12.380592979305964, + "learning_rate": 5e-05, + "loss": 0.2817, + "num_input_tokens_seen": 25653916, + "step": 281 + }, + { + "epoch": 1.1708333333333334, + "loss": 0.3497365713119507, + "loss_ce": 0.041875243186950684, + "loss_iou": 0.1904296875, + "loss_num": 0.057373046875, + "loss_xval": 0.30859375, + "num_input_tokens_seen": 25653916, + "step": 281 + }, + { + "epoch": 1.175, + "grad_norm": 13.97477290146763, + "learning_rate": 5e-05, + "loss": 0.2029, + "num_input_tokens_seen": 25745660, + "step": 282 + }, + { + "epoch": 1.175, + "loss": 0.16901439428329468, + "loss_ce": 0.002144268713891506, + "loss_iou": 0.296875, + "loss_num": 0.0267333984375, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 25745660, + "step": 282 + }, + { + "epoch": 1.1791666666666667, + "grad_norm": 13.599248755250013, + "learning_rate": 5e-05, + "loss": 0.2368, + "num_input_tokens_seen": 25837200, + "step": 283 + }, + { + "epoch": 1.1791666666666667, + "loss": 0.2460094392299652, + "loss_ce": 0.001746740541420877, + "loss_iou": 0.380859375, + "loss_num": 0.040283203125, + "loss_xval": 0.244140625, + "num_input_tokens_seen": 25837200, + "step": 283 + }, + { + "epoch": 1.1833333333333333, + "grad_norm": 9.55427246297203, + "learning_rate": 5e-05, + "loss": 0.1973, + "num_input_tokens_seen": 25928152, + "step": 284 + }, + { + "epoch": 1.1833333333333333, + "loss": 0.2068982720375061, + "loss_ce": 0.0030408508609980345, + "loss_iou": 0.259765625, + "loss_num": 0.034912109375, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 25928152, + "step": 284 + }, + { + "epoch": 1.1875, + "grad_norm": 28.158141134403348, + "learning_rate": 5e-05, + "loss": 0.2553, + "num_input_tokens_seen": 26019552, + "step": 285 + }, + { + "epoch": 1.1875, + "loss": 0.25668060779571533, + "loss_ce": 0.003934011794626713, + "loss_iou": 0.376953125, + "loss_num": 0.0419921875, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 26019552, + "step": 285 + }, + { + "epoch": 1.1916666666666667, + "grad_norm": 28.306902581620154, + "learning_rate": 5e-05, + "loss": 0.2502, + "num_input_tokens_seen": 26109036, + "step": 286 + }, + { + "epoch": 1.1916666666666667, + "loss": 0.27036812901496887, + "loss_ce": 0.0004096394404768944, + "loss_iou": 0.408203125, + "loss_num": 0.044677734375, + "loss_xval": 0.26953125, + "num_input_tokens_seen": 26109036, + "step": 286 + }, + { + "epoch": 1.1958333333333333, + "grad_norm": 4.381139671677197, + "learning_rate": 5e-05, + "loss": 0.3646, + "num_input_tokens_seen": 26200192, + "step": 287 + }, + { + "epoch": 1.1958333333333333, + "loss": 0.33801934123039246, + "loss_ce": 0.0015935404226183891, + "loss_iou": 0.32421875, + "loss_num": 0.06005859375, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 26200192, + "step": 287 + }, + { + "epoch": 1.2, + "grad_norm": 8.970869508879645, + "learning_rate": 5e-05, + "loss": 0.2122, + "num_input_tokens_seen": 26291316, + "step": 288 + }, + { + "epoch": 1.2, + "loss": 0.17160077393054962, + "loss_ce": 0.0007023363141342998, + "loss_iou": 0.33984375, + "loss_num": 0.0263671875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 26291316, + "step": 288 + }, + { + "epoch": 1.2041666666666666, + "grad_norm": 5.9025805154361795, + "learning_rate": 5e-05, + "loss": 0.2123, + "num_input_tokens_seen": 26382640, + "step": 289 + }, + { + "epoch": 1.2041666666666666, + "loss": 0.1856706142425537, + "loss_ce": 0.0011613458627834916, + "loss_iou": 0.17578125, + "loss_num": 0.032958984375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 26382640, + "step": 289 + }, + { + "epoch": 1.2083333333333333, + "grad_norm": 3.034340467333528, + "learning_rate": 5e-05, + "loss": 0.1987, + "num_input_tokens_seen": 26473560, + "step": 290 + }, + { + "epoch": 1.2083333333333333, + "loss": 0.20361942052841187, + "loss_ce": 0.0009216800681315362, + "loss_iou": 0.310546875, + "loss_num": 0.033447265625, + "loss_xval": 0.203125, + "num_input_tokens_seen": 26473560, + "step": 290 + }, + { + "epoch": 1.2125, + "grad_norm": 5.07195649570692, + "learning_rate": 5e-05, + "loss": 0.2551, + "num_input_tokens_seen": 26564448, + "step": 291 + }, + { + "epoch": 1.2125, + "loss": 0.3689958453178406, + "loss_ce": 0.000587630202062428, + "loss_iou": 0.46484375, + "loss_num": 0.06298828125, + "loss_xval": 0.369140625, + "num_input_tokens_seen": 26564448, + "step": 291 + }, + { + "epoch": 1.2166666666666668, + "grad_norm": 6.2466441370427, + "learning_rate": 5e-05, + "loss": 0.2091, + "num_input_tokens_seen": 26655164, + "step": 292 + }, + { + "epoch": 1.2166666666666668, + "loss": 0.2707892656326294, + "loss_ce": 0.0007697429973632097, + "loss_iou": 0.44921875, + "loss_num": 0.04345703125, + "loss_xval": 0.26953125, + "num_input_tokens_seen": 26655164, + "step": 292 + }, + { + "epoch": 1.2208333333333332, + "grad_norm": 8.151889617477465, + "learning_rate": 5e-05, + "loss": 0.2068, + "num_input_tokens_seen": 26746876, + "step": 293 + }, + { + "epoch": 1.2208333333333332, + "loss": 0.22343799471855164, + "loss_ce": 0.0002934715012088418, + "loss_iou": 0.376953125, + "loss_num": 0.035888671875, + "loss_xval": 0.22265625, + "num_input_tokens_seen": 26746876, + "step": 293 + }, + { + "epoch": 1.225, + "grad_norm": 4.124519965046716, + "learning_rate": 5e-05, + "loss": 0.2132, + "num_input_tokens_seen": 26838780, + "step": 294 + }, + { + "epoch": 1.225, + "loss": 0.16030102968215942, + "loss_ce": 0.0009382428834214807, + "loss_iou": 0.330078125, + "loss_num": 0.024169921875, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 26838780, + "step": 294 + }, + { + "epoch": 1.2291666666666667, + "grad_norm": 6.533995886459005, + "learning_rate": 5e-05, + "loss": 0.2646, + "num_input_tokens_seen": 26929904, + "step": 295 + }, + { + "epoch": 1.2291666666666667, + "loss": 0.2608182430267334, + "loss_ce": 0.0008084540022537112, + "loss_iou": 0.431640625, + "loss_num": 0.0419921875, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 26929904, + "step": 295 + }, + { + "epoch": 1.2333333333333334, + "grad_norm": 18.092046203643505, + "learning_rate": 5e-05, + "loss": 0.2806, + "num_input_tokens_seen": 27021036, + "step": 296 + }, + { + "epoch": 1.2333333333333334, + "loss": 0.2797492742538452, + "loss_ce": 0.0020393121521919966, + "loss_iou": 0.30859375, + "loss_num": 0.04833984375, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 27021036, + "step": 296 + }, + { + "epoch": 1.2375, + "grad_norm": 7.343572686469682, + "learning_rate": 5e-05, + "loss": 0.3192, + "num_input_tokens_seen": 27112136, + "step": 297 + }, + { + "epoch": 1.2375, + "loss": 0.2837493419647217, + "loss_ce": 0.0007903836667537689, + "loss_iou": 0.5078125, + "loss_num": 0.04443359375, + "loss_xval": 0.283203125, + "num_input_tokens_seen": 27112136, + "step": 297 + }, + { + "epoch": 1.2416666666666667, + "grad_norm": 7.332784386468288, + "learning_rate": 5e-05, + "loss": 0.2304, + "num_input_tokens_seen": 27203900, + "step": 298 + }, + { + "epoch": 1.2416666666666667, + "loss": 0.2656678259372711, + "loss_ce": 0.005841171368956566, + "loss_iou": 0.427734375, + "loss_num": 0.041748046875, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 27203900, + "step": 298 + }, + { + "epoch": 1.2458333333333333, + "grad_norm": 7.39725694818393, + "learning_rate": 5e-05, + "loss": 0.2118, + "num_input_tokens_seen": 27293496, + "step": 299 + }, + { + "epoch": 1.2458333333333333, + "loss": 0.20304223895072937, + "loss_ce": 0.00028345605824142694, + "loss_iou": 0.349609375, + "loss_num": 0.0322265625, + "loss_xval": 0.203125, + "num_input_tokens_seen": 27293496, + "step": 299 + }, + { + "epoch": 1.25, + "grad_norm": 10.010729098569774, + "learning_rate": 5e-05, + "loss": 0.1894, + "num_input_tokens_seen": 27384992, + "step": 300 + }, + { + "epoch": 1.25, + "loss": 0.13992911577224731, + "loss_ce": 0.003088284283876419, + "loss_iou": 0.318359375, + "loss_num": 0.019775390625, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 27384992, + "step": 300 + }, + { + "epoch": 1.2541666666666667, + "grad_norm": 7.37177077354589, + "learning_rate": 5e-05, + "loss": 0.2112, + "num_input_tokens_seen": 27476184, + "step": 301 + }, + { + "epoch": 1.2541666666666667, + "loss": 0.230925515294075, + "loss_ce": 0.002226787619292736, + "loss_iou": 0.439453125, + "loss_num": 0.03515625, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 27476184, + "step": 301 + }, + { + "epoch": 1.2583333333333333, + "grad_norm": 6.352901220947042, + "learning_rate": 5e-05, + "loss": 0.2913, + "num_input_tokens_seen": 27567100, + "step": 302 + }, + { + "epoch": 1.2583333333333333, + "loss": 0.2581457197666168, + "loss_ce": 0.0015539309242740273, + "loss_iou": 0.279296875, + "loss_num": 0.044677734375, + "loss_xval": 0.255859375, + "num_input_tokens_seen": 27567100, + "step": 302 + }, + { + "epoch": 1.2625, + "grad_norm": 8.923033195819553, + "learning_rate": 5e-05, + "loss": 0.1702, + "num_input_tokens_seen": 27658556, + "step": 303 + }, + { + "epoch": 1.2625, + "loss": 0.1497621238231659, + "loss_ce": 0.00034806010080501437, + "loss_iou": 0.2421875, + "loss_num": 0.0240478515625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 27658556, + "step": 303 + }, + { + "epoch": 1.2666666666666666, + "grad_norm": 25.031495809517857, + "learning_rate": 5e-05, + "loss": 0.2271, + "num_input_tokens_seen": 27749608, + "step": 304 + }, + { + "epoch": 1.2666666666666666, + "loss": 0.18870463967323303, + "loss_ce": 0.00028909966931678355, + "loss_iou": 0.3515625, + "loss_num": 0.0291748046875, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 27749608, + "step": 304 + }, + { + "epoch": 1.2708333333333333, + "grad_norm": 12.884676627528473, + "learning_rate": 5e-05, + "loss": 0.2223, + "num_input_tokens_seen": 27840920, + "step": 305 + }, + { + "epoch": 1.2708333333333333, + "loss": 0.22538474202156067, + "loss_ce": 0.02140524610877037, + "loss_iou": 0.232421875, + "loss_num": 0.03515625, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 27840920, + "step": 305 + }, + { + "epoch": 1.275, + "grad_norm": 3.0296539839659946, + "learning_rate": 5e-05, + "loss": 0.1609, + "num_input_tokens_seen": 27932776, + "step": 306 + }, + { + "epoch": 1.275, + "loss": 0.13345500826835632, + "loss_ce": 0.0054032509215176105, + "loss_iou": 0.3125, + "loss_num": 0.0179443359375, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 27932776, + "step": 306 + }, + { + "epoch": 1.2791666666666668, + "grad_norm": 9.317356311054386, + "learning_rate": 5e-05, + "loss": 0.2375, + "num_input_tokens_seen": 28023864, + "step": 307 + }, + { + "epoch": 1.2791666666666668, + "loss": 0.18880173563957214, + "loss_ce": 0.004658671095967293, + "loss_iou": 0.35546875, + "loss_num": 0.028076171875, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 28023864, + "step": 307 + }, + { + "epoch": 1.2833333333333332, + "grad_norm": 10.675024473336514, + "learning_rate": 5e-05, + "loss": 0.2169, + "num_input_tokens_seen": 28114932, + "step": 308 + }, + { + "epoch": 1.2833333333333332, + "loss": 0.24450092017650604, + "loss_ce": 0.00011613914102781564, + "loss_iou": 0.470703125, + "loss_num": 0.037353515625, + "loss_xval": 0.244140625, + "num_input_tokens_seen": 28114932, + "step": 308 + }, + { + "epoch": 1.2875, + "grad_norm": 51.09535546619375, + "learning_rate": 5e-05, + "loss": 0.2699, + "num_input_tokens_seen": 28205920, + "step": 309 + }, + { + "epoch": 1.2875, + "loss": 0.19558678567409515, + "loss_ce": 0.005096061155200005, + "loss_iou": 0.369140625, + "loss_num": 0.029052734375, + "loss_xval": 0.1904296875, + "num_input_tokens_seen": 28205920, + "step": 309 + }, + { + "epoch": 1.2916666666666667, + "grad_norm": 17.307347393595503, + "learning_rate": 5e-05, + "loss": 0.2243, + "num_input_tokens_seen": 28297280, + "step": 310 + }, + { + "epoch": 1.2916666666666667, + "loss": 0.23106957972049713, + "loss_ce": 5.150996003067121e-05, + "loss_iou": 0.396484375, + "loss_num": 0.036376953125, + "loss_xval": 0.2314453125, + "num_input_tokens_seen": 28297280, + "step": 310 + }, + { + "epoch": 1.2958333333333334, + "grad_norm": 5.621660380808616, + "learning_rate": 5e-05, + "loss": 0.2527, + "num_input_tokens_seen": 28388768, + "step": 311 + }, + { + "epoch": 1.2958333333333334, + "loss": 0.23239217698574066, + "loss_ce": 0.00253378227353096, + "loss_iou": 0.291015625, + "loss_num": 0.038818359375, + "loss_xval": 0.2294921875, + "num_input_tokens_seen": 28388768, + "step": 311 + }, + { + "epoch": 1.3, + "grad_norm": 3.7105957761308814, + "learning_rate": 5e-05, + "loss": 0.1835, + "num_input_tokens_seen": 28479672, + "step": 312 + }, + { + "epoch": 1.3, + "loss": 0.15457478165626526, + "loss_ce": 0.0021394838113337755, + "loss_iou": 0.35546875, + "loss_num": 0.0216064453125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 28479672, + "step": 312 + }, + { + "epoch": 1.3041666666666667, + "grad_norm": 9.313963201622089, + "learning_rate": 5e-05, + "loss": 0.2508, + "num_input_tokens_seen": 28571164, + "step": 313 + }, + { + "epoch": 1.3041666666666667, + "loss": 0.2276507318019867, + "loss_ce": 0.0029192741494625807, + "loss_iou": 0.365234375, + "loss_num": 0.03564453125, + "loss_xval": 0.224609375, + "num_input_tokens_seen": 28571164, + "step": 313 + }, + { + "epoch": 1.3083333333333333, + "grad_norm": 7.631332001679039, + "learning_rate": 5e-05, + "loss": 0.2597, + "num_input_tokens_seen": 28661232, + "step": 314 + }, + { + "epoch": 1.3083333333333333, + "loss": 0.20918793976306915, + "loss_ce": 0.0005087353638373315, + "loss_iou": 0.296875, + "loss_num": 0.0341796875, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 28661232, + "step": 314 + }, + { + "epoch": 1.3125, + "grad_norm": 8.850758042334006, + "learning_rate": 5e-05, + "loss": 0.2298, + "num_input_tokens_seen": 28753364, + "step": 315 + }, + { + "epoch": 1.3125, + "loss": 0.2625318169593811, + "loss_ce": 0.0017285854555666447, + "loss_iou": 0.3046875, + "loss_num": 0.04443359375, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 28753364, + "step": 315 + }, + { + "epoch": 1.3166666666666667, + "grad_norm": 6.859930907832721, + "learning_rate": 5e-05, + "loss": 0.2106, + "num_input_tokens_seen": 28844244, + "step": 316 + }, + { + "epoch": 1.3166666666666667, + "loss": 0.1807193160057068, + "loss_ce": 0.0006350984331220388, + "loss_iou": 0.115234375, + "loss_num": 0.033203125, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 28844244, + "step": 316 + }, + { + "epoch": 1.3208333333333333, + "grad_norm": 12.939751788005317, + "learning_rate": 5e-05, + "loss": 0.2817, + "num_input_tokens_seen": 28935828, + "step": 317 + }, + { + "epoch": 1.3208333333333333, + "loss": 0.2727729082107544, + "loss_ce": 0.00202095415443182, + "loss_iou": 0.365234375, + "loss_num": 0.044921875, + "loss_xval": 0.271484375, + "num_input_tokens_seen": 28935828, + "step": 317 + }, + { + "epoch": 1.325, + "grad_norm": 6.5877122224806115, + "learning_rate": 5e-05, + "loss": 0.2098, + "num_input_tokens_seen": 29027540, + "step": 318 + }, + { + "epoch": 1.325, + "loss": 0.18838170170783997, + "loss_ce": 0.003750366624444723, + "loss_iou": 0.322265625, + "loss_num": 0.02880859375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 29027540, + "step": 318 + }, + { + "epoch": 1.3291666666666666, + "grad_norm": 5.688954810244233, + "learning_rate": 5e-05, + "loss": 0.1954, + "num_input_tokens_seen": 29118992, + "step": 319 + }, + { + "epoch": 1.3291666666666666, + "loss": 0.19413119554519653, + "loss_ce": 0.00016146828420460224, + "loss_iou": 0.35546875, + "loss_num": 0.02978515625, + "loss_xval": 0.1943359375, + "num_input_tokens_seen": 29118992, + "step": 319 + }, + { + "epoch": 1.3333333333333333, + "grad_norm": 26.71855142634084, + "learning_rate": 5e-05, + "loss": 0.199, + "num_input_tokens_seen": 29210064, + "step": 320 + }, + { + "epoch": 1.3333333333333333, + "loss": 0.17129886150360107, + "loss_ce": 0.001392227946780622, + "loss_iou": 0.31640625, + "loss_num": 0.02587890625, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 29210064, + "step": 320 + }, + { + "epoch": 1.3375, + "grad_norm": 9.486748776798056, + "learning_rate": 5e-05, + "loss": 0.1816, + "num_input_tokens_seen": 29301120, + "step": 321 + }, + { + "epoch": 1.3375, + "loss": 0.21057261526584625, + "loss_ce": 0.002961541526019573, + "loss_iou": 0.26953125, + "loss_num": 0.03466796875, + "loss_xval": 0.2080078125, + "num_input_tokens_seen": 29301120, + "step": 321 + }, + { + "epoch": 1.3416666666666668, + "grad_norm": 3.174802850022777, + "learning_rate": 5e-05, + "loss": 0.2765, + "num_input_tokens_seen": 29391332, + "step": 322 + }, + { + "epoch": 1.3416666666666668, + "loss": 0.3159889280796051, + "loss_ce": 0.0038856619503349066, + "loss_iou": 0.24609375, + "loss_num": 0.05615234375, + "loss_xval": 0.3125, + "num_input_tokens_seen": 29391332, + "step": 322 + }, + { + "epoch": 1.3458333333333332, + "grad_norm": 8.782382534885203, + "learning_rate": 5e-05, + "loss": 0.2103, + "num_input_tokens_seen": 29483488, + "step": 323 + }, + { + "epoch": 1.3458333333333332, + "loss": 0.21020320057868958, + "loss_ce": 0.0057964809238910675, + "loss_iou": 0.408203125, + "loss_num": 0.0303955078125, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 29483488, + "step": 323 + }, + { + "epoch": 1.35, + "grad_norm": 11.461747008609803, + "learning_rate": 5e-05, + "loss": 0.2401, + "num_input_tokens_seen": 29575216, + "step": 324 + }, + { + "epoch": 1.35, + "loss": 0.22908198833465576, + "loss_ce": 0.004472623113542795, + "loss_iou": 0.2021484375, + "loss_num": 0.039794921875, + "loss_xval": 0.224609375, + "num_input_tokens_seen": 29575216, + "step": 324 + }, + { + "epoch": 1.3541666666666667, + "grad_norm": 6.9226387606348885, + "learning_rate": 5e-05, + "loss": 0.1771, + "num_input_tokens_seen": 29666440, + "step": 325 + }, + { + "epoch": 1.3541666666666667, + "loss": 0.1849173903465271, + "loss_ce": 0.0015983112389221787, + "loss_iou": 0.1943359375, + "loss_num": 0.03173828125, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 29666440, + "step": 325 + }, + { + "epoch": 1.3583333333333334, + "grad_norm": 7.252189463288932, + "learning_rate": 5e-05, + "loss": 0.2452, + "num_input_tokens_seen": 29757144, + "step": 326 + }, + { + "epoch": 1.3583333333333334, + "loss": 0.2724003493785858, + "loss_ce": 0.0028080574702471495, + "loss_iou": 0.416015625, + "loss_num": 0.04296875, + "loss_xval": 0.26953125, + "num_input_tokens_seen": 29757144, + "step": 326 + }, + { + "epoch": 1.3625, + "grad_norm": 30.468996079722707, + "learning_rate": 5e-05, + "loss": 0.2274, + "num_input_tokens_seen": 29848332, + "step": 327 + }, + { + "epoch": 1.3625, + "loss": 0.2479363977909088, + "loss_ce": 0.0006219286005944014, + "loss_iou": 0.55859375, + "loss_num": 0.034912109375, + "loss_xval": 0.2470703125, + "num_input_tokens_seen": 29848332, + "step": 327 + }, + { + "epoch": 1.3666666666666667, + "grad_norm": 13.758287459279725, + "learning_rate": 5e-05, + "loss": 0.2675, + "num_input_tokens_seen": 29939644, + "step": 328 + }, + { + "epoch": 1.3666666666666667, + "loss": 0.29800552129745483, + "loss_ce": 0.00955338403582573, + "loss_iou": 0.240234375, + "loss_num": 0.051513671875, + "loss_xval": 0.2890625, + "num_input_tokens_seen": 29939644, + "step": 328 + }, + { + "epoch": 1.3708333333333333, + "grad_norm": 4.239781836095859, + "learning_rate": 5e-05, + "loss": 0.2141, + "num_input_tokens_seen": 30031300, + "step": 329 + }, + { + "epoch": 1.3708333333333333, + "loss": 0.230190247297287, + "loss_ce": 0.004787414334714413, + "loss_iou": 0.3125, + "loss_num": 0.036865234375, + "loss_xval": 0.2255859375, + "num_input_tokens_seen": 30031300, + "step": 329 + }, + { + "epoch": 1.375, + "grad_norm": 10.217216745472696, + "learning_rate": 5e-05, + "loss": 0.2542, + "num_input_tokens_seen": 30123104, + "step": 330 + }, + { + "epoch": 1.375, + "loss": 0.2735680043697357, + "loss_ce": 0.0028770905919373035, + "loss_iou": 0.248046875, + "loss_num": 0.047607421875, + "loss_xval": 0.271484375, + "num_input_tokens_seen": 30123104, + "step": 330 + }, + { + "epoch": 1.3791666666666667, + "grad_norm": 8.133878191487339, + "learning_rate": 5e-05, + "loss": 0.2429, + "num_input_tokens_seen": 30214872, + "step": 331 + }, + { + "epoch": 1.3791666666666667, + "loss": 0.2163221538066864, + "loss_ce": 0.0047743008472025394, + "loss_iou": 0.34765625, + "loss_num": 0.033203125, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 30214872, + "step": 331 + }, + { + "epoch": 1.3833333333333333, + "grad_norm": 12.464149511832298, + "learning_rate": 5e-05, + "loss": 0.2662, + "num_input_tokens_seen": 30305752, + "step": 332 + }, + { + "epoch": 1.3833333333333333, + "loss": 0.19596882164478302, + "loss_ce": 0.00016803990001790226, + "loss_iou": 0.291015625, + "loss_num": 0.031494140625, + "loss_xval": 0.1953125, + "num_input_tokens_seen": 30305752, + "step": 332 + }, + { + "epoch": 1.3875, + "grad_norm": 7.075216555936379, + "learning_rate": 5e-05, + "loss": 0.2613, + "num_input_tokens_seen": 30397216, + "step": 333 + }, + { + "epoch": 1.3875, + "loss": 0.3156003952026367, + "loss_ce": 0.004443188663572073, + "loss_iou": 0.373046875, + "loss_num": 0.05224609375, + "loss_xval": 0.310546875, + "num_input_tokens_seen": 30397216, + "step": 333 + }, + { + "epoch": 1.3916666666666666, + "grad_norm": 13.164973320959898, + "learning_rate": 5e-05, + "loss": 0.1839, + "num_input_tokens_seen": 30488728, + "step": 334 + }, + { + "epoch": 1.3916666666666666, + "loss": 0.1610163450241089, + "loss_ce": 0.00028026686049997807, + "loss_iou": 0.349609375, + "loss_num": 0.0228271484375, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 30488728, + "step": 334 + }, + { + "epoch": 1.3958333333333333, + "grad_norm": 16.539594407020676, + "learning_rate": 5e-05, + "loss": 0.3598, + "num_input_tokens_seen": 30579524, + "step": 335 + }, + { + "epoch": 1.3958333333333333, + "loss": 0.28894931077957153, + "loss_ce": 0.0024503041058778763, + "loss_iou": 0.4375, + "loss_num": 0.045654296875, + "loss_xval": 0.287109375, + "num_input_tokens_seen": 30579524, + "step": 335 + }, + { + "epoch": 1.4, + "grad_norm": 4.990172501920475, + "learning_rate": 5e-05, + "loss": 0.2102, + "num_input_tokens_seen": 30668872, + "step": 336 + }, + { + "epoch": 1.4, + "loss": 0.15133045613765717, + "loss_ce": 0.00014637643471360207, + "loss_iou": 0.2216796875, + "loss_num": 0.0242919921875, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 30668872, + "step": 336 + }, + { + "epoch": 1.4041666666666668, + "grad_norm": 5.22741505793893, + "learning_rate": 5e-05, + "loss": 0.2785, + "num_input_tokens_seen": 30760380, + "step": 337 + }, + { + "epoch": 1.4041666666666668, + "loss": 0.23793606460094452, + "loss_ce": 0.0017300141043961048, + "loss_iou": 0.359375, + "loss_num": 0.03759765625, + "loss_xval": 0.236328125, + "num_input_tokens_seen": 30760380, + "step": 337 + }, + { + "epoch": 1.4083333333333332, + "grad_norm": 12.380202022294139, + "learning_rate": 5e-05, + "loss": 0.216, + "num_input_tokens_seen": 30851520, + "step": 338 + }, + { + "epoch": 1.4083333333333332, + "loss": 0.22748208045959473, + "loss_ce": 0.0022013087291270494, + "loss_iou": 0.435546875, + "loss_num": 0.033203125, + "loss_xval": 0.2255859375, + "num_input_tokens_seen": 30851520, + "step": 338 + }, + { + "epoch": 1.4125, + "grad_norm": 13.422377024212908, + "learning_rate": 5e-05, + "loss": 0.2825, + "num_input_tokens_seen": 30942864, + "step": 339 + }, + { + "epoch": 1.4125, + "loss": 0.31169962882995605, + "loss_ce": 0.0024955125991255045, + "loss_iou": 0.34765625, + "loss_num": 0.052490234375, + "loss_xval": 0.30859375, + "num_input_tokens_seen": 30942864, + "step": 339 + }, + { + "epoch": 1.4166666666666667, + "grad_norm": 3.790921840710961, + "learning_rate": 5e-05, + "loss": 0.2376, + "num_input_tokens_seen": 31033868, + "step": 340 + }, + { + "epoch": 1.4166666666666667, + "loss": 0.15136732161045074, + "loss_ce": 0.0005799724021926522, + "loss_iou": 0.259765625, + "loss_num": 0.0230712890625, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 31033868, + "step": 340 + }, + { + "epoch": 1.4208333333333334, + "grad_norm": 7.052725036547459, + "learning_rate": 5e-05, + "loss": 0.2162, + "num_input_tokens_seen": 31125672, + "step": 341 + }, + { + "epoch": 1.4208333333333334, + "loss": 0.2005731761455536, + "loss_ce": 0.0010492515284568071, + "loss_iou": 0.28125, + "loss_num": 0.0322265625, + "loss_xval": 0.19921875, + "num_input_tokens_seen": 31125672, + "step": 341 + }, + { + "epoch": 1.425, + "grad_norm": 10.400646348384424, + "learning_rate": 5e-05, + "loss": 0.1949, + "num_input_tokens_seen": 31217040, + "step": 342 + }, + { + "epoch": 1.425, + "loss": 0.15420687198638916, + "loss_ce": 0.0019852002151310444, + "loss_iou": 0.298828125, + "loss_num": 0.022216796875, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 31217040, + "step": 342 + }, + { + "epoch": 1.4291666666666667, + "grad_norm": 17.131715673792243, + "learning_rate": 5e-05, + "loss": 0.2112, + "num_input_tokens_seen": 31308424, + "step": 343 + }, + { + "epoch": 1.4291666666666667, + "loss": 0.20999836921691895, + "loss_ce": 0.0018684857059270144, + "loss_iou": 0.34765625, + "loss_num": 0.0322265625, + "loss_xval": 0.2080078125, + "num_input_tokens_seen": 31308424, + "step": 343 + }, + { + "epoch": 1.4333333333333333, + "grad_norm": 12.977675520043995, + "learning_rate": 5e-05, + "loss": 0.2513, + "num_input_tokens_seen": 31400176, + "step": 344 + }, + { + "epoch": 1.4333333333333333, + "loss": 0.2407962679862976, + "loss_ce": 0.0017520927358418703, + "loss_iou": 0.369140625, + "loss_num": 0.03759765625, + "loss_xval": 0.2392578125, + "num_input_tokens_seen": 31400176, + "step": 344 + }, + { + "epoch": 1.4375, + "grad_norm": 9.949923126654774, + "learning_rate": 5e-05, + "loss": 0.2511, + "num_input_tokens_seen": 31491676, + "step": 345 + }, + { + "epoch": 1.4375, + "loss": 0.2417999505996704, + "loss_ce": 0.0037628610152751207, + "loss_iou": 0.3125, + "loss_num": 0.0390625, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 31491676, + "step": 345 + }, + { + "epoch": 1.4416666666666667, + "grad_norm": 11.53433640909963, + "learning_rate": 5e-05, + "loss": 0.1946, + "num_input_tokens_seen": 31582808, + "step": 346 + }, + { + "epoch": 1.4416666666666667, + "loss": 0.20713722705841064, + "loss_ce": 0.0035239539574831724, + "loss_iou": 0.314453125, + "loss_num": 0.031982421875, + "loss_xval": 0.203125, + "num_input_tokens_seen": 31582808, + "step": 346 + }, + { + "epoch": 1.4458333333333333, + "grad_norm": 4.357137716774205, + "learning_rate": 5e-05, + "loss": 0.2112, + "num_input_tokens_seen": 31673744, + "step": 347 + }, + { + "epoch": 1.4458333333333333, + "loss": 0.2205427587032318, + "loss_ce": 0.011802521534264088, + "loss_iou": 0.3671875, + "loss_num": 0.031494140625, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 31673744, + "step": 347 + }, + { + "epoch": 1.45, + "grad_norm": 7.492972058482306, + "learning_rate": 5e-05, + "loss": 0.1947, + "num_input_tokens_seen": 31765152, + "step": 348 + }, + { + "epoch": 1.45, + "loss": 0.27522969245910645, + "loss_ce": 0.00069356121821329, + "loss_iou": 0.3984375, + "loss_num": 0.0439453125, + "loss_xval": 0.275390625, + "num_input_tokens_seen": 31765152, + "step": 348 + }, + { + "epoch": 1.4541666666666666, + "grad_norm": 15.91631131469505, + "learning_rate": 5e-05, + "loss": 0.236, + "num_input_tokens_seen": 31856836, + "step": 349 + }, + { + "epoch": 1.4541666666666666, + "loss": 0.1890929788351059, + "loss_ce": 0.0017150461208075285, + "loss_iou": 0.3671875, + "loss_num": 0.0272216796875, + "loss_xval": 0.1875, + "num_input_tokens_seen": 31856836, + "step": 349 + }, + { + "epoch": 1.4583333333333333, + "grad_norm": 7.869322709371291, + "learning_rate": 5e-05, + "loss": 0.2321, + "num_input_tokens_seen": 31948068, + "step": 350 + }, + { + "epoch": 1.4583333333333333, + "loss": 0.21434611082077026, + "loss_ce": 0.0029813670553267, + "loss_iou": 0.2734375, + "loss_num": 0.03466796875, + "loss_xval": 0.2109375, + "num_input_tokens_seen": 31948068, + "step": 350 + }, + { + "epoch": 1.4625, + "grad_norm": 4.358120861649633, + "learning_rate": 5e-05, + "loss": 0.2187, + "num_input_tokens_seen": 32039364, + "step": 351 + }, + { + "epoch": 1.4625, + "loss": 0.18133623898029327, + "loss_ce": 0.000550109485629946, + "loss_iou": 0.361328125, + "loss_num": 0.026123046875, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 32039364, + "step": 351 + }, + { + "epoch": 1.4666666666666668, + "grad_norm": 14.366288785351877, + "learning_rate": 5e-05, + "loss": 0.2674, + "num_input_tokens_seen": 32131012, + "step": 352 + }, + { + "epoch": 1.4666666666666668, + "loss": 0.19408850371837616, + "loss_ce": 0.00524573540315032, + "loss_iou": 0.41015625, + "loss_num": 0.0262451171875, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 32131012, + "step": 352 + }, + { + "epoch": 1.4708333333333332, + "grad_norm": 7.794105388849188, + "learning_rate": 5e-05, + "loss": 0.1846, + "num_input_tokens_seen": 32222392, + "step": 353 + }, + { + "epoch": 1.4708333333333332, + "loss": 0.1904703974723816, + "loss_ce": 0.0001322595780948177, + "loss_iou": 0.23828125, + "loss_num": 0.031494140625, + "loss_xval": 0.1904296875, + "num_input_tokens_seen": 32222392, + "step": 353 + }, + { + "epoch": 1.475, + "grad_norm": 8.569126832794671, + "learning_rate": 5e-05, + "loss": 0.2441, + "num_input_tokens_seen": 32313028, + "step": 354 + }, + { + "epoch": 1.475, + "loss": 0.19572490453720093, + "loss_ce": 0.003525196108967066, + "loss_iou": 0.328125, + "loss_num": 0.0291748046875, + "loss_xval": 0.1923828125, + "num_input_tokens_seen": 32313028, + "step": 354 + }, + { + "epoch": 1.4791666666666667, + "grad_norm": 9.900711758184192, + "learning_rate": 5e-05, + "loss": 0.3003, + "num_input_tokens_seen": 32405116, + "step": 355 + }, + { + "epoch": 1.4791666666666667, + "loss": 0.2578551173210144, + "loss_ce": 0.0013548820279538631, + "loss_iou": 0.369140625, + "loss_num": 0.040771484375, + "loss_xval": 0.255859375, + "num_input_tokens_seen": 32405116, + "step": 355 + }, + { + "epoch": 1.4833333333333334, + "grad_norm": 12.792461653462786, + "learning_rate": 5e-05, + "loss": 0.2204, + "num_input_tokens_seen": 32497380, + "step": 356 + }, + { + "epoch": 1.4833333333333334, + "loss": 0.1747611165046692, + "loss_ce": 0.003984738141298294, + "loss_iou": 0.365234375, + "loss_num": 0.0238037109375, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 32497380, + "step": 356 + }, + { + "epoch": 1.4875, + "grad_norm": 22.40811815246497, + "learning_rate": 5e-05, + "loss": 0.2485, + "num_input_tokens_seen": 32588164, + "step": 357 + }, + { + "epoch": 1.4875, + "loss": 0.20770668983459473, + "loss_ce": 6.508764636237174e-05, + "loss_iou": 0.44921875, + "loss_num": 0.0286865234375, + "loss_xval": 0.2080078125, + "num_input_tokens_seen": 32588164, + "step": 357 + }, + { + "epoch": 1.4916666666666667, + "grad_norm": 22.32086680673658, + "learning_rate": 5e-05, + "loss": 0.248, + "num_input_tokens_seen": 32679184, + "step": 358 + }, + { + "epoch": 1.4916666666666667, + "loss": 0.2456224262714386, + "loss_ce": 1.6946230971370824e-05, + "loss_iou": 0.49609375, + "loss_num": 0.034912109375, + "loss_xval": 0.24609375, + "num_input_tokens_seen": 32679184, + "step": 358 + }, + { + "epoch": 1.4958333333333333, + "grad_norm": 6.409542662760085, + "learning_rate": 5e-05, + "loss": 0.3606, + "num_input_tokens_seen": 32770212, + "step": 359 + }, + { + "epoch": 1.4958333333333333, + "loss": 0.38848453760147095, + "loss_ce": 5.6810757087077945e-05, + "loss_iou": 0.404296875, + "loss_num": 0.06591796875, + "loss_xval": 0.388671875, + "num_input_tokens_seen": 32770212, + "step": 359 + }, + { + "epoch": 1.5, + "grad_norm": 46.11948684374341, + "learning_rate": 5e-05, + "loss": 0.266, + "num_input_tokens_seen": 32861928, + "step": 360 + }, + { + "epoch": 1.5, + "loss": 0.1256561428308487, + "loss_ce": 0.0014190769288688898, + "loss_iou": 0.3359375, + "loss_num": 0.01519775390625, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 32861928, + "step": 360 + }, + { + "epoch": 1.5041666666666667, + "grad_norm": 3.2817747199724736, + "learning_rate": 5e-05, + "loss": 0.1962, + "num_input_tokens_seen": 32953268, + "step": 361 + }, + { + "epoch": 1.5041666666666667, + "loss": 0.2756691873073578, + "loss_ce": 0.0021706530824303627, + "loss_iou": 0.26171875, + "loss_num": 0.047119140625, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 32953268, + "step": 361 + }, + { + "epoch": 1.5083333333333333, + "grad_norm": 13.928454356141433, + "learning_rate": 5e-05, + "loss": 0.2496, + "num_input_tokens_seen": 33044524, + "step": 362 + }, + { + "epoch": 1.5083333333333333, + "loss": 0.2404671013355255, + "loss_ce": 0.0009651454747654498, + "loss_iou": 0.1708984375, + "loss_num": 0.04296875, + "loss_xval": 0.2392578125, + "num_input_tokens_seen": 33044524, + "step": 362 + }, + { + "epoch": 1.5125, + "grad_norm": 30.534739055714443, + "learning_rate": 5e-05, + "loss": 0.1796, + "num_input_tokens_seen": 33135836, + "step": 363 + }, + { + "epoch": 1.5125, + "loss": 0.15238480269908905, + "loss_ce": 0.0007124289986677468, + "loss_iou": 0.4140625, + "loss_num": 0.018310546875, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 33135836, + "step": 363 + }, + { + "epoch": 1.5166666666666666, + "grad_norm": 9.815702000404304, + "learning_rate": 5e-05, + "loss": 0.3049, + "num_input_tokens_seen": 33227504, + "step": 364 + }, + { + "epoch": 1.5166666666666666, + "loss": 0.37344616651535034, + "loss_ce": 0.0002772384032141417, + "loss_iou": 0.1416015625, + "loss_num": 0.07080078125, + "loss_xval": 0.373046875, + "num_input_tokens_seen": 33227504, + "step": 364 + }, + { + "epoch": 1.5208333333333335, + "grad_norm": 6.0881351836137565, + "learning_rate": 5e-05, + "loss": 0.1999, + "num_input_tokens_seen": 33319136, + "step": 365 + }, + { + "epoch": 1.5208333333333335, + "loss": 0.21963077783584595, + "loss_ce": 0.0015826758462935686, + "loss_iou": 0.322265625, + "loss_num": 0.0341796875, + "loss_xval": 0.2177734375, + "num_input_tokens_seen": 33319136, + "step": 365 + }, + { + "epoch": 1.525, + "grad_norm": 45.387457295986835, + "learning_rate": 5e-05, + "loss": 0.294, + "num_input_tokens_seen": 33409984, + "step": 366 + }, + { + "epoch": 1.525, + "loss": 0.33772012591362, + "loss_ce": 0.0009281392558477819, + "loss_iou": 0.3125, + "loss_num": 0.05810546875, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 33409984, + "step": 366 + }, + { + "epoch": 1.5291666666666668, + "grad_norm": 6.155663902094734, + "learning_rate": 5e-05, + "loss": 0.2236, + "num_input_tokens_seen": 33501112, + "step": 367 + }, + { + "epoch": 1.5291666666666668, + "loss": 0.22664915025234222, + "loss_ce": 0.0006970040267333388, + "loss_iou": 0.1630859375, + "loss_num": 0.04052734375, + "loss_xval": 0.2255859375, + "num_input_tokens_seen": 33501112, + "step": 367 + }, + { + "epoch": 1.5333333333333332, + "grad_norm": 4.6118762946783205, + "learning_rate": 5e-05, + "loss": 0.1876, + "num_input_tokens_seen": 33592508, + "step": 368 + }, + { + "epoch": 1.5333333333333332, + "loss": 0.1918344497680664, + "loss_ce": 0.0008249252568930387, + "loss_iou": 0.275390625, + "loss_num": 0.0301513671875, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 33592508, + "step": 368 + }, + { + "epoch": 1.5375, + "grad_norm": 5.626777974326843, + "learning_rate": 5e-05, + "loss": 0.2292, + "num_input_tokens_seen": 33682856, + "step": 369 + }, + { + "epoch": 1.5375, + "loss": 0.24069613218307495, + "loss_ce": 0.0019876514561474323, + "loss_iou": 0.279296875, + "loss_num": 0.03955078125, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 33682856, + "step": 369 + }, + { + "epoch": 1.5416666666666665, + "grad_norm": 9.044519890425953, + "learning_rate": 5e-05, + "loss": 0.1977, + "num_input_tokens_seen": 33774808, + "step": 370 + }, + { + "epoch": 1.5416666666666665, + "loss": 0.21389149129390717, + "loss_ce": 0.006127822212874889, + "loss_iou": 0.37109375, + "loss_num": 0.030517578125, + "loss_xval": 0.2080078125, + "num_input_tokens_seen": 33774808, + "step": 370 + }, + { + "epoch": 1.5458333333333334, + "grad_norm": 21.294508979140122, + "learning_rate": 5e-05, + "loss": 0.1979, + "num_input_tokens_seen": 33866240, + "step": 371 + }, + { + "epoch": 1.5458333333333334, + "loss": 0.16506054997444153, + "loss_ce": 0.0013032348360866308, + "loss_iou": 0.380859375, + "loss_num": 0.021484375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 33866240, + "step": 371 + }, + { + "epoch": 1.55, + "grad_norm": 23.744916776588756, + "learning_rate": 5e-05, + "loss": 0.2869, + "num_input_tokens_seen": 33957488, + "step": 372 + }, + { + "epoch": 1.55, + "loss": 0.3346654772758484, + "loss_ce": 0.0010778360301628709, + "loss_iou": 0.41015625, + "loss_num": 0.0546875, + "loss_xval": 0.333984375, + "num_input_tokens_seen": 33957488, + "step": 372 + }, + { + "epoch": 1.5541666666666667, + "grad_norm": 5.0626894260783954, + "learning_rate": 5e-05, + "loss": 0.269, + "num_input_tokens_seen": 34049076, + "step": 373 + }, + { + "epoch": 1.5541666666666667, + "loss": 0.2526951730251312, + "loss_ce": 0.0027562116738408804, + "loss_iou": 0.26953125, + "loss_num": 0.0419921875, + "loss_xval": 0.25, + "num_input_tokens_seen": 34049076, + "step": 373 + }, + { + "epoch": 1.5583333333333333, + "grad_norm": 8.974395840489713, + "learning_rate": 5e-05, + "loss": 0.2534, + "num_input_tokens_seen": 34140908, + "step": 374 + }, + { + "epoch": 1.5583333333333333, + "loss": 0.22455793619155884, + "loss_ce": 0.002328932285308838, + "loss_iou": 0.28125, + "loss_num": 0.0361328125, + "loss_xval": 0.22265625, + "num_input_tokens_seen": 34140908, + "step": 374 + }, + { + "epoch": 1.5625, + "grad_norm": 7.5526802682001986, + "learning_rate": 5e-05, + "loss": 0.1763, + "num_input_tokens_seen": 34231624, + "step": 375 + }, + { + "epoch": 1.5625, + "loss": 0.17288470268249512, + "loss_ce": 0.003206963185220957, + "loss_iou": 0.1787109375, + "loss_num": 0.028564453125, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 34231624, + "step": 375 + }, + { + "epoch": 1.5666666666666667, + "grad_norm": 15.121092207403379, + "learning_rate": 5e-05, + "loss": 0.1969, + "num_input_tokens_seen": 34321652, + "step": 376 + }, + { + "epoch": 1.5666666666666667, + "loss": 0.17863944172859192, + "loss_ce": 0.0010271335486322641, + "loss_iou": 0.328125, + "loss_num": 0.0257568359375, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 34321652, + "step": 376 + }, + { + "epoch": 1.5708333333333333, + "grad_norm": 6.962086400891019, + "learning_rate": 5e-05, + "loss": 0.2597, + "num_input_tokens_seen": 34411720, + "step": 377 + }, + { + "epoch": 1.5708333333333333, + "loss": 0.29189687967300415, + "loss_ce": 2.67762388830306e-05, + "loss_iou": 0.50390625, + "loss_num": 0.043212890625, + "loss_xval": 0.291015625, + "num_input_tokens_seen": 34411720, + "step": 377 + }, + { + "epoch": 1.575, + "grad_norm": 13.594155653472589, + "learning_rate": 5e-05, + "loss": 0.1871, + "num_input_tokens_seen": 34503112, + "step": 378 + }, + { + "epoch": 1.575, + "loss": 0.2062322199344635, + "loss_ce": 0.00036064465530216694, + "loss_iou": 0.306640625, + "loss_num": 0.03173828125, + "loss_xval": 0.2060546875, + "num_input_tokens_seen": 34503112, + "step": 378 + }, + { + "epoch": 1.5791666666666666, + "grad_norm": 11.618376014557729, + "learning_rate": 5e-05, + "loss": 0.2319, + "num_input_tokens_seen": 34594328, + "step": 379 + }, + { + "epoch": 1.5791666666666666, + "loss": 0.2239169478416443, + "loss_ce": 0.00046722288243472576, + "loss_iou": 0.37890625, + "loss_num": 0.033203125, + "loss_xval": 0.2236328125, + "num_input_tokens_seen": 34594328, + "step": 379 + }, + { + "epoch": 1.5833333333333335, + "grad_norm": 4.160227162012771, + "learning_rate": 5e-05, + "loss": 0.2201, + "num_input_tokens_seen": 34685332, + "step": 380 + }, + { + "epoch": 1.5833333333333335, + "loss": 0.2167702317237854, + "loss_ce": 0.000522678135894239, + "loss_iou": 0.30859375, + "loss_num": 0.033935546875, + "loss_xval": 0.2158203125, + "num_input_tokens_seen": 34685332, + "step": 380 + }, + { + "epoch": 1.5875, + "grad_norm": 26.170465463725673, + "learning_rate": 5e-05, + "loss": 0.2244, + "num_input_tokens_seen": 34776532, + "step": 381 + }, + { + "epoch": 1.5875, + "loss": 0.2976117730140686, + "loss_ce": 0.0012555646244436502, + "loss_iou": 0.259765625, + "loss_num": 0.051513671875, + "loss_xval": 0.296875, + "num_input_tokens_seen": 34776532, + "step": 381 + }, + { + "epoch": 1.5916666666666668, + "grad_norm": 11.570362583321861, + "learning_rate": 5e-05, + "loss": 0.186, + "num_input_tokens_seen": 34866236, + "step": 382 + }, + { + "epoch": 1.5916666666666668, + "loss": 0.18814677000045776, + "loss_ce": 0.00137919036205858, + "loss_iou": 0.41015625, + "loss_num": 0.0247802734375, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 34866236, + "step": 382 + }, + { + "epoch": 1.5958333333333332, + "grad_norm": 20.703551173153976, + "learning_rate": 5e-05, + "loss": 0.293, + "num_input_tokens_seen": 34957448, + "step": 383 + }, + { + "epoch": 1.5958333333333332, + "loss": 0.364020973443985, + "loss_ce": 0.00879635289311409, + "loss_iou": 0.259765625, + "loss_num": 0.06298828125, + "loss_xval": 0.35546875, + "num_input_tokens_seen": 34957448, + "step": 383 + }, + { + "epoch": 1.6, + "grad_norm": 7.683924593021991, + "learning_rate": 5e-05, + "loss": 0.2245, + "num_input_tokens_seen": 35048296, + "step": 384 + }, + { + "epoch": 1.6, + "loss": 0.2429579645395279, + "loss_ce": 0.0018080619629472494, + "loss_iou": 0.40625, + "loss_num": 0.03564453125, + "loss_xval": 0.2412109375, + "num_input_tokens_seen": 35048296, + "step": 384 + }, + { + "epoch": 1.6041666666666665, + "grad_norm": 7.103846328800123, + "learning_rate": 5e-05, + "loss": 0.2665, + "num_input_tokens_seen": 35139692, + "step": 385 + }, + { + "epoch": 1.6041666666666665, + "loss": 0.2960255742073059, + "loss_ce": 0.002873738296329975, + "loss_iou": 0.3515625, + "loss_num": 0.0478515625, + "loss_xval": 0.29296875, + "num_input_tokens_seen": 35139692, + "step": 385 + }, + { + "epoch": 1.6083333333333334, + "grad_norm": 18.805427329976087, + "learning_rate": 5e-05, + "loss": 0.2069, + "num_input_tokens_seen": 35230592, + "step": 386 + }, + { + "epoch": 1.6083333333333334, + "loss": 0.19052082300186157, + "loss_ce": 0.001006664359010756, + "loss_iou": 0.36328125, + "loss_num": 0.0267333984375, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 35230592, + "step": 386 + }, + { + "epoch": 1.6125, + "grad_norm": 10.10429707392757, + "learning_rate": 5e-05, + "loss": 0.2302, + "num_input_tokens_seen": 35322044, + "step": 387 + }, + { + "epoch": 1.6125, + "loss": 0.24689523875713348, + "loss_ce": 0.0006794063956476748, + "loss_iou": 0.37109375, + "loss_num": 0.037841796875, + "loss_xval": 0.24609375, + "num_input_tokens_seen": 35322044, + "step": 387 + }, + { + "epoch": 1.6166666666666667, + "grad_norm": 4.736744820547368, + "learning_rate": 5e-05, + "loss": 0.3142, + "num_input_tokens_seen": 35413536, + "step": 388 + }, + { + "epoch": 1.6166666666666667, + "loss": 0.4286704957485199, + "loss_ce": 0.0010886834934353828, + "loss_iou": 0.27734375, + "loss_num": 0.0771484375, + "loss_xval": 0.427734375, + "num_input_tokens_seen": 35413536, + "step": 388 + }, + { + "epoch": 1.6208333333333333, + "grad_norm": 15.744695283257744, + "learning_rate": 5e-05, + "loss": 0.1926, + "num_input_tokens_seen": 35505484, + "step": 389 + }, + { + "epoch": 1.6208333333333333, + "loss": 0.19779208302497864, + "loss_ce": 0.0017471597529947758, + "loss_iou": 0.345703125, + "loss_num": 0.028564453125, + "loss_xval": 0.1962890625, + "num_input_tokens_seen": 35505484, + "step": 389 + }, + { + "epoch": 1.625, + "grad_norm": 7.9490484769934175, + "learning_rate": 5e-05, + "loss": 0.2718, + "num_input_tokens_seen": 35596612, + "step": 390 + }, + { + "epoch": 1.625, + "loss": 0.2874143719673157, + "loss_ce": 0.0004270472563803196, + "loss_iou": 0.3125, + "loss_num": 0.047607421875, + "loss_xval": 0.287109375, + "num_input_tokens_seen": 35596612, + "step": 390 + }, + { + "epoch": 1.6291666666666667, + "grad_norm": 7.330820229165241, + "learning_rate": 5e-05, + "loss": 0.297, + "num_input_tokens_seen": 35687724, + "step": 391 + }, + { + "epoch": 1.6291666666666667, + "loss": 0.23688414692878723, + "loss_ce": 0.0011053455527871847, + "loss_iou": 0.2490234375, + "loss_num": 0.039306640625, + "loss_xval": 0.2353515625, + "num_input_tokens_seen": 35687724, + "step": 391 + }, + { + "epoch": 1.6333333333333333, + "grad_norm": 3.2668345947971096, + "learning_rate": 5e-05, + "loss": 0.2265, + "num_input_tokens_seen": 35778768, + "step": 392 + }, + { + "epoch": 1.6333333333333333, + "loss": 0.19086655974388123, + "loss_ce": 9.624052836443298e-06, + "loss_iou": 0.2734375, + "loss_num": 0.0296630859375, + "loss_xval": 0.1904296875, + "num_input_tokens_seen": 35778768, + "step": 392 + }, + { + "epoch": 1.6375, + "grad_norm": 6.081542328972911, + "learning_rate": 5e-05, + "loss": 0.2041, + "num_input_tokens_seen": 35870332, + "step": 393 + }, + { + "epoch": 1.6375, + "loss": 0.20596742630004883, + "loss_ce": 0.004307264927774668, + "loss_iou": 0.345703125, + "loss_num": 0.0294189453125, + "loss_xval": 0.201171875, + "num_input_tokens_seen": 35870332, + "step": 393 + }, + { + "epoch": 1.6416666666666666, + "grad_norm": 5.586847565860315, + "learning_rate": 5e-05, + "loss": 0.2041, + "num_input_tokens_seen": 35961944, + "step": 394 + }, + { + "epoch": 1.6416666666666666, + "loss": 0.21251477301120758, + "loss_ce": 0.014455698430538177, + "loss_iou": 0.32421875, + "loss_num": 0.029541015625, + "loss_xval": 0.1982421875, + "num_input_tokens_seen": 35961944, + "step": 394 + }, + { + "epoch": 1.6458333333333335, + "grad_norm": 5.535852588618803, + "learning_rate": 5e-05, + "loss": 0.2247, + "num_input_tokens_seen": 36053716, + "step": 395 + }, + { + "epoch": 1.6458333333333335, + "loss": 0.2407526671886444, + "loss_ce": 0.0012507280334830284, + "loss_iou": 0.361328125, + "loss_num": 0.036376953125, + "loss_xval": 0.2392578125, + "num_input_tokens_seen": 36053716, + "step": 395 + }, + { + "epoch": 1.65, + "grad_norm": 8.008392249480794, + "learning_rate": 5e-05, + "loss": 0.1529, + "num_input_tokens_seen": 36145512, + "step": 396 + }, + { + "epoch": 1.65, + "loss": 0.13524940609931946, + "loss_ce": 0.0006058429717086256, + "loss_iou": 0.24609375, + "loss_num": 0.0191650390625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 36145512, + "step": 396 + }, + { + "epoch": 1.6541666666666668, + "grad_norm": 10.17149172074727, + "learning_rate": 5e-05, + "loss": 0.1779, + "num_input_tokens_seen": 36237148, + "step": 397 + }, + { + "epoch": 1.6541666666666668, + "loss": 0.18197308480739594, + "loss_ce": 0.002529717283323407, + "loss_iou": 0.357421875, + "loss_num": 0.024658203125, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 36237148, + "step": 397 + }, + { + "epoch": 1.6583333333333332, + "grad_norm": 12.250940882285644, + "learning_rate": 5e-05, + "loss": 0.2401, + "num_input_tokens_seen": 36328940, + "step": 398 + }, + { + "epoch": 1.6583333333333332, + "loss": 0.24007660150527954, + "loss_ce": 0.002588799921795726, + "loss_iou": 0.255859375, + "loss_num": 0.039306640625, + "loss_xval": 0.2373046875, + "num_input_tokens_seen": 36328940, + "step": 398 + }, + { + "epoch": 1.6625, + "grad_norm": 13.131902976504431, + "learning_rate": 5e-05, + "loss": 0.2234, + "num_input_tokens_seen": 36420480, + "step": 399 + }, + { + "epoch": 1.6625, + "loss": 0.16811129450798035, + "loss_ce": 0.0016684221336618066, + "loss_iou": 0.359375, + "loss_num": 0.0218505859375, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 36420480, + "step": 399 + }, + { + "epoch": 1.6666666666666665, + "grad_norm": 10.31781709490099, + "learning_rate": 5e-05, + "loss": 0.2428, + "num_input_tokens_seen": 36512368, + "step": 400 + }, + { + "epoch": 1.6666666666666665, + "loss": 0.2330133020877838, + "loss_ce": 0.0021173148415982723, + "loss_iou": 0.259765625, + "loss_num": 0.037841796875, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 36512368, + "step": 400 + }, + { + "epoch": 1.6708333333333334, + "grad_norm": 9.461221271761932, + "learning_rate": 5e-05, + "loss": 0.3251, + "num_input_tokens_seen": 36603644, + "step": 401 + }, + { + "epoch": 1.6708333333333334, + "loss": 0.30060258507728577, + "loss_ce": 0.0023848214186728, + "loss_iou": 0.390625, + "loss_num": 0.047119140625, + "loss_xval": 0.298828125, + "num_input_tokens_seen": 36603644, + "step": 401 + }, + { + "epoch": 1.675, + "grad_norm": 7.1425619613813645, + "learning_rate": 5e-05, + "loss": 0.219, + "num_input_tokens_seen": 36694824, + "step": 402 + }, + { + "epoch": 1.675, + "loss": 0.2716678977012634, + "loss_ce": 0.0014652373502030969, + "loss_iou": 0.30078125, + "loss_num": 0.04443359375, + "loss_xval": 0.26953125, + "num_input_tokens_seen": 36694824, + "step": 402 + }, + { + "epoch": 1.6791666666666667, + "grad_norm": 10.989336173327343, + "learning_rate": 5e-05, + "loss": 0.2362, + "num_input_tokens_seen": 36786560, + "step": 403 + }, + { + "epoch": 1.6791666666666667, + "loss": 0.23715360462665558, + "loss_ce": 0.0011306637898087502, + "loss_iou": 0.3125, + "loss_num": 0.037109375, + "loss_xval": 0.236328125, + "num_input_tokens_seen": 36786560, + "step": 403 + }, + { + "epoch": 1.6833333333333333, + "grad_norm": 6.976559533587898, + "learning_rate": 5e-05, + "loss": 0.269, + "num_input_tokens_seen": 36878524, + "step": 404 + }, + { + "epoch": 1.6833333333333333, + "loss": 0.2160152792930603, + "loss_ce": 0.005840706638991833, + "loss_iou": 0.3203125, + "loss_num": 0.03173828125, + "loss_xval": 0.2099609375, + "num_input_tokens_seen": 36878524, + "step": 404 + }, + { + "epoch": 1.6875, + "grad_norm": 17.060754210977322, + "learning_rate": 5e-05, + "loss": 0.268, + "num_input_tokens_seen": 36970148, + "step": 405 + }, + { + "epoch": 1.6875, + "loss": 0.17095698416233063, + "loss_ce": 0.0011266570072621107, + "loss_iou": 0.326171875, + "loss_num": 0.0234375, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 36970148, + "step": 405 + }, + { + "epoch": 1.6916666666666667, + "grad_norm": 5.975013278466526, + "learning_rate": 5e-05, + "loss": 0.2443, + "num_input_tokens_seen": 37061936, + "step": 406 + }, + { + "epoch": 1.6916666666666667, + "loss": 0.21971558034420013, + "loss_ce": 0.0009655768517404795, + "loss_iou": 0.3203125, + "loss_num": 0.033447265625, + "loss_xval": 0.21875, + "num_input_tokens_seen": 37061936, + "step": 406 + }, + { + "epoch": 1.6958333333333333, + "grad_norm": 8.59667374148006, + "learning_rate": 5e-05, + "loss": 0.1863, + "num_input_tokens_seen": 37152712, + "step": 407 + }, + { + "epoch": 1.6958333333333333, + "loss": 0.21013236045837402, + "loss_ce": 0.0005071184132248163, + "loss_iou": 0.2412109375, + "loss_num": 0.0341796875, + "loss_xval": 0.2099609375, + "num_input_tokens_seen": 37152712, + "step": 407 + }, + { + "epoch": 1.7, + "grad_norm": 3.3529361396033086, + "learning_rate": 5e-05, + "loss": 0.2094, + "num_input_tokens_seen": 37244304, + "step": 408 + }, + { + "epoch": 1.7, + "loss": 0.17331859469413757, + "loss_ce": 0.002420151839032769, + "loss_iou": 0.30859375, + "loss_num": 0.024169921875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 37244304, + "step": 408 + }, + { + "epoch": 1.7041666666666666, + "grad_norm": 6.3683193819610375, + "learning_rate": 5e-05, + "loss": 0.2657, + "num_input_tokens_seen": 37333460, + "step": 409 + }, + { + "epoch": 1.7041666666666666, + "loss": 0.3749966025352478, + "loss_ce": 0.000515383668243885, + "loss_iou": 0.1396484375, + "loss_num": 0.0703125, + "loss_xval": 0.375, + "num_input_tokens_seen": 37333460, + "step": 409 + }, + { + "epoch": 1.7083333333333335, + "grad_norm": 13.088956915518326, + "learning_rate": 5e-05, + "loss": 0.2861, + "num_input_tokens_seen": 37424924, + "step": 410 + }, + { + "epoch": 1.7083333333333335, + "loss": 0.3154312074184418, + "loss_ce": 0.014772024936974049, + "loss_iou": 0.36328125, + "loss_num": 0.04833984375, + "loss_xval": 0.30078125, + "num_input_tokens_seen": 37424924, + "step": 410 + }, + { + "epoch": 1.7125, + "grad_norm": 13.613320408355072, + "learning_rate": 5e-05, + "loss": 0.2217, + "num_input_tokens_seen": 37516336, + "step": 411 + }, + { + "epoch": 1.7125, + "loss": 0.1935127079486847, + "loss_ce": 0.002289560856297612, + "loss_iou": 0.328125, + "loss_num": 0.0274658203125, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 37516336, + "step": 411 + }, + { + "epoch": 1.7166666666666668, + "grad_norm": 13.38082197566342, + "learning_rate": 5e-05, + "loss": 0.1883, + "num_input_tokens_seen": 37607440, + "step": 412 + }, + { + "epoch": 1.7166666666666668, + "loss": 0.223766028881073, + "loss_ce": 0.0012318526860326529, + "loss_iou": 0.3125, + "loss_num": 0.0341796875, + "loss_xval": 0.22265625, + "num_input_tokens_seen": 37607440, + "step": 412 + }, + { + "epoch": 1.7208333333333332, + "grad_norm": 8.073520023907198, + "learning_rate": 5e-05, + "loss": 0.2529, + "num_input_tokens_seen": 37698444, + "step": 413 + }, + { + "epoch": 1.7208333333333332, + "loss": 0.24851244688034058, + "loss_ce": 0.0008317787433043122, + "loss_iou": 0.201171875, + "loss_num": 0.04296875, + "loss_xval": 0.248046875, + "num_input_tokens_seen": 37698444, + "step": 413 + }, + { + "epoch": 1.725, + "grad_norm": 6.152938443727431, + "learning_rate": 5e-05, + "loss": 0.2345, + "num_input_tokens_seen": 37789556, + "step": 414 + }, + { + "epoch": 1.725, + "loss": 0.17417480051517487, + "loss_ce": 0.0009570303955115378, + "loss_iou": 0.365234375, + "loss_num": 0.0225830078125, + "loss_xval": 0.1728515625, + "num_input_tokens_seen": 37789556, + "step": 414 + }, + { + "epoch": 1.7291666666666665, + "grad_norm": 7.64973034730577, + "learning_rate": 5e-05, + "loss": 0.2374, + "num_input_tokens_seen": 37881200, + "step": 415 + }, + { + "epoch": 1.7291666666666665, + "loss": 0.2355271875858307, + "loss_ce": 0.0021287663839757442, + "loss_iou": 0.345703125, + "loss_num": 0.03515625, + "loss_xval": 0.2333984375, + "num_input_tokens_seen": 37881200, + "step": 415 + }, + { + "epoch": 1.7333333333333334, + "grad_norm": 7.465852707646311, + "learning_rate": 5e-05, + "loss": 0.2417, + "num_input_tokens_seen": 37972388, + "step": 416 + }, + { + "epoch": 1.7333333333333334, + "loss": 0.21885107457637787, + "loss_ce": 0.0008945193840190768, + "loss_iou": 0.3671875, + "loss_num": 0.031494140625, + "loss_xval": 0.2177734375, + "num_input_tokens_seen": 37972388, + "step": 416 + }, + { + "epoch": 1.7375, + "grad_norm": 12.16011864159544, + "learning_rate": 5e-05, + "loss": 0.1871, + "num_input_tokens_seen": 38063388, + "step": 417 + }, + { + "epoch": 1.7375, + "loss": 0.22579969465732574, + "loss_ce": 0.0022279280237853527, + "loss_iou": 0.1767578125, + "loss_num": 0.038818359375, + "loss_xval": 0.2236328125, + "num_input_tokens_seen": 38063388, + "step": 417 + }, + { + "epoch": 1.7416666666666667, + "grad_norm": 7.508322645181989, + "learning_rate": 5e-05, + "loss": 0.1914, + "num_input_tokens_seen": 38155040, + "step": 418 + }, + { + "epoch": 1.7416666666666667, + "loss": 0.151132732629776, + "loss_ce": 0.00904288049787283, + "loss_iou": 0.32421875, + "loss_num": 0.017578125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 38155040, + "step": 418 + }, + { + "epoch": 1.7458333333333333, + "grad_norm": 7.262707246882911, + "learning_rate": 5e-05, + "loss": 0.1827, + "num_input_tokens_seen": 38245628, + "step": 419 + }, + { + "epoch": 1.7458333333333333, + "loss": 0.20362314581871033, + "loss_ce": 0.00013193067570682615, + "loss_iou": 0.333984375, + "loss_num": 0.0296630859375, + "loss_xval": 0.203125, + "num_input_tokens_seen": 38245628, + "step": 419 + }, + { + "epoch": 1.75, + "grad_norm": 11.195260101861862, + "learning_rate": 5e-05, + "loss": 0.2121, + "num_input_tokens_seen": 38336516, + "step": 420 + }, + { + "epoch": 1.75, + "loss": 0.15969014167785645, + "loss_ce": 0.0031349719502031803, + "loss_iou": 0.19140625, + "loss_num": 0.02490234375, + "loss_xval": 0.15625, + "num_input_tokens_seen": 38336516, + "step": 420 + }, + { + "epoch": 1.7541666666666667, + "grad_norm": 7.311132884519421, + "learning_rate": 5e-05, + "loss": 0.2435, + "num_input_tokens_seen": 38427492, + "step": 421 + }, + { + "epoch": 1.7541666666666667, + "loss": 0.2525137662887573, + "loss_ce": 0.0022085891105234623, + "loss_iou": 0.384765625, + "loss_num": 0.037109375, + "loss_xval": 0.25, + "num_input_tokens_seen": 38427492, + "step": 421 + }, + { + "epoch": 1.7583333333333333, + "grad_norm": 8.407594284466022, + "learning_rate": 5e-05, + "loss": 0.2977, + "num_input_tokens_seen": 38518472, + "step": 422 + }, + { + "epoch": 1.7583333333333333, + "loss": 0.32363784313201904, + "loss_ce": 0.06790055334568024, + "loss_iou": 0.41796875, + "loss_num": 0.037109375, + "loss_xval": 0.255859375, + "num_input_tokens_seen": 38518472, + "step": 422 + }, + { + "epoch": 1.7625, + "grad_norm": 6.269501342271039, + "learning_rate": 5e-05, + "loss": 0.2442, + "num_input_tokens_seen": 38608332, + "step": 423 + }, + { + "epoch": 1.7625, + "loss": 0.2547208070755005, + "loss_ce": 0.001608002814464271, + "loss_iou": 0.0810546875, + "loss_num": 0.0478515625, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 38608332, + "step": 423 + }, + { + "epoch": 1.7666666666666666, + "grad_norm": 13.92887135014984, + "learning_rate": 5e-05, + "loss": 0.1873, + "num_input_tokens_seen": 38699308, + "step": 424 + }, + { + "epoch": 1.7666666666666666, + "loss": 0.1888759434223175, + "loss_ce": 0.003268023021519184, + "loss_iou": 0.21875, + "loss_num": 0.0296630859375, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 38699308, + "step": 424 + }, + { + "epoch": 1.7708333333333335, + "grad_norm": 12.482389388443156, + "learning_rate": 5e-05, + "loss": 0.2519, + "num_input_tokens_seen": 38790780, + "step": 425 + }, + { + "epoch": 1.7708333333333335, + "loss": 0.23790404200553894, + "loss_ce": 0.0007824670756235719, + "loss_iou": 0.40625, + "loss_num": 0.03369140625, + "loss_xval": 0.2373046875, + "num_input_tokens_seen": 38790780, + "step": 425 + }, + { + "epoch": 1.775, + "grad_norm": 15.205548992803037, + "learning_rate": 5e-05, + "loss": 0.2028, + "num_input_tokens_seen": 38881920, + "step": 426 + }, + { + "epoch": 1.775, + "loss": 0.1748272329568863, + "loss_ce": 0.0007549648871645331, + "loss_iou": 0.39453125, + "loss_num": 0.0213623046875, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 38881920, + "step": 426 + }, + { + "epoch": 1.7791666666666668, + "grad_norm": 52.300440017972036, + "learning_rate": 5e-05, + "loss": 0.2853, + "num_input_tokens_seen": 38973020, + "step": 427 + }, + { + "epoch": 1.7791666666666668, + "loss": 0.27499961853027344, + "loss_ce": 0.000463480013422668, + "loss_iou": 0.40234375, + "loss_num": 0.041259765625, + "loss_xval": 0.275390625, + "num_input_tokens_seen": 38973020, + "step": 427 + }, + { + "epoch": 1.7833333333333332, + "grad_norm": 9.954323695155647, + "learning_rate": 5e-05, + "loss": 0.1929, + "num_input_tokens_seen": 39064384, + "step": 428 + }, + { + "epoch": 1.7833333333333332, + "loss": 0.21909351646900177, + "loss_ce": 0.0010759325232356787, + "loss_iou": 0.3671875, + "loss_num": 0.031005859375, + "loss_xval": 0.2177734375, + "num_input_tokens_seen": 39064384, + "step": 428 + }, + { + "epoch": 1.7875, + "grad_norm": 2.790513551952913, + "learning_rate": 5e-05, + "loss": 0.1648, + "num_input_tokens_seen": 39156164, + "step": 429 + }, + { + "epoch": 1.7875, + "loss": 0.20743504166603088, + "loss_ce": 0.0013193088816478848, + "loss_iou": 0.326171875, + "loss_num": 0.030029296875, + "loss_xval": 0.2060546875, + "num_input_tokens_seen": 39156164, + "step": 429 + }, + { + "epoch": 1.7916666666666665, + "grad_norm": 5.595761110073747, + "learning_rate": 5e-05, + "loss": 0.2919, + "num_input_tokens_seen": 39246356, + "step": 430 + }, + { + "epoch": 1.7916666666666665, + "loss": 0.3420943319797516, + "loss_ce": 0.0020674869883805513, + "loss_iou": 0.361328125, + "loss_num": 0.0556640625, + "loss_xval": 0.33984375, + "num_input_tokens_seen": 39246356, + "step": 430 + }, + { + "epoch": 1.7958333333333334, + "grad_norm": 48.416934241964086, + "learning_rate": 5e-05, + "loss": 0.2207, + "num_input_tokens_seen": 39337120, + "step": 431 + }, + { + "epoch": 1.7958333333333334, + "loss": 0.21679693460464478, + "loss_ce": 0.0009155991720035672, + "loss_iou": 0.380859375, + "loss_num": 0.030029296875, + "loss_xval": 0.2158203125, + "num_input_tokens_seen": 39337120, + "step": 431 + }, + { + "epoch": 1.8, + "grad_norm": 18.232593843356966, + "learning_rate": 5e-05, + "loss": 0.2727, + "num_input_tokens_seen": 39428636, + "step": 432 + }, + { + "epoch": 1.8, + "loss": 0.2289196401834488, + "loss_ce": 0.0031505939550697803, + "loss_iou": 0.33984375, + "loss_num": 0.033447265625, + "loss_xval": 0.2255859375, + "num_input_tokens_seen": 39428636, + "step": 432 + }, + { + "epoch": 1.8041666666666667, + "grad_norm": 13.763518512759806, + "learning_rate": 5e-05, + "loss": 0.1901, + "num_input_tokens_seen": 39520168, + "step": 433 + }, + { + "epoch": 1.8041666666666667, + "loss": 0.1349903792142868, + "loss_ce": 0.005397483240813017, + "loss_iou": 0.15625, + "loss_num": 0.0205078125, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 39520168, + "step": 433 + }, + { + "epoch": 1.8083333333333333, + "grad_norm": 6.450519649920571, + "learning_rate": 5e-05, + "loss": 0.2366, + "num_input_tokens_seen": 39611840, + "step": 434 + }, + { + "epoch": 1.8083333333333333, + "loss": 0.21854786574840546, + "loss_ce": 0.0014458110090345144, + "loss_iou": 0.3046875, + "loss_num": 0.032958984375, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 39611840, + "step": 434 + }, + { + "epoch": 1.8125, + "grad_norm": 36.514213990480904, + "learning_rate": 5e-05, + "loss": 0.2076, + "num_input_tokens_seen": 39703452, + "step": 435 + }, + { + "epoch": 1.8125, + "loss": 0.24096806347370148, + "loss_ce": 0.00445683253929019, + "loss_iou": 0.359375, + "loss_num": 0.034912109375, + "loss_xval": 0.236328125, + "num_input_tokens_seen": 39703452, + "step": 435 + }, + { + "epoch": 1.8166666666666667, + "grad_norm": 26.94343530241333, + "learning_rate": 5e-05, + "loss": 0.2408, + "num_input_tokens_seen": 39794924, + "step": 436 + }, + { + "epoch": 1.8166666666666667, + "loss": 0.24908341467380524, + "loss_ce": 0.0005482627893798053, + "loss_iou": 0.5078125, + "loss_num": 0.031982421875, + "loss_xval": 0.248046875, + "num_input_tokens_seen": 39794924, + "step": 436 + }, + { + "epoch": 1.8208333333333333, + "grad_norm": 14.728416874555222, + "learning_rate": 5e-05, + "loss": 0.3074, + "num_input_tokens_seen": 39886324, + "step": 437 + }, + { + "epoch": 1.8208333333333333, + "loss": 0.2843567728996277, + "loss_ce": 0.0029847188852727413, + "loss_iou": 0.365234375, + "loss_num": 0.04345703125, + "loss_xval": 0.28125, + "num_input_tokens_seen": 39886324, + "step": 437 + }, + { + "epoch": 1.825, + "grad_norm": 8.850324859702438, + "learning_rate": 5e-05, + "loss": 0.2256, + "num_input_tokens_seen": 39977516, + "step": 438 + }, + { + "epoch": 1.825, + "loss": 0.20806992053985596, + "loss_ce": 0.0029917967040091753, + "loss_iou": 0.251953125, + "loss_num": 0.0322265625, + "loss_xval": 0.205078125, + "num_input_tokens_seen": 39977516, + "step": 438 + }, + { + "epoch": 1.8291666666666666, + "grad_norm": 18.04227980986907, + "learning_rate": 5e-05, + "loss": 0.2689, + "num_input_tokens_seen": 40068756, + "step": 439 + }, + { + "epoch": 1.8291666666666666, + "loss": 0.31097856163978577, + "loss_ce": 0.0016218679957091808, + "loss_iou": 0.33984375, + "loss_num": 0.050048828125, + "loss_xval": 0.30859375, + "num_input_tokens_seen": 40068756, + "step": 439 + }, + { + "epoch": 1.8333333333333335, + "grad_norm": 5.586189786763669, + "learning_rate": 5e-05, + "loss": 0.2875, + "num_input_tokens_seen": 40160316, + "step": 440 + }, + { + "epoch": 1.8333333333333335, + "loss": 0.24810029566287994, + "loss_ce": 0.0009079031879082322, + "loss_iou": 0.34375, + "loss_num": 0.037353515625, + "loss_xval": 0.2470703125, + "num_input_tokens_seen": 40160316, + "step": 440 + }, + { + "epoch": 1.8375, + "grad_norm": 22.851153983391157, + "learning_rate": 5e-05, + "loss": 0.2403, + "num_input_tokens_seen": 40252136, + "step": 441 + }, + { + "epoch": 1.8375, + "loss": 0.21321170032024384, + "loss_ce": 0.005081813782453537, + "loss_iou": 0.2490234375, + "loss_num": 0.032958984375, + "loss_xval": 0.2080078125, + "num_input_tokens_seen": 40252136, + "step": 441 + }, + { + "epoch": 1.8416666666666668, + "grad_norm": 5.288895590637178, + "learning_rate": 5e-05, + "loss": 0.2826, + "num_input_tokens_seen": 40341600, + "step": 442 + }, + { + "epoch": 1.8416666666666668, + "loss": 0.23124778270721436, + "loss_ce": 0.0005348873091861606, + "loss_iou": 0.4453125, + "loss_num": 0.030517578125, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 40341600, + "step": 442 + }, + { + "epoch": 1.8458333333333332, + "grad_norm": 8.125178497484377, + "learning_rate": 5e-05, + "loss": 0.2252, + "num_input_tokens_seen": 40433276, + "step": 443 + }, + { + "epoch": 1.8458333333333332, + "loss": 0.24864652752876282, + "loss_ce": 0.002247605938464403, + "loss_iou": 0.373046875, + "loss_num": 0.0361328125, + "loss_xval": 0.24609375, + "num_input_tokens_seen": 40433276, + "step": 443 + }, + { + "epoch": 1.85, + "grad_norm": 7.138480090683848, + "learning_rate": 5e-05, + "loss": 0.2542, + "num_input_tokens_seen": 40524728, + "step": 444 + }, + { + "epoch": 1.85, + "loss": 0.25098586082458496, + "loss_ce": 0.005014184396713972, + "loss_iou": 0.1689453125, + "loss_num": 0.043212890625, + "loss_xval": 0.24609375, + "num_input_tokens_seen": 40524728, + "step": 444 + }, + { + "epoch": 1.8541666666666665, + "grad_norm": 5.769777410913954, + "learning_rate": 5e-05, + "loss": 0.203, + "num_input_tokens_seen": 40616144, + "step": 445 + }, + { + "epoch": 1.8541666666666665, + "loss": 0.2437579333782196, + "loss_ce": 0.002546995645388961, + "loss_iou": 0.40625, + "loss_num": 0.033935546875, + "loss_xval": 0.2412109375, + "num_input_tokens_seen": 40616144, + "step": 445 + }, + { + "epoch": 1.8583333333333334, + "grad_norm": 4.068488818002669, + "learning_rate": 5e-05, + "loss": 0.2416, + "num_input_tokens_seen": 40708024, + "step": 446 + }, + { + "epoch": 1.8583333333333334, + "loss": 0.21607771515846252, + "loss_ce": 0.003003982827067375, + "loss_iou": 0.25390625, + "loss_num": 0.03369140625, + "loss_xval": 0.212890625, + "num_input_tokens_seen": 40708024, + "step": 446 + }, + { + "epoch": 1.8625, + "grad_norm": 10.867206459117877, + "learning_rate": 5e-05, + "loss": 0.2701, + "num_input_tokens_seen": 40799076, + "step": 447 + }, + { + "epoch": 1.8625, + "loss": 0.33143606781959534, + "loss_ce": 0.00263968319632113, + "loss_iou": 0.328125, + "loss_num": 0.05419921875, + "loss_xval": 0.328125, + "num_input_tokens_seen": 40799076, + "step": 447 + }, + { + "epoch": 1.8666666666666667, + "grad_norm": 54.97065144326686, + "learning_rate": 5e-05, + "loss": 0.1671, + "num_input_tokens_seen": 40890848, + "step": 448 + }, + { + "epoch": 1.8666666666666667, + "loss": 0.1328609138727188, + "loss_ce": 0.001574298250488937, + "loss_iou": 0.20703125, + "loss_num": 0.018798828125, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 40890848, + "step": 448 + }, + { + "epoch": 1.8708333333333333, + "grad_norm": 5.836067414607739, + "learning_rate": 5e-05, + "loss": 0.2459, + "num_input_tokens_seen": 40983080, + "step": 449 + }, + { + "epoch": 1.8708333333333333, + "loss": 0.2940506637096405, + "loss_ce": 0.0029129667673259974, + "loss_iou": 0.310546875, + "loss_num": 0.047119140625, + "loss_xval": 0.291015625, + "num_input_tokens_seen": 40983080, + "step": 449 + }, + { + "epoch": 1.875, + "grad_norm": 4.601007964435342, + "learning_rate": 5e-05, + "loss": 0.2801, + "num_input_tokens_seen": 41074384, + "step": 450 + }, + { + "epoch": 1.875, + "loss": 0.25329628586769104, + "loss_ce": 0.0012821154668927193, + "loss_iou": 0.359375, + "loss_num": 0.03759765625, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 41074384, + "step": 450 + }, + { + "epoch": 1.8791666666666667, + "grad_norm": 12.796871529274252, + "learning_rate": 5e-05, + "loss": 0.3056, + "num_input_tokens_seen": 41165644, + "step": 451 + }, + { + "epoch": 1.8791666666666667, + "loss": 0.35303500294685364, + "loss_ce": 0.0019607748836278915, + "loss_iou": 0.388671875, + "loss_num": 0.05615234375, + "loss_xval": 0.3515625, + "num_input_tokens_seen": 41165644, + "step": 451 + }, + { + "epoch": 1.8833333333333333, + "grad_norm": 13.93276233688637, + "learning_rate": 5e-05, + "loss": 0.249, + "num_input_tokens_seen": 41257472, + "step": 452 + }, + { + "epoch": 1.8833333333333333, + "loss": 0.2850377559661865, + "loss_ce": 0.002200846094638109, + "loss_iou": 0.287109375, + "loss_num": 0.046142578125, + "loss_xval": 0.283203125, + "num_input_tokens_seen": 41257472, + "step": 452 + }, + { + "epoch": 1.8875, + "grad_norm": 20.22805971748526, + "learning_rate": 5e-05, + "loss": 0.228, + "num_input_tokens_seen": 41348604, + "step": 453 + }, + { + "epoch": 1.8875, + "loss": 0.27741241455078125, + "loss_ce": 6.864364695502445e-05, + "loss_iou": 0.2470703125, + "loss_num": 0.04638671875, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 41348604, + "step": 453 + }, + { + "epoch": 1.8916666666666666, + "grad_norm": 14.070020258641543, + "learning_rate": 5e-05, + "loss": 0.2554, + "num_input_tokens_seen": 41439728, + "step": 454 + }, + { + "epoch": 1.8916666666666666, + "loss": 0.1974847912788391, + "loss_ce": 0.0027826428413391113, + "loss_iou": 0.31640625, + "loss_num": 0.0274658203125, + "loss_xval": 0.1943359375, + "num_input_tokens_seen": 41439728, + "step": 454 + }, + { + "epoch": 1.8958333333333335, + "grad_norm": 10.414859001422169, + "learning_rate": 5e-05, + "loss": 0.2386, + "num_input_tokens_seen": 41531584, + "step": 455 + }, + { + "epoch": 1.8958333333333335, + "loss": 0.2527470588684082, + "loss_ce": 0.00024461650173179805, + "loss_iou": 0.47265625, + "loss_num": 0.033447265625, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 41531584, + "step": 455 + }, + { + "epoch": 1.9, + "grad_norm": 10.851725528442135, + "learning_rate": 5e-05, + "loss": 0.2082, + "num_input_tokens_seen": 41623208, + "step": 456 + }, + { + "epoch": 1.9, + "loss": 0.21295757591724396, + "loss_ce": 0.0010435068979859352, + "loss_iou": 0.28515625, + "loss_num": 0.031982421875, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 41623208, + "step": 456 + }, + { + "epoch": 1.9041666666666668, + "grad_norm": 6.143359027790444, + "learning_rate": 5e-05, + "loss": 0.1694, + "num_input_tokens_seen": 41714784, + "step": 457 + }, + { + "epoch": 1.9041666666666668, + "loss": 0.11842577159404755, + "loss_ce": 0.004717267118394375, + "loss_iou": 0.19140625, + "loss_num": 0.0157470703125, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 41714784, + "step": 457 + }, + { + "epoch": 1.9083333333333332, + "grad_norm": 4.351736596832513, + "learning_rate": 5e-05, + "loss": 0.1894, + "num_input_tokens_seen": 41806148, + "step": 458 + }, + { + "epoch": 1.9083333333333332, + "loss": 0.20772914588451385, + "loss_ce": 8.754467853577808e-05, + "loss_iou": 0.3359375, + "loss_num": 0.0291748046875, + "loss_xval": 0.2080078125, + "num_input_tokens_seen": 41806148, + "step": 458 + }, + { + "epoch": 1.9125, + "grad_norm": 3.880482346896209, + "learning_rate": 5e-05, + "loss": 0.2375, + "num_input_tokens_seen": 41897204, + "step": 459 + }, + { + "epoch": 1.9125, + "loss": 0.27232295274734497, + "loss_ce": 0.0005333737935870886, + "loss_iou": 0.369140625, + "loss_num": 0.040771484375, + "loss_xval": 0.271484375, + "num_input_tokens_seen": 41897204, + "step": 459 + }, + { + "epoch": 1.9166666666666665, + "grad_norm": 4.917601920806994, + "learning_rate": 5e-05, + "loss": 0.2394, + "num_input_tokens_seen": 41988488, + "step": 460 + }, + { + "epoch": 1.9166666666666665, + "loss": 0.25409287214279175, + "loss_ce": 0.0018650712445378304, + "loss_iou": 0.1611328125, + "loss_num": 0.044677734375, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 41988488, + "step": 460 + }, + { + "epoch": 1.9208333333333334, + "grad_norm": 8.894410443279389, + "learning_rate": 5e-05, + "loss": 0.2329, + "num_input_tokens_seen": 42079860, + "step": 461 + }, + { + "epoch": 1.9208333333333334, + "loss": 0.23146000504493713, + "loss_ce": 1.4697448932565749e-05, + "loss_iou": 0.294921875, + "loss_num": 0.035400390625, + "loss_xval": 0.2314453125, + "num_input_tokens_seen": 42079860, + "step": 461 + }, + { + "epoch": 1.925, + "grad_norm": 16.189621066761624, + "learning_rate": 5e-05, + "loss": 0.2077, + "num_input_tokens_seen": 42170812, + "step": 462 + }, + { + "epoch": 1.925, + "loss": 0.21362504363059998, + "loss_ce": 6.30219146842137e-05, + "loss_iou": 0.291015625, + "loss_num": 0.031982421875, + "loss_xval": 0.2138671875, + "num_input_tokens_seen": 42170812, + "step": 462 + }, + { + "epoch": 1.9291666666666667, + "grad_norm": 3.855073566469258, + "learning_rate": 5e-05, + "loss": 0.2269, + "num_input_tokens_seen": 42261644, + "step": 463 + }, + { + "epoch": 1.9291666666666667, + "loss": 0.30324554443359375, + "loss_ce": 0.0003280591045040637, + "loss_iou": 0.271484375, + "loss_num": 0.050537109375, + "loss_xval": 0.302734375, + "num_input_tokens_seen": 42261644, + "step": 463 + }, + { + "epoch": 1.9333333333333333, + "grad_norm": 16.503840026630748, + "learning_rate": 5e-05, + "loss": 0.2487, + "num_input_tokens_seen": 42353188, + "step": 464 + }, + { + "epoch": 1.9333333333333333, + "loss": 0.18360735476016998, + "loss_ce": 0.0017225849442183971, + "loss_iou": 0.099609375, + "loss_num": 0.03271484375, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 42353188, + "step": 464 + }, + { + "epoch": 1.9375, + "grad_norm": 8.628894781312468, + "learning_rate": 5e-05, + "loss": 0.1897, + "num_input_tokens_seen": 42444276, + "step": 465 + }, + { + "epoch": 1.9375, + "loss": 0.18303534388542175, + "loss_ce": 0.002005070447921753, + "loss_iou": 0.248046875, + "loss_num": 0.0269775390625, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 42444276, + "step": 465 + }, + { + "epoch": 1.9416666666666667, + "grad_norm": 7.3114052832005845, + "learning_rate": 5e-05, + "loss": 0.3136, + "num_input_tokens_seen": 42535168, + "step": 466 + }, + { + "epoch": 1.9416666666666667, + "loss": 0.31238681077957153, + "loss_ce": 0.002572346245869994, + "loss_iou": 0.2470703125, + "loss_num": 0.052734375, + "loss_xval": 0.310546875, + "num_input_tokens_seen": 42535168, + "step": 466 + }, + { + "epoch": 1.9458333333333333, + "grad_norm": 4.8613746511208475, + "learning_rate": 5e-05, + "loss": 0.2269, + "num_input_tokens_seen": 42626024, + "step": 467 + }, + { + "epoch": 1.9458333333333333, + "loss": 0.218013197183609, + "loss_ce": 0.00017872979515232146, + "loss_iou": 0.291015625, + "loss_num": 0.03271484375, + "loss_xval": 0.2177734375, + "num_input_tokens_seen": 42626024, + "step": 467 + }, + { + "epoch": 1.95, + "grad_norm": 4.480762983796849, + "learning_rate": 5e-05, + "loss": 0.3126, + "num_input_tokens_seen": 42717488, + "step": 468 + }, + { + "epoch": 1.95, + "loss": 0.36717867851257324, + "loss_ce": 0.0014255361165851355, + "loss_iou": 0.259765625, + "loss_num": 0.0634765625, + "loss_xval": 0.365234375, + "num_input_tokens_seen": 42717488, + "step": 468 + }, + { + "epoch": 1.9541666666666666, + "grad_norm": 6.821058545858503, + "learning_rate": 5e-05, + "loss": 0.2826, + "num_input_tokens_seen": 42808380, + "step": 469 + }, + { + "epoch": 1.9541666666666666, + "loss": 0.3651350438594818, + "loss_ce": 0.0002668919914867729, + "loss_iou": 0.1962890625, + "loss_num": 0.0654296875, + "loss_xval": 0.365234375, + "num_input_tokens_seen": 42808380, + "step": 469 + }, + { + "epoch": 1.9583333333333335, + "grad_norm": 8.457303564288464, + "learning_rate": 5e-05, + "loss": 0.2184, + "num_input_tokens_seen": 42900112, + "step": 470 + }, + { + "epoch": 1.9583333333333335, + "loss": 0.19896230101585388, + "loss_ce": 0.005419825669378042, + "loss_iou": 0.265625, + "loss_num": 0.02880859375, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 42900112, + "step": 470 + }, + { + "epoch": 1.9625, + "grad_norm": 43.913513558689424, + "learning_rate": 5e-05, + "loss": 0.2405, + "num_input_tokens_seen": 42991484, + "step": 471 + }, + { + "epoch": 1.9625, + "loss": 0.19493769109249115, + "loss_ce": 0.0004796826106030494, + "loss_iou": 0.376953125, + "loss_num": 0.0247802734375, + "loss_xval": 0.1943359375, + "num_input_tokens_seen": 42991484, + "step": 471 + }, + { + "epoch": 1.9666666666666668, + "grad_norm": 5.607040268767477, + "learning_rate": 5e-05, + "loss": 0.2056, + "num_input_tokens_seen": 43083208, + "step": 472 + }, + { + "epoch": 1.9666666666666668, + "loss": 0.2115071415901184, + "loss_ce": 0.0014546550810337067, + "loss_iou": 0.29296875, + "loss_num": 0.031005859375, + "loss_xval": 0.2099609375, + "num_input_tokens_seen": 43083208, + "step": 472 + }, + { + "epoch": 1.9708333333333332, + "grad_norm": 15.939240899776951, + "learning_rate": 5e-05, + "loss": 0.2249, + "num_input_tokens_seen": 43173816, + "step": 473 + }, + { + "epoch": 1.9708333333333332, + "loss": 0.24818861484527588, + "loss_ce": 0.0024000415578484535, + "loss_iou": 0.298828125, + "loss_num": 0.037841796875, + "loss_xval": 0.24609375, + "num_input_tokens_seen": 43173816, + "step": 473 + }, + { + "epoch": 1.975, + "grad_norm": 7.107474798711838, + "learning_rate": 5e-05, + "loss": 0.2092, + "num_input_tokens_seen": 43265380, + "step": 474 + }, + { + "epoch": 1.975, + "loss": 0.14708954095840454, + "loss_ce": 0.0022531079594045877, + "loss_iou": 0.259765625, + "loss_num": 0.0191650390625, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 43265380, + "step": 474 + }, + { + "epoch": 1.9791666666666665, + "grad_norm": 6.792902728521322, + "learning_rate": 5e-05, + "loss": 0.2255, + "num_input_tokens_seen": 43356616, + "step": 475 + }, + { + "epoch": 1.9791666666666665, + "loss": 0.1605501025915146, + "loss_ce": 0.0006379862898029387, + "loss_iou": 0.310546875, + "loss_num": 0.020263671875, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 43356616, + "step": 475 + }, + { + "epoch": 1.9833333333333334, + "grad_norm": 5.614667033572034, + "learning_rate": 5e-05, + "loss": 0.2103, + "num_input_tokens_seen": 43447548, + "step": 476 + }, + { + "epoch": 1.9833333333333334, + "loss": 0.1937415599822998, + "loss_ce": 0.00068735855165869, + "loss_iou": 0.34765625, + "loss_num": 0.025390625, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 43447548, + "step": 476 + }, + { + "epoch": 1.9875, + "grad_norm": 5.637636664809899, + "learning_rate": 5e-05, + "loss": 0.2005, + "num_input_tokens_seen": 43539048, + "step": 477 + }, + { + "epoch": 1.9875, + "loss": 0.19285109639167786, + "loss_ce": 0.010478038340806961, + "loss_iou": 0.365234375, + "loss_num": 0.0225830078125, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 43539048, + "step": 477 + }, + { + "epoch": 1.9916666666666667, + "grad_norm": 10.267982859584741, + "learning_rate": 5e-05, + "loss": 0.2948, + "num_input_tokens_seen": 43630460, + "step": 478 + }, + { + "epoch": 1.9916666666666667, + "loss": 0.3191360831260681, + "loss_ce": 0.009077484719455242, + "loss_iou": 0.3046875, + "loss_num": 0.05029296875, + "loss_xval": 0.310546875, + "num_input_tokens_seen": 43630460, + "step": 478 + }, + { + "epoch": 1.9958333333333333, + "grad_norm": 4.494551642761895, + "learning_rate": 5e-05, + "loss": 0.2828, + "num_input_tokens_seen": 43721936, + "step": 479 + }, + { + "epoch": 1.9958333333333333, + "loss": 0.3179689645767212, + "loss_ce": 0.001318573486059904, + "loss_iou": 0.25390625, + "loss_num": 0.0537109375, + "loss_xval": 0.31640625, + "num_input_tokens_seen": 43721936, + "step": 479 + }, + { + "epoch": 2.0, + "grad_norm": 17.220812887551514, + "learning_rate": 5e-05, + "loss": 0.2027, + "num_input_tokens_seen": 43813360, + "step": 480 + }, + { + "epoch": 2.0, + "loss": 0.2017011195421219, + "loss_ce": 0.0005902753327973187, + "loss_iou": 0.421875, + "loss_num": 0.0240478515625, + "loss_xval": 0.201171875, + "num_input_tokens_seen": 43813360, + "step": 480 + }, + { + "epoch": 2.004166666666667, + "grad_norm": 9.071971916454121, + "learning_rate": 5e-05, + "loss": 0.293, + "num_input_tokens_seen": 43904944, + "step": 481 + }, + { + "epoch": 2.004166666666667, + "loss": 0.3399706780910492, + "loss_ce": 0.012150846421718597, + "loss_iou": 0.306640625, + "loss_num": 0.0537109375, + "loss_xval": 0.328125, + "num_input_tokens_seen": 43904944, + "step": 481 + }, + { + "epoch": 2.0083333333333333, + "grad_norm": 5.605031727104742, + "learning_rate": 5e-05, + "loss": 0.2103, + "num_input_tokens_seen": 43996192, + "step": 482 + }, + { + "epoch": 2.0083333333333333, + "loss": 0.23982742428779602, + "loss_ce": 0.0004475304449442774, + "loss_iou": 0.28125, + "loss_num": 0.037109375, + "loss_xval": 0.2392578125, + "num_input_tokens_seen": 43996192, + "step": 482 + }, + { + "epoch": 2.0125, + "grad_norm": 2.413140946271684, + "learning_rate": 5e-05, + "loss": 0.2327, + "num_input_tokens_seen": 44086036, + "step": 483 + }, + { + "epoch": 2.0125, + "loss": 0.19676050543785095, + "loss_ce": 0.001692143501713872, + "loss_iou": 0.3125, + "loss_num": 0.0269775390625, + "loss_xval": 0.1953125, + "num_input_tokens_seen": 44086036, + "step": 483 + }, + { + "epoch": 2.0166666666666666, + "grad_norm": 4.696557237903741, + "learning_rate": 5e-05, + "loss": 0.238, + "num_input_tokens_seen": 44177616, + "step": 484 + }, + { + "epoch": 2.0166666666666666, + "loss": 0.18791495263576508, + "loss_ce": 0.0024291048757731915, + "loss_iou": 0.30078125, + "loss_num": 0.0255126953125, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 44177616, + "step": 484 + }, + { + "epoch": 2.0208333333333335, + "grad_norm": 5.869051437792836, + "learning_rate": 5e-05, + "loss": 0.2714, + "num_input_tokens_seen": 44268552, + "step": 485 + }, + { + "epoch": 2.0208333333333335, + "loss": 0.2644971013069153, + "loss_ce": 0.00045901196426711977, + "loss_iou": 0.328125, + "loss_num": 0.0400390625, + "loss_xval": 0.263671875, + "num_input_tokens_seen": 44268552, + "step": 485 + }, + { + "epoch": 2.025, + "grad_norm": 5.760866011005493, + "learning_rate": 5e-05, + "loss": 0.2184, + "num_input_tokens_seen": 44360196, + "step": 486 + }, + { + "epoch": 2.025, + "loss": 0.26277172565460205, + "loss_ce": 0.00044262310257181525, + "loss_iou": 0.36328125, + "loss_num": 0.038330078125, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 44360196, + "step": 486 + }, + { + "epoch": 2.029166666666667, + "grad_norm": 9.485726120144186, + "learning_rate": 5e-05, + "loss": 0.2049, + "num_input_tokens_seen": 44451476, + "step": 487 + }, + { + "epoch": 2.029166666666667, + "loss": 0.1496741771697998, + "loss_ce": 0.0011756441090255976, + "loss_iou": 0.361328125, + "loss_num": 0.015625, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 44451476, + "step": 487 + }, + { + "epoch": 2.033333333333333, + "grad_norm": 6.46724624293029, + "learning_rate": 5e-05, + "loss": 0.2019, + "num_input_tokens_seen": 44543556, + "step": 488 + }, + { + "epoch": 2.033333333333333, + "loss": 0.20846158266067505, + "loss_ce": 0.0011251522228121758, + "loss_iou": 0.283203125, + "loss_num": 0.0303955078125, + "loss_xval": 0.20703125, + "num_input_tokens_seen": 44543556, + "step": 488 + }, + { + "epoch": 2.0375, + "grad_norm": 4.663831894967608, + "learning_rate": 5e-05, + "loss": 0.1779, + "num_input_tokens_seen": 44634704, + "step": 489 + }, + { + "epoch": 2.0375, + "loss": 0.20913270115852356, + "loss_ce": 0.0005755819729529321, + "loss_iou": 0.333984375, + "loss_num": 0.0286865234375, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 44634704, + "step": 489 + }, + { + "epoch": 2.0416666666666665, + "grad_norm": 4.817924636236086, + "learning_rate": 5e-05, + "loss": 0.2052, + "num_input_tokens_seen": 44725908, + "step": 490 + }, + { + "epoch": 2.0416666666666665, + "loss": 0.27449101209640503, + "loss_ce": 0.007279078476130962, + "loss_iou": 0.1845703125, + "loss_num": 0.046142578125, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 44725908, + "step": 490 + }, + { + "epoch": 2.0458333333333334, + "grad_norm": 9.929049168261573, + "learning_rate": 5e-05, + "loss": 0.2116, + "num_input_tokens_seen": 44817812, + "step": 491 + }, + { + "epoch": 2.0458333333333334, + "loss": 0.21883490681648254, + "loss_ce": 0.0010614749044179916, + "loss_iou": 0.408203125, + "loss_num": 0.027587890625, + "loss_xval": 0.2177734375, + "num_input_tokens_seen": 44817812, + "step": 491 + }, + { + "epoch": 2.05, + "grad_norm": 8.817393848544738, + "learning_rate": 5e-05, + "loss": 0.23, + "num_input_tokens_seen": 44909208, + "step": 492 + }, + { + "epoch": 2.05, + "loss": 0.2640566825866699, + "loss_ce": 0.0022769207134842873, + "loss_iou": 0.29296875, + "loss_num": 0.040771484375, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 44909208, + "step": 492 + }, + { + "epoch": 2.0541666666666667, + "grad_norm": 5.042570206698982, + "learning_rate": 5e-05, + "loss": 0.2268, + "num_input_tokens_seen": 45000720, + "step": 493 + }, + { + "epoch": 2.0541666666666667, + "loss": 0.2562553286552429, + "loss_ce": 0.001494583673775196, + "loss_iou": 0.279296875, + "loss_num": 0.0400390625, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 45000720, + "step": 493 + }, + { + "epoch": 2.058333333333333, + "grad_norm": 6.383182702072335, + "learning_rate": 5e-05, + "loss": 0.1796, + "num_input_tokens_seen": 45092216, + "step": 494 + }, + { + "epoch": 2.058333333333333, + "loss": 0.15668010711669922, + "loss_ce": 0.0010404729982838035, + "loss_iou": 0.29296875, + "loss_num": 0.01953125, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 45092216, + "step": 494 + }, + { + "epoch": 2.0625, + "grad_norm": 9.998961284226256, + "learning_rate": 5e-05, + "loss": 0.2058, + "num_input_tokens_seen": 45183120, + "step": 495 + }, + { + "epoch": 2.0625, + "loss": 0.14924222230911255, + "loss_ce": 0.00043851512600667775, + "loss_iou": 0.25, + "loss_num": 0.0198974609375, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 45183120, + "step": 495 + }, + { + "epoch": 2.066666666666667, + "grad_norm": 5.28057501901058, + "learning_rate": 5e-05, + "loss": 0.1962, + "num_input_tokens_seen": 45273996, + "step": 496 + }, + { + "epoch": 2.066666666666667, + "loss": 0.20502203702926636, + "loss_ce": 0.0005542653379961848, + "loss_iou": 0.322265625, + "loss_num": 0.028076171875, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 45273996, + "step": 496 + }, + { + "epoch": 2.0708333333333333, + "grad_norm": 7.768866562570465, + "learning_rate": 5e-05, + "loss": 0.1964, + "num_input_tokens_seen": 45365364, + "step": 497 + }, + { + "epoch": 2.0708333333333333, + "loss": 0.20308563113212585, + "loss_ce": 0.0013644578866660595, + "loss_iou": 0.279296875, + "loss_num": 0.029296875, + "loss_xval": 0.2021484375, + "num_input_tokens_seen": 45365364, + "step": 497 + }, + { + "epoch": 2.075, + "grad_norm": 6.284527737111382, + "learning_rate": 5e-05, + "loss": 0.1963, + "num_input_tokens_seen": 45456736, + "step": 498 + }, + { + "epoch": 2.075, + "loss": 0.18389853835105896, + "loss_ce": 0.0016475582960993052, + "loss_iou": 0.296875, + "loss_num": 0.024658203125, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 45456736, + "step": 498 + }, + { + "epoch": 2.0791666666666666, + "grad_norm": 4.79734703937701, + "learning_rate": 5e-05, + "loss": 0.2825, + "num_input_tokens_seen": 45547784, + "step": 499 + }, + { + "epoch": 2.0791666666666666, + "loss": 0.29045653343200684, + "loss_ce": 0.0013940533390268683, + "loss_iou": 0.404296875, + "loss_num": 0.041748046875, + "loss_xval": 0.2890625, + "num_input_tokens_seen": 45547784, + "step": 499 + }, + { + "epoch": 2.0833333333333335, + "grad_norm": 9.216382238681696, + "learning_rate": 5e-05, + "loss": 0.2529, + "num_input_tokens_seen": 45637724, + "step": 500 + }, + { + "epoch": 2.0833333333333335, + "eval_seeclick_CIoU": 0.40254954993724823, + "eval_seeclick_GIoU": 0.40075618028640747, + "eval_seeclick_IoU": 0.46426986157894135, + "eval_seeclick_MAE_all": 0.06658709421753883, + "eval_seeclick_MAE_h": 0.04495918843895197, + "eval_seeclick_MAE_w": 0.13129764050245285, + "eval_seeclick_MAE_x_boxes": 0.14095629006624222, + "eval_seeclick_MAE_y_boxes": 0.04734954051673412, + "eval_seeclick_NUM_probability": 0.9999997019767761, + "eval_seeclick_inside_bbox": 0.7528409063816071, + "eval_seeclick_loss": 0.5049810409545898, + "eval_seeclick_loss_ce": 0.08913525566458702, + "eval_seeclick_loss_iou": 0.469970703125, + "eval_seeclick_loss_num": 0.06447601318359375, + "eval_seeclick_loss_xval": 0.4163818359375, + "eval_seeclick_runtime": 74.8052, + "eval_seeclick_samples_per_second": 0.575, + "eval_seeclick_steps_per_second": 0.027, + "num_input_tokens_seen": 45637724, + "step": 500 + }, + { + "epoch": 2.0833333333333335, + "eval_icons_CIoU": 0.2693219371140003, + "eval_icons_GIoU": 0.2740980312228203, + "eval_icons_IoU": 0.37362542748451233, + "eval_icons_MAE_all": 0.08273549005389214, + "eval_icons_MAE_h": 0.1786297969520092, + "eval_icons_MAE_w": 0.12335462868213654, + "eval_icons_MAE_x_boxes": 0.12370636314153671, + "eval_icons_MAE_y_boxes": 0.18266896158456802, + "eval_icons_NUM_probability": 0.9999996721744537, + "eval_icons_inside_bbox": 0.4635416716337204, + "eval_icons_loss": 0.441895455121994, + "eval_icons_loss_ce": 1.6518655229447177e-05, + "eval_icons_loss_iou": 0.22637939453125, + "eval_icons_loss_num": 0.08292007446289062, + "eval_icons_loss_xval": 0.459716796875, + "eval_icons_runtime": 85.3011, + "eval_icons_samples_per_second": 0.586, + "eval_icons_steps_per_second": 0.023, + "num_input_tokens_seen": 45637724, + "step": 500 + }, + { + "epoch": 2.0833333333333335, + "eval_screenspot_CIoU": 0.3571961025396983, + "eval_screenspot_GIoU": 0.3489176432291667, + "eval_screenspot_IoU": 0.4308655261993408, + "eval_screenspot_MAE_all": 0.10158004860083263, + "eval_screenspot_MAE_h": 0.0966620072722435, + "eval_screenspot_MAE_w": 0.2082077662150065, + "eval_screenspot_MAE_x_boxes": 0.17490030328432718, + "eval_screenspot_MAE_y_boxes": 0.09309135625759761, + "eval_screenspot_NUM_probability": 0.9999842445055643, + "eval_screenspot_inside_bbox": 0.690833330154419, + "eval_screenspot_loss": 0.5826627612113953, + "eval_screenspot_loss_ce": 1.5979758851851027e-05, + "eval_screenspot_loss_iou": 0.3749593098958333, + "eval_screenspot_loss_num": 0.10100301106770833, + "eval_screenspot_loss_xval": 0.58056640625, + "eval_screenspot_runtime": 146.4672, + "eval_screenspot_samples_per_second": 0.608, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 45637724, + "step": 500 + }, + { + "epoch": 2.0833333333333335, + "eval_compot_CIoU": 0.4399932026863098, + "eval_compot_GIoU": 0.4214942157268524, + "eval_compot_IoU": 0.5184762179851532, + "eval_compot_MAE_all": 0.06234058737754822, + "eval_compot_MAE_h": 0.09770151227712631, + "eval_compot_MAE_w": 0.1238437294960022, + "eval_compot_MAE_x_boxes": 0.12367824465036392, + "eval_compot_MAE_y_boxes": 0.09963158518075943, + "eval_compot_NUM_probability": 0.9999629557132721, + "eval_compot_inside_bbox": 0.6493055522441864, + "eval_compot_loss": 0.3942759335041046, + "eval_compot_loss_ce": 0.0099323526956141, + "eval_compot_loss_iou": 0.41339111328125, + "eval_compot_loss_num": 0.054779052734375, + "eval_compot_loss_xval": 0.3568115234375, + "eval_compot_runtime": 84.0824, + "eval_compot_samples_per_second": 0.595, + "eval_compot_steps_per_second": 0.024, + "num_input_tokens_seen": 45637724, + "step": 500 + }, + { + "epoch": 2.0833333333333335, + "loss": 0.3428493142127991, + "loss_ce": 0.009841513819992542, + "loss_iou": 0.42578125, + "loss_num": 0.049560546875, + "loss_xval": 0.33203125, + "num_input_tokens_seen": 45637724, + "step": 500 + }, + { + "epoch": 2.0875, + "grad_norm": 7.729747203415406, + "learning_rate": 5e-05, + "loss": 0.2109, + "num_input_tokens_seen": 45727648, + "step": 501 + }, + { + "epoch": 2.0875, + "loss": 0.1922411322593689, + "loss_ce": 0.00236074673011899, + "loss_iou": 0.298828125, + "loss_num": 0.0260009765625, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 45727648, + "step": 501 + }, + { + "epoch": 2.091666666666667, + "grad_norm": 10.076408675413663, + "learning_rate": 5e-05, + "loss": 0.2347, + "num_input_tokens_seen": 45818984, + "step": 502 + }, + { + "epoch": 2.091666666666667, + "loss": 0.22048774361610413, + "loss_ce": 2.8755013772752136e-05, + "loss_iou": 0.5078125, + "loss_num": 0.023681640625, + "loss_xval": 0.220703125, + "num_input_tokens_seen": 45818984, + "step": 502 + }, + { + "epoch": 2.095833333333333, + "grad_norm": 15.803004261541545, + "learning_rate": 5e-05, + "loss": 0.2213, + "num_input_tokens_seen": 45909824, + "step": 503 + }, + { + "epoch": 2.095833333333333, + "loss": 0.19270195066928864, + "loss_ce": 0.002577434293925762, + "loss_iou": 0.28125, + "loss_num": 0.0267333984375, + "loss_xval": 0.1904296875, + "num_input_tokens_seen": 45909824, + "step": 503 + }, + { + "epoch": 2.1, + "grad_norm": 10.453303811857014, + "learning_rate": 5e-05, + "loss": 0.2257, + "num_input_tokens_seen": 46001408, + "step": 504 + }, + { + "epoch": 2.1, + "loss": 0.2370985597372055, + "loss_ce": 0.00028215881320647895, + "loss_iou": 0.3984375, + "loss_num": 0.03125, + "loss_xval": 0.236328125, + "num_input_tokens_seen": 46001408, + "step": 504 + }, + { + "epoch": 2.1041666666666665, + "grad_norm": 10.658019924725252, + "learning_rate": 5e-05, + "loss": 0.2449, + "num_input_tokens_seen": 46092752, + "step": 505 + }, + { + "epoch": 2.1041666666666665, + "loss": 0.22125060856342316, + "loss_ce": 0.0002117783296853304, + "loss_iou": 0.2060546875, + "loss_num": 0.035888671875, + "loss_xval": 0.220703125, + "num_input_tokens_seen": 46092752, + "step": 505 + }, + { + "epoch": 2.1083333333333334, + "grad_norm": 3.3864494529227724, + "learning_rate": 5e-05, + "loss": 0.1457, + "num_input_tokens_seen": 46184072, + "step": 506 + }, + { + "epoch": 2.1083333333333334, + "loss": 0.14974187314510345, + "loss_ce": 0.0001447110262233764, + "loss_iou": 0.23046875, + "loss_num": 0.0206298828125, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 46184072, + "step": 506 + }, + { + "epoch": 2.1125, + "grad_norm": 7.53025454760194, + "learning_rate": 5e-05, + "loss": 0.2635, + "num_input_tokens_seen": 46275676, + "step": 507 + }, + { + "epoch": 2.1125, + "loss": 0.20503434538841248, + "loss_ce": 0.0040608420968055725, + "loss_iou": 0.208984375, + "loss_num": 0.03173828125, + "loss_xval": 0.201171875, + "num_input_tokens_seen": 46275676, + "step": 507 + }, + { + "epoch": 2.1166666666666667, + "grad_norm": 33.285284767596316, + "learning_rate": 5e-05, + "loss": 0.2176, + "num_input_tokens_seen": 46365852, + "step": 508 + }, + { + "epoch": 2.1166666666666667, + "loss": 0.21645520627498627, + "loss_ce": 0.001123182475566864, + "loss_iou": 0.33984375, + "loss_num": 0.029296875, + "loss_xval": 0.21484375, + "num_input_tokens_seen": 46365852, + "step": 508 + }, + { + "epoch": 2.120833333333333, + "grad_norm": 14.815680317725786, + "learning_rate": 5e-05, + "loss": 0.2742, + "num_input_tokens_seen": 46457248, + "step": 509 + }, + { + "epoch": 2.120833333333333, + "loss": 0.25932037830352783, + "loss_ce": 0.002606501104310155, + "loss_iou": 0.2421875, + "loss_num": 0.04150390625, + "loss_xval": 0.255859375, + "num_input_tokens_seen": 46457248, + "step": 509 + }, + { + "epoch": 2.125, + "grad_norm": 8.73990619843021, + "learning_rate": 5e-05, + "loss": 0.2557, + "num_input_tokens_seen": 46548532, + "step": 510 + }, + { + "epoch": 2.125, + "loss": 0.2554578185081482, + "loss_ce": 0.0008191587985493243, + "loss_iou": 0.28125, + "loss_num": 0.03955078125, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 46548532, + "step": 510 + }, + { + "epoch": 2.129166666666667, + "grad_norm": 10.105992721155996, + "learning_rate": 5e-05, + "loss": 0.2362, + "num_input_tokens_seen": 46639872, + "step": 511 + }, + { + "epoch": 2.129166666666667, + "loss": 0.2154877781867981, + "loss_ce": 3.368042234797031e-05, + "loss_iou": 0.416015625, + "loss_num": 0.026123046875, + "loss_xval": 0.2158203125, + "num_input_tokens_seen": 46639872, + "step": 511 + }, + { + "epoch": 2.1333333333333333, + "grad_norm": 29.72522284972365, + "learning_rate": 5e-05, + "loss": 0.1714, + "num_input_tokens_seen": 46731340, + "step": 512 + }, + { + "epoch": 2.1333333333333333, + "loss": 0.15613722801208496, + "loss_ce": 0.004953158088028431, + "loss_iou": 0.244140625, + "loss_num": 0.020263671875, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 46731340, + "step": 512 + }, + { + "epoch": 2.1375, + "grad_norm": 9.531361802052238, + "learning_rate": 5e-05, + "loss": 0.253, + "num_input_tokens_seen": 46822688, + "step": 513 + }, + { + "epoch": 2.1375, + "loss": 0.22187145054340363, + "loss_ce": 0.001046255580149591, + "loss_iou": 0.275390625, + "loss_num": 0.032958984375, + "loss_xval": 0.220703125, + "num_input_tokens_seen": 46822688, + "step": 513 + }, + { + "epoch": 2.1416666666666666, + "grad_norm": 14.410513234997431, + "learning_rate": 5e-05, + "loss": 0.2471, + "num_input_tokens_seen": 46914088, + "step": 514 + }, + { + "epoch": 2.1416666666666666, + "loss": 0.2741532623767853, + "loss_ce": 0.0002885128487832844, + "loss_iou": 0.2890625, + "loss_num": 0.04296875, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 46914088, + "step": 514 + }, + { + "epoch": 2.1458333333333335, + "grad_norm": 8.05729374282805, + "learning_rate": 5e-05, + "loss": 0.1985, + "num_input_tokens_seen": 47005280, + "step": 515 + }, + { + "epoch": 2.1458333333333335, + "loss": 0.21794849634170532, + "loss_ce": 0.00011403985263314098, + "loss_iou": 0.265625, + "loss_num": 0.03271484375, + "loss_xval": 0.2177734375, + "num_input_tokens_seen": 47005280, + "step": 515 + }, + { + "epoch": 2.15, + "grad_norm": 7.318539417819902, + "learning_rate": 5e-05, + "loss": 0.196, + "num_input_tokens_seen": 47096288, + "step": 516 + }, + { + "epoch": 2.15, + "loss": 0.27666139602661133, + "loss_ce": 0.0016369989607483149, + "loss_iou": 0.37109375, + "loss_num": 0.039794921875, + "loss_xval": 0.275390625, + "num_input_tokens_seen": 47096288, + "step": 516 + }, + { + "epoch": 2.154166666666667, + "grad_norm": 8.090481077078994, + "learning_rate": 5e-05, + "loss": 0.2136, + "num_input_tokens_seen": 47187788, + "step": 517 + }, + { + "epoch": 2.154166666666667, + "loss": 0.14408686757087708, + "loss_ce": 0.00010493230365682393, + "loss_iou": 0.212890625, + "loss_num": 0.02001953125, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 47187788, + "step": 517 + }, + { + "epoch": 2.158333333333333, + "grad_norm": 11.230013943385165, + "learning_rate": 5e-05, + "loss": 0.2047, + "num_input_tokens_seen": 47279348, + "step": 518 + }, + { + "epoch": 2.158333333333333, + "loss": 0.1982039213180542, + "loss_ce": 0.0006941695464774966, + "loss_iou": 0.26171875, + "loss_num": 0.0286865234375, + "loss_xval": 0.197265625, + "num_input_tokens_seen": 47279348, + "step": 518 + }, + { + "epoch": 2.1625, + "grad_norm": 4.75139905842334, + "learning_rate": 5e-05, + "loss": 0.19, + "num_input_tokens_seen": 47370376, + "step": 519 + }, + { + "epoch": 2.1625, + "loss": 0.18471525609493256, + "loss_ce": 2.2873391571920365e-05, + "loss_iou": 0.41015625, + "loss_num": 0.0198974609375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 47370376, + "step": 519 + }, + { + "epoch": 2.1666666666666665, + "grad_norm": 6.517972761801104, + "learning_rate": 5e-05, + "loss": 0.2322, + "num_input_tokens_seen": 47462060, + "step": 520 + }, + { + "epoch": 2.1666666666666665, + "loss": 0.18972179293632507, + "loss_ce": 0.0049683707766234875, + "loss_iou": 0.22265625, + "loss_num": 0.0277099609375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 47462060, + "step": 520 + }, + { + "epoch": 2.1708333333333334, + "grad_norm": 7.3722523096943275, + "learning_rate": 5e-05, + "loss": 0.285, + "num_input_tokens_seen": 47553000, + "step": 521 + }, + { + "epoch": 2.1708333333333334, + "loss": 0.27948668599128723, + "loss_ce": 0.0012884553289040923, + "loss_iou": 0.3984375, + "loss_num": 0.0390625, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 47553000, + "step": 521 + }, + { + "epoch": 2.175, + "grad_norm": 13.932503842674887, + "learning_rate": 5e-05, + "loss": 0.1891, + "num_input_tokens_seen": 47643948, + "step": 522 + }, + { + "epoch": 2.175, + "loss": 0.21535250544548035, + "loss_ce": 0.0006308312877081335, + "loss_iou": 0.287109375, + "loss_num": 0.031005859375, + "loss_xval": 0.21484375, + "num_input_tokens_seen": 47643948, + "step": 522 + }, + { + "epoch": 2.1791666666666667, + "grad_norm": 8.908307371306876, + "learning_rate": 5e-05, + "loss": 0.2113, + "num_input_tokens_seen": 47735776, + "step": 523 + }, + { + "epoch": 2.1791666666666667, + "loss": 0.20093733072280884, + "loss_ce": 0.0030613476410508156, + "loss_iou": 0.3671875, + "loss_num": 0.024169921875, + "loss_xval": 0.1982421875, + "num_input_tokens_seen": 47735776, + "step": 523 + }, + { + "epoch": 2.183333333333333, + "grad_norm": 16.019182269926212, + "learning_rate": 5e-05, + "loss": 0.2013, + "num_input_tokens_seen": 47827680, + "step": 524 + }, + { + "epoch": 2.183333333333333, + "loss": 0.23729866743087769, + "loss_ce": 0.001214687479659915, + "loss_iou": 0.35546875, + "loss_num": 0.032470703125, + "loss_xval": 0.236328125, + "num_input_tokens_seen": 47827680, + "step": 524 + }, + { + "epoch": 2.1875, + "grad_norm": 6.964867016997655, + "learning_rate": 5e-05, + "loss": 0.1731, + "num_input_tokens_seen": 47919152, + "step": 525 + }, + { + "epoch": 2.1875, + "loss": 0.17872335016727448, + "loss_ce": 0.0013246757443994284, + "loss_iou": 0.310546875, + "loss_num": 0.0224609375, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 47919152, + "step": 525 + }, + { + "epoch": 2.191666666666667, + "grad_norm": 4.883391608973748, + "learning_rate": 5e-05, + "loss": 0.1685, + "num_input_tokens_seen": 48010236, + "step": 526 + }, + { + "epoch": 2.191666666666667, + "loss": 0.21654918789863586, + "loss_ce": 0.00042369376751594245, + "loss_iou": 0.2216796875, + "loss_num": 0.033935546875, + "loss_xval": 0.2158203125, + "num_input_tokens_seen": 48010236, + "step": 526 + }, + { + "epoch": 2.1958333333333333, + "grad_norm": 23.035289497265442, + "learning_rate": 5e-05, + "loss": 0.2039, + "num_input_tokens_seen": 48101544, + "step": 527 + }, + { + "epoch": 2.1958333333333333, + "loss": 0.200510174036026, + "loss_ce": 0.0003148576943203807, + "loss_iou": 0.39453125, + "loss_num": 0.0234375, + "loss_xval": 0.2001953125, + "num_input_tokens_seen": 48101544, + "step": 527 + }, + { + "epoch": 2.2, + "grad_norm": 8.430324506304586, + "learning_rate": 5e-05, + "loss": 0.2343, + "num_input_tokens_seen": 48192704, + "step": 528 + }, + { + "epoch": 2.2, + "loss": 0.2944282293319702, + "loss_ce": 0.0019477481255307794, + "loss_iou": 0.353515625, + "loss_num": 0.04345703125, + "loss_xval": 0.29296875, + "num_input_tokens_seen": 48192704, + "step": 528 + }, + { + "epoch": 2.2041666666666666, + "grad_norm": 15.089991729505178, + "learning_rate": 5e-05, + "loss": 0.2177, + "num_input_tokens_seen": 48284604, + "step": 529 + }, + { + "epoch": 2.2041666666666666, + "loss": 0.19912487268447876, + "loss_ce": 0.0010047497926279902, + "loss_iou": 0.1611328125, + "loss_num": 0.03271484375, + "loss_xval": 0.1982421875, + "num_input_tokens_seen": 48284604, + "step": 529 + }, + { + "epoch": 2.2083333333333335, + "grad_norm": 8.60120057780928, + "learning_rate": 5e-05, + "loss": 0.252, + "num_input_tokens_seen": 48376036, + "step": 530 + }, + { + "epoch": 2.2083333333333335, + "loss": 0.2593064308166504, + "loss_ce": 2.909008617280051e-05, + "loss_iou": 0.373046875, + "loss_num": 0.0361328125, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 48376036, + "step": 530 + }, + { + "epoch": 2.2125, + "grad_norm": 18.192155504342225, + "learning_rate": 5e-05, + "loss": 0.2296, + "num_input_tokens_seen": 48467472, + "step": 531 + }, + { + "epoch": 2.2125, + "loss": 0.2686399519443512, + "loss_ce": 0.00045148952631279826, + "loss_iou": 0.333984375, + "loss_num": 0.03955078125, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 48467472, + "step": 531 + }, + { + "epoch": 2.216666666666667, + "grad_norm": 20.96977992387443, + "learning_rate": 5e-05, + "loss": 0.1982, + "num_input_tokens_seen": 48559212, + "step": 532 + }, + { + "epoch": 2.216666666666667, + "loss": 0.1729247123003006, + "loss_ce": 0.0036437036469578743, + "loss_iou": 0.2119140625, + "loss_num": 0.0247802734375, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 48559212, + "step": 532 + }, + { + "epoch": 2.220833333333333, + "grad_norm": 11.254529565975902, + "learning_rate": 5e-05, + "loss": 0.2586, + "num_input_tokens_seen": 48650604, + "step": 533 + }, + { + "epoch": 2.220833333333333, + "loss": 0.16954649984836578, + "loss_ce": 0.0027984497137367725, + "loss_iou": 0.296875, + "loss_num": 0.0206298828125, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 48650604, + "step": 533 + }, + { + "epoch": 2.225, + "grad_norm": 8.116782024881553, + "learning_rate": 5e-05, + "loss": 0.2486, + "num_input_tokens_seen": 48741488, + "step": 534 + }, + { + "epoch": 2.225, + "loss": 0.2426433265209198, + "loss_ce": 0.002225851407274604, + "loss_iou": 0.21484375, + "loss_num": 0.038818359375, + "loss_xval": 0.240234375, + "num_input_tokens_seen": 48741488, + "step": 534 + }, + { + "epoch": 2.2291666666666665, + "grad_norm": 16.77344005618869, + "learning_rate": 5e-05, + "loss": 0.2735, + "num_input_tokens_seen": 48833012, + "step": 535 + }, + { + "epoch": 2.2291666666666665, + "loss": 0.310322642326355, + "loss_ce": 0.00014196071424521506, + "loss_iou": 0.306640625, + "loss_num": 0.048828125, + "loss_xval": 0.310546875, + "num_input_tokens_seen": 48833012, + "step": 535 + }, + { + "epoch": 2.2333333333333334, + "grad_norm": 7.982912313525831, + "learning_rate": 5e-05, + "loss": 0.259, + "num_input_tokens_seen": 48924748, + "step": 536 + }, + { + "epoch": 2.2333333333333334, + "loss": 0.27782681584358215, + "loss_ce": 0.003412754973396659, + "loss_iou": 0.2890625, + "loss_num": 0.04248046875, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 48924748, + "step": 536 + }, + { + "epoch": 2.2375, + "grad_norm": 1.8105627790424674, + "learning_rate": 5e-05, + "loss": 0.197, + "num_input_tokens_seen": 49015828, + "step": 537 + }, + { + "epoch": 2.2375, + "loss": 0.19336609542369843, + "loss_ce": 0.0029364186339080334, + "loss_iou": 0.361328125, + "loss_num": 0.0225830078125, + "loss_xval": 0.1904296875, + "num_input_tokens_seen": 49015828, + "step": 537 + }, + { + "epoch": 2.2416666666666667, + "grad_norm": 3.8723721155447866, + "learning_rate": 5e-05, + "loss": 0.2857, + "num_input_tokens_seen": 49106980, + "step": 538 + }, + { + "epoch": 2.2416666666666667, + "loss": 0.26870205998420715, + "loss_ce": 0.0036874094512313604, + "loss_iou": 0.38671875, + "loss_num": 0.036376953125, + "loss_xval": 0.265625, + "num_input_tokens_seen": 49106980, + "step": 538 + }, + { + "epoch": 2.245833333333333, + "grad_norm": 11.43927378564669, + "learning_rate": 5e-05, + "loss": 0.2064, + "num_input_tokens_seen": 49198292, + "step": 539 + }, + { + "epoch": 2.245833333333333, + "loss": 0.11015903949737549, + "loss_ce": 0.0009366283775307238, + "loss_iou": 0.197265625, + "loss_num": 0.01336669921875, + "loss_xval": 0.109375, + "num_input_tokens_seen": 49198292, + "step": 539 + }, + { + "epoch": 2.25, + "grad_norm": 9.479268489327094, + "learning_rate": 5e-05, + "loss": 0.2347, + "num_input_tokens_seen": 49289980, + "step": 540 + }, + { + "epoch": 2.25, + "loss": 0.24618439376354218, + "loss_ce": 0.0028982588555663824, + "loss_iou": 0.392578125, + "loss_num": 0.03173828125, + "loss_xval": 0.2431640625, + "num_input_tokens_seen": 49289980, + "step": 540 + }, + { + "epoch": 2.2541666666666664, + "grad_norm": 23.234574182965247, + "learning_rate": 5e-05, + "loss": 0.2566, + "num_input_tokens_seen": 49381024, + "step": 541 + }, + { + "epoch": 2.2541666666666664, + "loss": 0.32389476895332336, + "loss_ce": 0.0039484878070652485, + "loss_iou": 0.330078125, + "loss_num": 0.0498046875, + "loss_xval": 0.3203125, + "num_input_tokens_seen": 49381024, + "step": 541 + }, + { + "epoch": 2.2583333333333333, + "grad_norm": 6.512330127642697, + "learning_rate": 5e-05, + "loss": 0.231, + "num_input_tokens_seen": 49471848, + "step": 542 + }, + { + "epoch": 2.2583333333333333, + "loss": 0.2775254547595978, + "loss_ce": 0.0032334798015654087, + "loss_iou": 0.59375, + "loss_num": 0.0291748046875, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 49471848, + "step": 542 + }, + { + "epoch": 2.2625, + "grad_norm": 31.365520197825454, + "learning_rate": 5e-05, + "loss": 0.2918, + "num_input_tokens_seen": 49563792, + "step": 543 + }, + { + "epoch": 2.2625, + "loss": 0.3964589834213257, + "loss_ce": 0.0029043066315352917, + "loss_iou": 0.36328125, + "loss_num": 0.06298828125, + "loss_xval": 0.39453125, + "num_input_tokens_seen": 49563792, + "step": 543 + }, + { + "epoch": 2.2666666666666666, + "grad_norm": 9.131551427623819, + "learning_rate": 5e-05, + "loss": 0.1894, + "num_input_tokens_seen": 49654884, + "step": 544 + }, + { + "epoch": 2.2666666666666666, + "loss": 0.16559162735939026, + "loss_ce": 0.0012849814957007766, + "loss_iou": 0.189453125, + "loss_num": 0.024658203125, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 49654884, + "step": 544 + }, + { + "epoch": 2.2708333333333335, + "grad_norm": 4.3210090075713765, + "learning_rate": 5e-05, + "loss": 0.1877, + "num_input_tokens_seen": 49745916, + "step": 545 + }, + { + "epoch": 2.2708333333333335, + "loss": 0.22035479545593262, + "loss_ce": 1.789092675608117e-05, + "loss_iou": 0.408203125, + "loss_num": 0.0262451171875, + "loss_xval": 0.220703125, + "num_input_tokens_seen": 49745916, + "step": 545 + }, + { + "epoch": 2.275, + "grad_norm": 21.405001553889708, + "learning_rate": 5e-05, + "loss": 0.2585, + "num_input_tokens_seen": 49837444, + "step": 546 + }, + { + "epoch": 2.275, + "loss": 0.3219088315963745, + "loss_ce": 0.014673092402517796, + "loss_iou": 0.400390625, + "loss_num": 0.0439453125, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 49837444, + "step": 546 + }, + { + "epoch": 2.279166666666667, + "grad_norm": 11.734590492448271, + "learning_rate": 5e-05, + "loss": 0.2045, + "num_input_tokens_seen": 49928440, + "step": 547 + }, + { + "epoch": 2.279166666666667, + "loss": 0.15000897645950317, + "loss_ce": 0.0007780222222208977, + "loss_iou": 0.34765625, + "loss_num": 0.0146484375, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 49928440, + "step": 547 + }, + { + "epoch": 2.283333333333333, + "grad_norm": 6.654040686552338, + "learning_rate": 5e-05, + "loss": 0.1729, + "num_input_tokens_seen": 50019816, + "step": 548 + }, + { + "epoch": 2.283333333333333, + "loss": 0.16181126236915588, + "loss_ce": 0.000800521404016763, + "loss_iou": 0.28125, + "loss_num": 0.0198974609375, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 50019816, + "step": 548 + }, + { + "epoch": 2.2875, + "grad_norm": 4.9014266222969445, + "learning_rate": 5e-05, + "loss": 0.2343, + "num_input_tokens_seen": 50111900, + "step": 549 + }, + { + "epoch": 2.2875, + "loss": 0.17283910512924194, + "loss_ce": 0.0007199622923508286, + "loss_iou": 0.38671875, + "loss_num": 0.0174560546875, + "loss_xval": 0.171875, + "num_input_tokens_seen": 50111900, + "step": 549 + }, + { + "epoch": 2.2916666666666665, + "grad_norm": 3.1547749971655343, + "learning_rate": 5e-05, + "loss": 0.1892, + "num_input_tokens_seen": 50203308, + "step": 550 + }, + { + "epoch": 2.2916666666666665, + "loss": 0.18455275893211365, + "loss_ce": 0.002561180619522929, + "loss_iou": 0.166015625, + "loss_num": 0.029052734375, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 50203308, + "step": 550 + }, + { + "epoch": 2.2958333333333334, + "grad_norm": 9.05140127670536, + "learning_rate": 5e-05, + "loss": 0.2295, + "num_input_tokens_seen": 50294728, + "step": 551 + }, + { + "epoch": 2.2958333333333334, + "loss": 0.2015194296836853, + "loss_ce": 0.0009579022298566997, + "loss_iou": 0.287109375, + "loss_num": 0.0274658203125, + "loss_xval": 0.2001953125, + "num_input_tokens_seen": 50294728, + "step": 551 + }, + { + "epoch": 2.3, + "grad_norm": 12.269307896539292, + "learning_rate": 5e-05, + "loss": 0.2782, + "num_input_tokens_seen": 50385860, + "step": 552 + }, + { + "epoch": 2.3, + "loss": 0.2552986145019531, + "loss_ce": 0.0013313385425135493, + "loss_iou": 0.32421875, + "loss_num": 0.036376953125, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 50385860, + "step": 552 + }, + { + "epoch": 2.3041666666666667, + "grad_norm": 9.396071896354771, + "learning_rate": 5e-05, + "loss": 0.2442, + "num_input_tokens_seen": 50477380, + "step": 553 + }, + { + "epoch": 2.3041666666666667, + "loss": 0.23897230625152588, + "loss_ce": 0.00026380264898762107, + "loss_iou": 0.359375, + "loss_num": 0.031982421875, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 50477380, + "step": 553 + }, + { + "epoch": 2.3083333333333336, + "grad_norm": 5.715814267268313, + "learning_rate": 5e-05, + "loss": 0.2217, + "num_input_tokens_seen": 50568760, + "step": 554 + }, + { + "epoch": 2.3083333333333336, + "loss": 0.2331497073173523, + "loss_ce": 0.0013381622266024351, + "loss_iou": 0.1748046875, + "loss_num": 0.03857421875, + "loss_xval": 0.2314453125, + "num_input_tokens_seen": 50568760, + "step": 554 + }, + { + "epoch": 2.3125, + "grad_norm": 8.906380660918442, + "learning_rate": 5e-05, + "loss": 0.2031, + "num_input_tokens_seen": 50659780, + "step": 555 + }, + { + "epoch": 2.3125, + "loss": 0.1668861210346222, + "loss_ce": 1.6016194422263652e-05, + "loss_iou": 0.2490234375, + "loss_num": 0.0223388671875, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 50659780, + "step": 555 + }, + { + "epoch": 2.3166666666666664, + "grad_norm": 13.260563611079487, + "learning_rate": 5e-05, + "loss": 0.2962, + "num_input_tokens_seen": 50750744, + "step": 556 + }, + { + "epoch": 2.3166666666666664, + "loss": 0.3436912000179291, + "loss_ce": 0.0016501795034855604, + "loss_iou": 0.46484375, + "loss_num": 0.0478515625, + "loss_xval": 0.341796875, + "num_input_tokens_seen": 50750744, + "step": 556 + }, + { + "epoch": 2.3208333333333333, + "grad_norm": 6.33711851791319, + "learning_rate": 5e-05, + "loss": 0.1551, + "num_input_tokens_seen": 50842328, + "step": 557 + }, + { + "epoch": 2.3208333333333333, + "loss": 0.15023556351661682, + "loss_ce": 0.0015234070597216487, + "loss_iou": 0.296875, + "loss_num": 0.0166015625, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 50842328, + "step": 557 + }, + { + "epoch": 2.325, + "grad_norm": 8.66796050891329, + "learning_rate": 5e-05, + "loss": 0.2726, + "num_input_tokens_seen": 50933788, + "step": 558 + }, + { + "epoch": 2.325, + "loss": 0.3197578489780426, + "loss_ce": 0.00029983557760715485, + "loss_iou": 0.1474609375, + "loss_num": 0.057373046875, + "loss_xval": 0.3203125, + "num_input_tokens_seen": 50933788, + "step": 558 + }, + { + "epoch": 2.3291666666666666, + "grad_norm": 6.913245856308076, + "learning_rate": 5e-05, + "loss": 0.2266, + "num_input_tokens_seen": 51024468, + "step": 559 + }, + { + "epoch": 2.3291666666666666, + "loss": 0.24923312664031982, + "loss_ce": 8.76137928571552e-05, + "loss_iou": 0.306640625, + "loss_num": 0.0361328125, + "loss_xval": 0.2490234375, + "num_input_tokens_seen": 51024468, + "step": 559 + }, + { + "epoch": 2.3333333333333335, + "grad_norm": 4.904812670647303, + "learning_rate": 5e-05, + "loss": 0.2201, + "num_input_tokens_seen": 51115288, + "step": 560 + }, + { + "epoch": 2.3333333333333335, + "loss": 0.23579418659210205, + "loss_ce": 0.0003205478424206376, + "loss_iou": 0.353515625, + "loss_num": 0.03125, + "loss_xval": 0.2353515625, + "num_input_tokens_seen": 51115288, + "step": 560 + }, + { + "epoch": 2.3375, + "grad_norm": 22.768072572200946, + "learning_rate": 5e-05, + "loss": 0.2143, + "num_input_tokens_seen": 51206408, + "step": 561 + }, + { + "epoch": 2.3375, + "loss": 0.198276549577713, + "loss_ce": 0.00021747614664491266, + "loss_iou": 0.28125, + "loss_num": 0.0269775390625, + "loss_xval": 0.1982421875, + "num_input_tokens_seen": 51206408, + "step": 561 + }, + { + "epoch": 2.341666666666667, + "grad_norm": 7.501671159923808, + "learning_rate": 5e-05, + "loss": 0.1443, + "num_input_tokens_seen": 51297908, + "step": 562 + }, + { + "epoch": 2.341666666666667, + "loss": 0.14608047902584076, + "loss_ce": 0.0003590428677853197, + "loss_iou": 0.32421875, + "loss_num": 0.01458740234375, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 51297908, + "step": 562 + }, + { + "epoch": 2.345833333333333, + "grad_norm": 4.105011198127262, + "learning_rate": 5e-05, + "loss": 0.2115, + "num_input_tokens_seen": 51389324, + "step": 563 + }, + { + "epoch": 2.345833333333333, + "loss": 0.22044454514980316, + "loss_ce": 0.0006874686223454773, + "loss_iou": 0.255859375, + "loss_num": 0.032470703125, + "loss_xval": 0.2197265625, + "num_input_tokens_seen": 51389324, + "step": 563 + }, + { + "epoch": 2.35, + "grad_norm": 4.929604021728092, + "learning_rate": 5e-05, + "loss": 0.2209, + "num_input_tokens_seen": 51480428, + "step": 564 + }, + { + "epoch": 2.35, + "loss": 0.23108640313148499, + "loss_ce": 7.303670372493798e-06, + "loss_iou": 0.423828125, + "loss_num": 0.0272216796875, + "loss_xval": 0.2314453125, + "num_input_tokens_seen": 51480428, + "step": 564 + }, + { + "epoch": 2.3541666666666665, + "grad_norm": 11.02440441371447, + "learning_rate": 5e-05, + "loss": 0.2063, + "num_input_tokens_seen": 51572020, + "step": 565 + }, + { + "epoch": 2.3541666666666665, + "loss": 0.22416293621063232, + "loss_ce": 0.0008963280124589801, + "loss_iou": 0.353515625, + "loss_num": 0.0286865234375, + "loss_xval": 0.2236328125, + "num_input_tokens_seen": 51572020, + "step": 565 + }, + { + "epoch": 2.3583333333333334, + "grad_norm": 3.198732197234387, + "learning_rate": 5e-05, + "loss": 0.1983, + "num_input_tokens_seen": 51663300, + "step": 566 + }, + { + "epoch": 2.3583333333333334, + "loss": 0.1470959484577179, + "loss_ce": 0.0004895069869235158, + "loss_iou": 0.169921875, + "loss_num": 0.021728515625, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 51663300, + "step": 566 + }, + { + "epoch": 2.3625, + "grad_norm": 9.113443508930928, + "learning_rate": 5e-05, + "loss": 0.251, + "num_input_tokens_seen": 51755036, + "step": 567 + }, + { + "epoch": 2.3625, + "loss": 0.2304636687040329, + "loss_ce": 0.0011240601306781173, + "loss_iou": 0.271484375, + "loss_num": 0.03369140625, + "loss_xval": 0.2294921875, + "num_input_tokens_seen": 51755036, + "step": 567 + }, + { + "epoch": 2.3666666666666667, + "grad_norm": 8.550613414250304, + "learning_rate": 5e-05, + "loss": 0.2219, + "num_input_tokens_seen": 51846400, + "step": 568 + }, + { + "epoch": 2.3666666666666667, + "loss": 0.2326650619506836, + "loss_ce": 0.006834982428699732, + "loss_iou": 0.36328125, + "loss_num": 0.0286865234375, + "loss_xval": 0.2255859375, + "num_input_tokens_seen": 51846400, + "step": 568 + }, + { + "epoch": 2.3708333333333336, + "grad_norm": 14.361037201153003, + "learning_rate": 5e-05, + "loss": 0.2203, + "num_input_tokens_seen": 51937948, + "step": 569 + }, + { + "epoch": 2.3708333333333336, + "loss": 0.23306363821029663, + "loss_ce": 0.0023507599253207445, + "loss_iou": 0.34375, + "loss_num": 0.030517578125, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 51937948, + "step": 569 + }, + { + "epoch": 2.375, + "grad_norm": 3.9173504909673533, + "learning_rate": 5e-05, + "loss": 0.2329, + "num_input_tokens_seen": 52028928, + "step": 570 + }, + { + "epoch": 2.375, + "loss": 0.23187220096588135, + "loss_ce": 0.00018276153423357755, + "loss_iou": 0.298828125, + "loss_num": 0.03271484375, + "loss_xval": 0.2314453125, + "num_input_tokens_seen": 52028928, + "step": 570 + }, + { + "epoch": 2.3791666666666664, + "grad_norm": 4.439040232326872, + "learning_rate": 5e-05, + "loss": 0.2733, + "num_input_tokens_seen": 52120152, + "step": 571 + }, + { + "epoch": 2.3791666666666664, + "loss": 0.23996983468532562, + "loss_ce": 0.0005289136315695941, + "loss_iou": 0.33203125, + "loss_num": 0.03271484375, + "loss_xval": 0.2392578125, + "num_input_tokens_seen": 52120152, + "step": 571 + }, + { + "epoch": 2.3833333333333333, + "grad_norm": 6.735291595031367, + "learning_rate": 5e-05, + "loss": 0.2516, + "num_input_tokens_seen": 52211680, + "step": 572 + }, + { + "epoch": 2.3833333333333333, + "loss": 0.21853697299957275, + "loss_ce": 0.003601683070883155, + "loss_iou": 0.2080078125, + "loss_num": 0.033447265625, + "loss_xval": 0.21484375, + "num_input_tokens_seen": 52211680, + "step": 572 + }, + { + "epoch": 2.3875, + "grad_norm": 8.91972627065563, + "learning_rate": 5e-05, + "loss": 0.2946, + "num_input_tokens_seen": 52302312, + "step": 573 + }, + { + "epoch": 2.3875, + "loss": 0.33584511280059814, + "loss_ce": 0.0017386688850820065, + "loss_iou": 0.30078125, + "loss_num": 0.052978515625, + "loss_xval": 0.333984375, + "num_input_tokens_seen": 52302312, + "step": 573 + }, + { + "epoch": 2.3916666666666666, + "grad_norm": 19.558079094613312, + "learning_rate": 5e-05, + "loss": 0.2492, + "num_input_tokens_seen": 52393828, + "step": 574 + }, + { + "epoch": 2.3916666666666666, + "loss": 0.2284211963415146, + "loss_ce": 0.004239081870764494, + "loss_iou": 0.29296875, + "loss_num": 0.03125, + "loss_xval": 0.224609375, + "num_input_tokens_seen": 52393828, + "step": 574 + }, + { + "epoch": 2.3958333333333335, + "grad_norm": 13.560440175013902, + "learning_rate": 5e-05, + "loss": 0.2644, + "num_input_tokens_seen": 52485140, + "step": 575 + }, + { + "epoch": 2.3958333333333335, + "loss": 0.3099120855331421, + "loss_ce": 0.0005858939257450402, + "loss_iou": 0.53125, + "loss_num": 0.03759765625, + "loss_xval": 0.30859375, + "num_input_tokens_seen": 52485140, + "step": 575 + }, + { + "epoch": 2.4, + "grad_norm": 8.716871598647728, + "learning_rate": 5e-05, + "loss": 0.2122, + "num_input_tokens_seen": 52575916, + "step": 576 + }, + { + "epoch": 2.4, + "loss": 0.23423391580581665, + "loss_ce": 0.0008354815072380006, + "loss_iou": 0.38671875, + "loss_num": 0.0289306640625, + "loss_xval": 0.2333984375, + "num_input_tokens_seen": 52575916, + "step": 576 + }, + { + "epoch": 2.404166666666667, + "grad_norm": 8.08384619479721, + "learning_rate": 5e-05, + "loss": 0.2205, + "num_input_tokens_seen": 52667400, + "step": 577 + }, + { + "epoch": 2.404166666666667, + "loss": 0.18752390146255493, + "loss_ce": 0.004113262984901667, + "loss_iou": 0.279296875, + "loss_num": 0.0238037109375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 52667400, + "step": 577 + }, + { + "epoch": 2.408333333333333, + "grad_norm": 13.861550015247142, + "learning_rate": 5e-05, + "loss": 0.2721, + "num_input_tokens_seen": 52759060, + "step": 578 + }, + { + "epoch": 2.408333333333333, + "loss": 0.21155069768428802, + "loss_ce": 0.0020170207135379314, + "loss_iou": 0.3203125, + "loss_num": 0.0272216796875, + "loss_xval": 0.2099609375, + "num_input_tokens_seen": 52759060, + "step": 578 + }, + { + "epoch": 2.4125, + "grad_norm": 9.645532716058332, + "learning_rate": 5e-05, + "loss": 0.283, + "num_input_tokens_seen": 52851116, + "step": 579 + }, + { + "epoch": 2.4125, + "loss": 0.29763203859329224, + "loss_ce": 0.0001466983085265383, + "loss_iou": 0.310546875, + "loss_num": 0.045166015625, + "loss_xval": 0.296875, + "num_input_tokens_seen": 52851116, + "step": 579 + }, + { + "epoch": 2.4166666666666665, + "grad_norm": 4.418503920500965, + "learning_rate": 5e-05, + "loss": 0.2826, + "num_input_tokens_seen": 52942416, + "step": 580 + }, + { + "epoch": 2.4166666666666665, + "loss": 0.28978437185287476, + "loss_ce": 0.0030411938205361366, + "loss_iou": 0.03369140625, + "loss_num": 0.055908203125, + "loss_xval": 0.287109375, + "num_input_tokens_seen": 52942416, + "step": 580 + }, + { + "epoch": 2.4208333333333334, + "grad_norm": 4.552478828334792, + "learning_rate": 5e-05, + "loss": 0.2277, + "num_input_tokens_seen": 53033392, + "step": 581 + }, + { + "epoch": 2.4208333333333334, + "loss": 0.2454247623682022, + "loss_ce": 0.0015893190866336226, + "loss_iou": 0.3125, + "loss_num": 0.034423828125, + "loss_xval": 0.244140625, + "num_input_tokens_seen": 53033392, + "step": 581 + }, + { + "epoch": 2.425, + "grad_norm": 4.39237409560183, + "learning_rate": 5e-05, + "loss": 0.1809, + "num_input_tokens_seen": 53124512, + "step": 582 + }, + { + "epoch": 2.425, + "loss": 0.15168428421020508, + "loss_ce": 0.0001645039883442223, + "loss_iou": 0.263671875, + "loss_num": 0.01806640625, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 53124512, + "step": 582 + }, + { + "epoch": 2.4291666666666667, + "grad_norm": 11.948325894522515, + "learning_rate": 5e-05, + "loss": 0.2631, + "num_input_tokens_seen": 53215996, + "step": 583 + }, + { + "epoch": 2.4291666666666667, + "loss": 0.2483394742012024, + "loss_ce": 0.0050533367320895195, + "loss_iou": 0.2119140625, + "loss_num": 0.038818359375, + "loss_xval": 0.2431640625, + "num_input_tokens_seen": 53215996, + "step": 583 + }, + { + "epoch": 2.4333333333333336, + "grad_norm": 11.401193742000979, + "learning_rate": 5e-05, + "loss": 0.2263, + "num_input_tokens_seen": 53306272, + "step": 584 + }, + { + "epoch": 2.4333333333333336, + "loss": 0.2307310402393341, + "loss_ce": 0.0036802669055759907, + "loss_iou": 0.3828125, + "loss_num": 0.027587890625, + "loss_xval": 0.2265625, + "num_input_tokens_seen": 53306272, + "step": 584 + }, + { + "epoch": 2.4375, + "grad_norm": 16.571411322861564, + "learning_rate": 5e-05, + "loss": 0.2692, + "num_input_tokens_seen": 53397812, + "step": 585 + }, + { + "epoch": 2.4375, + "loss": 0.2582886815071106, + "loss_ce": 0.00017098673561122268, + "loss_iou": 0.3828125, + "loss_num": 0.03369140625, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 53397812, + "step": 585 + }, + { + "epoch": 2.4416666666666664, + "grad_norm": 4.6137369430836594, + "learning_rate": 5e-05, + "loss": 0.2597, + "num_input_tokens_seen": 53490068, + "step": 586 + }, + { + "epoch": 2.4416666666666664, + "loss": 0.2722558081150055, + "loss_ce": 0.0016259271651506424, + "loss_iou": 0.359375, + "loss_num": 0.037353515625, + "loss_xval": 0.271484375, + "num_input_tokens_seen": 53490068, + "step": 586 + }, + { + "epoch": 2.4458333333333333, + "grad_norm": 8.212023165000291, + "learning_rate": 5e-05, + "loss": 0.3262, + "num_input_tokens_seen": 53580940, + "step": 587 + }, + { + "epoch": 2.4458333333333333, + "loss": 0.322366327047348, + "loss_ce": 0.0004669178160838783, + "loss_iou": 0.19140625, + "loss_num": 0.055419921875, + "loss_xval": 0.322265625, + "num_input_tokens_seen": 53580940, + "step": 587 + }, + { + "epoch": 2.45, + "grad_norm": 7.41087579689105, + "learning_rate": 5e-05, + "loss": 0.2616, + "num_input_tokens_seen": 53672184, + "step": 588 + }, + { + "epoch": 2.45, + "loss": 0.2832728326320648, + "loss_ce": 0.0014124559238553047, + "loss_iou": 0.2265625, + "loss_num": 0.045654296875, + "loss_xval": 0.28125, + "num_input_tokens_seen": 53672184, + "step": 588 + }, + { + "epoch": 2.4541666666666666, + "grad_norm": 5.467855471006087, + "learning_rate": 5e-05, + "loss": 0.2301, + "num_input_tokens_seen": 53763264, + "step": 589 + }, + { + "epoch": 2.4541666666666666, + "loss": 0.24083589017391205, + "loss_ce": 0.0010897895554080606, + "loss_iou": 0.330078125, + "loss_num": 0.032470703125, + "loss_xval": 0.240234375, + "num_input_tokens_seen": 53763264, + "step": 589 + }, + { + "epoch": 2.4583333333333335, + "grad_norm": 9.064539351391804, + "learning_rate": 5e-05, + "loss": 0.3217, + "num_input_tokens_seen": 53854088, + "step": 590 + }, + { + "epoch": 2.4583333333333335, + "loss": 0.28198763728141785, + "loss_ce": 0.0008597145788371563, + "loss_iou": 0.275390625, + "loss_num": 0.043212890625, + "loss_xval": 0.28125, + "num_input_tokens_seen": 53854088, + "step": 590 + }, + { + "epoch": 2.4625, + "grad_norm": 23.99147596421393, + "learning_rate": 5e-05, + "loss": 0.2264, + "num_input_tokens_seen": 53945840, + "step": 591 + }, + { + "epoch": 2.4625, + "loss": 0.2099674642086029, + "loss_ce": 0.002066471381112933, + "loss_iou": 0.2431640625, + "loss_num": 0.0301513671875, + "loss_xval": 0.2080078125, + "num_input_tokens_seen": 53945840, + "step": 591 + }, + { + "epoch": 2.466666666666667, + "grad_norm": 10.504176338474922, + "learning_rate": 5e-05, + "loss": 0.2831, + "num_input_tokens_seen": 54036712, + "step": 592 + }, + { + "epoch": 2.466666666666667, + "loss": 0.21228235960006714, + "loss_ce": 0.00256557809188962, + "loss_iou": 0.267578125, + "loss_num": 0.029296875, + "loss_xval": 0.2099609375, + "num_input_tokens_seen": 54036712, + "step": 592 + }, + { + "epoch": 2.470833333333333, + "grad_norm": 4.43523163700089, + "learning_rate": 5e-05, + "loss": 0.2155, + "num_input_tokens_seen": 54127908, + "step": 593 + }, + { + "epoch": 2.470833333333333, + "loss": 0.25974512100219727, + "loss_ce": 0.0009560543112456799, + "loss_iou": 0.3125, + "loss_num": 0.036865234375, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 54127908, + "step": 593 + }, + { + "epoch": 2.475, + "grad_norm": 12.34114585053913, + "learning_rate": 5e-05, + "loss": 0.2014, + "num_input_tokens_seen": 54219776, + "step": 594 + }, + { + "epoch": 2.475, + "loss": 0.1644817292690277, + "loss_ce": 0.0022197626531124115, + "loss_iou": 0.17578125, + "loss_num": 0.024169921875, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 54219776, + "step": 594 + }, + { + "epoch": 2.4791666666666665, + "grad_norm": 6.150484430831458, + "learning_rate": 5e-05, + "loss": 0.1779, + "num_input_tokens_seen": 54311088, + "step": 595 + }, + { + "epoch": 2.4791666666666665, + "loss": 0.17623114585876465, + "loss_ce": 0.00038886640686541796, + "loss_iou": 0.27734375, + "loss_num": 0.02197265625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 54311088, + "step": 595 + }, + { + "epoch": 2.4833333333333334, + "grad_norm": 13.595301299687849, + "learning_rate": 5e-05, + "loss": 0.2931, + "num_input_tokens_seen": 54402208, + "step": 596 + }, + { + "epoch": 2.4833333333333334, + "loss": 0.3184419274330139, + "loss_ce": 0.0003266702115070075, + "loss_iou": 0.271484375, + "loss_num": 0.05078125, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 54402208, + "step": 596 + }, + { + "epoch": 2.4875, + "grad_norm": 6.662174326856572, + "learning_rate": 5e-05, + "loss": 0.2273, + "num_input_tokens_seen": 54494012, + "step": 597 + }, + { + "epoch": 2.4875, + "loss": 0.22467482089996338, + "loss_ce": 0.040165551006793976, + "loss_iou": 0.365234375, + "loss_num": 0.01953125, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 54494012, + "step": 597 + }, + { + "epoch": 2.4916666666666667, + "grad_norm": 6.025819860082932, + "learning_rate": 5e-05, + "loss": 0.1869, + "num_input_tokens_seen": 54585016, + "step": 598 + }, + { + "epoch": 2.4916666666666667, + "loss": 0.17940133810043335, + "loss_ce": 9.530932584311813e-05, + "loss_iou": 0.21484375, + "loss_num": 0.025634765625, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 54585016, + "step": 598 + }, + { + "epoch": 2.4958333333333336, + "grad_norm": 5.914698039286818, + "learning_rate": 5e-05, + "loss": 0.2873, + "num_input_tokens_seen": 54675584, + "step": 599 + }, + { + "epoch": 2.4958333333333336, + "loss": 0.3032541275024414, + "loss_ce": 0.005219449754804373, + "loss_iou": 0.451171875, + "loss_num": 0.037841796875, + "loss_xval": 0.298828125, + "num_input_tokens_seen": 54675584, + "step": 599 + }, + { + "epoch": 2.5, + "grad_norm": 7.242830995897235, + "learning_rate": 5e-05, + "loss": 0.2843, + "num_input_tokens_seen": 54766396, + "step": 600 + }, + { + "epoch": 2.5, + "loss": 0.2106434404850006, + "loss_ce": 0.002635627519339323, + "loss_iou": 0.38671875, + "loss_num": 0.0230712890625, + "loss_xval": 0.2080078125, + "num_input_tokens_seen": 54766396, + "step": 600 + }, + { + "epoch": 2.5041666666666664, + "grad_norm": 28.109909327718626, + "learning_rate": 5e-05, + "loss": 0.2381, + "num_input_tokens_seen": 54857360, + "step": 601 + }, + { + "epoch": 2.5041666666666664, + "loss": 0.22719983756542206, + "loss_ce": 0.00246838154271245, + "loss_iou": 0.3359375, + "loss_num": 0.02880859375, + "loss_xval": 0.224609375, + "num_input_tokens_seen": 54857360, + "step": 601 + }, + { + "epoch": 2.5083333333333333, + "grad_norm": 4.044157297681407, + "learning_rate": 5e-05, + "loss": 0.1771, + "num_input_tokens_seen": 54949156, + "step": 602 + }, + { + "epoch": 2.5083333333333333, + "loss": 0.1752859652042389, + "loss_ce": 0.0004812785191461444, + "loss_iou": 0.251953125, + "loss_num": 0.0228271484375, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 54949156, + "step": 602 + }, + { + "epoch": 2.5125, + "grad_norm": 5.0459673677008965, + "learning_rate": 5e-05, + "loss": 0.2322, + "num_input_tokens_seen": 55040396, + "step": 603 + }, + { + "epoch": 2.5125, + "loss": 0.19457007944583893, + "loss_ce": 0.0001731009833747521, + "loss_iou": 0.265625, + "loss_num": 0.026123046875, + "loss_xval": 0.1943359375, + "num_input_tokens_seen": 55040396, + "step": 603 + }, + { + "epoch": 2.5166666666666666, + "grad_norm": 12.668503987480383, + "learning_rate": 5e-05, + "loss": 0.2029, + "num_input_tokens_seen": 55131348, + "step": 604 + }, + { + "epoch": 2.5166666666666666, + "loss": 0.21802936494350433, + "loss_ce": 0.00019488329417072237, + "loss_iou": 0.283203125, + "loss_num": 0.0299072265625, + "loss_xval": 0.2177734375, + "num_input_tokens_seen": 55131348, + "step": 604 + }, + { + "epoch": 2.5208333333333335, + "grad_norm": 7.16371519314298, + "learning_rate": 5e-05, + "loss": 0.2742, + "num_input_tokens_seen": 55222552, + "step": 605 + }, + { + "epoch": 2.5208333333333335, + "loss": 0.210233673453331, + "loss_ce": 0.0006084321648813784, + "loss_iou": 0.2578125, + "loss_num": 0.0294189453125, + "loss_xval": 0.2099609375, + "num_input_tokens_seen": 55222552, + "step": 605 + }, + { + "epoch": 2.525, + "grad_norm": 11.766139605706721, + "learning_rate": 5e-05, + "loss": 0.2538, + "num_input_tokens_seen": 55314332, + "step": 606 + }, + { + "epoch": 2.525, + "loss": 0.2533082664012909, + "loss_ce": 0.0020570610649883747, + "loss_iou": 0.20703125, + "loss_num": 0.040283203125, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 55314332, + "step": 606 + }, + { + "epoch": 2.529166666666667, + "grad_norm": 10.679410877512694, + "learning_rate": 5e-05, + "loss": 0.2949, + "num_input_tokens_seen": 55405292, + "step": 607 + }, + { + "epoch": 2.529166666666667, + "loss": 0.3683062791824341, + "loss_ce": 0.0009356520604342222, + "loss_iou": 0.263671875, + "loss_num": 0.060546875, + "loss_xval": 0.3671875, + "num_input_tokens_seen": 55405292, + "step": 607 + }, + { + "epoch": 2.533333333333333, + "grad_norm": 5.315307943850376, + "learning_rate": 5e-05, + "loss": 0.2221, + "num_input_tokens_seen": 55497420, + "step": 608 + }, + { + "epoch": 2.533333333333333, + "loss": 0.22690710425376892, + "loss_ce": 0.0016263541765511036, + "loss_iou": 0.265625, + "loss_num": 0.031982421875, + "loss_xval": 0.2255859375, + "num_input_tokens_seen": 55497420, + "step": 608 + }, + { + "epoch": 2.5375, + "grad_norm": 5.8205477972719395, + "learning_rate": 5e-05, + "loss": 0.2586, + "num_input_tokens_seen": 55586944, + "step": 609 + }, + { + "epoch": 2.5375, + "loss": 0.22419872879981995, + "loss_ce": 0.007707024924457073, + "loss_iou": 0.1474609375, + "loss_num": 0.0361328125, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 55586944, + "step": 609 + }, + { + "epoch": 2.5416666666666665, + "grad_norm": 7.47567852438644, + "learning_rate": 5e-05, + "loss": 0.215, + "num_input_tokens_seen": 55678012, + "step": 610 + }, + { + "epoch": 2.5416666666666665, + "loss": 0.20373475551605225, + "loss_ce": 0.002776502165943384, + "loss_iou": 0.283203125, + "loss_num": 0.0263671875, + "loss_xval": 0.201171875, + "num_input_tokens_seen": 55678012, + "step": 610 + }, + { + "epoch": 2.5458333333333334, + "grad_norm": 7.639502583558188, + "learning_rate": 5e-05, + "loss": 0.2196, + "num_input_tokens_seen": 55769596, + "step": 611 + }, + { + "epoch": 2.5458333333333334, + "loss": 0.15836487710475922, + "loss_ce": 0.0027252272702753544, + "loss_iou": 0.33203125, + "loss_num": 0.01495361328125, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 55769596, + "step": 611 + }, + { + "epoch": 2.55, + "grad_norm": 17.317356082913285, + "learning_rate": 5e-05, + "loss": 0.2363, + "num_input_tokens_seen": 55861236, + "step": 612 + }, + { + "epoch": 2.55, + "loss": 0.2235887199640274, + "loss_ce": 0.00410629715770483, + "loss_iou": 0.2158203125, + "loss_num": 0.033447265625, + "loss_xval": 0.2197265625, + "num_input_tokens_seen": 55861236, + "step": 612 + }, + { + "epoch": 2.5541666666666667, + "grad_norm": 32.654099925210524, + "learning_rate": 5e-05, + "loss": 0.2216, + "num_input_tokens_seen": 55952812, + "step": 613 + }, + { + "epoch": 2.5541666666666667, + "loss": 0.22760234773159027, + "loss_ce": 0.0009177735191769898, + "loss_iou": 0.201171875, + "loss_num": 0.035400390625, + "loss_xval": 0.2265625, + "num_input_tokens_seen": 55952812, + "step": 613 + }, + { + "epoch": 2.5583333333333336, + "grad_norm": 9.219897635046888, + "learning_rate": 5e-05, + "loss": 0.2513, + "num_input_tokens_seen": 56043840, + "step": 614 + }, + { + "epoch": 2.5583333333333336, + "loss": 0.2992645502090454, + "loss_ce": 0.01947939395904541, + "loss_iou": 0.32421875, + "loss_num": 0.0400390625, + "loss_xval": 0.279296875, + "num_input_tokens_seen": 56043840, + "step": 614 + }, + { + "epoch": 2.5625, + "grad_norm": 13.837170946955512, + "learning_rate": 5e-05, + "loss": 0.1934, + "num_input_tokens_seen": 56135244, + "step": 615 + }, + { + "epoch": 2.5625, + "loss": 0.15511605143547058, + "loss_ce": 0.0029859289061278105, + "loss_iou": 0.263671875, + "loss_num": 0.017578125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 56135244, + "step": 615 + }, + { + "epoch": 2.5666666666666664, + "grad_norm": 15.002267527018512, + "learning_rate": 5e-05, + "loss": 0.2414, + "num_input_tokens_seen": 56227036, + "step": 616 + }, + { + "epoch": 2.5666666666666664, + "loss": 0.24220646917819977, + "loss_ce": 0.00557316979393363, + "loss_iou": 0.353515625, + "loss_num": 0.0299072265625, + "loss_xval": 0.236328125, + "num_input_tokens_seen": 56227036, + "step": 616 + }, + { + "epoch": 2.5708333333333333, + "grad_norm": 7.173839485508792, + "learning_rate": 5e-05, + "loss": 0.2519, + "num_input_tokens_seen": 56317752, + "step": 617 + }, + { + "epoch": 2.5708333333333333, + "loss": 0.1648525893688202, + "loss_ce": 0.0006375029333867133, + "loss_iou": 0.1572265625, + "loss_num": 0.025146484375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 56317752, + "step": 617 + }, + { + "epoch": 2.575, + "grad_norm": 16.42943444460069, + "learning_rate": 5e-05, + "loss": 0.2938, + "num_input_tokens_seen": 56408704, + "step": 618 + }, + { + "epoch": 2.575, + "loss": 0.3060579299926758, + "loss_ce": 0.010769846849143505, + "loss_iou": 0.306640625, + "loss_num": 0.0439453125, + "loss_xval": 0.294921875, + "num_input_tokens_seen": 56408704, + "step": 618 + }, + { + "epoch": 2.5791666666666666, + "grad_norm": 5.16657496800359, + "learning_rate": 5e-05, + "loss": 0.2348, + "num_input_tokens_seen": 56500452, + "step": 619 + }, + { + "epoch": 2.5791666666666666, + "loss": 0.27557849884033203, + "loss_ce": 0.002751333173364401, + "loss_iou": 0.236328125, + "loss_num": 0.04296875, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 56500452, + "step": 619 + }, + { + "epoch": 2.5833333333333335, + "grad_norm": 7.283405389473989, + "learning_rate": 5e-05, + "loss": 0.1938, + "num_input_tokens_seen": 56592032, + "step": 620 + }, + { + "epoch": 2.5833333333333335, + "loss": 0.1765138953924179, + "loss_ce": 0.0032350909896194935, + "loss_iou": 0.244140625, + "loss_num": 0.0225830078125, + "loss_xval": 0.1728515625, + "num_input_tokens_seen": 56592032, + "step": 620 + }, + { + "epoch": 2.5875, + "grad_norm": 16.938522115564087, + "learning_rate": 5e-05, + "loss": 0.2122, + "num_input_tokens_seen": 56683540, + "step": 621 + }, + { + "epoch": 2.5875, + "loss": 0.16648565232753754, + "loss_ce": 0.0005615915288217366, + "loss_iou": 0.337890625, + "loss_num": 0.016357421875, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 56683540, + "step": 621 + }, + { + "epoch": 2.591666666666667, + "grad_norm": 6.825989216329065, + "learning_rate": 5e-05, + "loss": 0.2783, + "num_input_tokens_seen": 56774596, + "step": 622 + }, + { + "epoch": 2.591666666666667, + "loss": 0.33193904161453247, + "loss_ce": 2.9850532882846892e-05, + "loss_iou": 0.400390625, + "loss_num": 0.04638671875, + "loss_xval": 0.33203125, + "num_input_tokens_seen": 56774596, + "step": 622 + }, + { + "epoch": 2.595833333333333, + "grad_norm": 6.282992266417186, + "learning_rate": 5e-05, + "loss": 0.2123, + "num_input_tokens_seen": 56866232, + "step": 623 + }, + { + "epoch": 2.595833333333333, + "loss": 0.21510429680347443, + "loss_ce": 0.0011150480713695288, + "loss_iou": 0.287109375, + "loss_num": 0.028564453125, + "loss_xval": 0.2138671875, + "num_input_tokens_seen": 56866232, + "step": 623 + }, + { + "epoch": 2.6, + "grad_norm": 6.0190747707456875, + "learning_rate": 5e-05, + "loss": 0.2136, + "num_input_tokens_seen": 56958128, + "step": 624 + }, + { + "epoch": 2.6, + "loss": 0.25186964869499207, + "loss_ce": 0.00034375820541754365, + "loss_iou": 0.2255859375, + "loss_num": 0.0390625, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 56958128, + "step": 624 + }, + { + "epoch": 2.6041666666666665, + "grad_norm": 10.169413415323342, + "learning_rate": 5e-05, + "loss": 0.2392, + "num_input_tokens_seen": 57048004, + "step": 625 + }, + { + "epoch": 2.6041666666666665, + "loss": 0.24526724219322205, + "loss_ce": 2.7989730369881727e-05, + "loss_iou": 0.40625, + "loss_num": 0.0286865234375, + "loss_xval": 0.2451171875, + "num_input_tokens_seen": 57048004, + "step": 625 + }, + { + "epoch": 2.6083333333333334, + "grad_norm": 5.278512092483218, + "learning_rate": 5e-05, + "loss": 0.3013, + "num_input_tokens_seen": 57139252, + "step": 626 + }, + { + "epoch": 2.6083333333333334, + "loss": 0.2858002185821533, + "loss_ce": 0.001010189764201641, + "loss_iou": 0.26171875, + "loss_num": 0.043701171875, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 57139252, + "step": 626 + }, + { + "epoch": 2.6125, + "grad_norm": 8.353415112603466, + "learning_rate": 5e-05, + "loss": 0.2807, + "num_input_tokens_seen": 57228832, + "step": 627 + }, + { + "epoch": 2.6125, + "loss": 0.22690685093402863, + "loss_ce": 0.002175399102270603, + "loss_iou": 0.2890625, + "loss_num": 0.0303955078125, + "loss_xval": 0.224609375, + "num_input_tokens_seen": 57228832, + "step": 627 + }, + { + "epoch": 2.6166666666666667, + "grad_norm": 7.679395051656159, + "learning_rate": 5e-05, + "loss": 0.1637, + "num_input_tokens_seen": 57320856, + "step": 628 + }, + { + "epoch": 2.6166666666666667, + "loss": 0.18475009500980377, + "loss_ce": 0.0012784195132553577, + "loss_iou": 0.125, + "loss_num": 0.030517578125, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 57320856, + "step": 628 + }, + { + "epoch": 2.6208333333333336, + "grad_norm": 15.678118569777013, + "learning_rate": 5e-05, + "loss": 0.23, + "num_input_tokens_seen": 57412972, + "step": 629 + }, + { + "epoch": 2.6208333333333336, + "loss": 0.24771615862846375, + "loss_ce": 0.003575525712221861, + "loss_iou": 0.3984375, + "loss_num": 0.02880859375, + "loss_xval": 0.244140625, + "num_input_tokens_seen": 57412972, + "step": 629 + }, + { + "epoch": 2.625, + "grad_norm": 7.690555252101069, + "learning_rate": 5e-05, + "loss": 0.187, + "num_input_tokens_seen": 57503852, + "step": 630 + }, + { + "epoch": 2.625, + "loss": 0.21856024861335754, + "loss_ce": 0.0008478478412143886, + "loss_iou": 0.1962890625, + "loss_num": 0.03369140625, + "loss_xval": 0.2177734375, + "num_input_tokens_seen": 57503852, + "step": 630 + }, + { + "epoch": 2.6291666666666664, + "grad_norm": 24.498475215223806, + "learning_rate": 5e-05, + "loss": 0.2475, + "num_input_tokens_seen": 57594536, + "step": 631 + }, + { + "epoch": 2.6291666666666664, + "loss": 0.23214200139045715, + "loss_ce": 2.5318913685623556e-05, + "loss_iou": 0.2412109375, + "loss_num": 0.0341796875, + "loss_xval": 0.232421875, + "num_input_tokens_seen": 57594536, + "step": 631 + }, + { + "epoch": 2.6333333333333333, + "grad_norm": 6.746331406823162, + "learning_rate": 5e-05, + "loss": 0.2355, + "num_input_tokens_seen": 57686568, + "step": 632 + }, + { + "epoch": 2.6333333333333333, + "loss": 0.17622056603431702, + "loss_ce": 0.0010954277822747827, + "loss_iou": 0.294921875, + "loss_num": 0.0201416015625, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 57686568, + "step": 632 + }, + { + "epoch": 2.6375, + "grad_norm": 26.850646887050033, + "learning_rate": 5e-05, + "loss": 0.343, + "num_input_tokens_seen": 57778092, + "step": 633 + }, + { + "epoch": 2.6375, + "loss": 0.3333674669265747, + "loss_ce": 0.000847943767439574, + "loss_iou": 0.236328125, + "loss_num": 0.054443359375, + "loss_xval": 0.33203125, + "num_input_tokens_seen": 57778092, + "step": 633 + }, + { + "epoch": 2.6416666666666666, + "grad_norm": 6.627786580412496, + "learning_rate": 5e-05, + "loss": 0.2335, + "num_input_tokens_seen": 57869300, + "step": 634 + }, + { + "epoch": 2.6416666666666666, + "loss": 0.18597182631492615, + "loss_ce": 0.0004402133054099977, + "loss_iou": 0.3359375, + "loss_num": 0.02001953125, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 57869300, + "step": 634 + }, + { + "epoch": 2.6458333333333335, + "grad_norm": 5.948417253025581, + "learning_rate": 5e-05, + "loss": 0.1583, + "num_input_tokens_seen": 57960656, + "step": 635 + }, + { + "epoch": 2.6458333333333335, + "loss": 0.15748488903045654, + "loss_ce": 0.001723156776279211, + "loss_iou": 0.205078125, + "loss_num": 0.020751953125, + "loss_xval": 0.15625, + "num_input_tokens_seen": 57960656, + "step": 635 + }, + { + "epoch": 2.65, + "grad_norm": 7.441843298976219, + "learning_rate": 5e-05, + "loss": 0.2295, + "num_input_tokens_seen": 58051536, + "step": 636 + }, + { + "epoch": 2.65, + "loss": 0.22246670722961426, + "loss_ce": 0.0006038988940417767, + "loss_iou": 0.369140625, + "loss_num": 0.025634765625, + "loss_xval": 0.2216796875, + "num_input_tokens_seen": 58051536, + "step": 636 + }, + { + "epoch": 2.654166666666667, + "grad_norm": 39.320466579597394, + "learning_rate": 5e-05, + "loss": 0.2438, + "num_input_tokens_seen": 58142564, + "step": 637 + }, + { + "epoch": 2.654166666666667, + "loss": 0.23603284358978271, + "loss_ce": 0.000620238424744457, + "loss_iou": 0.474609375, + "loss_num": 0.02294921875, + "loss_xval": 0.2353515625, + "num_input_tokens_seen": 58142564, + "step": 637 + }, + { + "epoch": 2.658333333333333, + "grad_norm": 11.045505201026938, + "learning_rate": 5e-05, + "loss": 0.2387, + "num_input_tokens_seen": 58233176, + "step": 638 + }, + { + "epoch": 2.658333333333333, + "loss": 0.27559831738471985, + "loss_ce": 8.562710718251765e-05, + "loss_iou": 0.3515625, + "loss_num": 0.037109375, + "loss_xval": 0.275390625, + "num_input_tokens_seen": 58233176, + "step": 638 + }, + { + "epoch": 2.6625, + "grad_norm": 15.660428211237646, + "learning_rate": 5e-05, + "loss": 0.2244, + "num_input_tokens_seen": 58324752, + "step": 639 + }, + { + "epoch": 2.6625, + "loss": 0.2039228230714798, + "loss_ce": 0.0001874716836027801, + "loss_iou": 0.375, + "loss_num": 0.0216064453125, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 58324752, + "step": 639 + }, + { + "epoch": 2.6666666666666665, + "grad_norm": 18.16003551462728, + "learning_rate": 5e-05, + "loss": 0.2829, + "num_input_tokens_seen": 58415844, + "step": 640 + }, + { + "epoch": 2.6666666666666665, + "loss": 0.2732733488082886, + "loss_ce": 0.004901759792119265, + "loss_iou": 0.416015625, + "loss_num": 0.032470703125, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 58415844, + "step": 640 + }, + { + "epoch": 2.6708333333333334, + "grad_norm": 9.423414548268628, + "learning_rate": 5e-05, + "loss": 0.2242, + "num_input_tokens_seen": 58507252, + "step": 641 + }, + { + "epoch": 2.6708333333333334, + "loss": 0.24119365215301514, + "loss_ce": 0.00010478242620592937, + "loss_iou": 0.34765625, + "loss_num": 0.0303955078125, + "loss_xval": 0.2412109375, + "num_input_tokens_seen": 58507252, + "step": 641 + }, + { + "epoch": 2.675, + "grad_norm": 5.52971937860211, + "learning_rate": 5e-05, + "loss": 0.2362, + "num_input_tokens_seen": 58599320, + "step": 642 + }, + { + "epoch": 2.675, + "loss": 0.27918940782546997, + "loss_ce": 0.003432593774050474, + "loss_iou": 0.294921875, + "loss_num": 0.0400390625, + "loss_xval": 0.275390625, + "num_input_tokens_seen": 58599320, + "step": 642 + }, + { + "epoch": 2.6791666666666667, + "grad_norm": 3.342123448215612, + "learning_rate": 5e-05, + "loss": 0.1887, + "num_input_tokens_seen": 58691080, + "step": 643 + }, + { + "epoch": 2.6791666666666667, + "loss": 0.18224427103996277, + "loss_ce": 0.00316713098436594, + "loss_iou": 0.2021484375, + "loss_num": 0.0255126953125, + "loss_xval": 0.1787109375, + "num_input_tokens_seen": 58691080, + "step": 643 + }, + { + "epoch": 2.6833333333333336, + "grad_norm": 5.4384932851096774, + "learning_rate": 5e-05, + "loss": 0.2862, + "num_input_tokens_seen": 58782012, + "step": 644 + }, + { + "epoch": 2.6833333333333336, + "loss": 0.2632726728916168, + "loss_ce": 0.0004247686010785401, + "loss_iou": 0.25, + "loss_num": 0.039794921875, + "loss_xval": 0.263671875, + "num_input_tokens_seen": 58782012, + "step": 644 + }, + { + "epoch": 2.6875, + "grad_norm": 6.975188398311233, + "learning_rate": 5e-05, + "loss": 0.2256, + "num_input_tokens_seen": 58873700, + "step": 645 + }, + { + "epoch": 2.6875, + "loss": 0.17887446284294128, + "loss_ce": 0.0006517907604575157, + "loss_iou": 0.2421875, + "loss_num": 0.0230712890625, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 58873700, + "step": 645 + }, + { + "epoch": 2.6916666666666664, + "grad_norm": 11.234238447987558, + "learning_rate": 5e-05, + "loss": 0.2266, + "num_input_tokens_seen": 58965020, + "step": 646 + }, + { + "epoch": 2.6916666666666664, + "loss": 0.2156769037246704, + "loss_ce": 0.000802622816991061, + "loss_iou": 0.306640625, + "loss_num": 0.027099609375, + "loss_xval": 0.21484375, + "num_input_tokens_seen": 58965020, + "step": 646 + }, + { + "epoch": 2.6958333333333333, + "grad_norm": 15.223147913406171, + "learning_rate": 5e-05, + "loss": 0.2143, + "num_input_tokens_seen": 59056500, + "step": 647 + }, + { + "epoch": 2.6958333333333333, + "loss": 0.2504352927207947, + "loss_ce": 0.0025104722008109093, + "loss_iou": 0.32421875, + "loss_num": 0.032958984375, + "loss_xval": 0.248046875, + "num_input_tokens_seen": 59056500, + "step": 647 + }, + { + "epoch": 2.7, + "grad_norm": 10.64234876170083, + "learning_rate": 5e-05, + "loss": 0.3137, + "num_input_tokens_seen": 59148324, + "step": 648 + }, + { + "epoch": 2.7, + "loss": 0.31744182109832764, + "loss_ce": 0.0014017969369888306, + "loss_iou": 0.265625, + "loss_num": 0.04931640625, + "loss_xval": 0.31640625, + "num_input_tokens_seen": 59148324, + "step": 648 + }, + { + "epoch": 2.7041666666666666, + "grad_norm": 6.937275427148855, + "learning_rate": 5e-05, + "loss": 0.2047, + "num_input_tokens_seen": 59239988, + "step": 649 + }, + { + "epoch": 2.7041666666666666, + "loss": 0.19481661915779114, + "loss_ce": 0.0026169028133153915, + "loss_iou": 0.26171875, + "loss_num": 0.02490234375, + "loss_xval": 0.1923828125, + "num_input_tokens_seen": 59239988, + "step": 649 + }, + { + "epoch": 2.7083333333333335, + "grad_norm": 8.433146515122392, + "learning_rate": 5e-05, + "loss": 0.2338, + "num_input_tokens_seen": 59330968, + "step": 650 + }, + { + "epoch": 2.7083333333333335, + "loss": 0.2627840042114258, + "loss_ce": 0.003933912143111229, + "loss_iou": 0.353515625, + "loss_num": 0.033447265625, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 59330968, + "step": 650 + }, + { + "epoch": 2.7125, + "grad_norm": 58.97124918183895, + "learning_rate": 5e-05, + "loss": 0.2601, + "num_input_tokens_seen": 59421252, + "step": 651 + }, + { + "epoch": 2.7125, + "loss": 0.2433607280254364, + "loss_ce": 7.458672916982323e-05, + "loss_iou": 0.34765625, + "loss_num": 0.030517578125, + "loss_xval": 0.2431640625, + "num_input_tokens_seen": 59421252, + "step": 651 + }, + { + "epoch": 2.716666666666667, + "grad_norm": 9.067646457220144, + "learning_rate": 5e-05, + "loss": 0.2721, + "num_input_tokens_seen": 59512520, + "step": 652 + }, + { + "epoch": 2.716666666666667, + "loss": 0.26135459542274475, + "loss_ce": 0.0008565568132326007, + "loss_iou": 0.353515625, + "loss_num": 0.03369140625, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 59512520, + "step": 652 + }, + { + "epoch": 2.720833333333333, + "grad_norm": 6.026072638011931, + "learning_rate": 5e-05, + "loss": 0.2861, + "num_input_tokens_seen": 59604232, + "step": 653 + }, + { + "epoch": 2.720833333333333, + "loss": 0.30131012201309204, + "loss_ce": 0.0018716540653258562, + "loss_iou": 0.310546875, + "loss_num": 0.043701171875, + "loss_xval": 0.298828125, + "num_input_tokens_seen": 59604232, + "step": 653 + }, + { + "epoch": 2.725, + "grad_norm": 10.651785753724841, + "learning_rate": 5e-05, + "loss": 0.2502, + "num_input_tokens_seen": 59695068, + "step": 654 + }, + { + "epoch": 2.725, + "loss": 0.197337806224823, + "loss_ce": 0.0008961611310951412, + "loss_iou": 0.16796875, + "loss_num": 0.030517578125, + "loss_xval": 0.1962890625, + "num_input_tokens_seen": 59695068, + "step": 654 + }, + { + "epoch": 2.7291666666666665, + "grad_norm": 9.793374084872067, + "learning_rate": 5e-05, + "loss": 0.222, + "num_input_tokens_seen": 59786688, + "step": 655 + }, + { + "epoch": 2.7291666666666665, + "loss": 0.2327248454093933, + "loss_ce": 0.007322031073272228, + "loss_iou": 0.318359375, + "loss_num": 0.0284423828125, + "loss_xval": 0.2255859375, + "num_input_tokens_seen": 59786688, + "step": 655 + }, + { + "epoch": 2.7333333333333334, + "grad_norm": 13.849598415052697, + "learning_rate": 5e-05, + "loss": 0.314, + "num_input_tokens_seen": 59877916, + "step": 656 + }, + { + "epoch": 2.7333333333333334, + "loss": 0.2899276912212372, + "loss_ce": 0.0003769119502976537, + "loss_iou": 0.189453125, + "loss_num": 0.0478515625, + "loss_xval": 0.2890625, + "num_input_tokens_seen": 59877916, + "step": 656 + }, + { + "epoch": 2.7375, + "grad_norm": 14.770246930851002, + "learning_rate": 5e-05, + "loss": 0.2103, + "num_input_tokens_seen": 59968964, + "step": 657 + }, + { + "epoch": 2.7375, + "loss": 0.23949527740478516, + "loss_ce": 0.0029229968786239624, + "loss_iou": 0.25390625, + "loss_num": 0.033935546875, + "loss_xval": 0.236328125, + "num_input_tokens_seen": 59968964, + "step": 657 + }, + { + "epoch": 2.7416666666666667, + "grad_norm": 5.194622412637509, + "learning_rate": 5e-05, + "loss": 0.2327, + "num_input_tokens_seen": 60060144, + "step": 658 + }, + { + "epoch": 2.7416666666666667, + "loss": 0.20210278034210205, + "loss_ce": 7.640862895641476e-05, + "loss_iou": 0.203125, + "loss_num": 0.0296630859375, + "loss_xval": 0.2021484375, + "num_input_tokens_seen": 60060144, + "step": 658 + }, + { + "epoch": 2.7458333333333336, + "grad_norm": 3.628872661853219, + "learning_rate": 5e-05, + "loss": 0.1819, + "num_input_tokens_seen": 60151700, + "step": 659 + }, + { + "epoch": 2.7458333333333336, + "loss": 0.16526725888252258, + "loss_ce": 0.0032799644395709038, + "loss_iou": 0.1982421875, + "loss_num": 0.02197265625, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 60151700, + "step": 659 + }, + { + "epoch": 2.75, + "grad_norm": 7.896663551863479, + "learning_rate": 5e-05, + "loss": 0.2507, + "num_input_tokens_seen": 60242876, + "step": 660 + }, + { + "epoch": 2.75, + "loss": 0.19262319803237915, + "loss_ce": 0.0021934963297098875, + "loss_iou": 0.2373046875, + "loss_num": 0.0255126953125, + "loss_xval": 0.1904296875, + "num_input_tokens_seen": 60242876, + "step": 660 + }, + { + "epoch": 2.7541666666666664, + "grad_norm": 11.92417871030824, + "learning_rate": 5e-05, + "loss": 0.2565, + "num_input_tokens_seen": 60334320, + "step": 661 + }, + { + "epoch": 2.7541666666666664, + "loss": 0.19221524894237518, + "loss_ce": 1.553935362608172e-05, + "loss_iou": 0.337890625, + "loss_num": 0.0206298828125, + "loss_xval": 0.1923828125, + "num_input_tokens_seen": 60334320, + "step": 661 + }, + { + "epoch": 2.7583333333333333, + "grad_norm": 11.250407466916267, + "learning_rate": 5e-05, + "loss": 0.2634, + "num_input_tokens_seen": 60425888, + "step": 662 + }, + { + "epoch": 2.7583333333333333, + "loss": 0.17673750221729279, + "loss_ce": 0.0019328128546476364, + "loss_iou": 0.232421875, + "loss_num": 0.022705078125, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 60425888, + "step": 662 + }, + { + "epoch": 2.7625, + "grad_norm": 10.98782497813321, + "learning_rate": 5e-05, + "loss": 0.2131, + "num_input_tokens_seen": 60517260, + "step": 663 + }, + { + "epoch": 2.7625, + "loss": 0.2038540244102478, + "loss_ce": 0.0006679813377559185, + "loss_iou": 0.44921875, + "loss_num": 0.0167236328125, + "loss_xval": 0.203125, + "num_input_tokens_seen": 60517260, + "step": 663 + }, + { + "epoch": 2.7666666666666666, + "grad_norm": 13.162918661460708, + "learning_rate": 5e-05, + "loss": 0.3174, + "num_input_tokens_seen": 60608376, + "step": 664 + }, + { + "epoch": 2.7666666666666666, + "loss": 0.31894752383232117, + "loss_ce": 0.0025412747636437416, + "loss_iou": 0.2431640625, + "loss_num": 0.05029296875, + "loss_xval": 0.31640625, + "num_input_tokens_seen": 60608376, + "step": 664 + }, + { + "epoch": 2.7708333333333335, + "grad_norm": 6.382744557259831, + "learning_rate": 5e-05, + "loss": 0.1812, + "num_input_tokens_seen": 60699668, + "step": 665 + }, + { + "epoch": 2.7708333333333335, + "loss": 0.16857726871967316, + "loss_ce": 0.0005474792560562491, + "loss_iou": 0.294921875, + "loss_num": 0.0179443359375, + "loss_xval": 0.16796875, + "num_input_tokens_seen": 60699668, + "step": 665 + }, + { + "epoch": 2.775, + "grad_norm": 3.281281401377553, + "learning_rate": 5e-05, + "loss": 0.3254, + "num_input_tokens_seen": 60791272, + "step": 666 + }, + { + "epoch": 2.775, + "loss": 0.2621937096118927, + "loss_ce": 0.002061892068013549, + "loss_iou": 0.208984375, + "loss_num": 0.040771484375, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 60791272, + "step": 666 + }, + { + "epoch": 2.779166666666667, + "grad_norm": 5.055060987599515, + "learning_rate": 5e-05, + "loss": 0.1799, + "num_input_tokens_seen": 60882756, + "step": 667 + }, + { + "epoch": 2.779166666666667, + "loss": 0.11920049786567688, + "loss_ce": 0.0060107954777777195, + "loss_iou": 0.06103515625, + "loss_num": 0.0194091796875, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 60882756, + "step": 667 + }, + { + "epoch": 2.783333333333333, + "grad_norm": 9.929699710676825, + "learning_rate": 5e-05, + "loss": 0.2377, + "num_input_tokens_seen": 60972912, + "step": 668 + }, + { + "epoch": 2.783333333333333, + "loss": 0.2324073612689972, + "loss_ce": 0.0005347952246665955, + "loss_iou": 0.28125, + "loss_num": 0.03125, + "loss_xval": 0.2314453125, + "num_input_tokens_seen": 60972912, + "step": 668 + }, + { + "epoch": 2.7875, + "grad_norm": 3.515949080645658, + "learning_rate": 5e-05, + "loss": 0.2269, + "num_input_tokens_seen": 61063756, + "step": 669 + }, + { + "epoch": 2.7875, + "loss": 0.20325595140457153, + "loss_ce": 0.0023892491590231657, + "loss_iou": 0.337890625, + "loss_num": 0.0220947265625, + "loss_xval": 0.201171875, + "num_input_tokens_seen": 61063756, + "step": 669 + }, + { + "epoch": 2.7916666666666665, + "grad_norm": 15.70144054020013, + "learning_rate": 5e-05, + "loss": 0.23, + "num_input_tokens_seen": 61154772, + "step": 670 + }, + { + "epoch": 2.7916666666666665, + "loss": 0.25681400299072266, + "loss_ce": 0.0027094087563455105, + "loss_iou": 0.466796875, + "loss_num": 0.02587890625, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 61154772, + "step": 670 + }, + { + "epoch": 2.7958333333333334, + "grad_norm": 8.257168700533908, + "learning_rate": 5e-05, + "loss": 0.2208, + "num_input_tokens_seen": 61245680, + "step": 671 + }, + { + "epoch": 2.7958333333333334, + "loss": 0.18007764220237732, + "loss_ce": 2.3923021217342466e-05, + "loss_iou": 0.1064453125, + "loss_num": 0.0302734375, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 61245680, + "step": 671 + }, + { + "epoch": 2.8, + "grad_norm": 8.520313388600616, + "learning_rate": 5e-05, + "loss": 0.2032, + "num_input_tokens_seen": 61336904, + "step": 672 + }, + { + "epoch": 2.8, + "loss": 0.1864093542098999, + "loss_ce": 0.0005267920205369592, + "loss_iou": 0.326171875, + "loss_num": 0.0196533203125, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 61336904, + "step": 672 + }, + { + "epoch": 2.8041666666666667, + "grad_norm": 12.037204098331905, + "learning_rate": 5e-05, + "loss": 0.244, + "num_input_tokens_seen": 61427584, + "step": 673 + }, + { + "epoch": 2.8041666666666667, + "loss": 0.2448461949825287, + "loss_ce": 0.0001562585384817794, + "loss_iou": 0.318359375, + "loss_num": 0.03173828125, + "loss_xval": 0.2451171875, + "num_input_tokens_seen": 61427584, + "step": 673 + }, + { + "epoch": 2.8083333333333336, + "grad_norm": 11.035018957052076, + "learning_rate": 5e-05, + "loss": 0.3142, + "num_input_tokens_seen": 61519484, + "step": 674 + }, + { + "epoch": 2.8083333333333336, + "loss": 0.3390733599662781, + "loss_ce": 0.003380006877705455, + "loss_iou": 0.1708984375, + "loss_num": 0.057861328125, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 61519484, + "step": 674 + }, + { + "epoch": 2.8125, + "grad_norm": 5.682714870903573, + "learning_rate": 5e-05, + "loss": 0.2313, + "num_input_tokens_seen": 61611072, + "step": 675 + }, + { + "epoch": 2.8125, + "loss": 0.2279127836227417, + "loss_ce": 0.006538269110023975, + "loss_iou": 0.2890625, + "loss_num": 0.0286865234375, + "loss_xval": 0.2216796875, + "num_input_tokens_seen": 61611072, + "step": 675 + }, + { + "epoch": 2.8166666666666664, + "grad_norm": 16.111549162613954, + "learning_rate": 5e-05, + "loss": 0.2393, + "num_input_tokens_seen": 61701712, + "step": 676 + }, + { + "epoch": 2.8166666666666664, + "loss": 0.28185001015663147, + "loss_ce": 0.0011493464699015021, + "loss_iou": 0.330078125, + "loss_num": 0.038330078125, + "loss_xval": 0.28125, + "num_input_tokens_seen": 61701712, + "step": 676 + }, + { + "epoch": 2.8208333333333333, + "grad_norm": 7.381517233376643, + "learning_rate": 5e-05, + "loss": 0.2343, + "num_input_tokens_seen": 61792896, + "step": 677 + }, + { + "epoch": 2.8208333333333333, + "loss": 0.2264833152294159, + "loss_ce": 0.00022599007934331894, + "loss_iou": 0.234375, + "loss_num": 0.032470703125, + "loss_xval": 0.2265625, + "num_input_tokens_seen": 61792896, + "step": 677 + }, + { + "epoch": 2.825, + "grad_norm": 8.826422628521604, + "learning_rate": 5e-05, + "loss": 0.2607, + "num_input_tokens_seen": 61884132, + "step": 678 + }, + { + "epoch": 2.825, + "loss": 0.24496780335903168, + "loss_ce": 0.001376486150547862, + "loss_iou": 0.25390625, + "loss_num": 0.034912109375, + "loss_xval": 0.2431640625, + "num_input_tokens_seen": 61884132, + "step": 678 + }, + { + "epoch": 2.8291666666666666, + "grad_norm": 9.635858360118323, + "learning_rate": 5e-05, + "loss": 0.2347, + "num_input_tokens_seen": 61975732, + "step": 679 + }, + { + "epoch": 2.8291666666666666, + "loss": 0.27702057361602783, + "loss_ce": 0.0026675716508179903, + "loss_iou": 0.34765625, + "loss_num": 0.035888671875, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 61975732, + "step": 679 + }, + { + "epoch": 2.8333333333333335, + "grad_norm": 11.19240813728173, + "learning_rate": 5e-05, + "loss": 0.2936, + "num_input_tokens_seen": 62067048, + "step": 680 + }, + { + "epoch": 2.8333333333333335, + "loss": 0.2815859913825989, + "loss_ce": 0.004974641837179661, + "loss_iou": 0.322265625, + "loss_num": 0.037841796875, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 62067048, + "step": 680 + }, + { + "epoch": 2.8375, + "grad_norm": 5.430056738897933, + "learning_rate": 5e-05, + "loss": 0.331, + "num_input_tokens_seen": 62158012, + "step": 681 + }, + { + "epoch": 2.8375, + "loss": 0.43693971633911133, + "loss_ce": 0.0007824877393431962, + "loss_iou": 0.34765625, + "loss_num": 0.068359375, + "loss_xval": 0.435546875, + "num_input_tokens_seen": 62158012, + "step": 681 + }, + { + "epoch": 2.841666666666667, + "grad_norm": 11.984837455811917, + "learning_rate": 5e-05, + "loss": 0.1713, + "num_input_tokens_seen": 62249280, + "step": 682 + }, + { + "epoch": 2.841666666666667, + "loss": 0.17815792560577393, + "loss_ce": 0.001644266420044005, + "loss_iou": 0.2060546875, + "loss_num": 0.0240478515625, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 62249280, + "step": 682 + }, + { + "epoch": 2.845833333333333, + "grad_norm": 15.193914351278222, + "learning_rate": 5e-05, + "loss": 0.3415, + "num_input_tokens_seen": 62339108, + "step": 683 + }, + { + "epoch": 2.845833333333333, + "loss": 0.2802782654762268, + "loss_ce": 0.0231371708214283, + "loss_iou": 0.294921875, + "loss_num": 0.035400390625, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 62339108, + "step": 683 + }, + { + "epoch": 2.85, + "grad_norm": 6.521015692246656, + "learning_rate": 5e-05, + "loss": 0.1894, + "num_input_tokens_seen": 62430992, + "step": 684 + }, + { + "epoch": 2.85, + "loss": 0.2058933973312378, + "loss_ce": 0.0009373407810926437, + "loss_iou": 0.326171875, + "loss_num": 0.023193359375, + "loss_xval": 0.205078125, + "num_input_tokens_seen": 62430992, + "step": 684 + }, + { + "epoch": 2.8541666666666665, + "grad_norm": 5.874700429325175, + "learning_rate": 5e-05, + "loss": 0.222, + "num_input_tokens_seen": 62522124, + "step": 685 + }, + { + "epoch": 2.8541666666666665, + "loss": 0.22513145208358765, + "loss_ce": 0.0011324224760755897, + "loss_iou": 0.2470703125, + "loss_num": 0.03125, + "loss_xval": 0.2236328125, + "num_input_tokens_seen": 62522124, + "step": 685 + }, + { + "epoch": 2.8583333333333334, + "grad_norm": 11.611583234295143, + "learning_rate": 5e-05, + "loss": 0.268, + "num_input_tokens_seen": 62613572, + "step": 686 + }, + { + "epoch": 2.8583333333333334, + "loss": 0.20817159116268158, + "loss_ce": 0.0013844802742823958, + "loss_iou": 0.302734375, + "loss_num": 0.0247802734375, + "loss_xval": 0.20703125, + "num_input_tokens_seen": 62613572, + "step": 686 + }, + { + "epoch": 2.8625, + "grad_norm": 16.695023351003698, + "learning_rate": 5e-05, + "loss": 0.2915, + "num_input_tokens_seen": 62704872, + "step": 687 + }, + { + "epoch": 2.8625, + "loss": 0.27818945050239563, + "loss_ce": 0.0017001950182020664, + "loss_iou": 0.322265625, + "loss_num": 0.03759765625, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 62704872, + "step": 687 + }, + { + "epoch": 2.8666666666666667, + "grad_norm": 12.654336027930734, + "learning_rate": 5e-05, + "loss": 0.2401, + "num_input_tokens_seen": 62795636, + "step": 688 + }, + { + "epoch": 2.8666666666666667, + "loss": 0.25469058752059937, + "loss_ce": 0.00011293623538222164, + "loss_iou": 0.291015625, + "loss_num": 0.034912109375, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 62795636, + "step": 688 + }, + { + "epoch": 2.8708333333333336, + "grad_norm": 8.620940501512623, + "learning_rate": 5e-05, + "loss": 0.1799, + "num_input_tokens_seen": 62887012, + "step": 689 + }, + { + "epoch": 2.8708333333333336, + "loss": 0.19433006644248962, + "loss_ce": 0.00106223882175982, + "loss_iou": 0.265625, + "loss_num": 0.0240478515625, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 62887012, + "step": 689 + }, + { + "epoch": 2.875, + "grad_norm": 6.658542573080399, + "learning_rate": 5e-05, + "loss": 0.2632, + "num_input_tokens_seen": 62978376, + "step": 690 + }, + { + "epoch": 2.875, + "loss": 0.33673685789108276, + "loss_ce": 0.00031107006361708045, + "loss_iou": 0.32421875, + "loss_num": 0.04931640625, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 62978376, + "step": 690 + }, + { + "epoch": 2.8791666666666664, + "grad_norm": 13.381904165276124, + "learning_rate": 5e-05, + "loss": 0.3308, + "num_input_tokens_seen": 63069576, + "step": 691 + }, + { + "epoch": 2.8791666666666664, + "loss": 0.2565188407897949, + "loss_ce": 0.0002932699571829289, + "loss_iou": 0.435546875, + "loss_num": 0.0272216796875, + "loss_xval": 0.255859375, + "num_input_tokens_seen": 63069576, + "step": 691 + }, + { + "epoch": 2.8833333333333333, + "grad_norm": 18.143033223163656, + "learning_rate": 5e-05, + "loss": 0.2573, + "num_input_tokens_seen": 63161244, + "step": 692 + }, + { + "epoch": 2.8833333333333333, + "loss": 0.3152886629104614, + "loss_ce": 0.00373471318744123, + "loss_iou": 0.1962890625, + "loss_num": 0.051513671875, + "loss_xval": 0.3125, + "num_input_tokens_seen": 63161244, + "step": 692 + }, + { + "epoch": 2.8875, + "grad_norm": 8.917116183411315, + "learning_rate": 5e-05, + "loss": 0.2325, + "num_input_tokens_seen": 63253472, + "step": 693 + }, + { + "epoch": 2.8875, + "loss": 0.2928329110145569, + "loss_ce": 0.0025497153401374817, + "loss_iou": 0.3125, + "loss_num": 0.040771484375, + "loss_xval": 0.291015625, + "num_input_tokens_seen": 63253472, + "step": 693 + }, + { + "epoch": 2.8916666666666666, + "grad_norm": 9.397470507246357, + "learning_rate": 5e-05, + "loss": 0.2805, + "num_input_tokens_seen": 63343676, + "step": 694 + }, + { + "epoch": 2.8916666666666666, + "loss": 0.29843559861183167, + "loss_ce": 9.574588329996914e-05, + "loss_iou": 0.42578125, + "loss_num": 0.0361328125, + "loss_xval": 0.298828125, + "num_input_tokens_seen": 63343676, + "step": 694 + }, + { + "epoch": 2.8958333333333335, + "grad_norm": 13.558932690642562, + "learning_rate": 5e-05, + "loss": 0.2866, + "num_input_tokens_seen": 63435244, + "step": 695 + }, + { + "epoch": 2.8958333333333335, + "loss": 0.35385358333587646, + "loss_ce": 0.0011924391146749258, + "loss_iou": 0.263671875, + "loss_num": 0.055908203125, + "loss_xval": 0.353515625, + "num_input_tokens_seen": 63435244, + "step": 695 + }, + { + "epoch": 2.9, + "grad_norm": 5.0040104824422285, + "learning_rate": 5e-05, + "loss": 0.2512, + "num_input_tokens_seen": 63526600, + "step": 696 + }, + { + "epoch": 2.9, + "loss": 0.29930955171585083, + "loss_ce": 0.005486306734383106, + "loss_iou": 0.2451171875, + "loss_num": 0.044921875, + "loss_xval": 0.29296875, + "num_input_tokens_seen": 63526600, + "step": 696 + }, + { + "epoch": 2.904166666666667, + "grad_norm": 5.068797963676063, + "learning_rate": 5e-05, + "loss": 0.2976, + "num_input_tokens_seen": 63618304, + "step": 697 + }, + { + "epoch": 2.904166666666667, + "loss": 0.24356845021247864, + "loss_ce": 0.0015030185459181666, + "loss_iou": 0.33984375, + "loss_num": 0.0294189453125, + "loss_xval": 0.2421875, + "num_input_tokens_seen": 63618304, + "step": 697 + }, + { + "epoch": 2.908333333333333, + "grad_norm": 10.455525088397929, + "learning_rate": 5e-05, + "loss": 0.2502, + "num_input_tokens_seen": 63708884, + "step": 698 + }, + { + "epoch": 2.908333333333333, + "loss": 0.27911537885665894, + "loss_ce": 0.005952558480203152, + "loss_iou": 0.32421875, + "loss_num": 0.03662109375, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 63708884, + "step": 698 + }, + { + "epoch": 2.9125, + "grad_norm": 11.425428022543029, + "learning_rate": 5e-05, + "loss": 0.263, + "num_input_tokens_seen": 63800660, + "step": 699 + }, + { + "epoch": 2.9125, + "loss": 0.2969471216201782, + "loss_ce": 0.005199064500629902, + "loss_iou": 0.23046875, + "loss_num": 0.04541015625, + "loss_xval": 0.291015625, + "num_input_tokens_seen": 63800660, + "step": 699 + }, + { + "epoch": 2.9166666666666665, + "grad_norm": 4.500736972041381, + "learning_rate": 5e-05, + "loss": 0.2321, + "num_input_tokens_seen": 63892240, + "step": 700 + }, + { + "epoch": 2.9166666666666665, + "loss": 0.2109307199716568, + "loss_ce": 0.00042046865564770997, + "loss_iou": 0.326171875, + "loss_num": 0.0238037109375, + "loss_xval": 0.2109375, + "num_input_tokens_seen": 63892240, + "step": 700 + }, + { + "epoch": 2.9208333333333334, + "grad_norm": 11.82852733728945, + "learning_rate": 5e-05, + "loss": 0.2214, + "num_input_tokens_seen": 63983876, + "step": 701 + }, + { + "epoch": 2.9208333333333334, + "loss": 0.28741031885147095, + "loss_ce": 0.001643726835027337, + "loss_iou": 0.365234375, + "loss_num": 0.036865234375, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 63983876, + "step": 701 + }, + { + "epoch": 2.925, + "grad_norm": 6.021733502978741, + "learning_rate": 5e-05, + "loss": 0.2829, + "num_input_tokens_seen": 64074748, + "step": 702 + }, + { + "epoch": 2.925, + "loss": 0.2205052673816681, + "loss_ce": 0.00035146629670634866, + "loss_iou": 0.38671875, + "loss_num": 0.0223388671875, + "loss_xval": 0.2197265625, + "num_input_tokens_seen": 64074748, + "step": 702 + }, + { + "epoch": 2.9291666666666667, + "grad_norm": 6.696864207356387, + "learning_rate": 5e-05, + "loss": 0.226, + "num_input_tokens_seen": 64166260, + "step": 703 + }, + { + "epoch": 2.9291666666666667, + "loss": 0.2554856836795807, + "loss_ce": 0.0018235727911815047, + "loss_iou": 0.27734375, + "loss_num": 0.03515625, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 64166260, + "step": 703 + }, + { + "epoch": 2.9333333333333336, + "grad_norm": 5.027904053317167, + "learning_rate": 5e-05, + "loss": 0.1737, + "num_input_tokens_seen": 64257464, + "step": 704 + }, + { + "epoch": 2.9333333333333336, + "loss": 0.19181515276432037, + "loss_ce": 0.0008971852366812527, + "loss_iou": 0.287109375, + "loss_num": 0.0220947265625, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 64257464, + "step": 704 + }, + { + "epoch": 2.9375, + "grad_norm": 14.681615464234536, + "learning_rate": 5e-05, + "loss": 0.29, + "num_input_tokens_seen": 64348264, + "step": 705 + }, + { + "epoch": 2.9375, + "loss": 0.34003758430480957, + "loss_ce": 0.0014145312597975135, + "loss_iou": 0.474609375, + "loss_num": 0.041015625, + "loss_xval": 0.337890625, + "num_input_tokens_seen": 64348264, + "step": 705 + }, + { + "epoch": 2.9416666666666664, + "grad_norm": 11.050186663812722, + "learning_rate": 5e-05, + "loss": 0.3783, + "num_input_tokens_seen": 64439336, + "step": 706 + }, + { + "epoch": 2.9416666666666664, + "loss": 0.4706357717514038, + "loss_ce": 5.473133933264762e-05, + "loss_iou": 0.337890625, + "loss_num": 0.0751953125, + "loss_xval": 0.470703125, + "num_input_tokens_seen": 64439336, + "step": 706 + }, + { + "epoch": 2.9458333333333333, + "grad_norm": 9.673816924435314, + "learning_rate": 5e-05, + "loss": 0.2529, + "num_input_tokens_seen": 64530780, + "step": 707 + }, + { + "epoch": 2.9458333333333333, + "loss": 0.27379924058914185, + "loss_ce": 0.0010941624641418457, + "loss_iou": 0.3671875, + "loss_num": 0.03369140625, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 64530780, + "step": 707 + }, + { + "epoch": 2.95, + "grad_norm": 9.763305725297712, + "learning_rate": 5e-05, + "loss": 0.2803, + "num_input_tokens_seen": 64622072, + "step": 708 + }, + { + "epoch": 2.95, + "loss": 0.2703673541545868, + "loss_ce": 0.0031554533634334803, + "loss_iou": 0.39453125, + "loss_num": 0.0311279296875, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 64622072, + "step": 708 + }, + { + "epoch": 2.9541666666666666, + "grad_norm": 7.074264004711966, + "learning_rate": 5e-05, + "loss": 0.2478, + "num_input_tokens_seen": 64713488, + "step": 709 + }, + { + "epoch": 2.9541666666666666, + "loss": 0.28091779351234436, + "loss_ce": 0.0006138343014754355, + "loss_iou": 0.345703125, + "loss_num": 0.036376953125, + "loss_xval": 0.28125, + "num_input_tokens_seen": 64713488, + "step": 709 + }, + { + "epoch": 2.9583333333333335, + "grad_norm": 7.851782261633102, + "learning_rate": 5e-05, + "loss": 0.2913, + "num_input_tokens_seen": 64804640, + "step": 710 + }, + { + "epoch": 2.9583333333333335, + "loss": 0.348236620426178, + "loss_ce": 0.0005803514504805207, + "loss_iou": 0.2490234375, + "loss_num": 0.055419921875, + "loss_xval": 0.34765625, + "num_input_tokens_seen": 64804640, + "step": 710 + }, + { + "epoch": 2.9625, + "grad_norm": 31.590384776585033, + "learning_rate": 5e-05, + "loss": 0.2149, + "num_input_tokens_seen": 64895024, + "step": 711 + }, + { + "epoch": 2.9625, + "loss": 0.2294641137123108, + "loss_ce": 3.295343049103394e-05, + "loss_iou": 0.341796875, + "loss_num": 0.0264892578125, + "loss_xval": 0.2294921875, + "num_input_tokens_seen": 64895024, + "step": 711 + }, + { + "epoch": 2.966666666666667, + "grad_norm": 9.453416591030498, + "learning_rate": 5e-05, + "loss": 0.3454, + "num_input_tokens_seen": 64986716, + "step": 712 + }, + { + "epoch": 2.966666666666667, + "loss": 0.31218835711479187, + "loss_ce": 0.0015194227453321218, + "loss_iou": 0.330078125, + "loss_num": 0.04345703125, + "loss_xval": 0.310546875, + "num_input_tokens_seen": 64986716, + "step": 712 + }, + { + "epoch": 2.970833333333333, + "grad_norm": 24.69275794721592, + "learning_rate": 5e-05, + "loss": 0.2417, + "num_input_tokens_seen": 65077856, + "step": 713 + }, + { + "epoch": 2.970833333333333, + "loss": 0.30438870191574097, + "loss_ce": 0.00015895110846031457, + "loss_iou": 0.34765625, + "loss_num": 0.041015625, + "loss_xval": 0.3046875, + "num_input_tokens_seen": 65077856, + "step": 713 + }, + { + "epoch": 2.975, + "grad_norm": 20.21321352363983, + "learning_rate": 5e-05, + "loss": 0.2496, + "num_input_tokens_seen": 65169540, + "step": 714 + }, + { + "epoch": 2.975, + "loss": 0.2716801166534424, + "loss_ce": 0.002454049652442336, + "loss_iou": 0.302734375, + "loss_num": 0.03662109375, + "loss_xval": 0.26953125, + "num_input_tokens_seen": 65169540, + "step": 714 + }, + { + "epoch": 2.9791666666666665, + "grad_norm": 10.718899872832404, + "learning_rate": 5e-05, + "loss": 0.2258, + "num_input_tokens_seen": 65260556, + "step": 715 + }, + { + "epoch": 2.9791666666666665, + "loss": 0.23855066299438477, + "loss_ce": 0.001795294345356524, + "loss_iou": 0.4296875, + "loss_num": 0.02294921875, + "loss_xval": 0.236328125, + "num_input_tokens_seen": 65260556, + "step": 715 + }, + { + "epoch": 2.9833333333333334, + "grad_norm": 8.397827721760875, + "learning_rate": 5e-05, + "loss": 0.2388, + "num_input_tokens_seen": 65351612, + "step": 716 + }, + { + "epoch": 2.9833333333333334, + "loss": 0.3029412627220154, + "loss_ce": 0.002495725639164448, + "loss_iou": 0.296875, + "loss_num": 0.04296875, + "loss_xval": 0.30078125, + "num_input_tokens_seen": 65351612, + "step": 716 + }, + { + "epoch": 2.9875, + "grad_norm": 3.6564582738942732, + "learning_rate": 5e-05, + "loss": 0.1776, + "num_input_tokens_seen": 65442800, + "step": 717 + }, + { + "epoch": 2.9875, + "loss": 0.14690200984477997, + "loss_ce": 0.00169937324244529, + "loss_iou": 0.28515625, + "loss_num": 0.0126953125, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 65442800, + "step": 717 + }, + { + "epoch": 2.9916666666666667, + "grad_norm": 17.98213183348839, + "learning_rate": 5e-05, + "loss": 0.2081, + "num_input_tokens_seen": 65533756, + "step": 718 + }, + { + "epoch": 2.9916666666666667, + "loss": 0.19276343286037445, + "loss_ce": 0.004042725078761578, + "loss_iou": 0.22265625, + "loss_num": 0.02490234375, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 65533756, + "step": 718 + }, + { + "epoch": 2.9958333333333336, + "grad_norm": 25.648827625748236, + "learning_rate": 5e-05, + "loss": 0.2349, + "num_input_tokens_seen": 65625792, + "step": 719 + }, + { + "epoch": 2.9958333333333336, + "loss": 0.25091925263404846, + "loss_ce": 0.002689286367967725, + "loss_iou": 0.330078125, + "loss_num": 0.0306396484375, + "loss_xval": 0.248046875, + "num_input_tokens_seen": 65625792, + "step": 719 + }, + { + "epoch": 3.0, + "grad_norm": 6.210842823600316, + "learning_rate": 5e-05, + "loss": 0.3084, + "num_input_tokens_seen": 65716796, + "step": 720 + }, + { + "epoch": 3.0, + "loss": 0.24776512384414673, + "loss_ce": 0.0007558311335742474, + "loss_iou": 0.3125, + "loss_num": 0.031494140625, + "loss_xval": 0.2470703125, + "num_input_tokens_seen": 65716796, + "step": 720 + }, + { + "epoch": 3.004166666666667, + "grad_norm": 5.980664589750897, + "learning_rate": 5e-05, + "loss": 0.2419, + "num_input_tokens_seen": 65807996, + "step": 721 + }, + { + "epoch": 3.004166666666667, + "loss": 0.27643975615501404, + "loss_ce": 0.0007439564215019345, + "loss_iou": 0.2158203125, + "loss_num": 0.042724609375, + "loss_xval": 0.275390625, + "num_input_tokens_seen": 65807996, + "step": 721 + }, + { + "epoch": 3.0083333333333333, + "grad_norm": 6.249638553171776, + "learning_rate": 5e-05, + "loss": 0.2432, + "num_input_tokens_seen": 65899328, + "step": 722 + }, + { + "epoch": 3.0083333333333333, + "loss": 0.3124554753303528, + "loss_ce": 0.0016644435236230493, + "loss_iou": 0.390625, + "loss_num": 0.03955078125, + "loss_xval": 0.310546875, + "num_input_tokens_seen": 65899328, + "step": 722 + }, + { + "epoch": 3.0125, + "grad_norm": 5.702342020055269, + "learning_rate": 5e-05, + "loss": 0.275, + "num_input_tokens_seen": 65990712, + "step": 723 + }, + { + "epoch": 3.0125, + "loss": 0.3301711082458496, + "loss_ce": 0.000703359954059124, + "loss_iou": 0.353515625, + "loss_num": 0.045654296875, + "loss_xval": 0.330078125, + "num_input_tokens_seen": 65990712, + "step": 723 + }, + { + "epoch": 3.0166666666666666, + "grad_norm": 17.307058236726217, + "learning_rate": 5e-05, + "loss": 0.31, + "num_input_tokens_seen": 66080572, + "step": 724 + }, + { + "epoch": 3.0166666666666666, + "loss": 0.3130786120891571, + "loss_ce": 9.032182424562052e-05, + "loss_iou": 0.484375, + "loss_num": 0.03466796875, + "loss_xval": 0.3125, + "num_input_tokens_seen": 66080572, + "step": 724 + }, + { + "epoch": 3.0208333333333335, + "grad_norm": 5.9350376914759835, + "learning_rate": 5e-05, + "loss": 0.2336, + "num_input_tokens_seen": 66172580, + "step": 725 + }, + { + "epoch": 3.0208333333333335, + "loss": 0.2671472430229187, + "loss_ce": 0.0024377694353461266, + "loss_iou": 0.2080078125, + "loss_num": 0.041015625, + "loss_xval": 0.265625, + "num_input_tokens_seen": 66172580, + "step": 725 + }, + { + "epoch": 3.025, + "grad_norm": 6.0164216461009605, + "learning_rate": 5e-05, + "loss": 0.2599, + "num_input_tokens_seen": 66263888, + "step": 726 + }, + { + "epoch": 3.025, + "loss": 0.23070672154426575, + "loss_ce": 0.0018248929409310222, + "loss_iou": 0.30078125, + "loss_num": 0.0283203125, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 66263888, + "step": 726 + }, + { + "epoch": 3.029166666666667, + "grad_norm": 6.765864018918662, + "learning_rate": 5e-05, + "loss": 0.192, + "num_input_tokens_seen": 66355716, + "step": 727 + }, + { + "epoch": 3.029166666666667, + "loss": 0.20913203060626984, + "loss_ce": 0.0018261217046529055, + "loss_iou": 0.2197265625, + "loss_num": 0.0286865234375, + "loss_xval": 0.20703125, + "num_input_tokens_seen": 66355716, + "step": 727 + }, + { + "epoch": 3.033333333333333, + "grad_norm": 14.570674315501613, + "learning_rate": 5e-05, + "loss": 0.2043, + "num_input_tokens_seen": 66446728, + "step": 728 + }, + { + "epoch": 3.033333333333333, + "loss": 0.17731231451034546, + "loss_ce": 0.0014700321480631828, + "loss_iou": 0.3046875, + "loss_num": 0.0174560546875, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 66446728, + "step": 728 + }, + { + "epoch": 3.0375, + "grad_norm": 11.645861591898887, + "learning_rate": 5e-05, + "loss": 0.3101, + "num_input_tokens_seen": 66538300, + "step": 729 + }, + { + "epoch": 3.0375, + "loss": 0.3834507465362549, + "loss_ce": 0.003812087932601571, + "loss_iou": 0.48046875, + "loss_num": 0.0478515625, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 66538300, + "step": 729 + }, + { + "epoch": 3.0416666666666665, + "grad_norm": 2.4084494270106953, + "learning_rate": 5e-05, + "loss": 0.2703, + "num_input_tokens_seen": 66629428, + "step": 730 + }, + { + "epoch": 3.0416666666666665, + "loss": 0.29808974266052246, + "loss_ce": 0.0013368347426876426, + "loss_iou": 0.27734375, + "loss_num": 0.043212890625, + "loss_xval": 0.296875, + "num_input_tokens_seen": 66629428, + "step": 730 + }, + { + "epoch": 3.0458333333333334, + "grad_norm": 6.375099040028535, + "learning_rate": 5e-05, + "loss": 0.2234, + "num_input_tokens_seen": 66720904, + "step": 731 + }, + { + "epoch": 3.0458333333333334, + "loss": 0.15203692018985748, + "loss_ce": 0.0004561072855722159, + "loss_iou": 0.259765625, + "loss_num": 0.01519775390625, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 66720904, + "step": 731 + }, + { + "epoch": 3.05, + "grad_norm": 8.7307235975561, + "learning_rate": 5e-05, + "loss": 0.1671, + "num_input_tokens_seen": 66812448, + "step": 732 + }, + { + "epoch": 3.05, + "loss": 0.15988758206367493, + "loss_ce": 0.0007155279163271189, + "loss_iou": 0.265625, + "loss_num": 0.0162353515625, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 66812448, + "step": 732 + }, + { + "epoch": 3.0541666666666667, + "grad_norm": 10.132144554810068, + "learning_rate": 5e-05, + "loss": 0.2463, + "num_input_tokens_seen": 66903796, + "step": 733 + }, + { + "epoch": 3.0541666666666667, + "loss": 0.23347671329975128, + "loss_ce": 0.0009327692678198218, + "loss_iou": 0.41796875, + "loss_num": 0.02197265625, + "loss_xval": 0.232421875, + "num_input_tokens_seen": 66903796, + "step": 733 + }, + { + "epoch": 3.058333333333333, + "grad_norm": 13.139156179340397, + "learning_rate": 5e-05, + "loss": 0.1758, + "num_input_tokens_seen": 66994496, + "step": 734 + }, + { + "epoch": 3.058333333333333, + "loss": 0.17115697264671326, + "loss_ce": 0.00013645495346281677, + "loss_iou": 0.3125, + "loss_num": 0.015869140625, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 66994496, + "step": 734 + }, + { + "epoch": 3.0625, + "grad_norm": 60.285167818015054, + "learning_rate": 5e-05, + "loss": 0.2115, + "num_input_tokens_seen": 67085600, + "step": 735 + }, + { + "epoch": 3.0625, + "loss": 0.24958476424217224, + "loss_ce": 0.0008054607314988971, + "loss_iou": 0.330078125, + "loss_num": 0.0303955078125, + "loss_xval": 0.2490234375, + "num_input_tokens_seen": 67085600, + "step": 735 + }, + { + "epoch": 3.066666666666667, + "grad_norm": 10.104238622158336, + "learning_rate": 5e-05, + "loss": 0.2253, + "num_input_tokens_seen": 67176960, + "step": 736 + }, + { + "epoch": 3.066666666666667, + "loss": 0.23942793905735016, + "loss_ce": 0.0004142660181969404, + "loss_iou": 0.353515625, + "loss_num": 0.0269775390625, + "loss_xval": 0.2392578125, + "num_input_tokens_seen": 67176960, + "step": 736 + }, + { + "epoch": 3.0708333333333333, + "grad_norm": 110.64647697840584, + "learning_rate": 5e-05, + "loss": 0.3002, + "num_input_tokens_seen": 67268684, + "step": 737 + }, + { + "epoch": 3.0708333333333333, + "loss": 0.4394175410270691, + "loss_ce": 0.0021616898011416197, + "loss_iou": 0.359375, + "loss_num": 0.06591796875, + "loss_xval": 0.4375, + "num_input_tokens_seen": 67268684, + "step": 737 + }, + { + "epoch": 3.075, + "grad_norm": 24.477292386933787, + "learning_rate": 5e-05, + "loss": 0.2881, + "num_input_tokens_seen": 67359580, + "step": 738 + }, + { + "epoch": 3.075, + "loss": 0.3519362211227417, + "loss_ce": 0.00025164170074276626, + "loss_iou": 0.443359375, + "loss_num": 0.044189453125, + "loss_xval": 0.3515625, + "num_input_tokens_seen": 67359580, + "step": 738 + }, + { + "epoch": 3.0791666666666666, + "grad_norm": 11.227093648263798, + "learning_rate": 5e-05, + "loss": 0.3251, + "num_input_tokens_seen": 67451448, + "step": 739 + }, + { + "epoch": 3.0791666666666666, + "loss": 0.34748974442481995, + "loss_ce": 0.0026411088183522224, + "loss_iou": 0.3046875, + "loss_num": 0.051025390625, + "loss_xval": 0.345703125, + "num_input_tokens_seen": 67451448, + "step": 739 + }, + { + "epoch": 3.0833333333333335, + "grad_norm": 19.663817251326506, + "learning_rate": 5e-05, + "loss": 0.2662, + "num_input_tokens_seen": 67542240, + "step": 740 + }, + { + "epoch": 3.0833333333333335, + "loss": 0.26498711109161377, + "loss_ce": 3.35053882736247e-05, + "loss_iou": 0.37109375, + "loss_num": 0.0311279296875, + "loss_xval": 0.265625, + "num_input_tokens_seen": 67542240, + "step": 740 + }, + { + "epoch": 3.0875, + "grad_norm": 7.2515753144097355, + "learning_rate": 5e-05, + "loss": 0.1997, + "num_input_tokens_seen": 67634168, + "step": 741 + }, + { + "epoch": 3.0875, + "loss": 0.20393003523349762, + "loss_ce": 0.004192485008388758, + "loss_iou": 0.2158203125, + "loss_num": 0.0272216796875, + "loss_xval": 0.2001953125, + "num_input_tokens_seen": 67634168, + "step": 741 + }, + { + "epoch": 3.091666666666667, + "grad_norm": 9.50108893290788, + "learning_rate": 5e-05, + "loss": 0.1838, + "num_input_tokens_seen": 67724972, + "step": 742 + }, + { + "epoch": 3.091666666666667, + "loss": 0.1799710988998413, + "loss_ce": 3.946739889215678e-05, + "loss_iou": 0.3125, + "loss_num": 0.0174560546875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 67724972, + "step": 742 + }, + { + "epoch": 3.095833333333333, + "grad_norm": 9.838223610176781, + "learning_rate": 5e-05, + "loss": 0.2151, + "num_input_tokens_seen": 67816724, + "step": 743 + }, + { + "epoch": 3.095833333333333, + "loss": 0.23354679346084595, + "loss_ce": 0.0018878569826483727, + "loss_iou": 0.25, + "loss_num": 0.031494140625, + "loss_xval": 0.2314453125, + "num_input_tokens_seen": 67816724, + "step": 743 + }, + { + "epoch": 3.1, + "grad_norm": 23.535071183878934, + "learning_rate": 5e-05, + "loss": 0.2149, + "num_input_tokens_seen": 67908128, + "step": 744 + }, + { + "epoch": 3.1, + "loss": 0.24034494161605835, + "loss_ce": 0.0024909228086471558, + "loss_iou": 0.42578125, + "loss_num": 0.022216796875, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 67908128, + "step": 744 + }, + { + "epoch": 3.1041666666666665, + "grad_norm": 11.178389315992188, + "learning_rate": 5e-05, + "loss": 0.2537, + "num_input_tokens_seen": 67999636, + "step": 745 + }, + { + "epoch": 3.1041666666666665, + "loss": 0.308246910572052, + "loss_ce": 0.006122882943600416, + "loss_iou": 0.296875, + "loss_num": 0.042724609375, + "loss_xval": 0.302734375, + "num_input_tokens_seen": 67999636, + "step": 745 + }, + { + "epoch": 3.1083333333333334, + "grad_norm": 21.301332250689942, + "learning_rate": 5e-05, + "loss": 0.237, + "num_input_tokens_seen": 68091420, + "step": 746 + }, + { + "epoch": 3.1083333333333334, + "loss": 0.30138248205184937, + "loss_ce": 0.001211579772643745, + "loss_iou": 0.44921875, + "loss_num": 0.033203125, + "loss_xval": 0.30078125, + "num_input_tokens_seen": 68091420, + "step": 746 + }, + { + "epoch": 3.1125, + "grad_norm": 20.655782682495296, + "learning_rate": 5e-05, + "loss": 0.2335, + "num_input_tokens_seen": 68183032, + "step": 747 + }, + { + "epoch": 3.1125, + "loss": 0.23181165754795074, + "loss_ce": 0.0014039536472409964, + "loss_iou": 0.28515625, + "loss_num": 0.029052734375, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 68183032, + "step": 747 + }, + { + "epoch": 3.1166666666666667, + "grad_norm": 18.36717474641016, + "learning_rate": 5e-05, + "loss": 0.2397, + "num_input_tokens_seen": 68273852, + "step": 748 + }, + { + "epoch": 3.1166666666666667, + "loss": 0.25389769673347473, + "loss_ce": 0.0002966265310533345, + "loss_iou": 0.130859375, + "loss_num": 0.04296875, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 68273852, + "step": 748 + }, + { + "epoch": 3.120833333333333, + "grad_norm": 5.027092516315899, + "learning_rate": 5e-05, + "loss": 0.3024, + "num_input_tokens_seen": 68365540, + "step": 749 + }, + { + "epoch": 3.120833333333333, + "loss": 0.3448534607887268, + "loss_ce": 0.0008593128295615315, + "loss_iou": 0.40234375, + "loss_num": 0.044677734375, + "loss_xval": 0.34375, + "num_input_tokens_seen": 68365540, + "step": 749 + }, + { + "epoch": 3.125, + "grad_norm": 10.119415515193976, + "learning_rate": 5e-05, + "loss": 0.2683, + "num_input_tokens_seen": 68457120, + "step": 750 + }, + { + "epoch": 3.125, + "eval_seeclick_CIoU": 0.18456538021564484, + "eval_seeclick_GIoU": 0.15629717707633972, + "eval_seeclick_IoU": 0.30682089924812317, + "eval_seeclick_MAE_all": 0.10893617942929268, + "eval_seeclick_MAE_h": 0.11178385838866234, + "eval_seeclick_MAE_w": 0.21100984513759613, + "eval_seeclick_MAE_x_boxes": 0.23261529207229614, + "eval_seeclick_MAE_y_boxes": 0.10914269834756851, + "eval_seeclick_NUM_probability": 0.9999980628490448, + "eval_seeclick_inside_bbox": 0.5085227340459824, + "eval_seeclick_loss": 0.6975008845329285, + "eval_seeclick_loss_ce": 0.09834163635969162, + "eval_seeclick_loss_iou": 0.392333984375, + "eval_seeclick_loss_num": 0.0947113037109375, + "eval_seeclick_loss_xval": 0.5914306640625, + "eval_seeclick_runtime": 73.0255, + "eval_seeclick_samples_per_second": 0.589, + "eval_seeclick_steps_per_second": 0.027, + "num_input_tokens_seen": 68457120, + "step": 750 + }, + { + "epoch": 3.125, + "eval_icons_CIoU": 0.38979673385620117, + "eval_icons_GIoU": 0.39880916476249695, + "eval_icons_IoU": 0.45799557864665985, + "eval_icons_MAE_all": 0.0646231584250927, + "eval_icons_MAE_h": 0.1286742426455021, + "eval_icons_MAE_w": 0.10467254742980003, + "eval_icons_MAE_x_boxes": 0.10263776406645775, + "eval_icons_MAE_y_boxes": 0.1300939917564392, + "eval_icons_NUM_probability": 0.9999995827674866, + "eval_icons_inside_bbox": 0.6180555522441864, + "eval_icons_loss": 0.405547559261322, + "eval_icons_loss_ce": 8.476455661821092e-07, + "eval_icons_loss_iou": 0.26947021484375, + "eval_icons_loss_num": 0.06772613525390625, + "eval_icons_loss_xval": 0.41937255859375, + "eval_icons_runtime": 89.5837, + "eval_icons_samples_per_second": 0.558, + "eval_icons_steps_per_second": 0.022, + "num_input_tokens_seen": 68457120, + "step": 750 + }, + { + "epoch": 3.125, + "eval_screenspot_CIoU": 0.33028921484947205, + "eval_screenspot_GIoU": 0.3156593143939972, + "eval_screenspot_IoU": 0.4068033794562022, + "eval_screenspot_MAE_all": 0.10019912074009578, + "eval_screenspot_MAE_h": 0.11826031655073166, + "eval_screenspot_MAE_w": 0.1802296737829844, + "eval_screenspot_MAE_x_boxes": 0.1806080937385559, + "eval_screenspot_MAE_y_boxes": 0.11672305067380269, + "eval_screenspot_NUM_probability": 0.9999342759450277, + "eval_screenspot_inside_bbox": 0.6120833357175192, + "eval_screenspot_loss": 0.6047809720039368, + "eval_screenspot_loss_ce": 3.964887582696974e-05, + "eval_screenspot_loss_iou": 0.3628336588541667, + "eval_screenspot_loss_num": 0.09828694661458333, + "eval_screenspot_loss_xval": 0.600341796875, + "eval_screenspot_runtime": 154.3123, + "eval_screenspot_samples_per_second": 0.577, + "eval_screenspot_steps_per_second": 0.019, + "num_input_tokens_seen": 68457120, + "step": 750 + }, + { + "epoch": 3.125, + "eval_compot_CIoU": 0.3661753237247467, + "eval_compot_GIoU": 0.3585101515054703, + "eval_compot_IoU": 0.448599174618721, + "eval_compot_MAE_all": 0.06923724710941315, + "eval_compot_MAE_h": 0.10827170684933662, + "eval_compot_MAE_w": 0.1390521600842476, + "eval_compot_MAE_x_boxes": 0.1365266591310501, + "eval_compot_MAE_y_boxes": 0.10781393945217133, + "eval_compot_NUM_probability": 0.9999944269657135, + "eval_compot_inside_bbox": 0.6180555522441864, + "eval_compot_loss": 0.4263221025466919, + "eval_compot_loss_ce": 0.012015189044177532, + "eval_compot_loss_iou": 0.2703857421875, + "eval_compot_loss_num": 0.06455612182617188, + "eval_compot_loss_xval": 0.40386962890625, + "eval_compot_runtime": 87.8802, + "eval_compot_samples_per_second": 0.569, + "eval_compot_steps_per_second": 0.023, + "num_input_tokens_seen": 68457120, + "step": 750 + }, + { + "epoch": 3.129166666666667, + "grad_norm": 4.23326422179187, + "learning_rate": 5e-05, + "loss": 0.1328, + "num_input_tokens_seen": 68547508, + "step": 751 + }, + { + "epoch": 3.129166666666667, + "loss": 0.1485927700996399, + "loss_ce": 0.003664804855361581, + "loss_iou": 0.2890625, + "loss_num": 0.029052734375, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 68547508, + "step": 751 + }, + { + "epoch": 3.1333333333333333, + "grad_norm": 4.24613529579631, + "learning_rate": 5e-05, + "loss": 0.123, + "num_input_tokens_seen": 68638744, + "step": 752 + }, + { + "epoch": 3.1333333333333333, + "loss": 0.12835034728050232, + "loss_ce": 0.0031519709154963493, + "loss_iou": 0.412109375, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 68638744, + "step": 752 + }, + { + "epoch": 3.1375, + "grad_norm": 5.939108838935815, + "learning_rate": 5e-05, + "loss": 0.2623, + "num_input_tokens_seen": 68729548, + "step": 753 + }, + { + "epoch": 3.1375, + "loss": 0.35694050788879395, + "loss_ce": 0.0001289581268792972, + "loss_iou": 0.1298828125, + "loss_num": 0.0712890625, + "loss_xval": 0.357421875, + "num_input_tokens_seen": 68729548, + "step": 753 + }, + { + "epoch": 3.1416666666666666, + "grad_norm": 5.088557187131976, + "learning_rate": 5e-05, + "loss": 0.1229, + "num_input_tokens_seen": 68820904, + "step": 754 + }, + { + "epoch": 3.1416666666666666, + "loss": 0.15128138661384583, + "loss_ce": 0.0018978423904627562, + "loss_iou": 0.259765625, + "loss_num": 0.02978515625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 68820904, + "step": 754 + }, + { + "epoch": 3.1458333333333335, + "grad_norm": 8.873679118003906, + "learning_rate": 5e-05, + "loss": 0.164, + "num_input_tokens_seen": 68912692, + "step": 755 + }, + { + "epoch": 3.1458333333333335, + "loss": 0.20318354666233063, + "loss_ce": 0.000897779711522162, + "loss_iou": 0.24609375, + "loss_num": 0.04052734375, + "loss_xval": 0.2021484375, + "num_input_tokens_seen": 68912692, + "step": 755 + }, + { + "epoch": 3.15, + "grad_norm": 16.297387562182426, + "learning_rate": 5e-05, + "loss": 0.13, + "num_input_tokens_seen": 69003836, + "step": 756 + }, + { + "epoch": 3.15, + "loss": 0.139330193400383, + "loss_ce": 0.0013602229300886393, + "loss_iou": 0.326171875, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 69003836, + "step": 756 + }, + { + "epoch": 3.154166666666667, + "grad_norm": 2.1080044689722515, + "learning_rate": 5e-05, + "loss": 0.1267, + "num_input_tokens_seen": 69094656, + "step": 757 + }, + { + "epoch": 3.154166666666667, + "loss": 0.1562238335609436, + "loss_ce": 0.0010114258620887995, + "loss_iou": 0.1162109375, + "loss_num": 0.031005859375, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 69094656, + "step": 757 + }, + { + "epoch": 3.158333333333333, + "grad_norm": 19.595354792790424, + "learning_rate": 5e-05, + "loss": 0.1061, + "num_input_tokens_seen": 69186340, + "step": 758 + }, + { + "epoch": 3.158333333333333, + "loss": 0.09858790785074234, + "loss_ce": 0.00019923456420656294, + "loss_iou": 0.29296875, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 69186340, + "step": 758 + }, + { + "epoch": 3.1625, + "grad_norm": 8.59897241926053, + "learning_rate": 5e-05, + "loss": 0.179, + "num_input_tokens_seen": 69277932, + "step": 759 + }, + { + "epoch": 3.1625, + "loss": 0.2634497582912445, + "loss_ce": 0.0026465251576155424, + "loss_iou": 0.322265625, + "loss_num": 0.05224609375, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 69277932, + "step": 759 + }, + { + "epoch": 3.1666666666666665, + "grad_norm": 3.239029242443324, + "learning_rate": 5e-05, + "loss": 0.1348, + "num_input_tokens_seen": 69369724, + "step": 760 + }, + { + "epoch": 3.1666666666666665, + "loss": 0.15876804292201996, + "loss_ce": 0.0007175026694312692, + "loss_iou": 0.232421875, + "loss_num": 0.031494140625, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 69369724, + "step": 760 + }, + { + "epoch": 3.1708333333333334, + "grad_norm": 4.391061916761268, + "learning_rate": 5e-05, + "loss": 0.1649, + "num_input_tokens_seen": 69460676, + "step": 761 + }, + { + "epoch": 3.1708333333333334, + "loss": 0.14667901396751404, + "loss_ce": 0.00150689750444144, + "loss_iou": 0.2734375, + "loss_num": 0.029052734375, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 69460676, + "step": 761 + }, + { + "epoch": 3.175, + "grad_norm": 3.076163275564048, + "learning_rate": 5e-05, + "loss": 0.1377, + "num_input_tokens_seen": 69551892, + "step": 762 + }, + { + "epoch": 3.175, + "loss": 0.10917718708515167, + "loss_ce": 0.000191280065337196, + "loss_iou": 0.045654296875, + "loss_num": 0.0218505859375, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 69551892, + "step": 762 + }, + { + "epoch": 3.1791666666666667, + "grad_norm": 4.103402222438804, + "learning_rate": 5e-05, + "loss": 0.1578, + "num_input_tokens_seen": 69643000, + "step": 763 + }, + { + "epoch": 3.1791666666666667, + "loss": 0.17557072639465332, + "loss_ce": 0.002612349344417453, + "loss_iou": 0.26953125, + "loss_num": 0.03466796875, + "loss_xval": 0.1728515625, + "num_input_tokens_seen": 69643000, + "step": 763 + }, + { + "epoch": 3.183333333333333, + "grad_norm": 6.504159280235073, + "learning_rate": 5e-05, + "loss": 0.184, + "num_input_tokens_seen": 69734104, + "step": 764 + }, + { + "epoch": 3.183333333333333, + "loss": 0.16976343095302582, + "loss_ce": 0.0020998548716306686, + "loss_iou": 0.373046875, + "loss_num": 0.033447265625, + "loss_xval": 0.16796875, + "num_input_tokens_seen": 69734104, + "step": 764 + }, + { + "epoch": 3.1875, + "grad_norm": 4.173262875208758, + "learning_rate": 5e-05, + "loss": 0.1794, + "num_input_tokens_seen": 69825836, + "step": 765 + }, + { + "epoch": 3.1875, + "loss": 0.19754093885421753, + "loss_ce": 0.0003973785205744207, + "loss_iou": 0.44140625, + "loss_num": 0.03955078125, + "loss_xval": 0.197265625, + "num_input_tokens_seen": 69825836, + "step": 765 + }, + { + "epoch": 3.191666666666667, + "grad_norm": 10.20024471572148, + "learning_rate": 5e-05, + "loss": 0.1561, + "num_input_tokens_seen": 69918116, + "step": 766 + }, + { + "epoch": 3.191666666666667, + "loss": 0.18424035608768463, + "loss_ce": 0.0032558543607592583, + "loss_iou": 0.29296875, + "loss_num": 0.0361328125, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 69918116, + "step": 766 + }, + { + "epoch": 3.1958333333333333, + "grad_norm": 15.316242677734815, + "learning_rate": 5e-05, + "loss": 0.1123, + "num_input_tokens_seen": 70009612, + "step": 767 + }, + { + "epoch": 3.1958333333333333, + "loss": 0.10993720591068268, + "loss_ce": 0.0009894566610455513, + "loss_iou": 0.32421875, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 70009612, + "step": 767 + }, + { + "epoch": 3.2, + "grad_norm": 30.342286623122178, + "learning_rate": 5e-05, + "loss": 0.2394, + "num_input_tokens_seen": 70100704, + "step": 768 + }, + { + "epoch": 3.2, + "loss": 0.1780831515789032, + "loss_ce": 0.010602687485516071, + "loss_iou": 0.2041015625, + "loss_num": 0.033447265625, + "loss_xval": 0.16796875, + "num_input_tokens_seen": 70100704, + "step": 768 + }, + { + "epoch": 3.2041666666666666, + "grad_norm": 11.984200740333602, + "learning_rate": 5e-05, + "loss": 0.1789, + "num_input_tokens_seen": 70191612, + "step": 769 + }, + { + "epoch": 3.2041666666666666, + "loss": 0.23412063717842102, + "loss_ce": 0.00130203808657825, + "loss_iou": 0.1494140625, + "loss_num": 0.046630859375, + "loss_xval": 0.232421875, + "num_input_tokens_seen": 70191612, + "step": 769 + }, + { + "epoch": 3.2083333333333335, + "grad_norm": 4.047384925920614, + "learning_rate": 5e-05, + "loss": 0.1686, + "num_input_tokens_seen": 70281980, + "step": 770 + }, + { + "epoch": 3.2083333333333335, + "loss": 0.12489617615938187, + "loss_ce": 0.001879819785244763, + "loss_iou": 0.1298828125, + "loss_num": 0.024658203125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 70281980, + "step": 770 + }, + { + "epoch": 3.2125, + "grad_norm": 2.803294314643065, + "learning_rate": 5e-05, + "loss": 0.0901, + "num_input_tokens_seen": 70372984, + "step": 771 + }, + { + "epoch": 3.2125, + "loss": 0.10114803165197372, + "loss_ce": 0.0024694406893104315, + "loss_iou": 0.1669921875, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 70372984, + "step": 771 + }, + { + "epoch": 3.216666666666667, + "grad_norm": 6.785832043826993, + "learning_rate": 5e-05, + "loss": 0.1715, + "num_input_tokens_seen": 70463936, + "step": 772 + }, + { + "epoch": 3.216666666666667, + "loss": 0.12498641759157181, + "loss_ce": 0.002580410335212946, + "loss_iou": 0.296875, + "loss_num": 0.0245361328125, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 70463936, + "step": 772 + }, + { + "epoch": 3.220833333333333, + "grad_norm": 5.1851765366428815, + "learning_rate": 5e-05, + "loss": 0.1977, + "num_input_tokens_seen": 70555092, + "step": 773 + }, + { + "epoch": 3.220833333333333, + "loss": 0.26781585812568665, + "loss_ce": 0.003701466601341963, + "loss_iou": 0.15234375, + "loss_num": 0.052734375, + "loss_xval": 0.263671875, + "num_input_tokens_seen": 70555092, + "step": 773 + }, + { + "epoch": 3.225, + "grad_norm": 8.688299795759383, + "learning_rate": 5e-05, + "loss": 0.152, + "num_input_tokens_seen": 70646432, + "step": 774 + }, + { + "epoch": 3.225, + "loss": 0.12950116395950317, + "loss_ce": 0.0030057919211685658, + "loss_iou": 0.1884765625, + "loss_num": 0.0252685546875, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 70646432, + "step": 774 + }, + { + "epoch": 3.2291666666666665, + "grad_norm": 3.4936298082637647, + "learning_rate": 5e-05, + "loss": 0.1429, + "num_input_tokens_seen": 70737124, + "step": 775 + }, + { + "epoch": 3.2291666666666665, + "loss": 0.16758891940116882, + "loss_ce": 0.00013895254232920706, + "loss_iou": 0.36328125, + "loss_num": 0.033447265625, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 70737124, + "step": 775 + }, + { + "epoch": 3.2333333333333334, + "grad_norm": 5.350457455750773, + "learning_rate": 5e-05, + "loss": 0.1123, + "num_input_tokens_seen": 70827660, + "step": 776 + }, + { + "epoch": 3.2333333333333334, + "loss": 0.10184476524591446, + "loss_ce": 7.608376108692028e-06, + "loss_iou": 0.04638671875, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 70827660, + "step": 776 + }, + { + "epoch": 3.2375, + "grad_norm": 2.5950546414943525, + "learning_rate": 5e-05, + "loss": 0.1718, + "num_input_tokens_seen": 70919060, + "step": 777 + }, + { + "epoch": 3.2375, + "loss": 0.20926007628440857, + "loss_ce": 0.0016261127311736345, + "loss_iou": 0.2373046875, + "loss_num": 0.04150390625, + "loss_xval": 0.2080078125, + "num_input_tokens_seen": 70919060, + "step": 777 + }, + { + "epoch": 3.2416666666666667, + "grad_norm": 9.458066813907067, + "learning_rate": 5e-05, + "loss": 0.1588, + "num_input_tokens_seen": 71010772, + "step": 778 + }, + { + "epoch": 3.2416666666666667, + "loss": 0.1460130363702774, + "loss_ce": 0.0014054938219487667, + "loss_iou": 0.216796875, + "loss_num": 0.0289306640625, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 71010772, + "step": 778 + }, + { + "epoch": 3.245833333333333, + "grad_norm": 6.716962142407295, + "learning_rate": 5e-05, + "loss": 0.1795, + "num_input_tokens_seen": 71101844, + "step": 779 + }, + { + "epoch": 3.245833333333333, + "loss": 0.17741234600543976, + "loss_ce": 0.002088862704113126, + "loss_iou": 0.23046875, + "loss_num": 0.03515625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 71101844, + "step": 779 + }, + { + "epoch": 3.25, + "grad_norm": 15.172517224504327, + "learning_rate": 5e-05, + "loss": 0.1973, + "num_input_tokens_seen": 71192812, + "step": 780 + }, + { + "epoch": 3.25, + "loss": 0.17874208092689514, + "loss_ce": 0.00018372779595665634, + "loss_iou": 0.314453125, + "loss_num": 0.03564453125, + "loss_xval": 0.1787109375, + "num_input_tokens_seen": 71192812, + "step": 780 + }, + { + "epoch": 3.2541666666666664, + "grad_norm": 9.244636582891307, + "learning_rate": 5e-05, + "loss": 0.1933, + "num_input_tokens_seen": 71284428, + "step": 781 + }, + { + "epoch": 3.2541666666666664, + "loss": 0.23924417793750763, + "loss_ce": 0.0009018905693665147, + "loss_iou": 0.296875, + "loss_num": 0.047607421875, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 71284428, + "step": 781 + }, + { + "epoch": 3.2583333333333333, + "grad_norm": 3.405929870103962, + "learning_rate": 5e-05, + "loss": 0.1475, + "num_input_tokens_seen": 71375580, + "step": 782 + }, + { + "epoch": 3.2583333333333333, + "loss": 0.14019837975502014, + "loss_ce": 3.11500443785917e-05, + "loss_iou": 0.134765625, + "loss_num": 0.028076171875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 71375580, + "step": 782 + }, + { + "epoch": 3.2625, + "grad_norm": 4.298926898901773, + "learning_rate": 5e-05, + "loss": 0.1711, + "num_input_tokens_seen": 71466292, + "step": 783 + }, + { + "epoch": 3.2625, + "loss": 0.12115862220525742, + "loss_ce": 6.487128121079877e-05, + "loss_iou": 0.216796875, + "loss_num": 0.024169921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 71466292, + "step": 783 + }, + { + "epoch": 3.2666666666666666, + "grad_norm": 3.795022106927016, + "learning_rate": 5e-05, + "loss": 0.0777, + "num_input_tokens_seen": 71557532, + "step": 784 + }, + { + "epoch": 3.2666666666666666, + "loss": 0.06865088641643524, + "loss_ce": 0.002442999044433236, + "loss_iou": 0.2734375, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 71557532, + "step": 784 + }, + { + "epoch": 3.2708333333333335, + "grad_norm": 4.1149975795163485, + "learning_rate": 5e-05, + "loss": 0.0999, + "num_input_tokens_seen": 71648752, + "step": 785 + }, + { + "epoch": 3.2708333333333335, + "loss": 0.08993716537952423, + "loss_ce": 0.0004901447682641447, + "loss_iou": 0.328125, + "loss_num": 0.0179443359375, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 71648752, + "step": 785 + }, + { + "epoch": 3.275, + "grad_norm": 6.759255492884617, + "learning_rate": 5e-05, + "loss": 0.1545, + "num_input_tokens_seen": 71740376, + "step": 786 + }, + { + "epoch": 3.275, + "loss": 0.11835940927267075, + "loss_ce": 0.002331583062186837, + "loss_iou": 0.25, + "loss_num": 0.023193359375, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 71740376, + "step": 786 + }, + { + "epoch": 3.279166666666667, + "grad_norm": 3.5243667525079267, + "learning_rate": 5e-05, + "loss": 0.137, + "num_input_tokens_seen": 71832116, + "step": 787 + }, + { + "epoch": 3.279166666666667, + "loss": 0.07449323683977127, + "loss_ce": 0.00338728167116642, + "loss_iou": 0.2412109375, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 71832116, + "step": 787 + }, + { + "epoch": 3.283333333333333, + "grad_norm": 3.4524324577376686, + "learning_rate": 5e-05, + "loss": 0.1167, + "num_input_tokens_seen": 71923616, + "step": 788 + }, + { + "epoch": 3.283333333333333, + "loss": 0.12164635211229324, + "loss_ce": 0.0007814801065251231, + "loss_iou": 0.400390625, + "loss_num": 0.024169921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 71923616, + "step": 788 + }, + { + "epoch": 3.2875, + "grad_norm": 4.781918619933174, + "learning_rate": 5e-05, + "loss": 0.1268, + "num_input_tokens_seen": 72014964, + "step": 789 + }, + { + "epoch": 3.2875, + "loss": 0.18079587817192078, + "loss_ce": 0.00022337015252560377, + "loss_iou": 0.234375, + "loss_num": 0.0361328125, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 72014964, + "step": 789 + }, + { + "epoch": 3.2916666666666665, + "grad_norm": 5.517276258832106, + "learning_rate": 5e-05, + "loss": 0.1124, + "num_input_tokens_seen": 72106332, + "step": 790 + }, + { + "epoch": 3.2916666666666665, + "loss": 0.12845715880393982, + "loss_ce": 0.001198849524371326, + "loss_iou": 0.125, + "loss_num": 0.0255126953125, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 72106332, + "step": 790 + }, + { + "epoch": 3.2958333333333334, + "grad_norm": 29.98018266638596, + "learning_rate": 5e-05, + "loss": 0.1293, + "num_input_tokens_seen": 72197912, + "step": 791 + }, + { + "epoch": 3.2958333333333334, + "loss": 0.10846811532974243, + "loss_ce": 0.00019174793851561844, + "loss_iou": 0.25, + "loss_num": 0.021728515625, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 72197912, + "step": 791 + }, + { + "epoch": 3.3, + "grad_norm": 9.902237060806767, + "learning_rate": 5e-05, + "loss": 0.2077, + "num_input_tokens_seen": 72288952, + "step": 792 + }, + { + "epoch": 3.3, + "loss": 0.1036197692155838, + "loss_ce": 0.0003177704056724906, + "loss_iou": 0.2578125, + "loss_num": 0.0206298828125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 72288952, + "step": 792 + }, + { + "epoch": 3.3041666666666667, + "grad_norm": 5.384154441900459, + "learning_rate": 5e-05, + "loss": 0.2307, + "num_input_tokens_seen": 72379412, + "step": 793 + }, + { + "epoch": 3.3041666666666667, + "loss": 0.26934942603111267, + "loss_ce": 1.288789690079284e-06, + "loss_iou": 0.1611328125, + "loss_num": 0.053955078125, + "loss_xval": 0.26953125, + "num_input_tokens_seen": 72379412, + "step": 793 + }, + { + "epoch": 3.3083333333333336, + "grad_norm": 3.253569574893736, + "learning_rate": 5e-05, + "loss": 0.1432, + "num_input_tokens_seen": 72470820, + "step": 794 + }, + { + "epoch": 3.3083333333333336, + "loss": 0.08719146251678467, + "loss_ce": 0.0017727642552927136, + "loss_iou": 0.201171875, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 72470820, + "step": 794 + }, + { + "epoch": 3.3125, + "grad_norm": 4.513685619297302, + "learning_rate": 5e-05, + "loss": 0.1465, + "num_input_tokens_seen": 72562456, + "step": 795 + }, + { + "epoch": 3.3125, + "loss": 0.17177698016166687, + "loss_ce": 0.007043100893497467, + "loss_iou": 0.267578125, + "loss_num": 0.032958984375, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 72562456, + "step": 795 + }, + { + "epoch": 3.3166666666666664, + "grad_norm": 7.32082631713167, + "learning_rate": 5e-05, + "loss": 0.1907, + "num_input_tokens_seen": 72653804, + "step": 796 + }, + { + "epoch": 3.3166666666666664, + "loss": 0.2058718502521515, + "loss_ce": 0.0014651028905063868, + "loss_iou": 0.23046875, + "loss_num": 0.040771484375, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 72653804, + "step": 796 + }, + { + "epoch": 3.3208333333333333, + "grad_norm": 5.038456487445877, + "learning_rate": 5e-05, + "loss": 0.1141, + "num_input_tokens_seen": 72745292, + "step": 797 + }, + { + "epoch": 3.3208333333333333, + "loss": 0.11923874169588089, + "loss_ce": 0.00018967277719639242, + "loss_iou": 0.27734375, + "loss_num": 0.0238037109375, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 72745292, + "step": 797 + }, + { + "epoch": 3.325, + "grad_norm": 3.784735092054637, + "learning_rate": 5e-05, + "loss": 0.1424, + "num_input_tokens_seen": 72836684, + "step": 798 + }, + { + "epoch": 3.325, + "loss": 0.12178568542003632, + "loss_ce": 0.0003562434285413474, + "loss_iou": 0.2392578125, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 72836684, + "step": 798 + }, + { + "epoch": 3.3291666666666666, + "grad_norm": 3.8761231327791097, + "learning_rate": 5e-05, + "loss": 0.1353, + "num_input_tokens_seen": 72928732, + "step": 799 + }, + { + "epoch": 3.3291666666666666, + "loss": 0.14368420839309692, + "loss_ce": 0.00397472595795989, + "loss_iou": 0.16015625, + "loss_num": 0.0279541015625, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 72928732, + "step": 799 + }, + { + "epoch": 3.3333333333333335, + "grad_norm": 4.350988633709539, + "learning_rate": 5e-05, + "loss": 0.1429, + "num_input_tokens_seen": 73019092, + "step": 800 + }, + { + "epoch": 3.3333333333333335, + "loss": 0.07402680069208145, + "loss_ce": 0.0004031416610814631, + "loss_iou": 0.1875, + "loss_num": 0.01470947265625, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 73019092, + "step": 800 + }, + { + "epoch": 3.3375, + "grad_norm": 6.427134290140459, + "learning_rate": 5e-05, + "loss": 0.2131, + "num_input_tokens_seen": 73110312, + "step": 801 + }, + { + "epoch": 3.3375, + "loss": 0.23900842666625977, + "loss_ce": 0.0008492398192174733, + "loss_iou": 0.17578125, + "loss_num": 0.047607421875, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 73110312, + "step": 801 + }, + { + "epoch": 3.341666666666667, + "grad_norm": 8.603622827505802, + "learning_rate": 5e-05, + "loss": 0.1597, + "num_input_tokens_seen": 73201564, + "step": 802 + }, + { + "epoch": 3.341666666666667, + "loss": 0.1939290463924408, + "loss_ce": 0.0020650303922593594, + "loss_iou": 0.2490234375, + "loss_num": 0.038330078125, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 73201564, + "step": 802 + }, + { + "epoch": 3.345833333333333, + "grad_norm": 2.8720544166960487, + "learning_rate": 5e-05, + "loss": 0.1123, + "num_input_tokens_seen": 73292644, + "step": 803 + }, + { + "epoch": 3.345833333333333, + "loss": 0.09989237040281296, + "loss_ce": 0.00016092188889160752, + "loss_iou": 0.302734375, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 73292644, + "step": 803 + }, + { + "epoch": 3.35, + "grad_norm": 7.114589570182523, + "learning_rate": 5e-05, + "loss": 0.1079, + "num_input_tokens_seen": 73384220, + "step": 804 + }, + { + "epoch": 3.35, + "loss": 0.07536976039409637, + "loss_ce": 0.0018834264483302832, + "loss_iou": 0.296875, + "loss_num": 0.01470947265625, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 73384220, + "step": 804 + }, + { + "epoch": 3.3541666666666665, + "grad_norm": 4.828739351086096, + "learning_rate": 5e-05, + "loss": 0.1446, + "num_input_tokens_seen": 73475856, + "step": 805 + }, + { + "epoch": 3.3541666666666665, + "loss": 0.10500533878803253, + "loss_ce": 0.0006504841148853302, + "loss_iou": 0.26953125, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 73475856, + "step": 805 + }, + { + "epoch": 3.3583333333333334, + "grad_norm": 3.214913125602273, + "learning_rate": 5e-05, + "loss": 0.1161, + "num_input_tokens_seen": 73567160, + "step": 806 + }, + { + "epoch": 3.3583333333333334, + "loss": 0.12572184205055237, + "loss_ce": 0.0043229046277701855, + "loss_iou": 0.283203125, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 73567160, + "step": 806 + }, + { + "epoch": 3.3625, + "grad_norm": 7.114280904079044, + "learning_rate": 5e-05, + "loss": 0.1481, + "num_input_tokens_seen": 73658604, + "step": 807 + }, + { + "epoch": 3.3625, + "loss": 0.21147285401821136, + "loss_ce": 0.0032056490890681744, + "loss_iou": 0.27734375, + "loss_num": 0.041748046875, + "loss_xval": 0.2080078125, + "num_input_tokens_seen": 73658604, + "step": 807 + }, + { + "epoch": 3.3666666666666667, + "grad_norm": 2.2326677998565954, + "learning_rate": 5e-05, + "loss": 0.1255, + "num_input_tokens_seen": 73749692, + "step": 808 + }, + { + "epoch": 3.3666666666666667, + "loss": 0.1155465841293335, + "loss_ce": 0.004264240153133869, + "loss_iou": 0.275390625, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 73749692, + "step": 808 + }, + { + "epoch": 3.3708333333333336, + "grad_norm": 2.0023833310085335, + "learning_rate": 5e-05, + "loss": 0.1097, + "num_input_tokens_seen": 73840924, + "step": 809 + }, + { + "epoch": 3.3708333333333336, + "loss": 0.1414848268032074, + "loss_ce": 3.585106605896726e-05, + "loss_iou": 0.376953125, + "loss_num": 0.0283203125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 73840924, + "step": 809 + }, + { + "epoch": 3.375, + "grad_norm": 3.1374653335995903, + "learning_rate": 5e-05, + "loss": 0.1364, + "num_input_tokens_seen": 73932068, + "step": 810 + }, + { + "epoch": 3.375, + "loss": 0.1625884622335434, + "loss_ce": 0.0015929804649204016, + "loss_iou": 0.26171875, + "loss_num": 0.0322265625, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 73932068, + "step": 810 + }, + { + "epoch": 3.3791666666666664, + "grad_norm": 1.685708414668304, + "learning_rate": 5e-05, + "loss": 0.0805, + "num_input_tokens_seen": 74023680, + "step": 811 + }, + { + "epoch": 3.3791666666666664, + "loss": 0.07619469612836838, + "loss_ce": 0.0005111010977998376, + "loss_iou": 0.2734375, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 74023680, + "step": 811 + }, + { + "epoch": 3.3833333333333333, + "grad_norm": 1.8933376014832215, + "learning_rate": 5e-05, + "loss": 0.1564, + "num_input_tokens_seen": 74114916, + "step": 812 + }, + { + "epoch": 3.3833333333333333, + "loss": 0.16464364528656006, + "loss_ce": 0.003922812175005674, + "loss_iou": 0.2099609375, + "loss_num": 0.0322265625, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 74114916, + "step": 812 + }, + { + "epoch": 3.3875, + "grad_norm": 4.402993261964665, + "learning_rate": 5e-05, + "loss": 0.1436, + "num_input_tokens_seen": 74206152, + "step": 813 + }, + { + "epoch": 3.3875, + "loss": 0.1346263587474823, + "loss_ce": 0.00030323388637043536, + "loss_iou": 0.3125, + "loss_num": 0.02685546875, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 74206152, + "step": 813 + }, + { + "epoch": 3.3916666666666666, + "grad_norm": 8.942507116728107, + "learning_rate": 5e-05, + "loss": 0.0969, + "num_input_tokens_seen": 74298040, + "step": 814 + }, + { + "epoch": 3.3916666666666666, + "loss": 0.08397021889686584, + "loss_ce": 0.0021068197675049305, + "loss_iou": 0.37109375, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 74298040, + "step": 814 + }, + { + "epoch": 3.3958333333333335, + "grad_norm": 3.0814600182017395, + "learning_rate": 5e-05, + "loss": 0.1609, + "num_input_tokens_seen": 74389564, + "step": 815 + }, + { + "epoch": 3.3958333333333335, + "loss": 0.17124760150909424, + "loss_ce": 0.000776410277467221, + "loss_iou": 0.408203125, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 74389564, + "step": 815 + }, + { + "epoch": 3.4, + "grad_norm": 11.077937502764769, + "learning_rate": 5e-05, + "loss": 0.203, + "num_input_tokens_seen": 74479988, + "step": 816 + }, + { + "epoch": 3.4, + "loss": 0.2204510122537613, + "loss_ce": 7.280018962774193e-06, + "loss_iou": 0.14453125, + "loss_num": 0.044189453125, + "loss_xval": 0.220703125, + "num_input_tokens_seen": 74479988, + "step": 816 + }, + { + "epoch": 3.404166666666667, + "grad_norm": 5.900049711330432, + "learning_rate": 5e-05, + "loss": 0.107, + "num_input_tokens_seen": 74571308, + "step": 817 + }, + { + "epoch": 3.404166666666667, + "loss": 0.11785734444856644, + "loss_ce": 0.0003951911348849535, + "loss_iou": 0.287109375, + "loss_num": 0.0234375, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 74571308, + "step": 817 + }, + { + "epoch": 3.408333333333333, + "grad_norm": 3.0966138313823004, + "learning_rate": 5e-05, + "loss": 0.1065, + "num_input_tokens_seen": 74661784, + "step": 818 + }, + { + "epoch": 3.408333333333333, + "loss": 0.1265370100736618, + "loss_ce": 0.008952784352004528, + "loss_iou": 0.2294921875, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 74661784, + "step": 818 + }, + { + "epoch": 3.4125, + "grad_norm": 21.46042272135341, + "learning_rate": 5e-05, + "loss": 0.169, + "num_input_tokens_seen": 74753064, + "step": 819 + }, + { + "epoch": 3.4125, + "loss": 0.06247454136610031, + "loss_ce": 0.0009053258690983057, + "loss_iou": 0.2490234375, + "loss_num": 0.0123291015625, + "loss_xval": 0.0615234375, + "num_input_tokens_seen": 74753064, + "step": 819 + }, + { + "epoch": 3.4166666666666665, + "grad_norm": 5.311244630831775, + "learning_rate": 5e-05, + "loss": 0.1708, + "num_input_tokens_seen": 74844492, + "step": 820 + }, + { + "epoch": 3.4166666666666665, + "loss": 0.1370103657245636, + "loss_ce": 0.0049379244446754456, + "loss_iou": 0.2890625, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 74844492, + "step": 820 + }, + { + "epoch": 3.4208333333333334, + "grad_norm": 5.211533051644635, + "learning_rate": 5e-05, + "loss": 0.185, + "num_input_tokens_seen": 74935568, + "step": 821 + }, + { + "epoch": 3.4208333333333334, + "loss": 0.2441231608390808, + "loss_ce": 0.0002572032390162349, + "loss_iou": 0.271484375, + "loss_num": 0.048828125, + "loss_xval": 0.244140625, + "num_input_tokens_seen": 74935568, + "step": 821 + }, + { + "epoch": 3.425, + "grad_norm": 5.37698998509261, + "learning_rate": 5e-05, + "loss": 0.1298, + "num_input_tokens_seen": 75026872, + "step": 822 + }, + { + "epoch": 3.425, + "loss": 0.14967405796051025, + "loss_ce": 0.00032103960984386504, + "loss_iou": 0.40625, + "loss_num": 0.0299072265625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 75026872, + "step": 822 + }, + { + "epoch": 3.4291666666666667, + "grad_norm": 3.4283196054918132, + "learning_rate": 5e-05, + "loss": 0.1534, + "num_input_tokens_seen": 75118744, + "step": 823 + }, + { + "epoch": 3.4291666666666667, + "loss": 0.10464496165513992, + "loss_ce": 0.002945136744529009, + "loss_iou": 0.28125, + "loss_num": 0.0203857421875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 75118744, + "step": 823 + }, + { + "epoch": 3.4333333333333336, + "grad_norm": 5.775737722048964, + "learning_rate": 5e-05, + "loss": 0.1075, + "num_input_tokens_seen": 75209872, + "step": 824 + }, + { + "epoch": 3.4333333333333336, + "loss": 0.11979828774929047, + "loss_ce": 0.0010086168767884374, + "loss_iou": 0.28515625, + "loss_num": 0.0238037109375, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 75209872, + "step": 824 + }, + { + "epoch": 3.4375, + "grad_norm": 3.227782306824348, + "learning_rate": 5e-05, + "loss": 0.1044, + "num_input_tokens_seen": 75301168, + "step": 825 + }, + { + "epoch": 3.4375, + "loss": 0.08311197906732559, + "loss_ce": 0.00010416478471597657, + "loss_iou": 0.5390625, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 75301168, + "step": 825 + }, + { + "epoch": 3.4416666666666664, + "grad_norm": 4.921386646325974, + "learning_rate": 5e-05, + "loss": 0.0919, + "num_input_tokens_seen": 75391968, + "step": 826 + }, + { + "epoch": 3.4416666666666664, + "loss": 0.11057807505130768, + "loss_ce": 0.0024695568718016148, + "loss_iou": 0.3046875, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 75391968, + "step": 826 + }, + { + "epoch": 3.4458333333333333, + "grad_norm": 8.603358089040904, + "learning_rate": 5e-05, + "loss": 0.1828, + "num_input_tokens_seen": 75483776, + "step": 827 + }, + { + "epoch": 3.4458333333333333, + "loss": 0.1585550755262375, + "loss_ce": 0.003495255019515753, + "loss_iou": 0.208984375, + "loss_num": 0.031005859375, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 75483776, + "step": 827 + }, + { + "epoch": 3.45, + "grad_norm": 2.6733910252403015, + "learning_rate": 5e-05, + "loss": 0.1585, + "num_input_tokens_seen": 75574940, + "step": 828 + }, + { + "epoch": 3.45, + "loss": 0.14622877538204193, + "loss_ce": 0.0012550182873383164, + "loss_iou": 0.1650390625, + "loss_num": 0.0289306640625, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 75574940, + "step": 828 + }, + { + "epoch": 3.4541666666666666, + "grad_norm": 7.671186709477866, + "learning_rate": 5e-05, + "loss": 0.2176, + "num_input_tokens_seen": 75666364, + "step": 829 + }, + { + "epoch": 3.4541666666666666, + "loss": 0.2595589756965637, + "loss_ce": 0.000525783107150346, + "loss_iou": 0.2109375, + "loss_num": 0.0517578125, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 75666364, + "step": 829 + }, + { + "epoch": 3.4583333333333335, + "grad_norm": 18.226846072866003, + "learning_rate": 5e-05, + "loss": 0.1378, + "num_input_tokens_seen": 75757436, + "step": 830 + }, + { + "epoch": 3.4583333333333335, + "loss": 0.16365206241607666, + "loss_ce": 1.5429660606969264e-06, + "loss_iou": 0.380859375, + "loss_num": 0.03271484375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 75757436, + "step": 830 + }, + { + "epoch": 3.4625, + "grad_norm": 8.318685188165535, + "learning_rate": 5e-05, + "loss": 0.1436, + "num_input_tokens_seen": 75849040, + "step": 831 + }, + { + "epoch": 3.4625, + "loss": 0.15188753604888916, + "loss_ce": 0.00012363101996015757, + "loss_iou": 0.236328125, + "loss_num": 0.0303955078125, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 75849040, + "step": 831 + }, + { + "epoch": 3.466666666666667, + "grad_norm": 2.6630234794156418, + "learning_rate": 5e-05, + "loss": 0.1736, + "num_input_tokens_seen": 75940524, + "step": 832 + }, + { + "epoch": 3.466666666666667, + "loss": 0.19373507797718048, + "loss_ce": 0.001672694575972855, + "loss_iou": 0.337890625, + "loss_num": 0.038330078125, + "loss_xval": 0.1923828125, + "num_input_tokens_seen": 75940524, + "step": 832 + }, + { + "epoch": 3.470833333333333, + "grad_norm": 2.2459169394774356, + "learning_rate": 5e-05, + "loss": 0.0962, + "num_input_tokens_seen": 76031620, + "step": 833 + }, + { + "epoch": 3.470833333333333, + "loss": 0.12036258727312088, + "loss_ce": 0.0005429437151178718, + "loss_iou": 0.2294921875, + "loss_num": 0.0240478515625, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 76031620, + "step": 833 + }, + { + "epoch": 3.475, + "grad_norm": 2.498827767772015, + "learning_rate": 5e-05, + "loss": 0.1102, + "num_input_tokens_seen": 76122764, + "step": 834 + }, + { + "epoch": 3.475, + "loss": 0.1229761466383934, + "loss_ce": 2.8445483621908352e-05, + "loss_iou": 0.2353515625, + "loss_num": 0.0245361328125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 76122764, + "step": 834 + }, + { + "epoch": 3.4791666666666665, + "grad_norm": 4.298154138835746, + "learning_rate": 5e-05, + "loss": 0.1125, + "num_input_tokens_seen": 76213956, + "step": 835 + }, + { + "epoch": 3.4791666666666665, + "loss": 0.13150085508823395, + "loss_ce": 0.0007940710638649762, + "loss_iou": 0.166015625, + "loss_num": 0.026123046875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 76213956, + "step": 835 + }, + { + "epoch": 3.4833333333333334, + "grad_norm": 4.972732986290837, + "learning_rate": 5e-05, + "loss": 0.172, + "num_input_tokens_seen": 76304764, + "step": 836 + }, + { + "epoch": 3.4833333333333334, + "loss": 0.22352594137191772, + "loss_ce": 0.0004729589563794434, + "loss_iou": 0.248046875, + "loss_num": 0.044677734375, + "loss_xval": 0.22265625, + "num_input_tokens_seen": 76304764, + "step": 836 + }, + { + "epoch": 3.4875, + "grad_norm": 4.725391316756069, + "learning_rate": 5e-05, + "loss": 0.1716, + "num_input_tokens_seen": 76396520, + "step": 837 + }, + { + "epoch": 3.4875, + "loss": 0.13293907046318054, + "loss_ce": 0.0023543545976281166, + "loss_iou": 0.3359375, + "loss_num": 0.026123046875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 76396520, + "step": 837 + }, + { + "epoch": 3.4916666666666667, + "grad_norm": 5.668771822315559, + "learning_rate": 5e-05, + "loss": 0.1328, + "num_input_tokens_seen": 76488272, + "step": 838 + }, + { + "epoch": 3.4916666666666667, + "loss": 0.17592602968215942, + "loss_ce": 0.0019910915289074183, + "loss_iou": 0.251953125, + "loss_num": 0.034912109375, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 76488272, + "step": 838 + }, + { + "epoch": 3.4958333333333336, + "grad_norm": 5.638127585122112, + "learning_rate": 5e-05, + "loss": 0.1882, + "num_input_tokens_seen": 76579868, + "step": 839 + }, + { + "epoch": 3.4958333333333336, + "loss": 0.2288551926612854, + "loss_ce": 0.004596756771206856, + "loss_iou": 0.26171875, + "loss_num": 0.044921875, + "loss_xval": 0.224609375, + "num_input_tokens_seen": 76579868, + "step": 839 + }, + { + "epoch": 3.5, + "grad_norm": 9.286255736402026, + "learning_rate": 5e-05, + "loss": 0.1512, + "num_input_tokens_seen": 76671076, + "step": 840 + }, + { + "epoch": 3.5, + "loss": 0.07215861976146698, + "loss_ce": 0.00032024577376432717, + "loss_iou": 0.365234375, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 76671076, + "step": 840 + }, + { + "epoch": 3.5041666666666664, + "grad_norm": 4.386475945583153, + "learning_rate": 5e-05, + "loss": 0.0689, + "num_input_tokens_seen": 76762808, + "step": 841 + }, + { + "epoch": 3.5041666666666664, + "loss": 0.07066143304109573, + "loss_ce": 0.0019663649145513773, + "loss_iou": 0.365234375, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 76762808, + "step": 841 + }, + { + "epoch": 3.5083333333333333, + "grad_norm": 4.9959751465411175, + "learning_rate": 5e-05, + "loss": 0.1352, + "num_input_tokens_seen": 76854288, + "step": 842 + }, + { + "epoch": 3.5083333333333333, + "loss": 0.14913874864578247, + "loss_ce": 0.0036614639684557915, + "loss_iou": 0.291015625, + "loss_num": 0.029052734375, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 76854288, + "step": 842 + }, + { + "epoch": 3.5125, + "grad_norm": 1.5990170431545732, + "learning_rate": 5e-05, + "loss": 0.1006, + "num_input_tokens_seen": 76945748, + "step": 843 + }, + { + "epoch": 3.5125, + "loss": 0.10650460422039032, + "loss_ce": 0.002042930806055665, + "loss_iou": 0.28515625, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 76945748, + "step": 843 + }, + { + "epoch": 3.5166666666666666, + "grad_norm": 4.534076156594933, + "learning_rate": 5e-05, + "loss": 0.1216, + "num_input_tokens_seen": 77037200, + "step": 844 + }, + { + "epoch": 3.5166666666666666, + "loss": 0.09443645179271698, + "loss_ce": 0.0006559367757290602, + "loss_iou": 0.16796875, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 77037200, + "step": 844 + }, + { + "epoch": 3.5208333333333335, + "grad_norm": 2.4011977220701426, + "learning_rate": 5e-05, + "loss": 0.1531, + "num_input_tokens_seen": 77129188, + "step": 845 + }, + { + "epoch": 3.5208333333333335, + "loss": 0.10003243386745453, + "loss_ce": 0.0007434985018335283, + "loss_iou": 0.265625, + "loss_num": 0.0198974609375, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 77129188, + "step": 845 + }, + { + "epoch": 3.525, + "grad_norm": 4.260385981525903, + "learning_rate": 5e-05, + "loss": 0.0976, + "num_input_tokens_seen": 77220180, + "step": 846 + }, + { + "epoch": 3.525, + "loss": 0.08183970302343369, + "loss_ce": 0.00037303665885701776, + "loss_iou": 0.2890625, + "loss_num": 0.0162353515625, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 77220180, + "step": 846 + }, + { + "epoch": 3.529166666666667, + "grad_norm": 3.6679913865243883, + "learning_rate": 5e-05, + "loss": 0.1906, + "num_input_tokens_seen": 77311900, + "step": 847 + }, + { + "epoch": 3.529166666666667, + "loss": 0.21985681354999542, + "loss_ce": 0.001732430886477232, + "loss_iou": 0.2119140625, + "loss_num": 0.043701171875, + "loss_xval": 0.2177734375, + "num_input_tokens_seen": 77311900, + "step": 847 + }, + { + "epoch": 3.533333333333333, + "grad_norm": 8.497174574589904, + "learning_rate": 5e-05, + "loss": 0.1817, + "num_input_tokens_seen": 77403052, + "step": 848 + }, + { + "epoch": 3.533333333333333, + "loss": 0.12078467756509781, + "loss_ce": 7.239534897962585e-05, + "loss_iou": 0.29296875, + "loss_num": 0.024169921875, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 77403052, + "step": 848 + }, + { + "epoch": 3.5375, + "grad_norm": 14.354819680373655, + "learning_rate": 5e-05, + "loss": 0.1719, + "num_input_tokens_seen": 77494380, + "step": 849 + }, + { + "epoch": 3.5375, + "loss": 0.1600828915834427, + "loss_ce": 0.0015440742718055844, + "loss_iou": 0.341796875, + "loss_num": 0.03173828125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 77494380, + "step": 849 + }, + { + "epoch": 3.5416666666666665, + "grad_norm": 5.368974849498363, + "learning_rate": 5e-05, + "loss": 0.1391, + "num_input_tokens_seen": 77585304, + "step": 850 + }, + { + "epoch": 3.5416666666666665, + "loss": 0.1813376545906067, + "loss_ce": 0.0013907547108829021, + "loss_iou": 0.087890625, + "loss_num": 0.035888671875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 77585304, + "step": 850 + }, + { + "epoch": 3.5458333333333334, + "grad_norm": 5.220110320232065, + "learning_rate": 5e-05, + "loss": 0.2156, + "num_input_tokens_seen": 77676964, + "step": 851 + }, + { + "epoch": 3.5458333333333334, + "loss": 0.1062939465045929, + "loss_ce": 0.0022442599292844534, + "loss_iou": 0.23046875, + "loss_num": 0.0208740234375, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 77676964, + "step": 851 + }, + { + "epoch": 3.55, + "grad_norm": 7.044117539532497, + "learning_rate": 5e-05, + "loss": 0.1643, + "num_input_tokens_seen": 77768672, + "step": 852 + }, + { + "epoch": 3.55, + "loss": 0.1451941579580307, + "loss_ce": 0.0013648092281073332, + "loss_iou": 0.474609375, + "loss_num": 0.0286865234375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 77768672, + "step": 852 + }, + { + "epoch": 3.5541666666666667, + "grad_norm": 3.512386496593121, + "learning_rate": 5e-05, + "loss": 0.2079, + "num_input_tokens_seen": 77859024, + "step": 853 + }, + { + "epoch": 3.5541666666666667, + "loss": 0.20865783095359802, + "loss_ce": 3.968643432017416e-05, + "loss_iou": 0.19140625, + "loss_num": 0.041748046875, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 77859024, + "step": 853 + }, + { + "epoch": 3.5583333333333336, + "grad_norm": 2.5185722713598144, + "learning_rate": 5e-05, + "loss": 0.108, + "num_input_tokens_seen": 77950920, + "step": 854 + }, + { + "epoch": 3.5583333333333336, + "loss": 0.16412417590618134, + "loss_ce": 0.0008246133802458644, + "loss_iou": 0.298828125, + "loss_num": 0.03271484375, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 77950920, + "step": 854 + }, + { + "epoch": 3.5625, + "grad_norm": 7.683335972137127, + "learning_rate": 5e-05, + "loss": 0.1109, + "num_input_tokens_seen": 78042688, + "step": 855 + }, + { + "epoch": 3.5625, + "loss": 0.08507044613361359, + "loss_ce": 0.0029705329798161983, + "loss_iou": 0.34375, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 78042688, + "step": 855 + }, + { + "epoch": 3.5666666666666664, + "grad_norm": 2.9311002155316537, + "learning_rate": 5e-05, + "loss": 0.153, + "num_input_tokens_seen": 78133672, + "step": 856 + }, + { + "epoch": 3.5666666666666664, + "loss": 0.10116206854581833, + "loss_ce": 0.0005761290667578578, + "loss_iou": 0.3203125, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 78133672, + "step": 856 + }, + { + "epoch": 3.5708333333333333, + "grad_norm": 1.755481247239063, + "learning_rate": 5e-05, + "loss": 0.1185, + "num_input_tokens_seen": 78225000, + "step": 857 + }, + { + "epoch": 3.5708333333333333, + "loss": 0.12590903043746948, + "loss_ce": 0.002099202712997794, + "loss_iou": 0.23828125, + "loss_num": 0.0247802734375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 78225000, + "step": 857 + }, + { + "epoch": 3.575, + "grad_norm": 9.559561742494447, + "learning_rate": 5e-05, + "loss": 0.1764, + "num_input_tokens_seen": 78316300, + "step": 858 + }, + { + "epoch": 3.575, + "loss": 0.2367614209651947, + "loss_ce": 0.0005858814110979438, + "loss_iou": 0.34765625, + "loss_num": 0.047119140625, + "loss_xval": 0.236328125, + "num_input_tokens_seen": 78316300, + "step": 858 + }, + { + "epoch": 3.5791666666666666, + "grad_norm": 10.036579582471406, + "learning_rate": 5e-05, + "loss": 0.0823, + "num_input_tokens_seen": 78408016, + "step": 859 + }, + { + "epoch": 3.5791666666666666, + "loss": 0.07882070541381836, + "loss_ce": 0.001809599227271974, + "loss_iou": 0.279296875, + "loss_num": 0.015380859375, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 78408016, + "step": 859 + }, + { + "epoch": 3.5833333333333335, + "grad_norm": 2.3705521449279967, + "learning_rate": 5e-05, + "loss": 0.0829, + "num_input_tokens_seen": 78498968, + "step": 860 + }, + { + "epoch": 3.5833333333333335, + "loss": 0.11630737036466599, + "loss_ce": 2.0137465980951674e-05, + "loss_iou": 0.3125, + "loss_num": 0.0233154296875, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 78498968, + "step": 860 + }, + { + "epoch": 3.5875, + "grad_norm": 5.987229333857078, + "learning_rate": 5e-05, + "loss": 0.1943, + "num_input_tokens_seen": 78590208, + "step": 861 + }, + { + "epoch": 3.5875, + "loss": 0.26542216539382935, + "loss_ce": 4.132339381612837e-05, + "loss_iou": 0.2265625, + "loss_num": 0.052978515625, + "loss_xval": 0.265625, + "num_input_tokens_seen": 78590208, + "step": 861 + }, + { + "epoch": 3.591666666666667, + "grad_norm": 5.736555389697911, + "learning_rate": 5e-05, + "loss": 0.1309, + "num_input_tokens_seen": 78681064, + "step": 862 + }, + { + "epoch": 3.591666666666667, + "loss": 0.1575353443622589, + "loss_ce": 0.0006749906460754573, + "loss_iou": 0.29296875, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 78681064, + "step": 862 + }, + { + "epoch": 3.595833333333333, + "grad_norm": 4.555255178719723, + "learning_rate": 5e-05, + "loss": 0.1412, + "num_input_tokens_seen": 78772584, + "step": 863 + }, + { + "epoch": 3.595833333333333, + "loss": 0.10891114175319672, + "loss_ce": 0.0009246918489225209, + "loss_iou": 0.306640625, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 78772584, + "step": 863 + }, + { + "epoch": 3.6, + "grad_norm": 10.841600062563783, + "learning_rate": 5e-05, + "loss": 0.1488, + "num_input_tokens_seen": 78864592, + "step": 864 + }, + { + "epoch": 3.6, + "loss": 0.1749829202890396, + "loss_ce": 0.0038403368089348078, + "loss_iou": 0.279296875, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 78864592, + "step": 864 + }, + { + "epoch": 3.6041666666666665, + "grad_norm": 4.92048030012845, + "learning_rate": 5e-05, + "loss": 0.1626, + "num_input_tokens_seen": 78955560, + "step": 865 + }, + { + "epoch": 3.6041666666666665, + "loss": 0.1305989921092987, + "loss_ce": 0.0034627642016857862, + "loss_iou": 0.326171875, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 78955560, + "step": 865 + }, + { + "epoch": 3.6083333333333334, + "grad_norm": 4.437315758229823, + "learning_rate": 5e-05, + "loss": 0.1117, + "num_input_tokens_seen": 79046972, + "step": 866 + }, + { + "epoch": 3.6083333333333334, + "loss": 0.0896613746881485, + "loss_ce": 0.0017402288503944874, + "loss_iou": 0.349609375, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 79046972, + "step": 866 + }, + { + "epoch": 3.6125, + "grad_norm": 2.2807293140028384, + "learning_rate": 5e-05, + "loss": 0.1052, + "num_input_tokens_seen": 79136824, + "step": 867 + }, + { + "epoch": 3.6125, + "loss": 0.11052871495485306, + "loss_ce": 9.305941603088286e-06, + "loss_iou": 0.39453125, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 79136824, + "step": 867 + }, + { + "epoch": 3.6166666666666667, + "grad_norm": 7.141800061379381, + "learning_rate": 5e-05, + "loss": 0.1306, + "num_input_tokens_seen": 79228340, + "step": 868 + }, + { + "epoch": 3.6166666666666667, + "loss": 0.13482439517974854, + "loss_ce": 0.0004097204946447164, + "loss_iou": 0.2890625, + "loss_num": 0.02685546875, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 79228340, + "step": 868 + }, + { + "epoch": 3.6208333333333336, + "grad_norm": 3.8234864903722547, + "learning_rate": 5e-05, + "loss": 0.1308, + "num_input_tokens_seen": 79319768, + "step": 869 + }, + { + "epoch": 3.6208333333333336, + "loss": 0.16188469529151917, + "loss_ce": 0.0019420783501118422, + "loss_iou": 0.333984375, + "loss_num": 0.031982421875, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 79319768, + "step": 869 + }, + { + "epoch": 3.625, + "grad_norm": 3.551647947387826, + "learning_rate": 5e-05, + "loss": 0.1047, + "num_input_tokens_seen": 79411328, + "step": 870 + }, + { + "epoch": 3.625, + "loss": 0.12375196069478989, + "loss_ce": 0.0030549420043826103, + "loss_iou": 0.240234375, + "loss_num": 0.024169921875, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 79411328, + "step": 870 + }, + { + "epoch": 3.6291666666666664, + "grad_norm": 3.7442918500543776, + "learning_rate": 5e-05, + "loss": 0.1229, + "num_input_tokens_seen": 79502548, + "step": 871 + }, + { + "epoch": 3.6291666666666664, + "loss": 0.1477302610874176, + "loss_ce": 0.0003303608391433954, + "loss_iou": 0.201171875, + "loss_num": 0.029541015625, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 79502548, + "step": 871 + }, + { + "epoch": 3.6333333333333333, + "grad_norm": 7.0439013115078755, + "learning_rate": 5e-05, + "loss": 0.1173, + "num_input_tokens_seen": 79594160, + "step": 872 + }, + { + "epoch": 3.6333333333333333, + "loss": 0.153409942984581, + "loss_ce": 0.001920689595863223, + "loss_iou": 0.251953125, + "loss_num": 0.0302734375, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 79594160, + "step": 872 + }, + { + "epoch": 3.6375, + "grad_norm": 2.205887052407003, + "learning_rate": 5e-05, + "loss": 0.1456, + "num_input_tokens_seen": 79685640, + "step": 873 + }, + { + "epoch": 3.6375, + "loss": 0.12901686131954193, + "loss_ce": 0.001453386852517724, + "loss_iou": 0.34765625, + "loss_num": 0.0255126953125, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 79685640, + "step": 873 + }, + { + "epoch": 3.6416666666666666, + "grad_norm": 4.9782678997006515, + "learning_rate": 5e-05, + "loss": 0.121, + "num_input_tokens_seen": 79777272, + "step": 874 + }, + { + "epoch": 3.6416666666666666, + "loss": 0.13823364675045013, + "loss_ce": 0.0010342422174289823, + "loss_iou": 0.1142578125, + "loss_num": 0.0274658203125, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 79777272, + "step": 874 + }, + { + "epoch": 3.6458333333333335, + "grad_norm": 4.34295823261341, + "learning_rate": 5e-05, + "loss": 0.1058, + "num_input_tokens_seen": 79868608, + "step": 875 + }, + { + "epoch": 3.6458333333333335, + "loss": 0.09006209671497345, + "loss_ce": 0.00029464036924764514, + "loss_iou": 0.37890625, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 79868608, + "step": 875 + }, + { + "epoch": 3.65, + "grad_norm": 5.888083977419817, + "learning_rate": 5e-05, + "loss": 0.182, + "num_input_tokens_seen": 79959644, + "step": 876 + }, + { + "epoch": 3.65, + "loss": 0.21097832918167114, + "loss_ce": 0.0014599019195884466, + "loss_iou": 0.2275390625, + "loss_num": 0.0419921875, + "loss_xval": 0.2099609375, + "num_input_tokens_seen": 79959644, + "step": 876 + }, + { + "epoch": 3.654166666666667, + "grad_norm": 3.737045771465334, + "learning_rate": 5e-05, + "loss": 0.1034, + "num_input_tokens_seen": 80050584, + "step": 877 + }, + { + "epoch": 3.654166666666667, + "loss": 0.09872293472290039, + "loss_ce": 0.0002579695428721607, + "loss_iou": 0.265625, + "loss_num": 0.0196533203125, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 80050584, + "step": 877 + }, + { + "epoch": 3.658333333333333, + "grad_norm": 3.2482049945691562, + "learning_rate": 5e-05, + "loss": 0.1676, + "num_input_tokens_seen": 80141352, + "step": 878 + }, + { + "epoch": 3.658333333333333, + "loss": 0.11685407161712646, + "loss_ce": 2.2607714527111966e-06, + "loss_iou": 0.35546875, + "loss_num": 0.0234375, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 80141352, + "step": 878 + }, + { + "epoch": 3.6625, + "grad_norm": 5.0140644768875395, + "learning_rate": 5e-05, + "loss": 0.1577, + "num_input_tokens_seen": 80233160, + "step": 879 + }, + { + "epoch": 3.6625, + "loss": 0.1571737825870514, + "loss_ce": 0.0018698314670473337, + "loss_iou": 0.318359375, + "loss_num": 0.031005859375, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 80233160, + "step": 879 + }, + { + "epoch": 3.6666666666666665, + "grad_norm": 7.255196429255433, + "learning_rate": 5e-05, + "loss": 0.1797, + "num_input_tokens_seen": 80322868, + "step": 880 + }, + { + "epoch": 3.6666666666666665, + "loss": 0.160821795463562, + "loss_ce": 4.757134956889786e-05, + "loss_iou": 0.310546875, + "loss_num": 0.0322265625, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 80322868, + "step": 880 + }, + { + "epoch": 3.6708333333333334, + "grad_norm": 10.360077602554204, + "learning_rate": 5e-05, + "loss": 0.1211, + "num_input_tokens_seen": 80413852, + "step": 881 + }, + { + "epoch": 3.6708333333333334, + "loss": 0.08685818314552307, + "loss_ce": 0.0004934355965815485, + "loss_iou": 0.28125, + "loss_num": 0.0172119140625, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 80413852, + "step": 881 + }, + { + "epoch": 3.675, + "grad_norm": 5.939748395653842, + "learning_rate": 5e-05, + "loss": 0.1236, + "num_input_tokens_seen": 80505160, + "step": 882 + }, + { + "epoch": 3.675, + "loss": 0.13993090391159058, + "loss_ce": 0.00016038586909417063, + "loss_iou": 0.345703125, + "loss_num": 0.0279541015625, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 80505160, + "step": 882 + }, + { + "epoch": 3.6791666666666667, + "grad_norm": 4.20897557631186, + "learning_rate": 5e-05, + "loss": 0.1369, + "num_input_tokens_seen": 80596384, + "step": 883 + }, + { + "epoch": 3.6791666666666667, + "loss": 0.1429780274629593, + "loss_ce": 6.421100988518447e-05, + "loss_iou": 0.30859375, + "loss_num": 0.028564453125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 80596384, + "step": 883 + }, + { + "epoch": 3.6833333333333336, + "grad_norm": 5.195823724131978, + "learning_rate": 5e-05, + "loss": 0.1643, + "num_input_tokens_seen": 80687208, + "step": 884 + }, + { + "epoch": 3.6833333333333336, + "loss": 0.1412520557641983, + "loss_ce": 0.0003829213383141905, + "loss_iou": 0.2890625, + "loss_num": 0.0281982421875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 80687208, + "step": 884 + }, + { + "epoch": 3.6875, + "grad_norm": 3.672237243325278, + "learning_rate": 5e-05, + "loss": 0.1604, + "num_input_tokens_seen": 80778752, + "step": 885 + }, + { + "epoch": 3.6875, + "loss": 0.1688692420721054, + "loss_ce": 0.0016939521301537752, + "loss_iou": 0.36328125, + "loss_num": 0.033447265625, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 80778752, + "step": 885 + }, + { + "epoch": 3.6916666666666664, + "grad_norm": 13.96778543887359, + "learning_rate": 5e-05, + "loss": 0.1447, + "num_input_tokens_seen": 80870668, + "step": 886 + }, + { + "epoch": 3.6916666666666664, + "loss": 0.15862274169921875, + "loss_ce": 0.005424493458122015, + "loss_iou": 0.296875, + "loss_num": 0.0306396484375, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 80870668, + "step": 886 + }, + { + "epoch": 3.6958333333333333, + "grad_norm": 3.293803485383682, + "learning_rate": 5e-05, + "loss": 0.1242, + "num_input_tokens_seen": 80961928, + "step": 887 + }, + { + "epoch": 3.6958333333333333, + "loss": 0.14489322900772095, + "loss_ce": 0.0027576074935495853, + "loss_iou": 0.3515625, + "loss_num": 0.0284423828125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 80961928, + "step": 887 + }, + { + "epoch": 3.7, + "grad_norm": 3.231950272341994, + "learning_rate": 5e-05, + "loss": 0.1681, + "num_input_tokens_seen": 81052964, + "step": 888 + }, + { + "epoch": 3.7, + "loss": 0.14616870880126953, + "loss_ce": 0.0016527026891708374, + "loss_iou": 0.2060546875, + "loss_num": 0.0289306640625, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 81052964, + "step": 888 + }, + { + "epoch": 3.7041666666666666, + "grad_norm": 5.961244662688287, + "learning_rate": 5e-05, + "loss": 0.1136, + "num_input_tokens_seen": 81144284, + "step": 889 + }, + { + "epoch": 3.7041666666666666, + "loss": 0.11914453655481339, + "loss_ce": 0.0016518579795956612, + "loss_iou": 0.28515625, + "loss_num": 0.0234375, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 81144284, + "step": 889 + }, + { + "epoch": 3.7083333333333335, + "grad_norm": 8.571525568639126, + "learning_rate": 5e-05, + "loss": 0.1556, + "num_input_tokens_seen": 81235328, + "step": 890 + }, + { + "epoch": 3.7083333333333335, + "loss": 0.13373783230781555, + "loss_ce": 0.0008337879553437233, + "loss_iou": 0.23828125, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 81235328, + "step": 890 + }, + { + "epoch": 3.7125, + "grad_norm": 5.826651502861672, + "learning_rate": 5e-05, + "loss": 0.1322, + "num_input_tokens_seen": 81326252, + "step": 891 + }, + { + "epoch": 3.7125, + "loss": 0.1149607002735138, + "loss_ce": 0.0010843577329069376, + "loss_iou": 0.16015625, + "loss_num": 0.022705078125, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 81326252, + "step": 891 + }, + { + "epoch": 3.716666666666667, + "grad_norm": 2.33538776603535, + "learning_rate": 5e-05, + "loss": 0.1452, + "num_input_tokens_seen": 81416468, + "step": 892 + }, + { + "epoch": 3.716666666666667, + "loss": 0.20617538690567017, + "loss_ce": 0.0007615811773575842, + "loss_iou": 0.28515625, + "loss_num": 0.041015625, + "loss_xval": 0.205078125, + "num_input_tokens_seen": 81416468, + "step": 892 + }, + { + "epoch": 3.720833333333333, + "grad_norm": 2.6862937653430787, + "learning_rate": 5e-05, + "loss": 0.0968, + "num_input_tokens_seen": 81507680, + "step": 893 + }, + { + "epoch": 3.720833333333333, + "loss": 0.09008393436670303, + "loss_ce": 0.0024374467320740223, + "loss_iou": 0.208984375, + "loss_num": 0.0174560546875, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 81507680, + "step": 893 + }, + { + "epoch": 3.725, + "grad_norm": 7.085969329959365, + "learning_rate": 5e-05, + "loss": 0.1291, + "num_input_tokens_seen": 81598772, + "step": 894 + }, + { + "epoch": 3.725, + "loss": 0.1661437749862671, + "loss_ce": 0.0019744737073779106, + "loss_iou": 0.1796875, + "loss_num": 0.03271484375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 81598772, + "step": 894 + }, + { + "epoch": 3.7291666666666665, + "grad_norm": 7.046338485536485, + "learning_rate": 5e-05, + "loss": 0.109, + "num_input_tokens_seen": 81689904, + "step": 895 + }, + { + "epoch": 3.7291666666666665, + "loss": 0.1281951367855072, + "loss_ce": 0.00035699873114936054, + "loss_iou": 0.298828125, + "loss_num": 0.0255126953125, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 81689904, + "step": 895 + }, + { + "epoch": 3.7333333333333334, + "grad_norm": 4.641066412415717, + "learning_rate": 5e-05, + "loss": 0.1218, + "num_input_tokens_seen": 81780940, + "step": 896 + }, + { + "epoch": 3.7333333333333334, + "loss": 0.11866171658039093, + "loss_ce": 0.0057161590084433556, + "loss_iou": 0.26953125, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 81780940, + "step": 896 + }, + { + "epoch": 3.7375, + "grad_norm": 2.363724821308636, + "learning_rate": 5e-05, + "loss": 0.1382, + "num_input_tokens_seen": 81872548, + "step": 897 + }, + { + "epoch": 3.7375, + "loss": 0.14953023195266724, + "loss_ce": 0.004968480207026005, + "loss_iou": 0.25390625, + "loss_num": 0.0289306640625, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 81872548, + "step": 897 + }, + { + "epoch": 3.7416666666666667, + "grad_norm": 4.37568884559808, + "learning_rate": 5e-05, + "loss": 0.1448, + "num_input_tokens_seen": 81964208, + "step": 898 + }, + { + "epoch": 3.7416666666666667, + "loss": 0.1580948829650879, + "loss_ce": 0.0035691240336745977, + "loss_iou": 0.1845703125, + "loss_num": 0.0308837890625, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 81964208, + "step": 898 + }, + { + "epoch": 3.7458333333333336, + "grad_norm": 8.266730113660397, + "learning_rate": 5e-05, + "loss": 0.1684, + "num_input_tokens_seen": 82055704, + "step": 899 + }, + { + "epoch": 3.7458333333333336, + "loss": 0.14106786251068115, + "loss_ce": 0.0011142397997900844, + "loss_iou": 0.302734375, + "loss_num": 0.028076171875, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 82055704, + "step": 899 + }, + { + "epoch": 3.75, + "grad_norm": 6.854780871094846, + "learning_rate": 5e-05, + "loss": 0.183, + "num_input_tokens_seen": 82146356, + "step": 900 + }, + { + "epoch": 3.75, + "loss": 0.14154388010501862, + "loss_ce": 3.353296506247716e-06, + "loss_iou": 0.33984375, + "loss_num": 0.0283203125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 82146356, + "step": 900 + }, + { + "epoch": 3.7541666666666664, + "grad_norm": 7.233013245995583, + "learning_rate": 5e-05, + "loss": 0.1471, + "num_input_tokens_seen": 82238148, + "step": 901 + }, + { + "epoch": 3.7541666666666664, + "loss": 0.17437678575515747, + "loss_ce": 0.0021660891361534595, + "loss_iou": 0.255859375, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 82238148, + "step": 901 + }, + { + "epoch": 3.7583333333333333, + "grad_norm": 3.0074373855147565, + "learning_rate": 5e-05, + "loss": 0.106, + "num_input_tokens_seen": 82329300, + "step": 902 + }, + { + "epoch": 3.7583333333333333, + "loss": 0.14340101182460785, + "loss_ce": 0.0020588545594364405, + "loss_iou": 0.1591796875, + "loss_num": 0.0283203125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 82329300, + "step": 902 + }, + { + "epoch": 3.7625, + "grad_norm": 6.872203528323323, + "learning_rate": 5e-05, + "loss": 0.1822, + "num_input_tokens_seen": 82420628, + "step": 903 + }, + { + "epoch": 3.7625, + "loss": 0.20105046033859253, + "loss_ce": 0.002045325469225645, + "loss_iou": 0.3125, + "loss_num": 0.039794921875, + "loss_xval": 0.19921875, + "num_input_tokens_seen": 82420628, + "step": 903 + }, + { + "epoch": 3.7666666666666666, + "grad_norm": 3.456179304931455, + "learning_rate": 5e-05, + "loss": 0.1165, + "num_input_tokens_seen": 82511840, + "step": 904 + }, + { + "epoch": 3.7666666666666666, + "loss": 0.07701139152050018, + "loss_ce": 0.0019076326861977577, + "loss_iou": 0.1484375, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 82511840, + "step": 904 + }, + { + "epoch": 3.7708333333333335, + "grad_norm": 3.1272839592274613, + "learning_rate": 5e-05, + "loss": 0.1265, + "num_input_tokens_seen": 82602904, + "step": 905 + }, + { + "epoch": 3.7708333333333335, + "loss": 0.12377360463142395, + "loss_ce": 0.00020792950817849487, + "loss_iou": 0.369140625, + "loss_num": 0.0247802734375, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 82602904, + "step": 905 + }, + { + "epoch": 3.775, + "grad_norm": 9.70117444562128, + "learning_rate": 5e-05, + "loss": 0.18, + "num_input_tokens_seen": 82693644, + "step": 906 + }, + { + "epoch": 3.775, + "loss": 0.21095910668373108, + "loss_ce": 0.0015017122495919466, + "loss_iou": 0.30078125, + "loss_num": 0.041748046875, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 82693644, + "step": 906 + }, + { + "epoch": 3.779166666666667, + "grad_norm": 13.071673758329982, + "learning_rate": 5e-05, + "loss": 0.1602, + "num_input_tokens_seen": 82785280, + "step": 907 + }, + { + "epoch": 3.779166666666667, + "loss": 0.1759437620639801, + "loss_ce": 0.0017799364868551493, + "loss_iou": 0.376953125, + "loss_num": 0.034912109375, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 82785280, + "step": 907 + }, + { + "epoch": 3.783333333333333, + "grad_norm": 2.3950949529873165, + "learning_rate": 5e-05, + "loss": 0.2106, + "num_input_tokens_seen": 82876080, + "step": 908 + }, + { + "epoch": 3.783333333333333, + "loss": 0.1860213726758957, + "loss_ce": 0.0006118176388554275, + "loss_iou": 0.236328125, + "loss_num": 0.037109375, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 82876080, + "step": 908 + }, + { + "epoch": 3.7875, + "grad_norm": 3.5588174044211054, + "learning_rate": 5e-05, + "loss": 0.1465, + "num_input_tokens_seen": 82967216, + "step": 909 + }, + { + "epoch": 3.7875, + "loss": 0.15826614201068878, + "loss_ce": 0.0018788184970617294, + "loss_iou": 0.33203125, + "loss_num": 0.03125, + "loss_xval": 0.15625, + "num_input_tokens_seen": 82967216, + "step": 909 + }, + { + "epoch": 3.7916666666666665, + "grad_norm": 5.950982975488221, + "learning_rate": 5e-05, + "loss": 0.2663, + "num_input_tokens_seen": 83058916, + "step": 910 + }, + { + "epoch": 3.7916666666666665, + "loss": 0.2467661052942276, + "loss_ce": 0.0008249367238022387, + "loss_iou": 0.2080078125, + "loss_num": 0.049072265625, + "loss_xval": 0.24609375, + "num_input_tokens_seen": 83058916, + "step": 910 + }, + { + "epoch": 3.7958333333333334, + "grad_norm": 7.937274435923854, + "learning_rate": 5e-05, + "loss": 0.1838, + "num_input_tokens_seen": 83149832, + "step": 911 + }, + { + "epoch": 3.7958333333333334, + "loss": 0.18536323308944702, + "loss_ce": 0.0038446770049631596, + "loss_iou": 0.126953125, + "loss_num": 0.036376953125, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 83149832, + "step": 911 + }, + { + "epoch": 3.8, + "grad_norm": 3.1189095340514794, + "learning_rate": 5e-05, + "loss": 0.1302, + "num_input_tokens_seen": 83240876, + "step": 912 + }, + { + "epoch": 3.8, + "loss": 0.14421942830085754, + "loss_ce": 0.0007562931277789176, + "loss_iou": 0.234375, + "loss_num": 0.0286865234375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 83240876, + "step": 912 + }, + { + "epoch": 3.8041666666666667, + "grad_norm": 3.5604076626857815, + "learning_rate": 5e-05, + "loss": 0.1292, + "num_input_tokens_seen": 83332576, + "step": 913 + }, + { + "epoch": 3.8041666666666667, + "loss": 0.11656103283166885, + "loss_ce": 0.005995844956487417, + "loss_iou": 0.189453125, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 83332576, + "step": 913 + }, + { + "epoch": 3.8083333333333336, + "grad_norm": 5.22722230155665, + "learning_rate": 5e-05, + "loss": 0.1424, + "num_input_tokens_seen": 83423936, + "step": 914 + }, + { + "epoch": 3.8083333333333336, + "loss": 0.10840374231338501, + "loss_ce": 0.0005851405439898372, + "loss_iou": 0.0751953125, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 83423936, + "step": 914 + }, + { + "epoch": 3.8125, + "grad_norm": 2.720035878251394, + "learning_rate": 5e-05, + "loss": 0.1747, + "num_input_tokens_seen": 83515352, + "step": 915 + }, + { + "epoch": 3.8125, + "loss": 0.1660463958978653, + "loss_ce": 0.0030214914586395025, + "loss_iou": 0.150390625, + "loss_num": 0.03271484375, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 83515352, + "step": 915 + }, + { + "epoch": 3.8166666666666664, + "grad_norm": 6.0446981950480145, + "learning_rate": 5e-05, + "loss": 0.1575, + "num_input_tokens_seen": 83606452, + "step": 916 + }, + { + "epoch": 3.8166666666666664, + "loss": 0.12540730834007263, + "loss_ce": 0.001551724737510085, + "loss_iou": 0.328125, + "loss_num": 0.0247802734375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 83606452, + "step": 916 + }, + { + "epoch": 3.8208333333333333, + "grad_norm": 6.168434467541618, + "learning_rate": 5e-05, + "loss": 0.1399, + "num_input_tokens_seen": 83697824, + "step": 917 + }, + { + "epoch": 3.8208333333333333, + "loss": 0.08690094202756882, + "loss_ce": 0.0008566331234760582, + "loss_iou": 0.255859375, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 83697824, + "step": 917 + }, + { + "epoch": 3.825, + "grad_norm": 3.0075171810552344, + "learning_rate": 5e-05, + "loss": 0.093, + "num_input_tokens_seen": 83789212, + "step": 918 + }, + { + "epoch": 3.825, + "loss": 0.07617410272359848, + "loss_ce": 0.0011618940625339746, + "loss_iou": 0.25390625, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 83789212, + "step": 918 + }, + { + "epoch": 3.8291666666666666, + "grad_norm": 10.734787557551742, + "learning_rate": 5e-05, + "loss": 0.1166, + "num_input_tokens_seen": 83879912, + "step": 919 + }, + { + "epoch": 3.8291666666666666, + "loss": 0.10854049026966095, + "loss_ce": 4.730073669634294e-06, + "loss_iou": 0.365234375, + "loss_num": 0.021728515625, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 83879912, + "step": 919 + }, + { + "epoch": 3.8333333333333335, + "grad_norm": 6.116961978654756, + "learning_rate": 5e-05, + "loss": 0.1869, + "num_input_tokens_seen": 83970948, + "step": 920 + }, + { + "epoch": 3.8333333333333335, + "loss": 0.14833904802799225, + "loss_ce": 0.0032890033908188343, + "loss_iou": 0.28515625, + "loss_num": 0.0289306640625, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 83970948, + "step": 920 + }, + { + "epoch": 3.8375, + "grad_norm": 3.20997363255731, + "learning_rate": 5e-05, + "loss": 0.1156, + "num_input_tokens_seen": 84062356, + "step": 921 + }, + { + "epoch": 3.8375, + "loss": 0.1262749582529068, + "loss_ce": 0.0005120187997817993, + "loss_iou": 0.384765625, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 84062356, + "step": 921 + }, + { + "epoch": 3.841666666666667, + "grad_norm": 7.310422599518437, + "learning_rate": 5e-05, + "loss": 0.1041, + "num_input_tokens_seen": 84153220, + "step": 922 + }, + { + "epoch": 3.841666666666667, + "loss": 0.09646876156330109, + "loss_ce": 0.00010950541036436334, + "loss_iou": 0.181640625, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 84153220, + "step": 922 + }, + { + "epoch": 3.845833333333333, + "grad_norm": 11.07096274448853, + "learning_rate": 5e-05, + "loss": 0.1529, + "num_input_tokens_seen": 84244428, + "step": 923 + }, + { + "epoch": 3.845833333333333, + "loss": 0.17023152112960815, + "loss_ce": 0.00034015910932794213, + "loss_iou": 0.353515625, + "loss_num": 0.033935546875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 84244428, + "step": 923 + }, + { + "epoch": 3.85, + "grad_norm": 5.051180851336462, + "learning_rate": 5e-05, + "loss": 0.129, + "num_input_tokens_seen": 84336700, + "step": 924 + }, + { + "epoch": 3.85, + "loss": 0.1427893042564392, + "loss_ce": 0.001553958049044013, + "loss_iou": 0.2470703125, + "loss_num": 0.0281982421875, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 84336700, + "step": 924 + }, + { + "epoch": 3.8541666666666665, + "grad_norm": 4.495963485971154, + "learning_rate": 5e-05, + "loss": 0.1785, + "num_input_tokens_seen": 84428020, + "step": 925 + }, + { + "epoch": 3.8541666666666665, + "loss": 0.21857711672782898, + "loss_ce": 0.002756803994998336, + "loss_iou": 0.26953125, + "loss_num": 0.043212890625, + "loss_xval": 0.2158203125, + "num_input_tokens_seen": 84428020, + "step": 925 + }, + { + "epoch": 3.8583333333333334, + "grad_norm": 3.3145649389351153, + "learning_rate": 5e-05, + "loss": 0.1057, + "num_input_tokens_seen": 84519152, + "step": 926 + }, + { + "epoch": 3.8583333333333334, + "loss": 0.08949233591556549, + "loss_ce": 0.0012354973005130887, + "loss_iou": 0.28125, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 84519152, + "step": 926 + }, + { + "epoch": 3.8625, + "grad_norm": 11.110766333390965, + "learning_rate": 5e-05, + "loss": 0.1772, + "num_input_tokens_seen": 84610144, + "step": 927 + }, + { + "epoch": 3.8625, + "loss": 0.17750558257102966, + "loss_ce": 0.00021371689217630774, + "loss_iou": 0.337890625, + "loss_num": 0.035400390625, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 84610144, + "step": 927 + }, + { + "epoch": 3.8666666666666667, + "grad_norm": 9.320735724906612, + "learning_rate": 5e-05, + "loss": 0.1365, + "num_input_tokens_seen": 84702372, + "step": 928 + }, + { + "epoch": 3.8666666666666667, + "loss": 0.12013794481754303, + "loss_ce": 0.002462164033204317, + "loss_iou": 0.30078125, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 84702372, + "step": 928 + }, + { + "epoch": 3.8708333333333336, + "grad_norm": 54.707623230301095, + "learning_rate": 5e-05, + "loss": 0.1142, + "num_input_tokens_seen": 84793828, + "step": 929 + }, + { + "epoch": 3.8708333333333336, + "loss": 0.09439679980278015, + "loss_ce": 0.0032865693792700768, + "loss_iou": 0.26171875, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 84793828, + "step": 929 + }, + { + "epoch": 3.875, + "grad_norm": 1.5635476627662845, + "learning_rate": 5e-05, + "loss": 0.0995, + "num_input_tokens_seen": 84886020, + "step": 930 + }, + { + "epoch": 3.875, + "loss": 0.10173749923706055, + "loss_ce": 0.004340652376413345, + "loss_iou": 0.259765625, + "loss_num": 0.01953125, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 84886020, + "step": 930 + }, + { + "epoch": 3.8791666666666664, + "grad_norm": 5.889122208098058, + "learning_rate": 5e-05, + "loss": 0.1463, + "num_input_tokens_seen": 84977480, + "step": 931 + }, + { + "epoch": 3.8791666666666664, + "loss": 0.1577497273683548, + "loss_ce": 0.00040109510882757604, + "loss_iou": 0.2470703125, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 84977480, + "step": 931 + }, + { + "epoch": 3.8833333333333333, + "grad_norm": 4.224099488917448, + "learning_rate": 5e-05, + "loss": 0.1867, + "num_input_tokens_seen": 85069072, + "step": 932 + }, + { + "epoch": 3.8833333333333333, + "loss": 0.11261321604251862, + "loss_ce": 0.0005526671302504838, + "loss_iou": 0.27734375, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 85069072, + "step": 932 + }, + { + "epoch": 3.8875, + "grad_norm": 11.423622253129569, + "learning_rate": 5e-05, + "loss": 0.1392, + "num_input_tokens_seen": 85160348, + "step": 933 + }, + { + "epoch": 3.8875, + "loss": 0.20320799946784973, + "loss_ce": 0.02242186665534973, + "loss_iou": 0.396484375, + "loss_num": 0.0361328125, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 85160348, + "step": 933 + }, + { + "epoch": 3.8916666666666666, + "grad_norm": 5.606403462872274, + "learning_rate": 5e-05, + "loss": 0.1506, + "num_input_tokens_seen": 85251604, + "step": 934 + }, + { + "epoch": 3.8916666666666666, + "loss": 0.15166088938713074, + "loss_ce": 0.0013923394726589322, + "loss_iou": 0.158203125, + "loss_num": 0.030029296875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 85251604, + "step": 934 + }, + { + "epoch": 3.8958333333333335, + "grad_norm": 16.76592793234881, + "learning_rate": 5e-05, + "loss": 0.1967, + "num_input_tokens_seen": 85343376, + "step": 935 + }, + { + "epoch": 3.8958333333333335, + "loss": 0.26409974694252014, + "loss_ce": 0.002258933149278164, + "loss_iou": 0.2734375, + "loss_num": 0.05224609375, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 85343376, + "step": 935 + }, + { + "epoch": 3.9, + "grad_norm": 7.951347957557853, + "learning_rate": 5e-05, + "loss": 0.1094, + "num_input_tokens_seen": 85434500, + "step": 936 + }, + { + "epoch": 3.9, + "loss": 0.10416235029697418, + "loss_ce": 0.00015844989684410393, + "loss_iou": 0.35546875, + "loss_num": 0.020751953125, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 85434500, + "step": 936 + }, + { + "epoch": 3.904166666666667, + "grad_norm": 2.4830229805624704, + "learning_rate": 5e-05, + "loss": 0.1479, + "num_input_tokens_seen": 85525420, + "step": 937 + }, + { + "epoch": 3.904166666666667, + "loss": 0.20122557878494263, + "loss_ce": 7.928603736218065e-06, + "loss_iou": 0.4765625, + "loss_num": 0.040283203125, + "loss_xval": 0.201171875, + "num_input_tokens_seen": 85525420, + "step": 937 + }, + { + "epoch": 3.908333333333333, + "grad_norm": 6.260816700820158, + "learning_rate": 5e-05, + "loss": 0.1974, + "num_input_tokens_seen": 85616688, + "step": 938 + }, + { + "epoch": 3.908333333333333, + "loss": 0.26152363419532776, + "loss_ce": 0.006701848469674587, + "loss_iou": 0.296875, + "loss_num": 0.051025390625, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 85616688, + "step": 938 + }, + { + "epoch": 3.9125, + "grad_norm": 2.4562762107300933, + "learning_rate": 5e-05, + "loss": 0.1253, + "num_input_tokens_seen": 85707284, + "step": 939 + }, + { + "epoch": 3.9125, + "loss": 0.13999181985855103, + "loss_ce": 0.0010300282156094909, + "loss_iou": 0.291015625, + "loss_num": 0.02783203125, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 85707284, + "step": 939 + }, + { + "epoch": 3.9166666666666665, + "grad_norm": 2.8296455134094156, + "learning_rate": 5e-05, + "loss": 0.1193, + "num_input_tokens_seen": 85798936, + "step": 940 + }, + { + "epoch": 3.9166666666666665, + "loss": 0.13532821834087372, + "loss_ce": 0.00187484931666404, + "loss_iou": 0.28125, + "loss_num": 0.0267333984375, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 85798936, + "step": 940 + }, + { + "epoch": 3.9208333333333334, + "grad_norm": 2.1867038989698413, + "learning_rate": 5e-05, + "loss": 0.1318, + "num_input_tokens_seen": 85890680, + "step": 941 + }, + { + "epoch": 3.9208333333333334, + "loss": 0.1194370836019516, + "loss_ce": 0.001028873841278255, + "loss_iou": 0.2578125, + "loss_num": 0.023681640625, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 85890680, + "step": 941 + }, + { + "epoch": 3.925, + "grad_norm": 9.344342386609227, + "learning_rate": 5e-05, + "loss": 0.1357, + "num_input_tokens_seen": 85981620, + "step": 942 + }, + { + "epoch": 3.925, + "loss": 0.13859602808952332, + "loss_ce": 0.0033116054255515337, + "loss_iou": 0.39453125, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 85981620, + "step": 942 + }, + { + "epoch": 3.9291666666666667, + "grad_norm": 3.2652150935417814, + "learning_rate": 5e-05, + "loss": 0.1738, + "num_input_tokens_seen": 86073340, + "step": 943 + }, + { + "epoch": 3.9291666666666667, + "loss": 0.16249480843544006, + "loss_ce": 0.000950021087191999, + "loss_iou": 0.2236328125, + "loss_num": 0.0322265625, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 86073340, + "step": 943 + }, + { + "epoch": 3.9333333333333336, + "grad_norm": 6.533695399414451, + "learning_rate": 5e-05, + "loss": 0.1753, + "num_input_tokens_seen": 86164440, + "step": 944 + }, + { + "epoch": 3.9333333333333336, + "loss": 0.13617786765098572, + "loss_ce": 0.00011524726141942665, + "loss_iou": 0.4140625, + "loss_num": 0.0272216796875, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 86164440, + "step": 944 + }, + { + "epoch": 3.9375, + "grad_norm": 14.80299981535188, + "learning_rate": 5e-05, + "loss": 0.1802, + "num_input_tokens_seen": 86256060, + "step": 945 + }, + { + "epoch": 3.9375, + "loss": 0.2190660834312439, + "loss_ce": 0.002177656628191471, + "loss_iou": 0.26171875, + "loss_num": 0.04345703125, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 86256060, + "step": 945 + }, + { + "epoch": 3.9416666666666664, + "grad_norm": 7.1976167399766275, + "learning_rate": 5e-05, + "loss": 0.1259, + "num_input_tokens_seen": 86347328, + "step": 946 + }, + { + "epoch": 3.9416666666666664, + "loss": 0.17285513877868652, + "loss_ce": 4.935155811836012e-05, + "loss_iou": 0.296875, + "loss_num": 0.03466796875, + "loss_xval": 0.1728515625, + "num_input_tokens_seen": 86347328, + "step": 946 + }, + { + "epoch": 3.9458333333333333, + "grad_norm": 11.88169149602509, + "learning_rate": 5e-05, + "loss": 0.1394, + "num_input_tokens_seen": 86438636, + "step": 947 + }, + { + "epoch": 3.9458333333333333, + "loss": 0.13551881909370422, + "loss_ce": 0.001638201531022787, + "loss_iou": 0.33984375, + "loss_num": 0.0267333984375, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 86438636, + "step": 947 + }, + { + "epoch": 3.95, + "grad_norm": 4.718115266274853, + "learning_rate": 5e-05, + "loss": 0.1227, + "num_input_tokens_seen": 86530304, + "step": 948 + }, + { + "epoch": 3.95, + "loss": 0.11934144049882889, + "loss_ce": 0.002947398694232106, + "loss_iou": 0.291015625, + "loss_num": 0.0233154296875, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 86530304, + "step": 948 + }, + { + "epoch": 3.9541666666666666, + "grad_norm": 2.5578899967053723, + "learning_rate": 5e-05, + "loss": 0.11, + "num_input_tokens_seen": 86621448, + "step": 949 + }, + { + "epoch": 3.9541666666666666, + "loss": 0.10856461524963379, + "loss_ce": 0.0005018667434342206, + "loss_iou": 0.23046875, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 86621448, + "step": 949 + }, + { + "epoch": 3.9583333333333335, + "grad_norm": 15.126351483779652, + "learning_rate": 5e-05, + "loss": 0.1009, + "num_input_tokens_seen": 86713244, + "step": 950 + }, + { + "epoch": 3.9583333333333335, + "loss": 0.13179825246334076, + "loss_ce": 0.0014882051618769765, + "loss_iou": 0.296875, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 86713244, + "step": 950 + }, + { + "epoch": 3.9625, + "grad_norm": 3.3495506162086532, + "learning_rate": 5e-05, + "loss": 0.1148, + "num_input_tokens_seen": 86805068, + "step": 951 + }, + { + "epoch": 3.9625, + "loss": 0.13726088404655457, + "loss_ce": 0.0015492134261876345, + "loss_iou": 0.255859375, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 86805068, + "step": 951 + }, + { + "epoch": 3.966666666666667, + "grad_norm": 7.1065666758190735, + "learning_rate": 5e-05, + "loss": 0.098, + "num_input_tokens_seen": 86896444, + "step": 952 + }, + { + "epoch": 3.966666666666667, + "loss": 0.09745579212903976, + "loss_ce": 0.00024204922374337912, + "loss_iou": 0.3515625, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 86896444, + "step": 952 + }, + { + "epoch": 3.970833333333333, + "grad_norm": 4.579685209282489, + "learning_rate": 5e-05, + "loss": 0.1246, + "num_input_tokens_seen": 86987832, + "step": 953 + }, + { + "epoch": 3.970833333333333, + "loss": 0.0786675363779068, + "loss_ce": 0.0010003021452575922, + "loss_iou": 0.39453125, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 86987832, + "step": 953 + }, + { + "epoch": 3.975, + "grad_norm": 3.83942290602781, + "learning_rate": 5e-05, + "loss": 0.0906, + "num_input_tokens_seen": 87079384, + "step": 954 + }, + { + "epoch": 3.975, + "loss": 0.08803156018257141, + "loss_ce": 0.0010335702681913972, + "loss_iou": 0.205078125, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 87079384, + "step": 954 + }, + { + "epoch": 3.9791666666666665, + "grad_norm": 3.6696422264372415, + "learning_rate": 5e-05, + "loss": 0.1051, + "num_input_tokens_seen": 87170740, + "step": 955 + }, + { + "epoch": 3.9791666666666665, + "loss": 0.1223042756319046, + "loss_ce": 0.0009816479869186878, + "loss_iou": 0.287109375, + "loss_num": 0.0242919921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 87170740, + "step": 955 + }, + { + "epoch": 3.9833333333333334, + "grad_norm": 12.134255707746513, + "learning_rate": 5e-05, + "loss": 0.1632, + "num_input_tokens_seen": 87261932, + "step": 956 + }, + { + "epoch": 3.9833333333333334, + "loss": 0.16107177734375, + "loss_ce": 0.0034789997152984142, + "loss_iou": 0.291015625, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 87261932, + "step": 956 + }, + { + "epoch": 3.9875, + "grad_norm": 5.088105032232529, + "learning_rate": 5e-05, + "loss": 0.1208, + "num_input_tokens_seen": 87353140, + "step": 957 + }, + { + "epoch": 3.9875, + "loss": 0.15737253427505493, + "loss_ce": 0.004189538769423962, + "loss_iou": 0.283203125, + "loss_num": 0.0306396484375, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 87353140, + "step": 957 + }, + { + "epoch": 3.9916666666666667, + "grad_norm": 5.805691454909013, + "learning_rate": 5e-05, + "loss": 0.1199, + "num_input_tokens_seen": 87444768, + "step": 958 + }, + { + "epoch": 3.9916666666666667, + "loss": 0.1642824411392212, + "loss_ce": 9.787664021132514e-05, + "loss_iou": 0.298828125, + "loss_num": 0.03271484375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 87444768, + "step": 958 + }, + { + "epoch": 3.9958333333333336, + "grad_norm": 8.423182459422138, + "learning_rate": 5e-05, + "loss": 0.1529, + "num_input_tokens_seen": 87536576, + "step": 959 + }, + { + "epoch": 3.9958333333333336, + "loss": 0.2128945142030716, + "loss_ce": 0.0014076823135837913, + "loss_iou": 0.25390625, + "loss_num": 0.042236328125, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 87536576, + "step": 959 + }, + { + "epoch": 4.0, + "grad_norm": 32.96109338843066, + "learning_rate": 5e-05, + "loss": 0.1576, + "num_input_tokens_seen": 87628096, + "step": 960 + }, + { + "epoch": 4.0, + "loss": 0.07793666422367096, + "loss_ce": 0.00039149654912762344, + "loss_iou": 0.283203125, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 87628096, + "step": 960 + }, + { + "epoch": 4.004166666666666, + "grad_norm": 4.443845311228221, + "learning_rate": 5e-05, + "loss": 0.1403, + "num_input_tokens_seen": 87719392, + "step": 961 + }, + { + "epoch": 4.004166666666666, + "loss": 0.1875435709953308, + "loss_ce": 0.00010460759222041816, + "loss_iou": 0.03857421875, + "loss_num": 0.037353515625, + "loss_xval": 0.1875, + "num_input_tokens_seen": 87719392, + "step": 961 + }, + { + "epoch": 4.008333333333334, + "grad_norm": 4.898741991070323, + "learning_rate": 5e-05, + "loss": 0.0962, + "num_input_tokens_seen": 87810456, + "step": 962 + }, + { + "epoch": 4.008333333333334, + "loss": 0.07666168361902237, + "loss_ce": 0.000520326429978013, + "loss_iou": 0.330078125, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 87810456, + "step": 962 + }, + { + "epoch": 4.0125, + "grad_norm": 6.425378915500767, + "learning_rate": 5e-05, + "loss": 0.1308, + "num_input_tokens_seen": 87901412, + "step": 963 + }, + { + "epoch": 4.0125, + "loss": 0.15114706754684448, + "loss_ce": 0.00025289118639193475, + "loss_iou": 0.251953125, + "loss_num": 0.0301513671875, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 87901412, + "step": 963 + }, + { + "epoch": 4.016666666666667, + "grad_norm": 5.964979636005754, + "learning_rate": 5e-05, + "loss": 0.1322, + "num_input_tokens_seen": 87992700, + "step": 964 + }, + { + "epoch": 4.016666666666667, + "loss": 0.16243040561676025, + "loss_ce": 0.000534652906935662, + "loss_iou": 0.2734375, + "loss_num": 0.032470703125, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 87992700, + "step": 964 + }, + { + "epoch": 4.020833333333333, + "grad_norm": 8.049764208385682, + "learning_rate": 5e-05, + "loss": 0.0996, + "num_input_tokens_seen": 88084624, + "step": 965 + }, + { + "epoch": 4.020833333333333, + "loss": 0.08246070146560669, + "loss_ce": 0.001886668847873807, + "loss_iou": 0.2373046875, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 88084624, + "step": 965 + }, + { + "epoch": 4.025, + "grad_norm": 3.960262569801235, + "learning_rate": 5e-05, + "loss": 0.1005, + "num_input_tokens_seen": 88175828, + "step": 966 + }, + { + "epoch": 4.025, + "loss": 0.13607226312160492, + "loss_ce": 0.0002995551039930433, + "loss_iou": 0.322265625, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 88175828, + "step": 966 + }, + { + "epoch": 4.029166666666667, + "grad_norm": 19.844560449627313, + "learning_rate": 5e-05, + "loss": 0.1436, + "num_input_tokens_seen": 88266892, + "step": 967 + }, + { + "epoch": 4.029166666666667, + "loss": 0.13706375658512115, + "loss_ce": 0.00025344558525830507, + "loss_iou": 0.34765625, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 88266892, + "step": 967 + }, + { + "epoch": 4.033333333333333, + "grad_norm": 8.680644226182876, + "learning_rate": 5e-05, + "loss": 0.1105, + "num_input_tokens_seen": 88357920, + "step": 968 + }, + { + "epoch": 4.033333333333333, + "loss": 0.1243818998336792, + "loss_ce": 0.0002669144596438855, + "loss_iou": 0.396484375, + "loss_num": 0.0247802734375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 88357920, + "step": 968 + }, + { + "epoch": 4.0375, + "grad_norm": 7.204047305940804, + "learning_rate": 5e-05, + "loss": 0.0912, + "num_input_tokens_seen": 88449892, + "step": 969 + }, + { + "epoch": 4.0375, + "loss": 0.1099606603384018, + "loss_ce": 0.003149131080135703, + "loss_iou": 0.3359375, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 88449892, + "step": 969 + }, + { + "epoch": 4.041666666666667, + "grad_norm": 2.4152880561554833, + "learning_rate": 5e-05, + "loss": 0.1681, + "num_input_tokens_seen": 88541204, + "step": 970 + }, + { + "epoch": 4.041666666666667, + "loss": 0.11691722273826599, + "loss_ce": 0.00021800363902002573, + "loss_iou": 0.240234375, + "loss_num": 0.0233154296875, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 88541204, + "step": 970 + }, + { + "epoch": 4.045833333333333, + "grad_norm": 12.140109984366983, + "learning_rate": 5e-05, + "loss": 0.1387, + "num_input_tokens_seen": 88633152, + "step": 971 + }, + { + "epoch": 4.045833333333333, + "loss": 0.10950451344251633, + "loss_ce": 0.0027540295850485563, + "loss_iou": 0.26171875, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 88633152, + "step": 971 + }, + { + "epoch": 4.05, + "grad_norm": 4.865660471200905, + "learning_rate": 5e-05, + "loss": 0.1275, + "num_input_tokens_seen": 88724328, + "step": 972 + }, + { + "epoch": 4.05, + "loss": 0.10865399241447449, + "loss_ce": 0.0001640023838263005, + "loss_iou": 0.388671875, + "loss_num": 0.021728515625, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 88724328, + "step": 972 + }, + { + "epoch": 4.054166666666666, + "grad_norm": 3.998968587081581, + "learning_rate": 5e-05, + "loss": 0.0976, + "num_input_tokens_seen": 88815820, + "step": 973 + }, + { + "epoch": 4.054166666666666, + "loss": 0.059591155499219894, + "loss_ce": 5.5868404160719365e-06, + "loss_iou": 0.357421875, + "loss_num": 0.01190185546875, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 88815820, + "step": 973 + }, + { + "epoch": 4.058333333333334, + "grad_norm": 3.5969518362843984, + "learning_rate": 5e-05, + "loss": 0.1013, + "num_input_tokens_seen": 88906316, + "step": 974 + }, + { + "epoch": 4.058333333333334, + "loss": 0.10994169861078262, + "loss_ce": 0.00018522625032346696, + "loss_iou": 0.171875, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 88906316, + "step": 974 + }, + { + "epoch": 4.0625, + "grad_norm": 5.148583156604257, + "learning_rate": 5e-05, + "loss": 0.0793, + "num_input_tokens_seen": 88997732, + "step": 975 + }, + { + "epoch": 4.0625, + "loss": 0.06272031366825104, + "loss_ce": 0.0009985165670514107, + "loss_iou": 0.34375, + "loss_num": 0.0123291015625, + "loss_xval": 0.061767578125, + "num_input_tokens_seen": 88997732, + "step": 975 + }, + { + "epoch": 4.066666666666666, + "grad_norm": 5.352702192595242, + "learning_rate": 5e-05, + "loss": 0.1092, + "num_input_tokens_seen": 89089060, + "step": 976 + }, + { + "epoch": 4.066666666666666, + "loss": 0.0935366153717041, + "loss_ce": 0.0006105887005105615, + "loss_iou": 0.3984375, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 89089060, + "step": 976 + }, + { + "epoch": 4.070833333333334, + "grad_norm": 3.929697031856074, + "learning_rate": 5e-05, + "loss": 0.0959, + "num_input_tokens_seen": 89180636, + "step": 977 + }, + { + "epoch": 4.070833333333334, + "loss": 0.10896719247102737, + "loss_ce": 0.0013622116530314088, + "loss_iou": 0.259765625, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 89180636, + "step": 977 + }, + { + "epoch": 4.075, + "grad_norm": 5.665121284659802, + "learning_rate": 5e-05, + "loss": 0.124, + "num_input_tokens_seen": 89271704, + "step": 978 + }, + { + "epoch": 4.075, + "loss": 0.14936652779579163, + "loss_ce": 0.0018903320888057351, + "loss_iou": 0.21484375, + "loss_num": 0.029541015625, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 89271704, + "step": 978 + }, + { + "epoch": 4.079166666666667, + "grad_norm": 4.147776116421283, + "learning_rate": 5e-05, + "loss": 0.084, + "num_input_tokens_seen": 89362856, + "step": 979 + }, + { + "epoch": 4.079166666666667, + "loss": 0.049897756427526474, + "loss_ce": 0.001161185442470014, + "loss_iou": 0.369140625, + "loss_num": 0.009765625, + "loss_xval": 0.048828125, + "num_input_tokens_seen": 89362856, + "step": 979 + }, + { + "epoch": 4.083333333333333, + "grad_norm": 3.9288760153563587, + "learning_rate": 5e-05, + "loss": 0.1276, + "num_input_tokens_seen": 89454140, + "step": 980 + }, + { + "epoch": 4.083333333333333, + "loss": 0.1733560562133789, + "loss_ce": 0.0009012245573103428, + "loss_iou": 0.2890625, + "loss_num": 0.034423828125, + "loss_xval": 0.1728515625, + "num_input_tokens_seen": 89454140, + "step": 980 + }, + { + "epoch": 4.0875, + "grad_norm": 2.7682581278089256, + "learning_rate": 5e-05, + "loss": 0.1069, + "num_input_tokens_seen": 89545052, + "step": 981 + }, + { + "epoch": 4.0875, + "loss": 0.10385777056217194, + "loss_ce": 6.45157433609711e-06, + "loss_iou": 0.328125, + "loss_num": 0.020751953125, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 89545052, + "step": 981 + }, + { + "epoch": 4.091666666666667, + "grad_norm": 1.5098268804953137, + "learning_rate": 5e-05, + "loss": 0.1338, + "num_input_tokens_seen": 89635988, + "step": 982 + }, + { + "epoch": 4.091666666666667, + "loss": 0.15624697506427765, + "loss_ce": 0.0008667304064147174, + "loss_iou": 0.2265625, + "loss_num": 0.031005859375, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 89635988, + "step": 982 + }, + { + "epoch": 4.095833333333333, + "grad_norm": 7.695699542286283, + "learning_rate": 5e-05, + "loss": 0.1346, + "num_input_tokens_seen": 89727320, + "step": 983 + }, + { + "epoch": 4.095833333333333, + "loss": 0.17396774888038635, + "loss_ce": 0.00046768668107688427, + "loss_iou": 0.2890625, + "loss_num": 0.03466796875, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 89727320, + "step": 983 + }, + { + "epoch": 4.1, + "grad_norm": 2.949612298898544, + "learning_rate": 5e-05, + "loss": 0.1358, + "num_input_tokens_seen": 89818596, + "step": 984 + }, + { + "epoch": 4.1, + "loss": 0.09132670611143112, + "loss_ce": 0.0010557147907093167, + "loss_iou": 0.35546875, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 89818596, + "step": 984 + }, + { + "epoch": 4.104166666666667, + "grad_norm": 4.07795865495468, + "learning_rate": 5e-05, + "loss": 0.1367, + "num_input_tokens_seen": 89909784, + "step": 985 + }, + { + "epoch": 4.104166666666667, + "loss": 0.17781442403793335, + "loss_ce": 0.0014838598435744643, + "loss_iou": 0.30859375, + "loss_num": 0.03515625, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 89909784, + "step": 985 + }, + { + "epoch": 4.108333333333333, + "grad_norm": 2.766711540092012, + "learning_rate": 5e-05, + "loss": 0.112, + "num_input_tokens_seen": 90000876, + "step": 986 + }, + { + "epoch": 4.108333333333333, + "loss": 0.13473664224147797, + "loss_ce": 1.5432389091074583e-06, + "loss_iou": 0.54296875, + "loss_num": 0.0269775390625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 90000876, + "step": 986 + }, + { + "epoch": 4.1125, + "grad_norm": 4.713177841079944, + "learning_rate": 5e-05, + "loss": 0.1128, + "num_input_tokens_seen": 90092116, + "step": 987 + }, + { + "epoch": 4.1125, + "loss": 0.13242650032043457, + "loss_ce": 0.0015823881840333343, + "loss_iou": 0.259765625, + "loss_num": 0.026123046875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 90092116, + "step": 987 + }, + { + "epoch": 4.116666666666666, + "grad_norm": 7.2006335170937055, + "learning_rate": 5e-05, + "loss": 0.1098, + "num_input_tokens_seen": 90182952, + "step": 988 + }, + { + "epoch": 4.116666666666666, + "loss": 0.13896583020687103, + "loss_ce": 0.004581678658723831, + "loss_iou": 0.2421875, + "loss_num": 0.02685546875, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 90182952, + "step": 988 + }, + { + "epoch": 4.120833333333334, + "grad_norm": 4.493654244976299, + "learning_rate": 5e-05, + "loss": 0.1583, + "num_input_tokens_seen": 90274168, + "step": 989 + }, + { + "epoch": 4.120833333333334, + "loss": 0.19936174154281616, + "loss_ce": 0.0023478814400732517, + "loss_iou": 0.333984375, + "loss_num": 0.039306640625, + "loss_xval": 0.197265625, + "num_input_tokens_seen": 90274168, + "step": 989 + }, + { + "epoch": 4.125, + "grad_norm": 4.991471041662424, + "learning_rate": 5e-05, + "loss": 0.0771, + "num_input_tokens_seen": 90365976, + "step": 990 + }, + { + "epoch": 4.125, + "loss": 0.08014944195747375, + "loss_ce": 0.0003764900902751833, + "loss_iou": 0.251953125, + "loss_num": 0.0159912109375, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 90365976, + "step": 990 + }, + { + "epoch": 4.129166666666666, + "grad_norm": 6.153980458296831, + "learning_rate": 5e-05, + "loss": 0.1423, + "num_input_tokens_seen": 90457376, + "step": 991 + }, + { + "epoch": 4.129166666666666, + "loss": 0.14803236722946167, + "loss_ce": 2.2120133507996798e-05, + "loss_iou": 0.416015625, + "loss_num": 0.0296630859375, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 90457376, + "step": 991 + }, + { + "epoch": 4.133333333333334, + "grad_norm": 6.076137039796416, + "learning_rate": 5e-05, + "loss": 0.1176, + "num_input_tokens_seen": 90548868, + "step": 992 + }, + { + "epoch": 4.133333333333334, + "loss": 0.08611531555652618, + "loss_ce": 4.0487215301254764e-05, + "loss_iou": 0.2041015625, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 90548868, + "step": 992 + }, + { + "epoch": 4.1375, + "grad_norm": 3.4031906722640737, + "learning_rate": 5e-05, + "loss": 0.116, + "num_input_tokens_seen": 90640408, + "step": 993 + }, + { + "epoch": 4.1375, + "loss": 0.14683718979358673, + "loss_ce": 4.7640893171774223e-05, + "loss_iou": 0.279296875, + "loss_num": 0.029296875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 90640408, + "step": 993 + }, + { + "epoch": 4.141666666666667, + "grad_norm": 6.529223897966937, + "learning_rate": 5e-05, + "loss": 0.1685, + "num_input_tokens_seen": 90731516, + "step": 994 + }, + { + "epoch": 4.141666666666667, + "loss": 0.18272951245307922, + "loss_ce": 0.0006005996838212013, + "loss_iou": 0.251953125, + "loss_num": 0.036376953125, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 90731516, + "step": 994 + }, + { + "epoch": 4.145833333333333, + "grad_norm": 2.1685415747038927, + "learning_rate": 5e-05, + "loss": 0.0979, + "num_input_tokens_seen": 90823400, + "step": 995 + }, + { + "epoch": 4.145833333333333, + "loss": 0.09621228277683258, + "loss_ce": 0.002431760774925351, + "loss_iou": 0.1689453125, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 90823400, + "step": 995 + }, + { + "epoch": 4.15, + "grad_norm": 5.302334741694656, + "learning_rate": 5e-05, + "loss": 0.124, + "num_input_tokens_seen": 90914192, + "step": 996 + }, + { + "epoch": 4.15, + "loss": 0.08803659677505493, + "loss_ce": 0.001030988059937954, + "loss_iou": 0.349609375, + "loss_num": 0.0174560546875, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 90914192, + "step": 996 + }, + { + "epoch": 4.154166666666667, + "grad_norm": 3.955132642719237, + "learning_rate": 5e-05, + "loss": 0.1567, + "num_input_tokens_seen": 91004840, + "step": 997 + }, + { + "epoch": 4.154166666666667, + "loss": 0.15012209117412567, + "loss_ce": 0.00018922274466603994, + "loss_iou": 0.326171875, + "loss_num": 0.030029296875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 91004840, + "step": 997 + }, + { + "epoch": 4.158333333333333, + "grad_norm": 3.1198611222256947, + "learning_rate": 5e-05, + "loss": 0.113, + "num_input_tokens_seen": 91095924, + "step": 998 + }, + { + "epoch": 4.158333333333333, + "loss": 0.1430675983428955, + "loss_ce": 0.0012218987103551626, + "loss_iou": 0.302734375, + "loss_num": 0.0283203125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 91095924, + "step": 998 + }, + { + "epoch": 4.1625, + "grad_norm": 2.630883571061764, + "learning_rate": 5e-05, + "loss": 0.1122, + "num_input_tokens_seen": 91186540, + "step": 999 + }, + { + "epoch": 4.1625, + "loss": 0.12479518353939056, + "loss_ce": 3.9328693674178794e-05, + "loss_iou": 0.345703125, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 91186540, + "step": 999 + }, + { + "epoch": 4.166666666666667, + "grad_norm": 6.898916642476488, + "learning_rate": 5e-05, + "loss": 0.1358, + "num_input_tokens_seen": 91278140, + "step": 1000 + }, + { + "epoch": 4.166666666666667, + "eval_seeclick_CIoU": 0.2288176789879799, + "eval_seeclick_GIoU": 0.2192443385720253, + "eval_seeclick_IoU": 0.34198732674121857, + "eval_seeclick_MAE_all": 0.10106326639652252, + "eval_seeclick_MAE_h": 0.1016768105328083, + "eval_seeclick_MAE_w": 0.18323545902967453, + "eval_seeclick_MAE_x_boxes": 0.2194460779428482, + "eval_seeclick_MAE_y_boxes": 0.10459760949015617, + "eval_seeclick_NUM_probability": 0.9999995231628418, + "eval_seeclick_inside_bbox": 0.4943181872367859, + "eval_seeclick_loss": 0.5763809680938721, + "eval_seeclick_loss_ce": 0.12347016483545303, + "eval_seeclick_loss_iou": 0.4483642578125, + "eval_seeclick_loss_num": 0.08683013916015625, + "eval_seeclick_loss_xval": 0.434326171875, + "eval_seeclick_runtime": 74.1847, + "eval_seeclick_samples_per_second": 0.58, + "eval_seeclick_steps_per_second": 0.027, + "num_input_tokens_seen": 91278140, + "step": 1000 + }, + { + "epoch": 4.166666666666667, + "eval_icons_CIoU": 0.41445091366767883, + "eval_icons_GIoU": 0.4151296019554138, + "eval_icons_IoU": 0.47638723254203796, + "eval_icons_MAE_all": 0.060545625165104866, + "eval_icons_MAE_h": 0.12721606343984604, + "eval_icons_MAE_w": 0.08871277421712875, + "eval_icons_MAE_x_boxes": 0.08461445942521095, + "eval_icons_MAE_y_boxes": 0.12413446977734566, + "eval_icons_NUM_probability": 0.9999995529651642, + "eval_icons_inside_bbox": 0.640625, + "eval_icons_loss": 0.2962195873260498, + "eval_icons_loss_ce": 6.803241319630615e-06, + "eval_icons_loss_iou": 0.345947265625, + "eval_icons_loss_num": 0.059906005859375, + "eval_icons_loss_xval": 0.299713134765625, + "eval_icons_runtime": 85.3334, + "eval_icons_samples_per_second": 0.586, + "eval_icons_steps_per_second": 0.023, + "num_input_tokens_seen": 91278140, + "step": 1000 + }, + { + "epoch": 4.166666666666667, + "eval_screenspot_CIoU": 0.39049918452898663, + "eval_screenspot_GIoU": 0.37226015826066333, + "eval_screenspot_IoU": 0.4547177155812581, + "eval_screenspot_MAE_all": 0.0913725992043813, + "eval_screenspot_MAE_h": 0.0856855387489001, + "eval_screenspot_MAE_w": 0.18789143363634744, + "eval_screenspot_MAE_x_boxes": 0.1800380746523539, + "eval_screenspot_MAE_y_boxes": 0.07909448444843292, + "eval_screenspot_NUM_probability": 0.9999943971633911, + "eval_screenspot_inside_bbox": 0.6833333373069763, + "eval_screenspot_loss": 0.4577127993106842, + "eval_screenspot_loss_ce": 1.8053931967187964e-06, + "eval_screenspot_loss_iou": 0.3567708333333333, + "eval_screenspot_loss_num": 0.09382120768229167, + "eval_screenspot_loss_xval": 0.46923828125, + "eval_screenspot_runtime": 154.6239, + "eval_screenspot_samples_per_second": 0.576, + "eval_screenspot_steps_per_second": 0.019, + "num_input_tokens_seen": 91278140, + "step": 1000 + }, + { + "epoch": 4.166666666666667, + "eval_compot_CIoU": 0.4610436111688614, + "eval_compot_GIoU": 0.451981320977211, + "eval_compot_IoU": 0.5354782938957214, + "eval_compot_MAE_all": 0.05896247178316116, + "eval_compot_MAE_h": 0.08070729672908783, + "eval_compot_MAE_w": 0.13192753866314888, + "eval_compot_MAE_x_boxes": 0.12282107770442963, + "eval_compot_MAE_y_boxes": 0.08394554629921913, + "eval_compot_NUM_probability": 0.9999991953372955, + "eval_compot_inside_bbox": 0.6892361044883728, + "eval_compot_loss": 0.31611597537994385, + "eval_compot_loss_ce": 0.013481661211699247, + "eval_compot_loss_iou": 0.3232421875, + "eval_compot_loss_num": 0.054492950439453125, + "eval_compot_loss_xval": 0.27239990234375, + "eval_compot_runtime": 86.7171, + "eval_compot_samples_per_second": 0.577, + "eval_compot_steps_per_second": 0.023, + "num_input_tokens_seen": 91278140, + "step": 1000 + }, + { + "epoch": 4.166666666666667, + "loss": 0.257731556892395, + "loss_ce": 0.012309202924370766, + "loss_iou": 0.306640625, + "loss_num": 0.049072265625, + "loss_xval": 0.2451171875, + "num_input_tokens_seen": 91278140, + "step": 1000 + }, + { + "epoch": 4.170833333333333, + "grad_norm": 4.775300264318875, + "learning_rate": 5e-05, + "loss": 0.1354, + "num_input_tokens_seen": 91369144, + "step": 1001 + }, + { + "epoch": 4.170833333333333, + "loss": 0.184337317943573, + "loss_ce": 1.1161824659211561e-05, + "loss_iou": 0.3515625, + "loss_num": 0.036865234375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 91369144, + "step": 1001 + }, + { + "epoch": 4.175, + "grad_norm": 4.021752324374664, + "learning_rate": 5e-05, + "loss": 0.0796, + "num_input_tokens_seen": 91460416, + "step": 1002 + }, + { + "epoch": 4.175, + "loss": 0.08465791493654251, + "loss_ce": 2.1546813968598144e-06, + "loss_iou": 0.2314453125, + "loss_num": 0.0169677734375, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 91460416, + "step": 1002 + }, + { + "epoch": 4.179166666666666, + "grad_norm": 5.366416397358166, + "learning_rate": 5e-05, + "loss": 0.0921, + "num_input_tokens_seen": 91551540, + "step": 1003 + }, + { + "epoch": 4.179166666666666, + "loss": 0.10360711067914963, + "loss_ce": 0.0005187301430851221, + "loss_iou": 0.50390625, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 91551540, + "step": 1003 + }, + { + "epoch": 4.183333333333334, + "grad_norm": 3.0526868130769285, + "learning_rate": 5e-05, + "loss": 0.1933, + "num_input_tokens_seen": 91643256, + "step": 1004 + }, + { + "epoch": 4.183333333333334, + "loss": 0.17099416255950928, + "loss_ce": 6.520580063806847e-05, + "loss_iou": 0.376953125, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 91643256, + "step": 1004 + }, + { + "epoch": 4.1875, + "grad_norm": 4.153700213808706, + "learning_rate": 5e-05, + "loss": 0.1375, + "num_input_tokens_seen": 91734420, + "step": 1005 + }, + { + "epoch": 4.1875, + "loss": 0.11866825819015503, + "loss_ce": 0.0002905750006902963, + "loss_iou": 0.1796875, + "loss_num": 0.023681640625, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 91734420, + "step": 1005 + }, + { + "epoch": 4.191666666666666, + "grad_norm": 5.938137477817666, + "learning_rate": 5e-05, + "loss": 0.0948, + "num_input_tokens_seen": 91825960, + "step": 1006 + }, + { + "epoch": 4.191666666666666, + "loss": 0.12803258001804352, + "loss_ce": 0.0012625595554709435, + "loss_iou": 0.328125, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 91825960, + "step": 1006 + }, + { + "epoch": 4.195833333333334, + "grad_norm": 6.701228074881766, + "learning_rate": 5e-05, + "loss": 0.1026, + "num_input_tokens_seen": 91917024, + "step": 1007 + }, + { + "epoch": 4.195833333333334, + "loss": 0.13918179273605347, + "loss_ce": 0.0023714962881058455, + "loss_iou": 0.31640625, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 91917024, + "step": 1007 + }, + { + "epoch": 4.2, + "grad_norm": 2.58968299528391, + "learning_rate": 5e-05, + "loss": 0.1368, + "num_input_tokens_seen": 92008396, + "step": 1008 + }, + { + "epoch": 4.2, + "loss": 0.18333737552165985, + "loss_ce": 0.004656949080526829, + "loss_iou": 0.267578125, + "loss_num": 0.03564453125, + "loss_xval": 0.1787109375, + "num_input_tokens_seen": 92008396, + "step": 1008 + }, + { + "epoch": 4.204166666666667, + "grad_norm": 4.2155909481180736, + "learning_rate": 5e-05, + "loss": 0.1072, + "num_input_tokens_seen": 92099656, + "step": 1009 + }, + { + "epoch": 4.204166666666667, + "loss": 0.1417737752199173, + "loss_ce": 0.0001874682493507862, + "loss_iou": 0.2373046875, + "loss_num": 0.0283203125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 92099656, + "step": 1009 + }, + { + "epoch": 4.208333333333333, + "grad_norm": 5.415876527285819, + "learning_rate": 5e-05, + "loss": 0.1509, + "num_input_tokens_seen": 92190484, + "step": 1010 + }, + { + "epoch": 4.208333333333333, + "loss": 0.18809230625629425, + "loss_ce": 0.0008364361710846424, + "loss_iou": 0.24609375, + "loss_num": 0.03759765625, + "loss_xval": 0.1875, + "num_input_tokens_seen": 92190484, + "step": 1010 + }, + { + "epoch": 4.2125, + "grad_norm": 11.825215048838075, + "learning_rate": 5e-05, + "loss": 0.1618, + "num_input_tokens_seen": 92281520, + "step": 1011 + }, + { + "epoch": 4.2125, + "loss": 0.10141883790493011, + "loss_ce": 0.0022519633639603853, + "loss_iou": 0.30859375, + "loss_num": 0.019775390625, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 92281520, + "step": 1011 + }, + { + "epoch": 4.216666666666667, + "grad_norm": 5.040315147434173, + "learning_rate": 5e-05, + "loss": 0.0943, + "num_input_tokens_seen": 92373020, + "step": 1012 + }, + { + "epoch": 4.216666666666667, + "loss": 0.1095087081193924, + "loss_ce": 0.004589277319610119, + "loss_iou": 0.138671875, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 92373020, + "step": 1012 + }, + { + "epoch": 4.220833333333333, + "grad_norm": 11.145069595062028, + "learning_rate": 5e-05, + "loss": 0.0987, + "num_input_tokens_seen": 92464692, + "step": 1013 + }, + { + "epoch": 4.220833333333333, + "loss": 0.11222478747367859, + "loss_ce": 0.0022241822443902493, + "loss_iou": 0.21875, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 92464692, + "step": 1013 + }, + { + "epoch": 4.225, + "grad_norm": 4.100185141836065, + "learning_rate": 5e-05, + "loss": 0.11, + "num_input_tokens_seen": 92556912, + "step": 1014 + }, + { + "epoch": 4.225, + "loss": 0.0731762945652008, + "loss_ce": 0.0004223883734084666, + "loss_iou": 0.1552734375, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 92556912, + "step": 1014 + }, + { + "epoch": 4.229166666666667, + "grad_norm": 11.782810710198497, + "learning_rate": 5e-05, + "loss": 0.1718, + "num_input_tokens_seen": 92648544, + "step": 1015 + }, + { + "epoch": 4.229166666666667, + "loss": 0.13355088233947754, + "loss_ce": 0.00018905679462477565, + "loss_iou": 0.10986328125, + "loss_num": 0.026611328125, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 92648544, + "step": 1015 + }, + { + "epoch": 4.233333333333333, + "grad_norm": 3.266013508735402, + "learning_rate": 5e-05, + "loss": 0.0879, + "num_input_tokens_seen": 92740228, + "step": 1016 + }, + { + "epoch": 4.233333333333333, + "loss": 0.110908642411232, + "loss_ce": 9.931866952683777e-05, + "loss_iou": 0.29296875, + "loss_num": 0.022216796875, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 92740228, + "step": 1016 + }, + { + "epoch": 4.2375, + "grad_norm": 12.214471155924263, + "learning_rate": 5e-05, + "loss": 0.1184, + "num_input_tokens_seen": 92832024, + "step": 1017 + }, + { + "epoch": 4.2375, + "loss": 0.09152170270681381, + "loss_ce": 0.0006403519655577838, + "loss_iou": 0.16796875, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 92832024, + "step": 1017 + }, + { + "epoch": 4.241666666666666, + "grad_norm": 3.9020094684822277, + "learning_rate": 5e-05, + "loss": 0.0897, + "num_input_tokens_seen": 92922932, + "step": 1018 + }, + { + "epoch": 4.241666666666666, + "loss": 0.09350967407226562, + "loss_ce": 3.81834479412646e-06, + "loss_iou": 0.41015625, + "loss_num": 0.0186767578125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 92922932, + "step": 1018 + }, + { + "epoch": 4.245833333333334, + "grad_norm": 7.433497429425666, + "learning_rate": 5e-05, + "loss": 0.1451, + "num_input_tokens_seen": 93013296, + "step": 1019 + }, + { + "epoch": 4.245833333333334, + "loss": 0.14127758145332336, + "loss_ce": 0.0014155278913676739, + "loss_iou": 0.2734375, + "loss_num": 0.0279541015625, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 93013296, + "step": 1019 + }, + { + "epoch": 4.25, + "grad_norm": 4.855624630655086, + "learning_rate": 5e-05, + "loss": 0.1653, + "num_input_tokens_seen": 93104704, + "step": 1020 + }, + { + "epoch": 4.25, + "loss": 0.22039823234081268, + "loss_ce": 0.00012235053873155266, + "loss_iou": 0.212890625, + "loss_num": 0.0439453125, + "loss_xval": 0.220703125, + "num_input_tokens_seen": 93104704, + "step": 1020 + }, + { + "epoch": 4.254166666666666, + "grad_norm": 2.869147326672204, + "learning_rate": 5e-05, + "loss": 0.089, + "num_input_tokens_seen": 93196180, + "step": 1021 + }, + { + "epoch": 4.254166666666666, + "loss": 0.09016729146242142, + "loss_ce": 0.0015747613506391644, + "loss_iou": 0.2099609375, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 93196180, + "step": 1021 + }, + { + "epoch": 4.258333333333334, + "grad_norm": 5.461179477967053, + "learning_rate": 5e-05, + "loss": 0.1149, + "num_input_tokens_seen": 93287704, + "step": 1022 + }, + { + "epoch": 4.258333333333334, + "loss": 0.10635490715503693, + "loss_ce": 0.008027271367609501, + "loss_iou": 0.248046875, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 93287704, + "step": 1022 + }, + { + "epoch": 4.2625, + "grad_norm": 2.559847695714954, + "learning_rate": 5e-05, + "loss": 0.1002, + "num_input_tokens_seen": 93379140, + "step": 1023 + }, + { + "epoch": 4.2625, + "loss": 0.10472606867551804, + "loss_ce": 0.001118888845667243, + "loss_iou": 0.287109375, + "loss_num": 0.020751953125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 93379140, + "step": 1023 + }, + { + "epoch": 4.266666666666667, + "grad_norm": 5.012909495931835, + "learning_rate": 5e-05, + "loss": 0.1619, + "num_input_tokens_seen": 93470312, + "step": 1024 + }, + { + "epoch": 4.266666666666667, + "loss": 0.11101584136486053, + "loss_ce": 0.00026755983708426356, + "loss_iou": 0.3359375, + "loss_num": 0.0220947265625, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 93470312, + "step": 1024 + }, + { + "epoch": 4.270833333333333, + "grad_norm": 4.205104507070826, + "learning_rate": 5e-05, + "loss": 0.1457, + "num_input_tokens_seen": 93561672, + "step": 1025 + }, + { + "epoch": 4.270833333333333, + "loss": 0.17691843211650848, + "loss_ce": 8.432482718490064e-05, + "loss_iou": 0.1044921875, + "loss_num": 0.035400390625, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 93561672, + "step": 1025 + }, + { + "epoch": 4.275, + "grad_norm": 3.0823987312822663, + "learning_rate": 5e-05, + "loss": 0.1219, + "num_input_tokens_seen": 93652988, + "step": 1026 + }, + { + "epoch": 4.275, + "loss": 0.11530449986457825, + "loss_ce": 0.00013116124318912625, + "loss_iou": 0.33984375, + "loss_num": 0.02294921875, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 93652988, + "step": 1026 + }, + { + "epoch": 4.279166666666667, + "grad_norm": 41.05969313267573, + "learning_rate": 5e-05, + "loss": 0.1262, + "num_input_tokens_seen": 93743720, + "step": 1027 + }, + { + "epoch": 4.279166666666667, + "loss": 0.14659270644187927, + "loss_ce": 1.6788795619504526e-05, + "loss_iou": 0.2255859375, + "loss_num": 0.029296875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 93743720, + "step": 1027 + }, + { + "epoch": 4.283333333333333, + "grad_norm": 12.961008697360798, + "learning_rate": 5e-05, + "loss": 0.1056, + "num_input_tokens_seen": 93834976, + "step": 1028 + }, + { + "epoch": 4.283333333333333, + "loss": 0.14191699028015137, + "loss_ce": 0.00031542833312414587, + "loss_iou": 0.486328125, + "loss_num": 0.0283203125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 93834976, + "step": 1028 + }, + { + "epoch": 4.2875, + "grad_norm": 3.5652944680153937, + "learning_rate": 5e-05, + "loss": 0.085, + "num_input_tokens_seen": 93926376, + "step": 1029 + }, + { + "epoch": 4.2875, + "loss": 0.07104349136352539, + "loss_ce": 0.0011582336155697703, + "loss_iou": 0.267578125, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 93926376, + "step": 1029 + }, + { + "epoch": 4.291666666666667, + "grad_norm": 6.44765876025744, + "learning_rate": 5e-05, + "loss": 0.1574, + "num_input_tokens_seen": 94017836, + "step": 1030 + }, + { + "epoch": 4.291666666666667, + "loss": 0.17174601554870605, + "loss_ce": 0.00159526988863945, + "loss_iou": 0.267578125, + "loss_num": 0.033935546875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 94017836, + "step": 1030 + }, + { + "epoch": 4.295833333333333, + "grad_norm": 5.20718161105157, + "learning_rate": 5e-05, + "loss": 0.1019, + "num_input_tokens_seen": 94108616, + "step": 1031 + }, + { + "epoch": 4.295833333333333, + "loss": 0.10829800367355347, + "loss_ce": 0.00018948587239719927, + "loss_iou": 0.310546875, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 94108616, + "step": 1031 + }, + { + "epoch": 4.3, + "grad_norm": 7.001137693637598, + "learning_rate": 5e-05, + "loss": 0.1759, + "num_input_tokens_seen": 94199668, + "step": 1032 + }, + { + "epoch": 4.3, + "loss": 0.17750316858291626, + "loss_ce": 7.396183355012909e-05, + "loss_iou": 0.390625, + "loss_num": 0.035400390625, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 94199668, + "step": 1032 + }, + { + "epoch": 4.304166666666666, + "grad_norm": 5.786106834808877, + "learning_rate": 5e-05, + "loss": 0.1418, + "num_input_tokens_seen": 94291036, + "step": 1033 + }, + { + "epoch": 4.304166666666666, + "loss": 0.1702231764793396, + "loss_ce": 0.002376505173742771, + "loss_iou": 0.13671875, + "loss_num": 0.033447265625, + "loss_xval": 0.16796875, + "num_input_tokens_seen": 94291036, + "step": 1033 + }, + { + "epoch": 4.308333333333334, + "grad_norm": 4.057659988135845, + "learning_rate": 5e-05, + "loss": 0.0903, + "num_input_tokens_seen": 94382240, + "step": 1034 + }, + { + "epoch": 4.308333333333334, + "loss": 0.09596603363752365, + "loss_ce": 0.0020329286344349384, + "loss_iou": 0.3125, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 94382240, + "step": 1034 + }, + { + "epoch": 4.3125, + "grad_norm": 60.708773659127075, + "learning_rate": 5e-05, + "loss": 0.1717, + "num_input_tokens_seen": 94473272, + "step": 1035 + }, + { + "epoch": 4.3125, + "loss": 0.15523292124271393, + "loss_ce": 0.006978522054851055, + "loss_iou": 0.279296875, + "loss_num": 0.0296630859375, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 94473272, + "step": 1035 + }, + { + "epoch": 4.316666666666666, + "grad_norm": 15.166310694757913, + "learning_rate": 5e-05, + "loss": 0.2223, + "num_input_tokens_seen": 94564500, + "step": 1036 + }, + { + "epoch": 4.316666666666666, + "loss": 0.2636827230453491, + "loss_ce": 1.0847867088159546e-05, + "loss_iou": 0.13671875, + "loss_num": 0.052734375, + "loss_xval": 0.263671875, + "num_input_tokens_seen": 94564500, + "step": 1036 + }, + { + "epoch": 4.320833333333334, + "grad_norm": 4.906645673402136, + "learning_rate": 5e-05, + "loss": 0.1672, + "num_input_tokens_seen": 94655904, + "step": 1037 + }, + { + "epoch": 4.320833333333334, + "loss": 0.2126345932483673, + "loss_ce": 0.0011172639206051826, + "loss_iou": 0.298828125, + "loss_num": 0.042236328125, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 94655904, + "step": 1037 + }, + { + "epoch": 4.325, + "grad_norm": 6.5407679231960385, + "learning_rate": 5e-05, + "loss": 0.1233, + "num_input_tokens_seen": 94747468, + "step": 1038 + }, + { + "epoch": 4.325, + "loss": 0.13433226943016052, + "loss_ce": 0.001580800162628293, + "loss_iou": 0.296875, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 94747468, + "step": 1038 + }, + { + "epoch": 4.329166666666667, + "grad_norm": 2.6249773090077264, + "learning_rate": 5e-05, + "loss": 0.1367, + "num_input_tokens_seen": 94838488, + "step": 1039 + }, + { + "epoch": 4.329166666666667, + "loss": 0.09026487916707993, + "loss_ce": 0.00106199795845896, + "loss_iou": 0.287109375, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 94838488, + "step": 1039 + }, + { + "epoch": 4.333333333333333, + "grad_norm": 4.371515638310731, + "learning_rate": 5e-05, + "loss": 0.1191, + "num_input_tokens_seen": 94929896, + "step": 1040 + }, + { + "epoch": 4.333333333333333, + "loss": 0.10758574306964874, + "loss_ce": 0.0004690401256084442, + "loss_iou": 0.353515625, + "loss_num": 0.021484375, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 94929896, + "step": 1040 + }, + { + "epoch": 4.3375, + "grad_norm": 4.839714934876044, + "learning_rate": 5e-05, + "loss": 0.1067, + "num_input_tokens_seen": 95020620, + "step": 1041 + }, + { + "epoch": 4.3375, + "loss": 0.11356394737958908, + "loss_ce": 8.036779036046937e-06, + "loss_iou": 0.34765625, + "loss_num": 0.022705078125, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 95020620, + "step": 1041 + }, + { + "epoch": 4.341666666666667, + "grad_norm": 2.935495078974472, + "learning_rate": 5e-05, + "loss": 0.0837, + "num_input_tokens_seen": 95111780, + "step": 1042 + }, + { + "epoch": 4.341666666666667, + "loss": 0.0978274717926979, + "loss_ce": 0.00215486460365355, + "loss_iou": 0.189453125, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 95111780, + "step": 1042 + }, + { + "epoch": 4.345833333333333, + "grad_norm": 5.575717494937947, + "learning_rate": 5e-05, + "loss": 0.1329, + "num_input_tokens_seen": 95202328, + "step": 1043 + }, + { + "epoch": 4.345833333333333, + "loss": 0.0912865400314331, + "loss_ce": 0.00106132123619318, + "loss_iou": 0.2431640625, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 95202328, + "step": 1043 + }, + { + "epoch": 4.35, + "grad_norm": 38.64345492570206, + "learning_rate": 5e-05, + "loss": 0.0997, + "num_input_tokens_seen": 95293976, + "step": 1044 + }, + { + "epoch": 4.35, + "loss": 0.09628809988498688, + "loss_ce": 0.0026449114084243774, + "loss_iou": 0.361328125, + "loss_num": 0.0186767578125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 95293976, + "step": 1044 + }, + { + "epoch": 4.354166666666667, + "grad_norm": 4.982650414589755, + "learning_rate": 5e-05, + "loss": 0.1407, + "num_input_tokens_seen": 95385772, + "step": 1045 + }, + { + "epoch": 4.354166666666667, + "loss": 0.08871060609817505, + "loss_ce": 0.0003011856460943818, + "loss_iou": 0.32421875, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 95385772, + "step": 1045 + }, + { + "epoch": 4.358333333333333, + "grad_norm": 6.46543114351235, + "learning_rate": 5e-05, + "loss": 0.1169, + "num_input_tokens_seen": 95477428, + "step": 1046 + }, + { + "epoch": 4.358333333333333, + "loss": 0.13676266372203827, + "loss_ce": 0.0015850570052862167, + "loss_iou": 0.337890625, + "loss_num": 0.027099609375, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 95477428, + "step": 1046 + }, + { + "epoch": 4.3625, + "grad_norm": 5.489329432381536, + "learning_rate": 5e-05, + "loss": 0.1253, + "num_input_tokens_seen": 95568880, + "step": 1047 + }, + { + "epoch": 4.3625, + "loss": 0.14458361268043518, + "loss_ce": 0.0009297404321841896, + "loss_iou": 0.388671875, + "loss_num": 0.0286865234375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 95568880, + "step": 1047 + }, + { + "epoch": 4.366666666666666, + "grad_norm": 5.301059214254398, + "learning_rate": 5e-05, + "loss": 0.1523, + "num_input_tokens_seen": 95659964, + "step": 1048 + }, + { + "epoch": 4.366666666666666, + "loss": 0.18293695151805878, + "loss_ce": 0.0017235726118087769, + "loss_iou": 0.419921875, + "loss_num": 0.0361328125, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 95659964, + "step": 1048 + }, + { + "epoch": 4.370833333333334, + "grad_norm": 4.682583376414269, + "learning_rate": 5e-05, + "loss": 0.1188, + "num_input_tokens_seen": 95751316, + "step": 1049 + }, + { + "epoch": 4.370833333333334, + "loss": 0.08031313121318817, + "loss_ce": 0.0013336361153051257, + "loss_iou": 0.2119140625, + "loss_num": 0.0157470703125, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 95751316, + "step": 1049 + }, + { + "epoch": 4.375, + "grad_norm": 2.8000950158405327, + "learning_rate": 5e-05, + "loss": 0.1377, + "num_input_tokens_seen": 95842480, + "step": 1050 + }, + { + "epoch": 4.375, + "loss": 0.11660677939653397, + "loss_ce": 9.066909842658788e-05, + "loss_iou": 0.330078125, + "loss_num": 0.0233154296875, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 95842480, + "step": 1050 + }, + { + "epoch": 4.379166666666666, + "grad_norm": 2.890628721811094, + "learning_rate": 5e-05, + "loss": 0.0822, + "num_input_tokens_seen": 95933772, + "step": 1051 + }, + { + "epoch": 4.379166666666666, + "loss": 0.07693706452846527, + "loss_ce": 0.00021587421360891312, + "loss_iou": 0.197265625, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 95933772, + "step": 1051 + }, + { + "epoch": 4.383333333333334, + "grad_norm": 6.7502043431928715, + "learning_rate": 5e-05, + "loss": 0.1116, + "num_input_tokens_seen": 96025156, + "step": 1052 + }, + { + "epoch": 4.383333333333334, + "loss": 0.06723552942276001, + "loss_ce": 0.0006766942678950727, + "loss_iou": 0.31640625, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 96025156, + "step": 1052 + }, + { + "epoch": 4.3875, + "grad_norm": 12.387700241419916, + "learning_rate": 5e-05, + "loss": 0.1287, + "num_input_tokens_seen": 96116552, + "step": 1053 + }, + { + "epoch": 4.3875, + "loss": 0.16309259831905365, + "loss_ce": 0.0009221778018400073, + "loss_iou": 0.46875, + "loss_num": 0.032470703125, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 96116552, + "step": 1053 + }, + { + "epoch": 4.391666666666667, + "grad_norm": 2.7429843825048317, + "learning_rate": 5e-05, + "loss": 0.1145, + "num_input_tokens_seen": 96208260, + "step": 1054 + }, + { + "epoch": 4.391666666666667, + "loss": 0.11623889207839966, + "loss_ce": 0.00274402042850852, + "loss_iou": 0.361328125, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 96208260, + "step": 1054 + }, + { + "epoch": 4.395833333333333, + "grad_norm": 3.122685674128764, + "learning_rate": 5e-05, + "loss": 0.0835, + "num_input_tokens_seen": 96299532, + "step": 1055 + }, + { + "epoch": 4.395833333333333, + "loss": 0.08110110461711884, + "loss_ce": 0.002625147346407175, + "loss_iou": 0.1640625, + "loss_num": 0.015625, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 96299532, + "step": 1055 + }, + { + "epoch": 4.4, + "grad_norm": 3.2042392537005755, + "learning_rate": 5e-05, + "loss": 0.1466, + "num_input_tokens_seen": 96390992, + "step": 1056 + }, + { + "epoch": 4.4, + "loss": 0.162710040807724, + "loss_ce": 0.0016687808092683554, + "loss_iou": 0.224609375, + "loss_num": 0.0322265625, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 96390992, + "step": 1056 + }, + { + "epoch": 4.404166666666667, + "grad_norm": 12.795231372183528, + "learning_rate": 5e-05, + "loss": 0.1034, + "num_input_tokens_seen": 96482032, + "step": 1057 + }, + { + "epoch": 4.404166666666667, + "loss": 0.11816906929016113, + "loss_ce": 0.003926511853933334, + "loss_iou": 0.103515625, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 96482032, + "step": 1057 + }, + { + "epoch": 4.408333333333333, + "grad_norm": 6.009806761384676, + "learning_rate": 5e-05, + "loss": 0.1758, + "num_input_tokens_seen": 96573696, + "step": 1058 + }, + { + "epoch": 4.408333333333333, + "loss": 0.140816330909729, + "loss_ce": 0.004646895453333855, + "loss_iou": 0.2119140625, + "loss_num": 0.0272216796875, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 96573696, + "step": 1058 + }, + { + "epoch": 4.4125, + "grad_norm": 4.539913894785967, + "learning_rate": 5e-05, + "loss": 0.0993, + "num_input_tokens_seen": 96664868, + "step": 1059 + }, + { + "epoch": 4.4125, + "loss": 0.111955925822258, + "loss_ce": 0.0019400569144636393, + "loss_iou": 0.2021484375, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 96664868, + "step": 1059 + }, + { + "epoch": 4.416666666666667, + "grad_norm": 8.195207688890294, + "learning_rate": 5e-05, + "loss": 0.1131, + "num_input_tokens_seen": 96756356, + "step": 1060 + }, + { + "epoch": 4.416666666666667, + "loss": 0.1350308656692505, + "loss_ce": 0.0017300797626376152, + "loss_iou": 0.23828125, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 96756356, + "step": 1060 + }, + { + "epoch": 4.420833333333333, + "grad_norm": 3.0907685806253427, + "learning_rate": 5e-05, + "loss": 0.137, + "num_input_tokens_seen": 96847764, + "step": 1061 + }, + { + "epoch": 4.420833333333333, + "loss": 0.13408119976520538, + "loss_ce": 0.0004905025125481188, + "loss_iou": 0.41796875, + "loss_num": 0.0267333984375, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 96847764, + "step": 1061 + }, + { + "epoch": 4.425, + "grad_norm": 4.23812751420282, + "learning_rate": 5e-05, + "loss": 0.0817, + "num_input_tokens_seen": 96938988, + "step": 1062 + }, + { + "epoch": 4.425, + "loss": 0.08144142478704453, + "loss_ce": 0.0015769237652420998, + "loss_iou": 0.263671875, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 96938988, + "step": 1062 + }, + { + "epoch": 4.429166666666666, + "grad_norm": 4.6119967415837015, + "learning_rate": 5e-05, + "loss": 0.0975, + "num_input_tokens_seen": 97029908, + "step": 1063 + }, + { + "epoch": 4.429166666666666, + "loss": 0.10544291138648987, + "loss_ce": 4.6817790462228e-06, + "loss_iou": 0.302734375, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 97029908, + "step": 1063 + }, + { + "epoch": 4.433333333333334, + "grad_norm": 20.866494495602268, + "learning_rate": 5e-05, + "loss": 0.1457, + "num_input_tokens_seen": 97121228, + "step": 1064 + }, + { + "epoch": 4.433333333333334, + "loss": 0.15473097562789917, + "loss_ce": 0.00260083912871778, + "loss_iou": 0.349609375, + "loss_num": 0.030517578125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 97121228, + "step": 1064 + }, + { + "epoch": 4.4375, + "grad_norm": 8.185767026864667, + "learning_rate": 5e-05, + "loss": 0.1013, + "num_input_tokens_seen": 97212372, + "step": 1065 + }, + { + "epoch": 4.4375, + "loss": 0.08164595067501068, + "loss_ce": 0.0008964404696598649, + "loss_iou": 0.1953125, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 97212372, + "step": 1065 + }, + { + "epoch": 4.441666666666666, + "grad_norm": 9.766264831533416, + "learning_rate": 5e-05, + "loss": 0.1549, + "num_input_tokens_seen": 97303324, + "step": 1066 + }, + { + "epoch": 4.441666666666666, + "loss": 0.1613626778125763, + "loss_ce": 3.1500829209107906e-05, + "loss_iou": 0.2275390625, + "loss_num": 0.0322265625, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 97303324, + "step": 1066 + }, + { + "epoch": 4.445833333333334, + "grad_norm": 8.703973354918965, + "learning_rate": 5e-05, + "loss": 0.1389, + "num_input_tokens_seen": 97394880, + "step": 1067 + }, + { + "epoch": 4.445833333333334, + "loss": 0.1335448920726776, + "loss_ce": 0.00012203974620206282, + "loss_iou": 0.419921875, + "loss_num": 0.0267333984375, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 97394880, + "step": 1067 + }, + { + "epoch": 4.45, + "grad_norm": 23.521570844410952, + "learning_rate": 5e-05, + "loss": 0.1463, + "num_input_tokens_seen": 97486472, + "step": 1068 + }, + { + "epoch": 4.45, + "loss": 0.129866823554039, + "loss_ce": 0.0012657458428293467, + "loss_iou": 0.390625, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 97486472, + "step": 1068 + }, + { + "epoch": 4.454166666666667, + "grad_norm": 5.231644248721244, + "learning_rate": 5e-05, + "loss": 0.1618, + "num_input_tokens_seen": 97578168, + "step": 1069 + }, + { + "epoch": 4.454166666666667, + "loss": 0.15577560663223267, + "loss_ce": 0.0015092582907527685, + "loss_iou": 0.2333984375, + "loss_num": 0.0308837890625, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 97578168, + "step": 1069 + }, + { + "epoch": 4.458333333333333, + "grad_norm": 4.642798890431178, + "learning_rate": 5e-05, + "loss": 0.1194, + "num_input_tokens_seen": 97669944, + "step": 1070 + }, + { + "epoch": 4.458333333333333, + "loss": 0.10740844905376434, + "loss_ce": 0.0020617651753127575, + "loss_iou": 0.27734375, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 97669944, + "step": 1070 + }, + { + "epoch": 4.4625, + "grad_norm": 11.450914706764102, + "learning_rate": 5e-05, + "loss": 0.1861, + "num_input_tokens_seen": 97760660, + "step": 1071 + }, + { + "epoch": 4.4625, + "loss": 0.2228844165802002, + "loss_ce": 0.0006554118008352816, + "loss_iou": 0.275390625, + "loss_num": 0.04443359375, + "loss_xval": 0.22265625, + "num_input_tokens_seen": 97760660, + "step": 1071 + }, + { + "epoch": 4.466666666666667, + "grad_norm": 2.8459211790179264, + "learning_rate": 5e-05, + "loss": 0.1802, + "num_input_tokens_seen": 97851884, + "step": 1072 + }, + { + "epoch": 4.466666666666667, + "loss": 0.2216033786535263, + "loss_ce": 4.576363062369637e-05, + "loss_iou": 0.28515625, + "loss_num": 0.04443359375, + "loss_xval": 0.2216796875, + "num_input_tokens_seen": 97851884, + "step": 1072 + }, + { + "epoch": 4.470833333333333, + "grad_norm": 8.759497913734183, + "learning_rate": 5e-05, + "loss": 0.1255, + "num_input_tokens_seen": 97943872, + "step": 1073 + }, + { + "epoch": 4.470833333333333, + "loss": 0.19304654002189636, + "loss_ce": 0.00418851338326931, + "loss_iou": 0.201171875, + "loss_num": 0.037841796875, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 97943872, + "step": 1073 + }, + { + "epoch": 4.475, + "grad_norm": 1.6902853797270938, + "learning_rate": 5e-05, + "loss": 0.1127, + "num_input_tokens_seen": 98034840, + "step": 1074 + }, + { + "epoch": 4.475, + "loss": 0.1364348828792572, + "loss_ce": 0.0004638074606191367, + "loss_iou": 0.2578125, + "loss_num": 0.0272216796875, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 98034840, + "step": 1074 + }, + { + "epoch": 4.479166666666667, + "grad_norm": 5.3041756834731135, + "learning_rate": 5e-05, + "loss": 0.2545, + "num_input_tokens_seen": 98126216, + "step": 1075 + }, + { + "epoch": 4.479166666666667, + "loss": 0.22662073373794556, + "loss_ce": 0.000882212829310447, + "loss_iou": 0.291015625, + "loss_num": 0.045166015625, + "loss_xval": 0.2255859375, + "num_input_tokens_seen": 98126216, + "step": 1075 + }, + { + "epoch": 4.483333333333333, + "grad_norm": 5.693385201390032, + "learning_rate": 5e-05, + "loss": 0.0939, + "num_input_tokens_seen": 98217784, + "step": 1076 + }, + { + "epoch": 4.483333333333333, + "loss": 0.08883555233478546, + "loss_ce": 0.0010975135955959558, + "loss_iou": 0.3984375, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 98217784, + "step": 1076 + }, + { + "epoch": 4.4875, + "grad_norm": 11.974885328346211, + "learning_rate": 5e-05, + "loss": 0.111, + "num_input_tokens_seen": 98308892, + "step": 1077 + }, + { + "epoch": 4.4875, + "loss": 0.08810891956090927, + "loss_ce": 0.0007065777899697423, + "loss_iou": 0.31640625, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 98308892, + "step": 1077 + }, + { + "epoch": 4.491666666666666, + "grad_norm": 5.405136544371594, + "learning_rate": 5e-05, + "loss": 0.182, + "num_input_tokens_seen": 98400664, + "step": 1078 + }, + { + "epoch": 4.491666666666666, + "loss": 0.1772213578224182, + "loss_ce": 0.0016232000198215246, + "loss_iou": 0.27734375, + "loss_num": 0.03515625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 98400664, + "step": 1078 + }, + { + "epoch": 4.495833333333334, + "grad_norm": 15.460965408014841, + "learning_rate": 5e-05, + "loss": 0.1202, + "num_input_tokens_seen": 98492180, + "step": 1079 + }, + { + "epoch": 4.495833333333334, + "loss": 0.11177276074886322, + "loss_ce": 0.0016958509804680943, + "loss_iou": 0.294921875, + "loss_num": 0.0220947265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 98492180, + "step": 1079 + }, + { + "epoch": 4.5, + "grad_norm": 1.8566096703105832, + "learning_rate": 5e-05, + "loss": 0.0817, + "num_input_tokens_seen": 98583616, + "step": 1080 + }, + { + "epoch": 4.5, + "loss": 0.07147879153490067, + "loss_ce": 0.0007237876998260617, + "loss_iou": 0.1484375, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 98583616, + "step": 1080 + }, + { + "epoch": 4.504166666666666, + "grad_norm": 4.0890400559643, + "learning_rate": 5e-05, + "loss": 0.0996, + "num_input_tokens_seen": 98673480, + "step": 1081 + }, + { + "epoch": 4.504166666666666, + "loss": 0.1101948693394661, + "loss_ce": 0.0014302213676273823, + "loss_iou": 0.296875, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 98673480, + "step": 1081 + }, + { + "epoch": 4.508333333333333, + "grad_norm": 3.0362109072862378, + "learning_rate": 5e-05, + "loss": 0.1142, + "num_input_tokens_seen": 98764280, + "step": 1082 + }, + { + "epoch": 4.508333333333333, + "loss": 0.1345566213130951, + "loss_ce": 9.617566684028134e-05, + "loss_iou": 0.34765625, + "loss_num": 0.0269775390625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 98764280, + "step": 1082 + }, + { + "epoch": 4.5125, + "grad_norm": 2.074294944015263, + "learning_rate": 5e-05, + "loss": 0.0802, + "num_input_tokens_seen": 98855312, + "step": 1083 + }, + { + "epoch": 4.5125, + "loss": 0.10959449410438538, + "loss_ce": 0.005514297168701887, + "loss_iou": 0.439453125, + "loss_num": 0.020751953125, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 98855312, + "step": 1083 + }, + { + "epoch": 4.516666666666667, + "grad_norm": 6.223919956572017, + "learning_rate": 5e-05, + "loss": 0.126, + "num_input_tokens_seen": 98946616, + "step": 1084 + }, + { + "epoch": 4.516666666666667, + "loss": 0.0991659164428711, + "loss_ce": 0.0009603450307622552, + "loss_iou": 0.2578125, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 98946616, + "step": 1084 + }, + { + "epoch": 4.520833333333333, + "grad_norm": 4.230394093817779, + "learning_rate": 5e-05, + "loss": 0.0991, + "num_input_tokens_seen": 99037472, + "step": 1085 + }, + { + "epoch": 4.520833333333333, + "loss": 0.0747312381863594, + "loss_ce": 0.0007413700805045664, + "loss_iou": 0.26953125, + "loss_num": 0.0147705078125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 99037472, + "step": 1085 + }, + { + "epoch": 4.525, + "grad_norm": 3.7165027957165955, + "learning_rate": 5e-05, + "loss": 0.1435, + "num_input_tokens_seen": 99128828, + "step": 1086 + }, + { + "epoch": 4.525, + "loss": 0.1280263066291809, + "loss_ce": 0.00021869037300348282, + "loss_iou": 0.455078125, + "loss_num": 0.0255126953125, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 99128828, + "step": 1086 + }, + { + "epoch": 4.529166666666667, + "grad_norm": 14.01206249592369, + "learning_rate": 5e-05, + "loss": 0.1471, + "num_input_tokens_seen": 99220736, + "step": 1087 + }, + { + "epoch": 4.529166666666667, + "loss": 0.16491644084453583, + "loss_ce": 0.0023187866900116205, + "loss_iou": 0.328125, + "loss_num": 0.032470703125, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 99220736, + "step": 1087 + }, + { + "epoch": 4.533333333333333, + "grad_norm": 2.599111310139979, + "learning_rate": 5e-05, + "loss": 0.1357, + "num_input_tokens_seen": 99311436, + "step": 1088 + }, + { + "epoch": 4.533333333333333, + "loss": 0.16733185946941376, + "loss_ce": 0.0006448504282161593, + "loss_iou": 0.220703125, + "loss_num": 0.033447265625, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 99311436, + "step": 1088 + }, + { + "epoch": 4.5375, + "grad_norm": 4.24205043514949, + "learning_rate": 5e-05, + "loss": 0.0905, + "num_input_tokens_seen": 99403160, + "step": 1089 + }, + { + "epoch": 4.5375, + "loss": 0.07050883769989014, + "loss_ce": 0.001424671383574605, + "loss_iou": 0.2216796875, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 99403160, + "step": 1089 + }, + { + "epoch": 4.541666666666667, + "grad_norm": 3.7078569700241553, + "learning_rate": 5e-05, + "loss": 0.1323, + "num_input_tokens_seen": 99494308, + "step": 1090 + }, + { + "epoch": 4.541666666666667, + "loss": 0.1405109465122223, + "loss_ce": 0.0011218992294743657, + "loss_iou": 0.1826171875, + "loss_num": 0.02783203125, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 99494308, + "step": 1090 + }, + { + "epoch": 4.545833333333333, + "grad_norm": 4.602171652582107, + "learning_rate": 5e-05, + "loss": 0.1072, + "num_input_tokens_seen": 99586328, + "step": 1091 + }, + { + "epoch": 4.545833333333333, + "loss": 0.07130880653858185, + "loss_ce": 0.002194119617342949, + "loss_iou": 0.265625, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 99586328, + "step": 1091 + }, + { + "epoch": 4.55, + "grad_norm": 5.519884250024296, + "learning_rate": 5e-05, + "loss": 0.1428, + "num_input_tokens_seen": 99677508, + "step": 1092 + }, + { + "epoch": 4.55, + "loss": 0.19089868664741516, + "loss_ce": 0.0008352050790563226, + "loss_iou": 0.37109375, + "loss_num": 0.0380859375, + "loss_xval": 0.1904296875, + "num_input_tokens_seen": 99677508, + "step": 1092 + }, + { + "epoch": 4.554166666666667, + "grad_norm": 3.085873838630324, + "learning_rate": 5e-05, + "loss": 0.1394, + "num_input_tokens_seen": 99768852, + "step": 1093 + }, + { + "epoch": 4.554166666666667, + "loss": 0.18020425736904144, + "loss_ce": 0.00039468033355660737, + "loss_iou": 0.408203125, + "loss_num": 0.035888671875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 99768852, + "step": 1093 + }, + { + "epoch": 4.558333333333334, + "grad_norm": 7.984248544548892, + "learning_rate": 5e-05, + "loss": 0.1766, + "num_input_tokens_seen": 99860088, + "step": 1094 + }, + { + "epoch": 4.558333333333334, + "loss": 0.2613310217857361, + "loss_ce": 3.948756420868449e-05, + "loss_iou": 0.32421875, + "loss_num": 0.05224609375, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 99860088, + "step": 1094 + }, + { + "epoch": 4.5625, + "grad_norm": 14.35350456740094, + "learning_rate": 5e-05, + "loss": 0.125, + "num_input_tokens_seen": 99951820, + "step": 1095 + }, + { + "epoch": 4.5625, + "loss": 0.1256924569606781, + "loss_ce": 0.00035675818799063563, + "loss_iou": 0.333984375, + "loss_num": 0.025146484375, + "loss_xval": 0.125, + "num_input_tokens_seen": 99951820, + "step": 1095 + }, + { + "epoch": 4.566666666666666, + "grad_norm": 15.883834122510626, + "learning_rate": 5e-05, + "loss": 0.135, + "num_input_tokens_seen": 100043104, + "step": 1096 + }, + { + "epoch": 4.566666666666666, + "loss": 0.11116228997707367, + "loss_ce": 0.0041066245175898075, + "loss_iou": 0.296875, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 100043104, + "step": 1096 + }, + { + "epoch": 4.570833333333333, + "grad_norm": 4.523964712573911, + "learning_rate": 5e-05, + "loss": 0.109, + "num_input_tokens_seen": 100134216, + "step": 1097 + }, + { + "epoch": 4.570833333333333, + "loss": 0.07000759243965149, + "loss_ce": 0.0021212399005889893, + "loss_iou": 0.30078125, + "loss_num": 0.01361083984375, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 100134216, + "step": 1097 + }, + { + "epoch": 4.575, + "grad_norm": 3.061116023954208, + "learning_rate": 5e-05, + "loss": 0.1407, + "num_input_tokens_seen": 100225776, + "step": 1098 + }, + { + "epoch": 4.575, + "loss": 0.11010673642158508, + "loss_ce": 0.001220025704242289, + "loss_iou": 0.234375, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 100225776, + "step": 1098 + }, + { + "epoch": 4.579166666666667, + "grad_norm": 5.851281585800328, + "learning_rate": 5e-05, + "loss": 0.1402, + "num_input_tokens_seen": 100316984, + "step": 1099 + }, + { + "epoch": 4.579166666666667, + "loss": 0.16490298509597778, + "loss_ce": 0.00032168958568945527, + "loss_iou": 0.294921875, + "loss_num": 0.032958984375, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 100316984, + "step": 1099 + }, + { + "epoch": 4.583333333333333, + "grad_norm": 2.198330942761651, + "learning_rate": 5e-05, + "loss": 0.1685, + "num_input_tokens_seen": 100408276, + "step": 1100 + }, + { + "epoch": 4.583333333333333, + "loss": 0.10341347754001617, + "loss_ce": 0.00046242796815931797, + "loss_iou": 0.06787109375, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 100408276, + "step": 1100 + }, + { + "epoch": 4.5875, + "grad_norm": 8.432062934184843, + "learning_rate": 5e-05, + "loss": 0.149, + "num_input_tokens_seen": 100499856, + "step": 1101 + }, + { + "epoch": 4.5875, + "loss": 0.14112815260887146, + "loss_ce": 0.002944562118500471, + "loss_iou": 0.294921875, + "loss_num": 0.027587890625, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 100499856, + "step": 1101 + }, + { + "epoch": 4.591666666666667, + "grad_norm": 9.177777201547373, + "learning_rate": 5e-05, + "loss": 0.1512, + "num_input_tokens_seen": 100591332, + "step": 1102 + }, + { + "epoch": 4.591666666666667, + "loss": 0.1141015812754631, + "loss_ce": 0.0024530200753360987, + "loss_iou": 0.37109375, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 100591332, + "step": 1102 + }, + { + "epoch": 4.595833333333333, + "grad_norm": 7.531965106745693, + "learning_rate": 5e-05, + "loss": 0.0928, + "num_input_tokens_seen": 100682972, + "step": 1103 + }, + { + "epoch": 4.595833333333333, + "loss": 0.09467661380767822, + "loss_ce": 0.0016590356826782227, + "loss_iou": 0.359375, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 100682972, + "step": 1103 + }, + { + "epoch": 4.6, + "grad_norm": 3.007683190875576, + "learning_rate": 5e-05, + "loss": 0.0963, + "num_input_tokens_seen": 100774672, + "step": 1104 + }, + { + "epoch": 4.6, + "loss": 0.14586971700191498, + "loss_ce": 0.006160246208310127, + "loss_iou": 0.25390625, + "loss_num": 0.0279541015625, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 100774672, + "step": 1104 + }, + { + "epoch": 4.604166666666667, + "grad_norm": 6.5108841230106, + "learning_rate": 5e-05, + "loss": 0.1634, + "num_input_tokens_seen": 100865852, + "step": 1105 + }, + { + "epoch": 4.604166666666667, + "loss": 0.08255942910909653, + "loss_ce": 0.0003298107476439327, + "loss_iou": 0.25, + "loss_num": 0.0164794921875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 100865852, + "step": 1105 + }, + { + "epoch": 4.608333333333333, + "grad_norm": 1.9635394555090955, + "learning_rate": 5e-05, + "loss": 0.1689, + "num_input_tokens_seen": 100956540, + "step": 1106 + }, + { + "epoch": 4.608333333333333, + "loss": 0.22228175401687622, + "loss_ce": 2.223448973381892e-05, + "loss_iou": 0.1953125, + "loss_num": 0.04443359375, + "loss_xval": 0.22265625, + "num_input_tokens_seen": 100956540, + "step": 1106 + }, + { + "epoch": 4.6125, + "grad_norm": 3.526058451584953, + "learning_rate": 5e-05, + "loss": 0.1048, + "num_input_tokens_seen": 101047388, + "step": 1107 + }, + { + "epoch": 4.6125, + "loss": 0.0747670978307724, + "loss_ce": 0.0007314551039598882, + "loss_iou": 0.2890625, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 101047388, + "step": 1107 + }, + { + "epoch": 4.616666666666667, + "grad_norm": 11.893244447837684, + "learning_rate": 5e-05, + "loss": 0.1028, + "num_input_tokens_seen": 101138732, + "step": 1108 + }, + { + "epoch": 4.616666666666667, + "loss": 0.12535637617111206, + "loss_ce": 0.0013024121290072799, + "loss_iou": 0.251953125, + "loss_num": 0.0247802734375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 101138732, + "step": 1108 + }, + { + "epoch": 4.620833333333334, + "grad_norm": 5.773796067589656, + "learning_rate": 5e-05, + "loss": 0.1282, + "num_input_tokens_seen": 101230184, + "step": 1109 + }, + { + "epoch": 4.620833333333334, + "loss": 0.10866034775972366, + "loss_ce": 0.0015436523826792836, + "loss_iou": 0.21484375, + "loss_num": 0.021484375, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 101230184, + "step": 1109 + }, + { + "epoch": 4.625, + "grad_norm": 4.518275709839977, + "learning_rate": 5e-05, + "loss": 0.123, + "num_input_tokens_seen": 101321308, + "step": 1110 + }, + { + "epoch": 4.625, + "loss": 0.1575690656900406, + "loss_ce": 6.810591457906412e-06, + "loss_iou": 0.41796875, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 101321308, + "step": 1110 + }, + { + "epoch": 4.629166666666666, + "grad_norm": 4.7529760698914485, + "learning_rate": 5e-05, + "loss": 0.1158, + "num_input_tokens_seen": 101412848, + "step": 1111 + }, + { + "epoch": 4.629166666666666, + "loss": 0.08711521327495575, + "loss_ce": 0.0008115016971714795, + "loss_iou": 0.29296875, + "loss_num": 0.0172119140625, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 101412848, + "step": 1111 + }, + { + "epoch": 4.633333333333333, + "grad_norm": 3.132943307581427, + "learning_rate": 5e-05, + "loss": 0.127, + "num_input_tokens_seen": 101504576, + "step": 1112 + }, + { + "epoch": 4.633333333333333, + "loss": 0.15530680119991302, + "loss_ce": 0.0011930357431992888, + "loss_iou": 0.283203125, + "loss_num": 0.03076171875, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 101504576, + "step": 1112 + }, + { + "epoch": 4.6375, + "grad_norm": 2.886742399910248, + "learning_rate": 5e-05, + "loss": 0.1289, + "num_input_tokens_seen": 101595300, + "step": 1113 + }, + { + "epoch": 4.6375, + "loss": 0.12066149711608887, + "loss_ce": 2.550972203607671e-05, + "loss_iou": 0.404296875, + "loss_num": 0.024169921875, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 101595300, + "step": 1113 + }, + { + "epoch": 4.641666666666667, + "grad_norm": 13.523813010513921, + "learning_rate": 5e-05, + "loss": 0.2184, + "num_input_tokens_seen": 101686740, + "step": 1114 + }, + { + "epoch": 4.641666666666667, + "loss": 0.29297542572021484, + "loss_ce": 0.0011052797781303525, + "loss_iou": 0.2080078125, + "loss_num": 0.058349609375, + "loss_xval": 0.291015625, + "num_input_tokens_seen": 101686740, + "step": 1114 + }, + { + "epoch": 4.645833333333333, + "grad_norm": 3.3219089109226085, + "learning_rate": 5e-05, + "loss": 0.1614, + "num_input_tokens_seen": 101778172, + "step": 1115 + }, + { + "epoch": 4.645833333333333, + "loss": 0.14340245723724365, + "loss_ce": 6.140043842606246e-05, + "loss_iou": 0.330078125, + "loss_num": 0.0286865234375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 101778172, + "step": 1115 + }, + { + "epoch": 4.65, + "grad_norm": 10.960525792775918, + "learning_rate": 5e-05, + "loss": 0.1611, + "num_input_tokens_seen": 101869384, + "step": 1116 + }, + { + "epoch": 4.65, + "loss": 0.19843502342700958, + "loss_ce": 0.0028860135935246944, + "loss_iou": 0.373046875, + "loss_num": 0.0390625, + "loss_xval": 0.1953125, + "num_input_tokens_seen": 101869384, + "step": 1116 + }, + { + "epoch": 4.654166666666667, + "grad_norm": 4.007868167346531, + "learning_rate": 5e-05, + "loss": 0.1422, + "num_input_tokens_seen": 101960296, + "step": 1117 + }, + { + "epoch": 4.654166666666667, + "loss": 0.1601758450269699, + "loss_ce": 0.0002637325960677117, + "loss_iou": 0.310546875, + "loss_num": 0.031982421875, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 101960296, + "step": 1117 + }, + { + "epoch": 4.658333333333333, + "grad_norm": 9.30897934729109, + "learning_rate": 5e-05, + "loss": 0.1561, + "num_input_tokens_seen": 102051264, + "step": 1118 + }, + { + "epoch": 4.658333333333333, + "loss": 0.1870819479227066, + "loss_ce": 0.004830969497561455, + "loss_iou": 0.326171875, + "loss_num": 0.036376953125, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 102051264, + "step": 1118 + }, + { + "epoch": 4.6625, + "grad_norm": 5.973730886667036, + "learning_rate": 5e-05, + "loss": 0.1121, + "num_input_tokens_seen": 102142604, + "step": 1119 + }, + { + "epoch": 4.6625, + "loss": 0.1165546178817749, + "loss_ce": 0.004310957621783018, + "loss_iou": 0.37109375, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 102142604, + "step": 1119 + }, + { + "epoch": 4.666666666666667, + "grad_norm": 4.216800416560698, + "learning_rate": 5e-05, + "loss": 0.165, + "num_input_tokens_seen": 102234036, + "step": 1120 + }, + { + "epoch": 4.666666666666667, + "loss": 0.16760239005088806, + "loss_ce": 0.0006101946346461773, + "loss_iou": 0.3046875, + "loss_num": 0.033447265625, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 102234036, + "step": 1120 + }, + { + "epoch": 4.670833333333333, + "grad_norm": 10.564228271071244, + "learning_rate": 5e-05, + "loss": 0.0977, + "num_input_tokens_seen": 102325188, + "step": 1121 + }, + { + "epoch": 4.670833333333333, + "loss": 0.07861147820949554, + "loss_ce": 0.00027285737451165915, + "loss_iou": 0.29296875, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 102325188, + "step": 1121 + }, + { + "epoch": 4.675, + "grad_norm": 5.613473524594385, + "learning_rate": 5e-05, + "loss": 0.1335, + "num_input_tokens_seen": 102415764, + "step": 1122 + }, + { + "epoch": 4.675, + "loss": 0.11580031365156174, + "loss_ce": 3.188900154782459e-05, + "loss_iou": 0.2001953125, + "loss_num": 0.023193359375, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 102415764, + "step": 1122 + }, + { + "epoch": 4.679166666666667, + "grad_norm": 3.4497010574149303, + "learning_rate": 5e-05, + "loss": 0.1118, + "num_input_tokens_seen": 102507248, + "step": 1123 + }, + { + "epoch": 4.679166666666667, + "loss": 0.12468966841697693, + "loss_ce": 0.00011691106192301959, + "loss_iou": 0.3203125, + "loss_num": 0.02490234375, + "loss_xval": 0.12451171875, + "num_input_tokens_seen": 102507248, + "step": 1123 + }, + { + "epoch": 4.683333333333334, + "grad_norm": 2.406253546576618, + "learning_rate": 5e-05, + "loss": 0.0764, + "num_input_tokens_seen": 102598672, + "step": 1124 + }, + { + "epoch": 4.683333333333334, + "loss": 0.0875929445028305, + "loss_ce": 0.001060347887687385, + "loss_iou": 0.2421875, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 102598672, + "step": 1124 + }, + { + "epoch": 4.6875, + "grad_norm": 13.838199882433146, + "learning_rate": 5e-05, + "loss": 0.1055, + "num_input_tokens_seen": 102689792, + "step": 1125 + }, + { + "epoch": 4.6875, + "loss": 0.10166600346565247, + "loss_ce": 2.7204778234590776e-05, + "loss_iou": 0.2119140625, + "loss_num": 0.0203857421875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 102689792, + "step": 1125 + }, + { + "epoch": 4.691666666666666, + "grad_norm": 3.0925421228060834, + "learning_rate": 5e-05, + "loss": 0.1917, + "num_input_tokens_seen": 102781628, + "step": 1126 + }, + { + "epoch": 4.691666666666666, + "loss": 0.17679978907108307, + "loss_ce": 0.0005302638746798038, + "loss_iou": 0.2412109375, + "loss_num": 0.03515625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 102781628, + "step": 1126 + }, + { + "epoch": 4.695833333333333, + "grad_norm": 4.711952688437384, + "learning_rate": 5e-05, + "loss": 0.1452, + "num_input_tokens_seen": 102873092, + "step": 1127 + }, + { + "epoch": 4.695833333333333, + "loss": 0.11696916073560715, + "loss_ce": 0.0010328851640224457, + "loss_iou": 0.365234375, + "loss_num": 0.023193359375, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 102873092, + "step": 1127 + }, + { + "epoch": 4.7, + "grad_norm": 12.698922137838567, + "learning_rate": 5e-05, + "loss": 0.1822, + "num_input_tokens_seen": 102964416, + "step": 1128 + }, + { + "epoch": 4.7, + "loss": 0.18701621890068054, + "loss_ce": 0.005451895762234926, + "loss_iou": 0.28125, + "loss_num": 0.036376953125, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 102964416, + "step": 1128 + }, + { + "epoch": 4.704166666666667, + "grad_norm": 10.40043243339498, + "learning_rate": 5e-05, + "loss": 0.1573, + "num_input_tokens_seen": 103055708, + "step": 1129 + }, + { + "epoch": 4.704166666666667, + "loss": 0.16252252459526062, + "loss_ce": 1.1628793572526774e-06, + "loss_iou": 0.46484375, + "loss_num": 0.032470703125, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 103055708, + "step": 1129 + }, + { + "epoch": 4.708333333333333, + "grad_norm": 2.184086695791655, + "learning_rate": 5e-05, + "loss": 0.116, + "num_input_tokens_seen": 103147032, + "step": 1130 + }, + { + "epoch": 4.708333333333333, + "loss": 0.13449756801128387, + "loss_ce": 0.0019291974604129791, + "loss_iou": 0.390625, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 103147032, + "step": 1130 + }, + { + "epoch": 4.7125, + "grad_norm": 7.032209184530382, + "learning_rate": 5e-05, + "loss": 0.1126, + "num_input_tokens_seen": 103238204, + "step": 1131 + }, + { + "epoch": 4.7125, + "loss": 0.09560714662075043, + "loss_ce": 0.0012162767816334963, + "loss_iou": 0.44140625, + "loss_num": 0.0189208984375, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 103238204, + "step": 1131 + }, + { + "epoch": 4.716666666666667, + "grad_norm": 2.8403002406090687, + "learning_rate": 5e-05, + "loss": 0.1821, + "num_input_tokens_seen": 103329796, + "step": 1132 + }, + { + "epoch": 4.716666666666667, + "loss": 0.1839357614517212, + "loss_ce": 0.0002504565636627376, + "loss_iou": 0.2080078125, + "loss_num": 0.03662109375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 103329796, + "step": 1132 + }, + { + "epoch": 4.720833333333333, + "grad_norm": 6.8795684491862525, + "learning_rate": 5e-05, + "loss": 0.0855, + "num_input_tokens_seen": 103421468, + "step": 1133 + }, + { + "epoch": 4.720833333333333, + "loss": 0.0764055848121643, + "loss_ce": 0.0008440621895715594, + "loss_iou": 0.380859375, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 103421468, + "step": 1133 + }, + { + "epoch": 4.725, + "grad_norm": 10.919233351301784, + "learning_rate": 5e-05, + "loss": 0.0997, + "num_input_tokens_seen": 103513172, + "step": 1134 + }, + { + "epoch": 4.725, + "loss": 0.056897103786468506, + "loss_ce": 0.0006684675463475287, + "loss_iou": 0.232421875, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 103513172, + "step": 1134 + }, + { + "epoch": 4.729166666666667, + "grad_norm": 3.647598629703392, + "learning_rate": 5e-05, + "loss": 0.1303, + "num_input_tokens_seen": 103604216, + "step": 1135 + }, + { + "epoch": 4.729166666666667, + "loss": 0.11557944864034653, + "loss_ce": 0.0004213701467961073, + "loss_iou": 0.322265625, + "loss_num": 0.0230712890625, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 103604216, + "step": 1135 + }, + { + "epoch": 4.733333333333333, + "grad_norm": 2.3437484820197847, + "learning_rate": 5e-05, + "loss": 0.11, + "num_input_tokens_seen": 103695572, + "step": 1136 + }, + { + "epoch": 4.733333333333333, + "loss": 0.07198523730039597, + "loss_ce": 0.0001773782423697412, + "loss_iou": 0.22265625, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 103695572, + "step": 1136 + }, + { + "epoch": 4.7375, + "grad_norm": 4.234167936321321, + "learning_rate": 5e-05, + "loss": 0.1169, + "num_input_tokens_seen": 103787040, + "step": 1137 + }, + { + "epoch": 4.7375, + "loss": 0.12047646939754486, + "loss_ce": 0.0014884258853271604, + "loss_iou": 0.30859375, + "loss_num": 0.0238037109375, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 103787040, + "step": 1137 + }, + { + "epoch": 4.741666666666667, + "grad_norm": 16.781877619157644, + "learning_rate": 5e-05, + "loss": 0.1287, + "num_input_tokens_seen": 103878008, + "step": 1138 + }, + { + "epoch": 4.741666666666667, + "loss": 0.14299070835113525, + "loss_ce": 0.003189687617123127, + "loss_iou": 0.353515625, + "loss_num": 0.0279541015625, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 103878008, + "step": 1138 + }, + { + "epoch": 4.745833333333334, + "grad_norm": 4.72457382985536, + "learning_rate": 5e-05, + "loss": 0.0992, + "num_input_tokens_seen": 103969652, + "step": 1139 + }, + { + "epoch": 4.745833333333334, + "loss": 0.040563084185123444, + "loss_ce": 0.000646095082629472, + "loss_iou": 0.2177734375, + "loss_num": 0.00799560546875, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 103969652, + "step": 1139 + }, + { + "epoch": 4.75, + "grad_norm": 3.0250129729368194, + "learning_rate": 5e-05, + "loss": 0.0826, + "num_input_tokens_seen": 104061028, + "step": 1140 + }, + { + "epoch": 4.75, + "loss": 0.07690826058387756, + "loss_ce": 0.001011039363220334, + "loss_iou": 0.18359375, + "loss_num": 0.01519775390625, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 104061028, + "step": 1140 + }, + { + "epoch": 4.754166666666666, + "grad_norm": 2.2427258239010905, + "learning_rate": 5e-05, + "loss": 0.0678, + "num_input_tokens_seen": 104152516, + "step": 1141 + }, + { + "epoch": 4.754166666666666, + "loss": 0.07243698090314865, + "loss_ce": 0.00030868116300553083, + "loss_iou": 0.1845703125, + "loss_num": 0.014404296875, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 104152516, + "step": 1141 + }, + { + "epoch": 4.758333333333333, + "grad_norm": 6.676180269717597, + "learning_rate": 5e-05, + "loss": 0.1645, + "num_input_tokens_seen": 104244052, + "step": 1142 + }, + { + "epoch": 4.758333333333333, + "loss": 0.12318507581949234, + "loss_ce": 0.00019923440413549542, + "loss_iou": 0.15625, + "loss_num": 0.0245361328125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 104244052, + "step": 1142 + }, + { + "epoch": 4.7625, + "grad_norm": 7.079291954745069, + "learning_rate": 5e-05, + "loss": 0.1527, + "num_input_tokens_seen": 104333724, + "step": 1143 + }, + { + "epoch": 4.7625, + "loss": 0.19446733593940735, + "loss_ce": 0.0006807069876231253, + "loss_iou": 0.2431640625, + "loss_num": 0.038818359375, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 104333724, + "step": 1143 + }, + { + "epoch": 4.766666666666667, + "grad_norm": 3.550546756408287, + "learning_rate": 5e-05, + "loss": 0.1273, + "num_input_tokens_seen": 104425440, + "step": 1144 + }, + { + "epoch": 4.766666666666667, + "loss": 0.12909270823001862, + "loss_ce": 0.0006137005402706563, + "loss_iou": 0.28515625, + "loss_num": 0.025634765625, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 104425440, + "step": 1144 + }, + { + "epoch": 4.770833333333333, + "grad_norm": 2.192891962766182, + "learning_rate": 5e-05, + "loss": 0.0826, + "num_input_tokens_seen": 104516736, + "step": 1145 + }, + { + "epoch": 4.770833333333333, + "loss": 0.09015144407749176, + "loss_ce": 0.00015510247612837702, + "loss_iou": 0.298828125, + "loss_num": 0.01806640625, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 104516736, + "step": 1145 + }, + { + "epoch": 4.775, + "grad_norm": 3.1296938404560852, + "learning_rate": 5e-05, + "loss": 0.0988, + "num_input_tokens_seen": 104608108, + "step": 1146 + }, + { + "epoch": 4.775, + "loss": 0.10480667650699615, + "loss_ce": 0.001443637884221971, + "loss_iou": 0.23046875, + "loss_num": 0.020751953125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 104608108, + "step": 1146 + }, + { + "epoch": 4.779166666666667, + "grad_norm": 3.6372144037935232, + "learning_rate": 5e-05, + "loss": 0.1935, + "num_input_tokens_seen": 104699640, + "step": 1147 + }, + { + "epoch": 4.779166666666667, + "loss": 0.1913982331752777, + "loss_ce": 0.004050817806273699, + "loss_iou": 0.408203125, + "loss_num": 0.03759765625, + "loss_xval": 0.1875, + "num_input_tokens_seen": 104699640, + "step": 1147 + }, + { + "epoch": 4.783333333333333, + "grad_norm": 2.6064847950760606, + "learning_rate": 5e-05, + "loss": 0.0681, + "num_input_tokens_seen": 104790628, + "step": 1148 + }, + { + "epoch": 4.783333333333333, + "loss": 0.054977696388959885, + "loss_ce": 3.0795123166171834e-05, + "loss_iou": 0.30078125, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 104790628, + "step": 1148 + }, + { + "epoch": 4.7875, + "grad_norm": 2.9820181209674432, + "learning_rate": 5e-05, + "loss": 0.1189, + "num_input_tokens_seen": 104882404, + "step": 1149 + }, + { + "epoch": 4.7875, + "loss": 0.1582833230495453, + "loss_ce": 0.001056756591424346, + "loss_iou": 0.2109375, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 104882404, + "step": 1149 + }, + { + "epoch": 4.791666666666667, + "grad_norm": 3.9393256885276404, + "learning_rate": 5e-05, + "loss": 0.1714, + "num_input_tokens_seen": 104973680, + "step": 1150 + }, + { + "epoch": 4.791666666666667, + "loss": 0.16472193598747253, + "loss_ce": 0.0041842274367809296, + "loss_iou": 0.2578125, + "loss_num": 0.0322265625, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 104973680, + "step": 1150 + }, + { + "epoch": 4.795833333333333, + "grad_norm": 5.190100418108923, + "learning_rate": 5e-05, + "loss": 0.096, + "num_input_tokens_seen": 105064840, + "step": 1151 + }, + { + "epoch": 4.795833333333333, + "loss": 0.09066504240036011, + "loss_ce": 0.002286137081682682, + "loss_iou": 0.26171875, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 105064840, + "step": 1151 + }, + { + "epoch": 4.8, + "grad_norm": 7.303473496948488, + "learning_rate": 5e-05, + "loss": 0.1512, + "num_input_tokens_seen": 105157112, + "step": 1152 + }, + { + "epoch": 4.8, + "loss": 0.18908609449863434, + "loss_ce": 0.002074366668239236, + "loss_iou": 0.232421875, + "loss_num": 0.037353515625, + "loss_xval": 0.1875, + "num_input_tokens_seen": 105157112, + "step": 1152 + }, + { + "epoch": 4.804166666666667, + "grad_norm": 3.060594925978094, + "learning_rate": 5e-05, + "loss": 0.1526, + "num_input_tokens_seen": 105248432, + "step": 1153 + }, + { + "epoch": 4.804166666666667, + "loss": 0.10005295276641846, + "loss_ce": 0.0025035091675817966, + "loss_iou": 0.189453125, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 105248432, + "step": 1153 + }, + { + "epoch": 4.808333333333334, + "grad_norm": 4.609051925692657, + "learning_rate": 5e-05, + "loss": 0.1391, + "num_input_tokens_seen": 105339660, + "step": 1154 + }, + { + "epoch": 4.808333333333334, + "loss": 0.21614710986614227, + "loss_ce": 0.0007693012012168765, + "loss_iou": 0.236328125, + "loss_num": 0.04296875, + "loss_xval": 0.2158203125, + "num_input_tokens_seen": 105339660, + "step": 1154 + }, + { + "epoch": 4.8125, + "grad_norm": 7.8704766368309, + "learning_rate": 5e-05, + "loss": 0.1957, + "num_input_tokens_seen": 105431824, + "step": 1155 + }, + { + "epoch": 4.8125, + "loss": 0.24834512174129486, + "loss_ce": 0.0006339406245388091, + "loss_iou": 0.2734375, + "loss_num": 0.049560546875, + "loss_xval": 0.248046875, + "num_input_tokens_seen": 105431824, + "step": 1155 + }, + { + "epoch": 4.816666666666666, + "grad_norm": 3.1825568683650016, + "learning_rate": 5e-05, + "loss": 0.0872, + "num_input_tokens_seen": 105522832, + "step": 1156 + }, + { + "epoch": 4.816666666666666, + "loss": 0.08022044599056244, + "loss_ce": 0.0006611213320866227, + "loss_iou": 0.333984375, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 105522832, + "step": 1156 + }, + { + "epoch": 4.820833333333333, + "grad_norm": 4.876646511952777, + "learning_rate": 5e-05, + "loss": 0.1709, + "num_input_tokens_seen": 105614292, + "step": 1157 + }, + { + "epoch": 4.820833333333333, + "loss": 0.13954287767410278, + "loss_ce": 0.0021374865900725126, + "loss_iou": 0.291015625, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 105614292, + "step": 1157 + }, + { + "epoch": 4.825, + "grad_norm": 8.168819966830162, + "learning_rate": 5e-05, + "loss": 0.1076, + "num_input_tokens_seen": 105705452, + "step": 1158 + }, + { + "epoch": 4.825, + "loss": 0.12230473011732101, + "loss_ce": 2.0796111130039208e-05, + "loss_iou": 0.3515625, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 105705452, + "step": 1158 + }, + { + "epoch": 4.829166666666667, + "grad_norm": 5.778774136351113, + "learning_rate": 5e-05, + "loss": 0.1174, + "num_input_tokens_seen": 105796620, + "step": 1159 + }, + { + "epoch": 4.829166666666667, + "loss": 0.09682287275791168, + "loss_ce": 0.0014249193482100964, + "loss_iou": 0.32421875, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 105796620, + "step": 1159 + }, + { + "epoch": 4.833333333333333, + "grad_norm": 3.618191034070861, + "learning_rate": 5e-05, + "loss": 0.1313, + "num_input_tokens_seen": 105887976, + "step": 1160 + }, + { + "epoch": 4.833333333333333, + "loss": 0.10715027898550034, + "loss_ce": 0.0015594599535688758, + "loss_iou": 0.4296875, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 105887976, + "step": 1160 + }, + { + "epoch": 4.8375, + "grad_norm": 3.0934691829656247, + "learning_rate": 5e-05, + "loss": 0.0743, + "num_input_tokens_seen": 105979552, + "step": 1161 + }, + { + "epoch": 4.8375, + "loss": 0.05586852878332138, + "loss_ce": 0.002844237256795168, + "loss_iou": 0.267578125, + "loss_num": 0.0106201171875, + "loss_xval": 0.052978515625, + "num_input_tokens_seen": 105979552, + "step": 1161 + }, + { + "epoch": 4.841666666666667, + "grad_norm": 2.761479297129452, + "learning_rate": 5e-05, + "loss": 0.1769, + "num_input_tokens_seen": 106071192, + "step": 1162 + }, + { + "epoch": 4.841666666666667, + "loss": 0.12410786747932434, + "loss_ce": 0.002434288617223501, + "loss_iou": 0.27734375, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 106071192, + "step": 1162 + }, + { + "epoch": 4.845833333333333, + "grad_norm": 7.136811974389668, + "learning_rate": 5e-05, + "loss": 0.0975, + "num_input_tokens_seen": 106162500, + "step": 1163 + }, + { + "epoch": 4.845833333333333, + "loss": 0.08225230872631073, + "loss_ce": 0.0001600257819518447, + "loss_iou": 0.35546875, + "loss_num": 0.0164794921875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 106162500, + "step": 1163 + }, + { + "epoch": 4.85, + "grad_norm": 7.198517497297793, + "learning_rate": 5e-05, + "loss": 0.1575, + "num_input_tokens_seen": 106254376, + "step": 1164 + }, + { + "epoch": 4.85, + "loss": 0.1532648205757141, + "loss_ce": 0.0005853786133229733, + "loss_iou": 0.34375, + "loss_num": 0.030517578125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 106254376, + "step": 1164 + }, + { + "epoch": 4.854166666666667, + "grad_norm": 4.436200013135143, + "learning_rate": 5e-05, + "loss": 0.1463, + "num_input_tokens_seen": 106345416, + "step": 1165 + }, + { + "epoch": 4.854166666666667, + "loss": 0.13062620162963867, + "loss_ce": 0.0002245925134047866, + "loss_iou": 0.37109375, + "loss_num": 0.026123046875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 106345416, + "step": 1165 + }, + { + "epoch": 4.858333333333333, + "grad_norm": 2.2125295380449472, + "learning_rate": 5e-05, + "loss": 0.0956, + "num_input_tokens_seen": 106437532, + "step": 1166 + }, + { + "epoch": 4.858333333333333, + "loss": 0.12548190355300903, + "loss_ce": 0.0007413043058477342, + "loss_iou": 0.26171875, + "loss_num": 0.0250244140625, + "loss_xval": 0.12451171875, + "num_input_tokens_seen": 106437532, + "step": 1166 + }, + { + "epoch": 4.8625, + "grad_norm": 26.07955525963802, + "learning_rate": 5e-05, + "loss": 0.1577, + "num_input_tokens_seen": 106528440, + "step": 1167 + }, + { + "epoch": 4.8625, + "loss": 0.18524158000946045, + "loss_ce": 0.000976437411736697, + "loss_iou": 0.185546875, + "loss_num": 0.036865234375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 106528440, + "step": 1167 + }, + { + "epoch": 4.866666666666667, + "grad_norm": 3.417899391433817, + "learning_rate": 5e-05, + "loss": 0.1398, + "num_input_tokens_seen": 106620244, + "step": 1168 + }, + { + "epoch": 4.866666666666667, + "loss": 0.16416826844215393, + "loss_ce": 0.0014638010179623961, + "loss_iou": 0.259765625, + "loss_num": 0.032470703125, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 106620244, + "step": 1168 + }, + { + "epoch": 4.870833333333334, + "grad_norm": 2.36998391465569, + "learning_rate": 5e-05, + "loss": 0.1123, + "num_input_tokens_seen": 106711812, + "step": 1169 + }, + { + "epoch": 4.870833333333334, + "loss": 0.1460573673248291, + "loss_ce": 0.0003512083785608411, + "loss_iou": 0.23828125, + "loss_num": 0.0291748046875, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 106711812, + "step": 1169 + }, + { + "epoch": 4.875, + "grad_norm": 4.6140129234293985, + "learning_rate": 5e-05, + "loss": 0.1436, + "num_input_tokens_seen": 106803516, + "step": 1170 + }, + { + "epoch": 4.875, + "loss": 0.1088951975107193, + "loss_ce": 0.001198665937408805, + "loss_iou": 0.318359375, + "loss_num": 0.021484375, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 106803516, + "step": 1170 + }, + { + "epoch": 4.879166666666666, + "grad_norm": 4.106721008433326, + "learning_rate": 5e-05, + "loss": 0.1061, + "num_input_tokens_seen": 106895244, + "step": 1171 + }, + { + "epoch": 4.879166666666666, + "loss": 0.11301624774932861, + "loss_ce": 0.0005284602520987391, + "loss_iou": 0.37890625, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 106895244, + "step": 1171 + }, + { + "epoch": 4.883333333333333, + "grad_norm": 3.3142334922220362, + "learning_rate": 5e-05, + "loss": 0.1314, + "num_input_tokens_seen": 106986652, + "step": 1172 + }, + { + "epoch": 4.883333333333333, + "loss": 0.13002213835716248, + "loss_ce": 0.0013600302627310157, + "loss_iou": 0.2734375, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 106986652, + "step": 1172 + }, + { + "epoch": 4.8875, + "grad_norm": 3.2706971322529648, + "learning_rate": 5e-05, + "loss": 0.1275, + "num_input_tokens_seen": 107077508, + "step": 1173 + }, + { + "epoch": 4.8875, + "loss": 0.08167026191949844, + "loss_ce": 0.0008597183041274548, + "loss_iou": 0.23046875, + "loss_num": 0.01611328125, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 107077508, + "step": 1173 + }, + { + "epoch": 4.891666666666667, + "grad_norm": 5.044103138964741, + "learning_rate": 5e-05, + "loss": 0.1026, + "num_input_tokens_seen": 107169048, + "step": 1174 + }, + { + "epoch": 4.891666666666667, + "loss": 0.09915536642074585, + "loss_ce": 0.0022925687953829765, + "loss_iou": 0.318359375, + "loss_num": 0.0194091796875, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 107169048, + "step": 1174 + }, + { + "epoch": 4.895833333333333, + "grad_norm": 47.56655776407188, + "learning_rate": 5e-05, + "loss": 0.179, + "num_input_tokens_seen": 107260840, + "step": 1175 + }, + { + "epoch": 4.895833333333333, + "loss": 0.14901116490364075, + "loss_ce": 0.0027861865237355232, + "loss_iou": 0.25390625, + "loss_num": 0.0291748046875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 107260840, + "step": 1175 + }, + { + "epoch": 4.9, + "grad_norm": 14.474975416203984, + "learning_rate": 5e-05, + "loss": 0.1438, + "num_input_tokens_seen": 107352072, + "step": 1176 + }, + { + "epoch": 4.9, + "loss": 0.12309698760509491, + "loss_ce": 1.9596440324676223e-05, + "loss_iou": 0.35546875, + "loss_num": 0.024658203125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 107352072, + "step": 1176 + }, + { + "epoch": 4.904166666666667, + "grad_norm": 4.597554419096973, + "learning_rate": 5e-05, + "loss": 0.1488, + "num_input_tokens_seen": 107443000, + "step": 1177 + }, + { + "epoch": 4.904166666666667, + "loss": 0.1479775458574295, + "loss_ce": 0.00024195160949602723, + "loss_iou": 0.30078125, + "loss_num": 0.029541015625, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 107443000, + "step": 1177 + }, + { + "epoch": 4.908333333333333, + "grad_norm": 17.63616065243701, + "learning_rate": 5e-05, + "loss": 0.1815, + "num_input_tokens_seen": 107534172, + "step": 1178 + }, + { + "epoch": 4.908333333333333, + "loss": 0.16808560490608215, + "loss_ce": 5.581736331805587e-05, + "loss_iou": 0.296875, + "loss_num": 0.03369140625, + "loss_xval": 0.16796875, + "num_input_tokens_seen": 107534172, + "step": 1178 + }, + { + "epoch": 4.9125, + "grad_norm": 4.532270987973431, + "learning_rate": 5e-05, + "loss": 0.1264, + "num_input_tokens_seen": 107625528, + "step": 1179 + }, + { + "epoch": 4.9125, + "loss": 0.16151559352874756, + "loss_ce": 0.0037702254485338926, + "loss_iou": 0.19140625, + "loss_num": 0.031494140625, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 107625528, + "step": 1179 + }, + { + "epoch": 4.916666666666667, + "grad_norm": 29.418004624772323, + "learning_rate": 5e-05, + "loss": 0.1425, + "num_input_tokens_seen": 107717188, + "step": 1180 + }, + { + "epoch": 4.916666666666667, + "loss": 0.11543691903352737, + "loss_ce": 0.0002941017155535519, + "loss_iou": 0.283203125, + "loss_num": 0.0230712890625, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 107717188, + "step": 1180 + }, + { + "epoch": 4.920833333333333, + "grad_norm": 5.9605390657692, + "learning_rate": 5e-05, + "loss": 0.1664, + "num_input_tokens_seen": 107808660, + "step": 1181 + }, + { + "epoch": 4.920833333333333, + "loss": 0.1978244185447693, + "loss_ce": 0.002908646594733, + "loss_iou": 0.404296875, + "loss_num": 0.0390625, + "loss_xval": 0.1953125, + "num_input_tokens_seen": 107808660, + "step": 1181 + }, + { + "epoch": 4.925, + "grad_norm": 6.279078096263478, + "learning_rate": 5e-05, + "loss": 0.0919, + "num_input_tokens_seen": 107900216, + "step": 1182 + }, + { + "epoch": 4.925, + "loss": 0.10716290026903152, + "loss_ce": 0.00039715541061013937, + "loss_iou": 0.275390625, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 107900216, + "step": 1182 + }, + { + "epoch": 4.929166666666667, + "grad_norm": 3.9856423529113973, + "learning_rate": 5e-05, + "loss": 0.1249, + "num_input_tokens_seen": 107989820, + "step": 1183 + }, + { + "epoch": 4.929166666666667, + "loss": 0.11589328199625015, + "loss_ce": 1.8040238501271233e-05, + "loss_iou": 0.44921875, + "loss_num": 0.023193359375, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 107989820, + "step": 1183 + }, + { + "epoch": 4.933333333333334, + "grad_norm": 4.7845682379324534, + "learning_rate": 5e-05, + "loss": 0.1555, + "num_input_tokens_seen": 108080832, + "step": 1184 + }, + { + "epoch": 4.933333333333334, + "loss": 0.12583021819591522, + "loss_ce": 6.233003659872338e-06, + "loss_iou": 0.3671875, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 108080832, + "step": 1184 + }, + { + "epoch": 4.9375, + "grad_norm": 3.950777986773663, + "learning_rate": 5e-05, + "loss": 0.1059, + "num_input_tokens_seen": 108172540, + "step": 1185 + }, + { + "epoch": 4.9375, + "loss": 0.10127120465040207, + "loss_ce": 0.0010362147586420178, + "loss_iou": 0.189453125, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 108172540, + "step": 1185 + }, + { + "epoch": 4.941666666666666, + "grad_norm": 2.1953353587311546, + "learning_rate": 5e-05, + "loss": 0.0987, + "num_input_tokens_seen": 108263896, + "step": 1186 + }, + { + "epoch": 4.941666666666666, + "loss": 0.13001351058483124, + "loss_ce": 0.0009699350339360535, + "loss_iou": 0.232421875, + "loss_num": 0.02587890625, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 108263896, + "step": 1186 + }, + { + "epoch": 4.945833333333333, + "grad_norm": 11.310572081689427, + "learning_rate": 5e-05, + "loss": 0.1665, + "num_input_tokens_seen": 108355512, + "step": 1187 + }, + { + "epoch": 4.945833333333333, + "loss": 0.21164894104003906, + "loss_ce": 0.0024204296059906483, + "loss_iou": 0.283203125, + "loss_num": 0.041748046875, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 108355512, + "step": 1187 + }, + { + "epoch": 4.95, + "grad_norm": 12.34823239005613, + "learning_rate": 5e-05, + "loss": 0.111, + "num_input_tokens_seen": 108447188, + "step": 1188 + }, + { + "epoch": 4.95, + "loss": 0.08706867694854736, + "loss_ce": 0.002885938622057438, + "loss_iou": 0.2421875, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 108447188, + "step": 1188 + }, + { + "epoch": 4.954166666666667, + "grad_norm": 4.008203850158953, + "learning_rate": 5e-05, + "loss": 0.0917, + "num_input_tokens_seen": 108538840, + "step": 1189 + }, + { + "epoch": 4.954166666666667, + "loss": 0.046880945563316345, + "loss_ce": 0.0006162988720461726, + "loss_iou": 0.26171875, + "loss_num": 0.00927734375, + "loss_xval": 0.04638671875, + "num_input_tokens_seen": 108538840, + "step": 1189 + }, + { + "epoch": 4.958333333333333, + "grad_norm": 8.730872913036187, + "learning_rate": 5e-05, + "loss": 0.1671, + "num_input_tokens_seen": 108629984, + "step": 1190 + }, + { + "epoch": 4.958333333333333, + "loss": 0.14688724279403687, + "loss_ce": 3.66496060451027e-05, + "loss_iou": 0.267578125, + "loss_num": 0.0294189453125, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 108629984, + "step": 1190 + }, + { + "epoch": 4.9625, + "grad_norm": 6.948890913958782, + "learning_rate": 5e-05, + "loss": 0.0737, + "num_input_tokens_seen": 108721364, + "step": 1191 + }, + { + "epoch": 4.9625, + "loss": 0.07616404443979263, + "loss_ce": 0.0006025217589922249, + "loss_iou": 0.306640625, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 108721364, + "step": 1191 + }, + { + "epoch": 4.966666666666667, + "grad_norm": 12.006868126801029, + "learning_rate": 5e-05, + "loss": 0.1201, + "num_input_tokens_seen": 108812772, + "step": 1192 + }, + { + "epoch": 4.966666666666667, + "loss": 0.12916617095470428, + "loss_ce": 0.0026250318624079227, + "loss_iou": 0.400390625, + "loss_num": 0.0252685546875, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 108812772, + "step": 1192 + }, + { + "epoch": 4.970833333333333, + "grad_norm": 11.075487431256267, + "learning_rate": 5e-05, + "loss": 0.1683, + "num_input_tokens_seen": 108903696, + "step": 1193 + }, + { + "epoch": 4.970833333333333, + "loss": 0.18713349103927612, + "loss_ce": 0.00021332701726350933, + "loss_iou": 0.37109375, + "loss_num": 0.037353515625, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 108903696, + "step": 1193 + }, + { + "epoch": 4.975, + "grad_norm": 5.9047766973597335, + "learning_rate": 5e-05, + "loss": 0.1342, + "num_input_tokens_seen": 108995264, + "step": 1194 + }, + { + "epoch": 4.975, + "loss": 0.1575443148612976, + "loss_ce": 0.021435918286442757, + "loss_iou": 0.38671875, + "loss_num": 0.0272216796875, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 108995264, + "step": 1194 + }, + { + "epoch": 4.979166666666667, + "grad_norm": 7.209285987756478, + "learning_rate": 5e-05, + "loss": 0.0953, + "num_input_tokens_seen": 109086616, + "step": 1195 + }, + { + "epoch": 4.979166666666667, + "loss": 0.08984746783971786, + "loss_ce": 0.008487604558467865, + "loss_iou": 0.171875, + "loss_num": 0.0162353515625, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 109086616, + "step": 1195 + }, + { + "epoch": 4.983333333333333, + "grad_norm": 4.959710692273525, + "learning_rate": 5e-05, + "loss": 0.1384, + "num_input_tokens_seen": 109177672, + "step": 1196 + }, + { + "epoch": 4.983333333333333, + "loss": 0.06564254313707352, + "loss_ce": 0.0005638079019263387, + "loss_iou": 0.412109375, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 109177672, + "step": 1196 + }, + { + "epoch": 4.9875, + "grad_norm": 4.4038327446744425, + "learning_rate": 5e-05, + "loss": 0.0954, + "num_input_tokens_seen": 109268552, + "step": 1197 + }, + { + "epoch": 4.9875, + "loss": 0.07659703493118286, + "loss_ce": 0.00037938207970000803, + "loss_iou": 0.287109375, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 109268552, + "step": 1197 + }, + { + "epoch": 4.991666666666667, + "grad_norm": 2.5176914705181375, + "learning_rate": 5e-05, + "loss": 0.1254, + "num_input_tokens_seen": 109359828, + "step": 1198 + }, + { + "epoch": 4.991666666666667, + "loss": 0.11878697574138641, + "loss_ce": 1.2563883501570672e-05, + "loss_iou": 0.1484375, + "loss_num": 0.0238037109375, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 109359828, + "step": 1198 + }, + { + "epoch": 4.995833333333334, + "grad_norm": 7.96050941330993, + "learning_rate": 5e-05, + "loss": 0.1744, + "num_input_tokens_seen": 109451164, + "step": 1199 + }, + { + "epoch": 4.995833333333334, + "loss": 0.10806751996278763, + "loss_ce": 4.776245532411849e-06, + "loss_iou": 0.279296875, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 109451164, + "step": 1199 + }, + { + "epoch": 5.0, + "grad_norm": 20.87607456136375, + "learning_rate": 5e-05, + "loss": 0.1382, + "num_input_tokens_seen": 109542924, + "step": 1200 + }, + { + "epoch": 5.0, + "loss": 0.1367817372083664, + "loss_ce": 0.0009785225847736, + "loss_iou": 0.35546875, + "loss_num": 0.0272216796875, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 109542924, + "step": 1200 + }, + { + "epoch": 5.004166666666666, + "grad_norm": 3.8787681830586753, + "learning_rate": 5e-05, + "loss": 0.1366, + "num_input_tokens_seen": 109633248, + "step": 1201 + }, + { + "epoch": 5.004166666666666, + "loss": 0.19319066405296326, + "loss_ce": 0.0005331888678483665, + "loss_iou": 0.049560546875, + "loss_num": 0.03857421875, + "loss_xval": 0.1923828125, + "num_input_tokens_seen": 109633248, + "step": 1201 + }, + { + "epoch": 5.008333333333334, + "grad_norm": 2.8024044597653504, + "learning_rate": 5e-05, + "loss": 0.1, + "num_input_tokens_seen": 109724012, + "step": 1202 + }, + { + "epoch": 5.008333333333334, + "loss": 0.11142734438180923, + "loss_ce": 6.869970820844173e-05, + "loss_iou": 0.2109375, + "loss_num": 0.0223388671875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 109724012, + "step": 1202 + }, + { + "epoch": 5.0125, + "grad_norm": 11.504232933050812, + "learning_rate": 5e-05, + "loss": 0.1348, + "num_input_tokens_seen": 109814980, + "step": 1203 + }, + { + "epoch": 5.0125, + "loss": 0.14389806985855103, + "loss_ce": 0.00040442385943606496, + "loss_iou": 0.32421875, + "loss_num": 0.0286865234375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 109814980, + "step": 1203 + }, + { + "epoch": 5.016666666666667, + "grad_norm": 3.7370252268604296, + "learning_rate": 5e-05, + "loss": 0.1411, + "num_input_tokens_seen": 109905940, + "step": 1204 + }, + { + "epoch": 5.016666666666667, + "loss": 0.13649845123291016, + "loss_ce": 9.59103772402159e-07, + "loss_iou": 0.392578125, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 109905940, + "step": 1204 + }, + { + "epoch": 5.020833333333333, + "grad_norm": 4.4689807688140135, + "learning_rate": 5e-05, + "loss": 0.0909, + "num_input_tokens_seen": 109997520, + "step": 1205 + }, + { + "epoch": 5.020833333333333, + "loss": 0.08278882503509521, + "loss_ce": 0.0015357693191617727, + "loss_iou": 0.380859375, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 109997520, + "step": 1205 + }, + { + "epoch": 5.025, + "grad_norm": 2.4383737085153823, + "learning_rate": 5e-05, + "loss": 0.1215, + "num_input_tokens_seen": 110088568, + "step": 1206 + }, + { + "epoch": 5.025, + "loss": 0.12168996036052704, + "loss_ce": 0.0015422508586198092, + "loss_iou": 0.310546875, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 110088568, + "step": 1206 + }, + { + "epoch": 5.029166666666667, + "grad_norm": 1.789251070742093, + "learning_rate": 5e-05, + "loss": 0.0993, + "num_input_tokens_seen": 110178884, + "step": 1207 + }, + { + "epoch": 5.029166666666667, + "loss": 0.0567280575633049, + "loss_ce": 0.0019642619881778955, + "loss_iou": 0.30078125, + "loss_num": 0.010986328125, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 110178884, + "step": 1207 + }, + { + "epoch": 5.033333333333333, + "grad_norm": 4.0877162405297724, + "learning_rate": 5e-05, + "loss": 0.1192, + "num_input_tokens_seen": 110270332, + "step": 1208 + }, + { + "epoch": 5.033333333333333, + "loss": 0.14080800116062164, + "loss_ce": 0.00018300242663826793, + "loss_iou": 0.326171875, + "loss_num": 0.028076171875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 110270332, + "step": 1208 + }, + { + "epoch": 5.0375, + "grad_norm": 15.93284051558836, + "learning_rate": 5e-05, + "loss": 0.16, + "num_input_tokens_seen": 110361576, + "step": 1209 + }, + { + "epoch": 5.0375, + "loss": 0.167129784822464, + "loss_ce": 0.0004122618702240288, + "loss_iou": 0.32421875, + "loss_num": 0.033203125, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 110361576, + "step": 1209 + }, + { + "epoch": 5.041666666666667, + "grad_norm": 12.944063658397615, + "learning_rate": 5e-05, + "loss": 0.1346, + "num_input_tokens_seen": 110452884, + "step": 1210 + }, + { + "epoch": 5.041666666666667, + "loss": 0.10224296152591705, + "loss_ce": 0.00028373015811666846, + "loss_iou": 0.408203125, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 110452884, + "step": 1210 + }, + { + "epoch": 5.045833333333333, + "grad_norm": 23.60951585784412, + "learning_rate": 5e-05, + "loss": 0.1252, + "num_input_tokens_seen": 110544128, + "step": 1211 + }, + { + "epoch": 5.045833333333333, + "loss": 0.07057714462280273, + "loss_ce": 0.00034093711292371154, + "loss_iou": 0.2734375, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 110544128, + "step": 1211 + }, + { + "epoch": 5.05, + "grad_norm": 3.8865840347157503, + "learning_rate": 5e-05, + "loss": 0.0641, + "num_input_tokens_seen": 110634764, + "step": 1212 + }, + { + "epoch": 5.05, + "loss": 0.05204097181558609, + "loss_ce": 0.0005578203708864748, + "loss_iou": 0.35546875, + "loss_num": 0.01031494140625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 110634764, + "step": 1212 + }, + { + "epoch": 5.054166666666666, + "grad_norm": 4.8249299659894245, + "learning_rate": 5e-05, + "loss": 0.1227, + "num_input_tokens_seen": 110726256, + "step": 1213 + }, + { + "epoch": 5.054166666666666, + "loss": 0.09966389834880829, + "loss_ce": 2.4005919840419665e-05, + "loss_iou": 0.302734375, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 110726256, + "step": 1213 + }, + { + "epoch": 5.058333333333334, + "grad_norm": 7.718292424470949, + "learning_rate": 5e-05, + "loss": 0.11, + "num_input_tokens_seen": 110817604, + "step": 1214 + }, + { + "epoch": 5.058333333333334, + "loss": 0.06570029258728027, + "loss_ce": 0.0001027620310196653, + "loss_iou": 0.49609375, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 110817604, + "step": 1214 + }, + { + "epoch": 5.0625, + "grad_norm": 5.757078059025428, + "learning_rate": 5e-05, + "loss": 0.0944, + "num_input_tokens_seen": 110908712, + "step": 1215 + }, + { + "epoch": 5.0625, + "loss": 0.10337609797716141, + "loss_ce": 0.002912231022492051, + "loss_iou": 0.279296875, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 110908712, + "step": 1215 + }, + { + "epoch": 5.066666666666666, + "grad_norm": 4.350185436094405, + "learning_rate": 5e-05, + "loss": 0.1392, + "num_input_tokens_seen": 110999572, + "step": 1216 + }, + { + "epoch": 5.066666666666666, + "loss": 0.16903723776340485, + "loss_ce": 0.0002445149584673345, + "loss_iou": 0.3359375, + "loss_num": 0.03369140625, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 110999572, + "step": 1216 + }, + { + "epoch": 5.070833333333334, + "grad_norm": 2.9290239729778995, + "learning_rate": 5e-05, + "loss": 0.1382, + "num_input_tokens_seen": 111091312, + "step": 1217 + }, + { + "epoch": 5.070833333333334, + "loss": 0.14560575783252716, + "loss_ce": 0.00298185832798481, + "loss_iou": 0.2197265625, + "loss_num": 0.028564453125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 111091312, + "step": 1217 + }, + { + "epoch": 5.075, + "grad_norm": 3.0035295725462503, + "learning_rate": 5e-05, + "loss": 0.0717, + "num_input_tokens_seen": 111182248, + "step": 1218 + }, + { + "epoch": 5.075, + "loss": 0.08869168907403946, + "loss_ce": 6.864196620881557e-05, + "loss_iou": 0.326171875, + "loss_num": 0.0177001953125, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 111182248, + "step": 1218 + }, + { + "epoch": 5.079166666666667, + "grad_norm": 3.3324160340032756, + "learning_rate": 5e-05, + "loss": 0.1397, + "num_input_tokens_seen": 111274184, + "step": 1219 + }, + { + "epoch": 5.079166666666667, + "loss": 0.10855446010828018, + "loss_ce": 0.001727680442854762, + "loss_iou": 0.3984375, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 111274184, + "step": 1219 + }, + { + "epoch": 5.083333333333333, + "grad_norm": 3.8266739457670855, + "learning_rate": 5e-05, + "loss": 0.0748, + "num_input_tokens_seen": 111365224, + "step": 1220 + }, + { + "epoch": 5.083333333333333, + "loss": 0.04669380933046341, + "loss_ce": 1.9105934825347504e-06, + "loss_iou": 0.35546875, + "loss_num": 0.00933837890625, + "loss_xval": 0.046630859375, + "num_input_tokens_seen": 111365224, + "step": 1220 + }, + { + "epoch": 5.0875, + "grad_norm": 4.20819565496716, + "learning_rate": 5e-05, + "loss": 0.126, + "num_input_tokens_seen": 111456556, + "step": 1221 + }, + { + "epoch": 5.0875, + "loss": 0.13568758964538574, + "loss_ce": 6.435318482544972e-06, + "loss_iou": 0.4453125, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 111456556, + "step": 1221 + }, + { + "epoch": 5.091666666666667, + "grad_norm": 3.9317513583556125, + "learning_rate": 5e-05, + "loss": 0.0995, + "num_input_tokens_seen": 111547324, + "step": 1222 + }, + { + "epoch": 5.091666666666667, + "loss": 0.11891846358776093, + "loss_ce": 0.0023718271404504776, + "loss_iou": 0.2216796875, + "loss_num": 0.0233154296875, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 111547324, + "step": 1222 + }, + { + "epoch": 5.095833333333333, + "grad_norm": 4.900769539292761, + "learning_rate": 5e-05, + "loss": 0.1248, + "num_input_tokens_seen": 111638688, + "step": 1223 + }, + { + "epoch": 5.095833333333333, + "loss": 0.12108991295099258, + "loss_ce": 0.0013694557128474116, + "loss_iou": 0.384765625, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 111638688, + "step": 1223 + }, + { + "epoch": 5.1, + "grad_norm": 6.628289053873807, + "learning_rate": 5e-05, + "loss": 0.1211, + "num_input_tokens_seen": 111730696, + "step": 1224 + }, + { + "epoch": 5.1, + "loss": 0.12536606192588806, + "loss_ce": 0.0016783210448920727, + "loss_iou": 0.220703125, + "loss_num": 0.0247802734375, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 111730696, + "step": 1224 + }, + { + "epoch": 5.104166666666667, + "grad_norm": 9.234334670863234, + "learning_rate": 5e-05, + "loss": 0.126, + "num_input_tokens_seen": 111822212, + "step": 1225 + }, + { + "epoch": 5.104166666666667, + "loss": 0.0658029243350029, + "loss_ce": 1.4655664926976897e-05, + "loss_iou": 0.29296875, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 111822212, + "step": 1225 + }, + { + "epoch": 5.108333333333333, + "grad_norm": 2.4145462349780464, + "learning_rate": 5e-05, + "loss": 0.0894, + "num_input_tokens_seen": 111913296, + "step": 1226 + }, + { + "epoch": 5.108333333333333, + "loss": 0.06669288128614426, + "loss_ce": 0.0002713756402954459, + "loss_iou": 0.2177734375, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 111913296, + "step": 1226 + }, + { + "epoch": 5.1125, + "grad_norm": 11.584890739030651, + "learning_rate": 5e-05, + "loss": 0.0991, + "num_input_tokens_seen": 112004356, + "step": 1227 + }, + { + "epoch": 5.1125, + "loss": 0.11644387245178223, + "loss_ce": 4.057878868479747e-06, + "loss_iou": 0.306640625, + "loss_num": 0.0233154296875, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 112004356, + "step": 1227 + }, + { + "epoch": 5.116666666666666, + "grad_norm": 5.636659565390342, + "learning_rate": 5e-05, + "loss": 0.0992, + "num_input_tokens_seen": 112095520, + "step": 1228 + }, + { + "epoch": 5.116666666666666, + "loss": 0.1413094401359558, + "loss_ce": 0.00028771322104148567, + "loss_iou": 0.150390625, + "loss_num": 0.0281982421875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 112095520, + "step": 1228 + }, + { + "epoch": 5.120833333333334, + "grad_norm": 11.177885073008182, + "learning_rate": 5e-05, + "loss": 0.1417, + "num_input_tokens_seen": 112186296, + "step": 1229 + }, + { + "epoch": 5.120833333333334, + "loss": 0.21080881357192993, + "loss_ce": 0.018044522032141685, + "loss_iou": 0.10693359375, + "loss_num": 0.03857421875, + "loss_xval": 0.1923828125, + "num_input_tokens_seen": 112186296, + "step": 1229 + }, + { + "epoch": 5.125, + "grad_norm": 2.7526024291809055, + "learning_rate": 5e-05, + "loss": 0.0795, + "num_input_tokens_seen": 112277736, + "step": 1230 + }, + { + "epoch": 5.125, + "loss": 0.05874582752585411, + "loss_ce": 0.0004496269393712282, + "loss_iou": 0.3203125, + "loss_num": 0.01165771484375, + "loss_xval": 0.058349609375, + "num_input_tokens_seen": 112277736, + "step": 1230 + }, + { + "epoch": 5.129166666666666, + "grad_norm": 2.251815283862548, + "learning_rate": 5e-05, + "loss": 0.1232, + "num_input_tokens_seen": 112369748, + "step": 1231 + }, + { + "epoch": 5.129166666666666, + "loss": 0.11877487599849701, + "loss_ce": 0.003067473880946636, + "loss_iou": 0.349609375, + "loss_num": 0.0230712890625, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 112369748, + "step": 1231 + }, + { + "epoch": 5.133333333333334, + "grad_norm": 2.578317507730831, + "learning_rate": 5e-05, + "loss": 0.0606, + "num_input_tokens_seen": 112461016, + "step": 1232 + }, + { + "epoch": 5.133333333333334, + "loss": 0.06011161953210831, + "loss_ce": 5.3028885304229334e-05, + "loss_iou": 0.2314453125, + "loss_num": 0.01202392578125, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 112461016, + "step": 1232 + }, + { + "epoch": 5.1375, + "grad_norm": 1.360840339322072, + "learning_rate": 5e-05, + "loss": 0.0871, + "num_input_tokens_seen": 112552472, + "step": 1233 + }, + { + "epoch": 5.1375, + "loss": 0.11113837361335754, + "loss_ce": 0.000931774964556098, + "loss_iou": 0.240234375, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 112552472, + "step": 1233 + }, + { + "epoch": 5.141666666666667, + "grad_norm": 1.7276905893923984, + "learning_rate": 5e-05, + "loss": 0.0733, + "num_input_tokens_seen": 112643440, + "step": 1234 + }, + { + "epoch": 5.141666666666667, + "loss": 0.06363178789615631, + "loss_ce": 9.4195143901743e-05, + "loss_iou": 0.2890625, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 112643440, + "step": 1234 + }, + { + "epoch": 5.145833333333333, + "grad_norm": 3.4148272040861287, + "learning_rate": 5e-05, + "loss": 0.1149, + "num_input_tokens_seen": 112734996, + "step": 1235 + }, + { + "epoch": 5.145833333333333, + "loss": 0.14283618330955505, + "loss_ce": 1.392368631059071e-05, + "loss_iou": 0.431640625, + "loss_num": 0.028564453125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 112734996, + "step": 1235 + }, + { + "epoch": 5.15, + "grad_norm": 3.4329962925971365, + "learning_rate": 5e-05, + "loss": 0.1615, + "num_input_tokens_seen": 112826176, + "step": 1236 + }, + { + "epoch": 5.15, + "loss": 0.1847839653491974, + "loss_ce": 0.0009460803703404963, + "loss_iou": 0.2421875, + "loss_num": 0.036865234375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 112826176, + "step": 1236 + }, + { + "epoch": 5.154166666666667, + "grad_norm": 4.912752549896548, + "learning_rate": 5e-05, + "loss": 0.0925, + "num_input_tokens_seen": 112917236, + "step": 1237 + }, + { + "epoch": 5.154166666666667, + "loss": 0.10758166015148163, + "loss_ce": 7.199274023150792e-06, + "loss_iou": 0.2314453125, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 112917236, + "step": 1237 + }, + { + "epoch": 5.158333333333333, + "grad_norm": 3.057128424442674, + "learning_rate": 5e-05, + "loss": 0.093, + "num_input_tokens_seen": 113008216, + "step": 1238 + }, + { + "epoch": 5.158333333333333, + "loss": 0.08614519983530045, + "loss_ce": 0.0007417544256895781, + "loss_iou": 0.298828125, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 113008216, + "step": 1238 + }, + { + "epoch": 5.1625, + "grad_norm": 3.202517671389756, + "learning_rate": 5e-05, + "loss": 0.0882, + "num_input_tokens_seen": 113099600, + "step": 1239 + }, + { + "epoch": 5.1625, + "loss": 0.10517486184835434, + "loss_ce": 0.00429900549352169, + "loss_iou": 0.2451171875, + "loss_num": 0.0201416015625, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 113099600, + "step": 1239 + }, + { + "epoch": 5.166666666666667, + "grad_norm": 8.732261105764604, + "learning_rate": 5e-05, + "loss": 0.1477, + "num_input_tokens_seen": 113190864, + "step": 1240 + }, + { + "epoch": 5.166666666666667, + "loss": 0.16263793408870697, + "loss_ce": 0.00010131551243830472, + "loss_iou": 0.4453125, + "loss_num": 0.032470703125, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 113190864, + "step": 1240 + }, + { + "epoch": 5.170833333333333, + "grad_norm": 1.9516385622557737, + "learning_rate": 5e-05, + "loss": 0.0973, + "num_input_tokens_seen": 113281976, + "step": 1241 + }, + { + "epoch": 5.170833333333333, + "loss": 0.10548153519630432, + "loss_ce": 0.000592618715018034, + "loss_iou": 0.2890625, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 113281976, + "step": 1241 + }, + { + "epoch": 5.175, + "grad_norm": 1.6857417263901389, + "learning_rate": 5e-05, + "loss": 0.083, + "num_input_tokens_seen": 113373120, + "step": 1242 + }, + { + "epoch": 5.175, + "loss": 0.08540096133947372, + "loss_ce": 0.0014318418689072132, + "loss_iou": 0.2255859375, + "loss_num": 0.0167236328125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 113373120, + "step": 1242 + }, + { + "epoch": 5.179166666666666, + "grad_norm": 3.5131015850856047, + "learning_rate": 5e-05, + "loss": 0.1122, + "num_input_tokens_seen": 113464092, + "step": 1243 + }, + { + "epoch": 5.179166666666666, + "loss": 0.08137943595647812, + "loss_ce": 0.001087690470740199, + "loss_iou": 0.2099609375, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 113464092, + "step": 1243 + }, + { + "epoch": 5.183333333333334, + "grad_norm": 2.7252061609552136, + "learning_rate": 5e-05, + "loss": 0.0794, + "num_input_tokens_seen": 113555832, + "step": 1244 + }, + { + "epoch": 5.183333333333334, + "loss": 0.07820607721805573, + "loss_ce": 0.0004015131271444261, + "loss_iou": 0.28125, + "loss_num": 0.01556396484375, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 113555832, + "step": 1244 + }, + { + "epoch": 5.1875, + "grad_norm": 9.974603644228377, + "learning_rate": 5e-05, + "loss": 0.0804, + "num_input_tokens_seen": 113647152, + "step": 1245 + }, + { + "epoch": 5.1875, + "loss": 0.05682176351547241, + "loss_ce": 0.0007914903690107167, + "loss_iou": 0.3671875, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 113647152, + "step": 1245 + }, + { + "epoch": 5.191666666666666, + "grad_norm": 3.461158509656596, + "learning_rate": 5e-05, + "loss": 0.1451, + "num_input_tokens_seen": 113738720, + "step": 1246 + }, + { + "epoch": 5.191666666666666, + "loss": 0.21678170561790466, + "loss_ce": 0.001754840137436986, + "loss_iou": 0.263671875, + "loss_num": 0.04296875, + "loss_xval": 0.21484375, + "num_input_tokens_seen": 113738720, + "step": 1246 + }, + { + "epoch": 5.195833333333334, + "grad_norm": 3.8199671948919143, + "learning_rate": 5e-05, + "loss": 0.121, + "num_input_tokens_seen": 113830052, + "step": 1247 + }, + { + "epoch": 5.195833333333334, + "loss": 0.16566026210784912, + "loss_ce": 0.0017808763077482581, + "loss_iou": 0.361328125, + "loss_num": 0.03271484375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 113830052, + "step": 1247 + }, + { + "epoch": 5.2, + "grad_norm": 11.496281960766611, + "learning_rate": 5e-05, + "loss": 0.134, + "num_input_tokens_seen": 113919636, + "step": 1248 + }, + { + "epoch": 5.2, + "loss": 0.15728822350502014, + "loss_ce": 6.198486062203301e-07, + "loss_iou": 0.34765625, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 113919636, + "step": 1248 + }, + { + "epoch": 5.204166666666667, + "grad_norm": 4.603555009446634, + "learning_rate": 5e-05, + "loss": 0.1175, + "num_input_tokens_seen": 114010788, + "step": 1249 + }, + { + "epoch": 5.204166666666667, + "loss": 0.14147533476352692, + "loss_ce": 0.0010486957617104053, + "loss_iou": 0.333984375, + "loss_num": 0.028076171875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 114010788, + "step": 1249 + }, + { + "epoch": 5.208333333333333, + "grad_norm": 3.3103145760410957, + "learning_rate": 5e-05, + "loss": 0.1269, + "num_input_tokens_seen": 114102008, + "step": 1250 + }, + { + "epoch": 5.208333333333333, + "eval_seeclick_CIoU": 0.19745288416743279, + "eval_seeclick_GIoU": 0.1804250180721283, + "eval_seeclick_IoU": 0.3141836002469063, + "eval_seeclick_MAE_all": 0.10292381420731544, + "eval_seeclick_MAE_h": 0.07238547503948212, + "eval_seeclick_MAE_w": 0.22456881403923035, + "eval_seeclick_MAE_x_boxes": 0.24006878584623337, + "eval_seeclick_MAE_y_boxes": 0.07802290096879005, + "eval_seeclick_NUM_probability": 0.999999463558197, + "eval_seeclick_inside_bbox": 0.4630681872367859, + "eval_seeclick_loss": 0.5826772451400757, + "eval_seeclick_loss_ce": 0.13239652663469315, + "eval_seeclick_loss_iou": 0.36871337890625, + "eval_seeclick_loss_num": 0.0897979736328125, + "eval_seeclick_loss_xval": 0.4488525390625, + "eval_seeclick_runtime": 73.2552, + "eval_seeclick_samples_per_second": 0.587, + "eval_seeclick_steps_per_second": 0.027, + "num_input_tokens_seen": 114102008, + "step": 1250 + }, + { + "epoch": 5.208333333333333, + "eval_icons_CIoU": 0.3539682552218437, + "eval_icons_GIoU": 0.3729694336652756, + "eval_icons_IoU": 0.43295738101005554, + "eval_icons_MAE_all": 0.06616230122745037, + "eval_icons_MAE_h": 0.14156471192836761, + "eval_icons_MAE_w": 0.09367327392101288, + "eval_icons_MAE_x_boxes": 0.09583292528986931, + "eval_icons_MAE_y_boxes": 0.14096488058567047, + "eval_icons_NUM_probability": 0.9999997913837433, + "eval_icons_inside_bbox": 0.6006944477558136, + "eval_icons_loss": 0.3076817989349365, + "eval_icons_loss_ce": 2.005231726798229e-05, + "eval_icons_loss_iou": 0.34320068359375, + "eval_icons_loss_num": 0.06174468994140625, + "eval_icons_loss_xval": 0.30865478515625, + "eval_icons_runtime": 86.1107, + "eval_icons_samples_per_second": 0.581, + "eval_icons_steps_per_second": 0.023, + "num_input_tokens_seen": 114102008, + "step": 1250 + }, + { + "epoch": 5.208333333333333, + "eval_screenspot_CIoU": 0.3847437302271525, + "eval_screenspot_GIoU": 0.3658294876416524, + "eval_screenspot_IoU": 0.4502400855223338, + "eval_screenspot_MAE_all": 0.09436274568239848, + "eval_screenspot_MAE_h": 0.08610273400942485, + "eval_screenspot_MAE_w": 0.1881504605213801, + "eval_screenspot_MAE_x_boxes": 0.17523721357186636, + "eval_screenspot_MAE_y_boxes": 0.07575235267480214, + "eval_screenspot_NUM_probability": 0.9999992450078329, + "eval_screenspot_inside_bbox": 0.7116666634877523, + "eval_screenspot_loss": 0.468685507774353, + "eval_screenspot_loss_ce": 1.2692903320991415e-06, + "eval_screenspot_loss_iou": 0.406005859375, + "eval_screenspot_loss_num": 0.09428914388020833, + "eval_screenspot_loss_xval": 0.47119140625, + "eval_screenspot_runtime": 146.7117, + "eval_screenspot_samples_per_second": 0.607, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 114102008, + "step": 1250 + }, + { + "epoch": 5.208333333333333, + "eval_compot_CIoU": 0.49966710805892944, + "eval_compot_GIoU": 0.48504847288131714, + "eval_compot_IoU": 0.5533573031425476, + "eval_compot_MAE_all": 0.0510424617677927, + "eval_compot_MAE_h": 0.05651161074638367, + "eval_compot_MAE_w": 0.12595685943961143, + "eval_compot_MAE_x_boxes": 0.12238426506519318, + "eval_compot_MAE_y_boxes": 0.05694563314318657, + "eval_compot_NUM_probability": 0.9999992847442627, + "eval_compot_inside_bbox": 0.8072916567325592, + "eval_compot_loss": 0.2954633831977844, + "eval_compot_loss_ce": 0.023688997142016888, + "eval_compot_loss_iou": 0.37640380859375, + "eval_compot_loss_num": 0.048618316650390625, + "eval_compot_loss_xval": 0.2430572509765625, + "eval_compot_runtime": 85.198, + "eval_compot_samples_per_second": 0.587, + "eval_compot_steps_per_second": 0.023, + "num_input_tokens_seen": 114102008, + "step": 1250 + }, + { + "epoch": 5.208333333333333, + "loss": 0.2156691700220108, + "loss_ce": 0.026307594031095505, + "loss_iou": 0.37890625, + "loss_num": 0.037841796875, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 114102008, + "step": 1250 + }, + { + "epoch": 5.2125, + "grad_norm": 2.4482462710704627, + "learning_rate": 5e-05, + "loss": 0.0686, + "num_input_tokens_seen": 114193780, + "step": 1251 + }, + { + "epoch": 5.2125, + "loss": 0.0852910578250885, + "loss_ce": 0.0006963338819332421, + "loss_iou": 0.296875, + "loss_num": 0.0169677734375, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 114193780, + "step": 1251 + }, + { + "epoch": 5.216666666666667, + "grad_norm": 2.7907731585969113, + "learning_rate": 5e-05, + "loss": 0.0948, + "num_input_tokens_seen": 114285468, + "step": 1252 + }, + { + "epoch": 5.216666666666667, + "loss": 0.12171787023544312, + "loss_ce": 0.0014786121901124716, + "loss_iou": 0.26953125, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 114285468, + "step": 1252 + }, + { + "epoch": 5.220833333333333, + "grad_norm": 3.1560111385127017, + "learning_rate": 5e-05, + "loss": 0.1098, + "num_input_tokens_seen": 114377144, + "step": 1253 + }, + { + "epoch": 5.220833333333333, + "loss": 0.0772915780544281, + "loss_ce": 0.0006314230267889798, + "loss_iou": 0.205078125, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 114377144, + "step": 1253 + }, + { + "epoch": 5.225, + "grad_norm": 4.287537330574922, + "learning_rate": 5e-05, + "loss": 0.1235, + "num_input_tokens_seen": 114468928, + "step": 1254 + }, + { + "epoch": 5.225, + "loss": 0.15133850276470184, + "loss_ce": 9.337875235360116e-05, + "loss_iou": 0.2314453125, + "loss_num": 0.0302734375, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 114468928, + "step": 1254 + }, + { + "epoch": 5.229166666666667, + "grad_norm": 4.213109821782446, + "learning_rate": 5e-05, + "loss": 0.0713, + "num_input_tokens_seen": 114560320, + "step": 1255 + }, + { + "epoch": 5.229166666666667, + "loss": 0.05044609308242798, + "loss_ce": 0.0013280524872243404, + "loss_iou": 0.3046875, + "loss_num": 0.00982666015625, + "loss_xval": 0.049072265625, + "num_input_tokens_seen": 114560320, + "step": 1255 + }, + { + "epoch": 5.233333333333333, + "grad_norm": 4.437664875584358, + "learning_rate": 5e-05, + "loss": 0.1562, + "num_input_tokens_seen": 114651584, + "step": 1256 + }, + { + "epoch": 5.233333333333333, + "loss": 0.07865123450756073, + "loss_ce": 0.0002515834057703614, + "loss_iou": 0.392578125, + "loss_num": 0.015625, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 114651584, + "step": 1256 + }, + { + "epoch": 5.2375, + "grad_norm": 5.187559708651044, + "learning_rate": 5e-05, + "loss": 0.0946, + "num_input_tokens_seen": 114743208, + "step": 1257 + }, + { + "epoch": 5.2375, + "loss": 0.055650509893894196, + "loss_ce": 0.0018937942804768682, + "loss_iou": 0.10888671875, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 114743208, + "step": 1257 + }, + { + "epoch": 5.241666666666666, + "grad_norm": 5.861157534137002, + "learning_rate": 5e-05, + "loss": 0.1179, + "num_input_tokens_seen": 114834960, + "step": 1258 + }, + { + "epoch": 5.241666666666666, + "loss": 0.08185291290283203, + "loss_ce": 0.0008287417003884912, + "loss_iou": 0.27734375, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 114834960, + "step": 1258 + }, + { + "epoch": 5.245833333333334, + "grad_norm": 3.0543935312914456, + "learning_rate": 5e-05, + "loss": 0.0786, + "num_input_tokens_seen": 114926684, + "step": 1259 + }, + { + "epoch": 5.245833333333334, + "loss": 0.07390153408050537, + "loss_ce": 0.001361246220767498, + "loss_iou": 0.267578125, + "loss_num": 0.01446533203125, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 114926684, + "step": 1259 + }, + { + "epoch": 5.25, + "grad_norm": 3.741079964142253, + "learning_rate": 5e-05, + "loss": 0.1447, + "num_input_tokens_seen": 115017888, + "step": 1260 + }, + { + "epoch": 5.25, + "loss": 0.1192452535033226, + "loss_ce": 0.0007760171429254115, + "loss_iou": 0.2216796875, + "loss_num": 0.023681640625, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 115017888, + "step": 1260 + }, + { + "epoch": 5.254166666666666, + "grad_norm": 11.260506478568418, + "learning_rate": 5e-05, + "loss": 0.0746, + "num_input_tokens_seen": 115108324, + "step": 1261 + }, + { + "epoch": 5.254166666666666, + "loss": 0.06887489557266235, + "loss_ce": 1.197768870042637e-05, + "loss_iou": 0.2890625, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 115108324, + "step": 1261 + }, + { + "epoch": 5.258333333333334, + "grad_norm": 5.874110416519585, + "learning_rate": 5e-05, + "loss": 0.1323, + "num_input_tokens_seen": 115200368, + "step": 1262 + }, + { + "epoch": 5.258333333333334, + "loss": 0.19069834053516388, + "loss_ce": 0.002992353169247508, + "loss_iou": 0.177734375, + "loss_num": 0.03759765625, + "loss_xval": 0.1875, + "num_input_tokens_seen": 115200368, + "step": 1262 + }, + { + "epoch": 5.2625, + "grad_norm": 2.3060382178106633, + "learning_rate": 5e-05, + "loss": 0.0757, + "num_input_tokens_seen": 115291888, + "step": 1263 + }, + { + "epoch": 5.2625, + "loss": 0.0695885717868805, + "loss_ce": 0.0011071269400417805, + "loss_iou": 0.34765625, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 115291888, + "step": 1263 + }, + { + "epoch": 5.266666666666667, + "grad_norm": 4.184750546982303, + "learning_rate": 5e-05, + "loss": 0.169, + "num_input_tokens_seen": 115383440, + "step": 1264 + }, + { + "epoch": 5.266666666666667, + "loss": 0.14805006980895996, + "loss_ce": 0.0004365481436252594, + "loss_iou": 0.2373046875, + "loss_num": 0.029541015625, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 115383440, + "step": 1264 + }, + { + "epoch": 5.270833333333333, + "grad_norm": 5.722882309669123, + "learning_rate": 5e-05, + "loss": 0.1043, + "num_input_tokens_seen": 115474844, + "step": 1265 + }, + { + "epoch": 5.270833333333333, + "loss": 0.11676155775785446, + "loss_ce": 0.00027596583822742105, + "loss_iou": 0.15625, + "loss_num": 0.0233154296875, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 115474844, + "step": 1265 + }, + { + "epoch": 5.275, + "grad_norm": 2.1351602820682096, + "learning_rate": 5e-05, + "loss": 0.1252, + "num_input_tokens_seen": 115565996, + "step": 1266 + }, + { + "epoch": 5.275, + "loss": 0.061716049909591675, + "loss_ce": 0.00014683924382552505, + "loss_iou": 0.318359375, + "loss_num": 0.0123291015625, + "loss_xval": 0.0615234375, + "num_input_tokens_seen": 115565996, + "step": 1266 + }, + { + "epoch": 5.279166666666667, + "grad_norm": 2.9318578257220147, + "learning_rate": 5e-05, + "loss": 0.1345, + "num_input_tokens_seen": 115657736, + "step": 1267 + }, + { + "epoch": 5.279166666666667, + "loss": 0.12814414501190186, + "loss_ce": 0.0016793095273897052, + "loss_iou": 0.2060546875, + "loss_num": 0.0252685546875, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 115657736, + "step": 1267 + }, + { + "epoch": 5.283333333333333, + "grad_norm": 5.993788064184745, + "learning_rate": 5e-05, + "loss": 0.1007, + "num_input_tokens_seen": 115749500, + "step": 1268 + }, + { + "epoch": 5.283333333333333, + "loss": 0.1011967882514, + "loss_ce": 0.001953623490408063, + "loss_iou": 0.201171875, + "loss_num": 0.019775390625, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 115749500, + "step": 1268 + }, + { + "epoch": 5.2875, + "grad_norm": 1.805664621328046, + "learning_rate": 5e-05, + "loss": 0.1322, + "num_input_tokens_seen": 115839948, + "step": 1269 + }, + { + "epoch": 5.2875, + "loss": 0.0894617959856987, + "loss_ce": 3.003023266501259e-05, + "loss_iou": 0.32421875, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 115839948, + "step": 1269 + }, + { + "epoch": 5.291666666666667, + "grad_norm": 4.496194953161926, + "learning_rate": 5e-05, + "loss": 0.1543, + "num_input_tokens_seen": 115930852, + "step": 1270 + }, + { + "epoch": 5.291666666666667, + "loss": 0.1023472398519516, + "loss_ce": 0.00047956418711692095, + "loss_iou": 0.1982421875, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 115930852, + "step": 1270 + }, + { + "epoch": 5.295833333333333, + "grad_norm": 7.9884847357892195, + "learning_rate": 5e-05, + "loss": 0.1254, + "num_input_tokens_seen": 116022452, + "step": 1271 + }, + { + "epoch": 5.295833333333333, + "loss": 0.18090632557868958, + "loss_ce": 0.0018902214942499995, + "loss_iou": 0.42578125, + "loss_num": 0.035888671875, + "loss_xval": 0.1787109375, + "num_input_tokens_seen": 116022452, + "step": 1271 + }, + { + "epoch": 5.3, + "grad_norm": 4.888515317396117, + "learning_rate": 5e-05, + "loss": 0.0957, + "num_input_tokens_seen": 116114104, + "step": 1272 + }, + { + "epoch": 5.3, + "loss": 0.09846118092536926, + "loss_ce": 0.0011406235862523317, + "loss_iou": 0.251953125, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 116114104, + "step": 1272 + }, + { + "epoch": 5.304166666666666, + "grad_norm": 13.270427957057752, + "learning_rate": 5e-05, + "loss": 0.1048, + "num_input_tokens_seen": 116205948, + "step": 1273 + }, + { + "epoch": 5.304166666666666, + "loss": 0.07233616709709167, + "loss_ce": 0.0012302087852731347, + "loss_iou": 0.32421875, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 116205948, + "step": 1273 + }, + { + "epoch": 5.308333333333334, + "grad_norm": 11.445246012507008, + "learning_rate": 5e-05, + "loss": 0.1139, + "num_input_tokens_seen": 116296720, + "step": 1274 + }, + { + "epoch": 5.308333333333334, + "loss": 0.10334327071905136, + "loss_ce": 0.0006210966967046261, + "loss_iou": 0.265625, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 116296720, + "step": 1274 + }, + { + "epoch": 5.3125, + "grad_norm": 3.9569976986556195, + "learning_rate": 5e-05, + "loss": 0.0976, + "num_input_tokens_seen": 116388268, + "step": 1275 + }, + { + "epoch": 5.3125, + "loss": 0.08903685212135315, + "loss_ce": 0.0008868263103067875, + "loss_iou": 0.228515625, + "loss_num": 0.017578125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 116388268, + "step": 1275 + }, + { + "epoch": 5.316666666666666, + "grad_norm": 3.683810789503949, + "learning_rate": 5e-05, + "loss": 0.0943, + "num_input_tokens_seen": 116479860, + "step": 1276 + }, + { + "epoch": 5.316666666666666, + "loss": 0.07193230837583542, + "loss_ce": 3.289574306108989e-05, + "loss_iou": 0.32421875, + "loss_num": 0.014404296875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 116479860, + "step": 1276 + }, + { + "epoch": 5.320833333333334, + "grad_norm": 4.154684216190813, + "learning_rate": 5e-05, + "loss": 0.1087, + "num_input_tokens_seen": 116571556, + "step": 1277 + }, + { + "epoch": 5.320833333333334, + "loss": 0.11861756443977356, + "loss_ce": 0.0005298026371747255, + "loss_iou": 0.474609375, + "loss_num": 0.023681640625, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 116571556, + "step": 1277 + }, + { + "epoch": 5.325, + "grad_norm": 3.5111047164996703, + "learning_rate": 5e-05, + "loss": 0.1377, + "num_input_tokens_seen": 116662712, + "step": 1278 + }, + { + "epoch": 5.325, + "loss": 0.16763438284397125, + "loss_ce": 0.001313579734414816, + "loss_iou": 0.279296875, + "loss_num": 0.033203125, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 116662712, + "step": 1278 + }, + { + "epoch": 5.329166666666667, + "grad_norm": 2.4882705059093624, + "learning_rate": 5e-05, + "loss": 0.1037, + "num_input_tokens_seen": 116754268, + "step": 1279 + }, + { + "epoch": 5.329166666666667, + "loss": 0.1483645886182785, + "loss_ce": 0.002421897603198886, + "loss_iou": 0.1328125, + "loss_num": 0.0291748046875, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 116754268, + "step": 1279 + }, + { + "epoch": 5.333333333333333, + "grad_norm": 5.29046572456457, + "learning_rate": 5e-05, + "loss": 0.1165, + "num_input_tokens_seen": 116845356, + "step": 1280 + }, + { + "epoch": 5.333333333333333, + "loss": 0.08341895788908005, + "loss_ce": 0.00021278511849232018, + "loss_iou": 0.244140625, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 116845356, + "step": 1280 + }, + { + "epoch": 5.3375, + "grad_norm": 9.806375446060992, + "learning_rate": 5e-05, + "loss": 0.1046, + "num_input_tokens_seen": 116936416, + "step": 1281 + }, + { + "epoch": 5.3375, + "loss": 0.09760526567697525, + "loss_ce": 0.0004372965486254543, + "loss_iou": 0.28515625, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 116936416, + "step": 1281 + }, + { + "epoch": 5.341666666666667, + "grad_norm": 3.2928939554918055, + "learning_rate": 5e-05, + "loss": 0.1307, + "num_input_tokens_seen": 117027856, + "step": 1282 + }, + { + "epoch": 5.341666666666667, + "loss": 0.1211928129196167, + "loss_ce": 0.0005873381742276251, + "loss_iou": 0.333984375, + "loss_num": 0.024169921875, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 117027856, + "step": 1282 + }, + { + "epoch": 5.345833333333333, + "grad_norm": 5.52683288897043, + "learning_rate": 5e-05, + "loss": 0.108, + "num_input_tokens_seen": 117118588, + "step": 1283 + }, + { + "epoch": 5.345833333333333, + "loss": 0.10166649520397186, + "loss_ce": 1.2437561963452026e-05, + "loss_iou": 0.3203125, + "loss_num": 0.020263671875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 117118588, + "step": 1283 + }, + { + "epoch": 5.35, + "grad_norm": 2.816179778442025, + "learning_rate": 5e-05, + "loss": 0.1203, + "num_input_tokens_seen": 117209744, + "step": 1284 + }, + { + "epoch": 5.35, + "loss": 0.08381817489862442, + "loss_ce": 0.000688286847434938, + "loss_iou": 0.2099609375, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 117209744, + "step": 1284 + }, + { + "epoch": 5.354166666666667, + "grad_norm": 7.291796604136836, + "learning_rate": 5e-05, + "loss": 0.0903, + "num_input_tokens_seen": 117301596, + "step": 1285 + }, + { + "epoch": 5.354166666666667, + "loss": 0.11099517345428467, + "loss_ce": 0.0013302592560648918, + "loss_iou": 0.2138671875, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 117301596, + "step": 1285 + }, + { + "epoch": 5.358333333333333, + "grad_norm": 3.210137899677224, + "learning_rate": 5e-05, + "loss": 0.0939, + "num_input_tokens_seen": 117393500, + "step": 1286 + }, + { + "epoch": 5.358333333333333, + "loss": 0.10765822231769562, + "loss_ce": 0.0013960100477561355, + "loss_iou": 0.291015625, + "loss_num": 0.021240234375, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 117393500, + "step": 1286 + }, + { + "epoch": 5.3625, + "grad_norm": 8.119859245564339, + "learning_rate": 5e-05, + "loss": 0.0997, + "num_input_tokens_seen": 117484348, + "step": 1287 + }, + { + "epoch": 5.3625, + "loss": 0.07025125622749329, + "loss_ce": 0.00021341571118682623, + "loss_iou": 0.3359375, + "loss_num": 0.0140380859375, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 117484348, + "step": 1287 + }, + { + "epoch": 5.366666666666666, + "grad_norm": 8.109501712065667, + "learning_rate": 5e-05, + "loss": 0.091, + "num_input_tokens_seen": 117575352, + "step": 1288 + }, + { + "epoch": 5.366666666666666, + "loss": 0.08108506351709366, + "loss_ce": 0.0003202911466360092, + "loss_iou": 0.294921875, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 117575352, + "step": 1288 + }, + { + "epoch": 5.370833333333334, + "grad_norm": 9.906258072505857, + "learning_rate": 5e-05, + "loss": 0.1012, + "num_input_tokens_seen": 117666916, + "step": 1289 + }, + { + "epoch": 5.370833333333334, + "loss": 0.09506266564130783, + "loss_ce": 0.0005954969674348831, + "loss_iou": 0.318359375, + "loss_num": 0.0189208984375, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 117666916, + "step": 1289 + }, + { + "epoch": 5.375, + "grad_norm": 4.899701709085185, + "learning_rate": 5e-05, + "loss": 0.1321, + "num_input_tokens_seen": 117758368, + "step": 1290 + }, + { + "epoch": 5.375, + "loss": 0.13716095685958862, + "loss_ce": 0.0009304783889092505, + "loss_iou": 0.283203125, + "loss_num": 0.0272216796875, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 117758368, + "step": 1290 + }, + { + "epoch": 5.379166666666666, + "grad_norm": 3.6447385189588575, + "learning_rate": 5e-05, + "loss": 0.1184, + "num_input_tokens_seen": 117849536, + "step": 1291 + }, + { + "epoch": 5.379166666666666, + "loss": 0.08016116917133331, + "loss_ce": 0.0001440807245671749, + "loss_iou": 0.353515625, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 117849536, + "step": 1291 + }, + { + "epoch": 5.383333333333334, + "grad_norm": 9.279877811881972, + "learning_rate": 5e-05, + "loss": 0.1479, + "num_input_tokens_seen": 117941236, + "step": 1292 + }, + { + "epoch": 5.383333333333334, + "loss": 0.15879233181476593, + "loss_ce": 0.0013216282241046429, + "loss_iou": 0.33203125, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 117941236, + "step": 1292 + }, + { + "epoch": 5.3875, + "grad_norm": 1.947697669110079, + "learning_rate": 5e-05, + "loss": 0.0772, + "num_input_tokens_seen": 118032712, + "step": 1293 + }, + { + "epoch": 5.3875, + "loss": 0.10585369169712067, + "loss_ce": 0.0014683237532153726, + "loss_iou": 0.4296875, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 118032712, + "step": 1293 + }, + { + "epoch": 5.391666666666667, + "grad_norm": 3.4046967806826474, + "learning_rate": 5e-05, + "loss": 0.1321, + "num_input_tokens_seen": 118123684, + "step": 1294 + }, + { + "epoch": 5.391666666666667, + "loss": 0.12018641829490662, + "loss_ce": 8.188269930542447e-06, + "loss_iou": 0.37890625, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 118123684, + "step": 1294 + }, + { + "epoch": 5.395833333333333, + "grad_norm": 4.557620525229239, + "learning_rate": 5e-05, + "loss": 0.1296, + "num_input_tokens_seen": 118214100, + "step": 1295 + }, + { + "epoch": 5.395833333333333, + "loss": 0.1263495683670044, + "loss_ce": 0.00028145581018179655, + "loss_iou": 0.16015625, + "loss_num": 0.0252685546875, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 118214100, + "step": 1295 + }, + { + "epoch": 5.4, + "grad_norm": 5.160410407604447, + "learning_rate": 5e-05, + "loss": 0.1075, + "num_input_tokens_seen": 118305364, + "step": 1296 + }, + { + "epoch": 5.4, + "loss": 0.09206394851207733, + "loss_ce": 0.0007553547620773315, + "loss_iou": 0.345703125, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 118305364, + "step": 1296 + }, + { + "epoch": 5.404166666666667, + "grad_norm": 3.2523841062609904, + "learning_rate": 5e-05, + "loss": 0.0924, + "num_input_tokens_seen": 118396576, + "step": 1297 + }, + { + "epoch": 5.404166666666667, + "loss": 0.10512672364711761, + "loss_ce": 0.002694476395845413, + "loss_iou": 0.1748046875, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 118396576, + "step": 1297 + }, + { + "epoch": 5.408333333333333, + "grad_norm": 3.821223046739121, + "learning_rate": 5e-05, + "loss": 0.0895, + "num_input_tokens_seen": 118487648, + "step": 1298 + }, + { + "epoch": 5.408333333333333, + "loss": 0.10508648306131363, + "loss_ce": 0.0013572323368862271, + "loss_iou": 0.2578125, + "loss_num": 0.020751953125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 118487648, + "step": 1298 + }, + { + "epoch": 5.4125, + "grad_norm": 2.9015572422467995, + "learning_rate": 5e-05, + "loss": 0.1482, + "num_input_tokens_seen": 118578664, + "step": 1299 + }, + { + "epoch": 5.4125, + "loss": 0.1781851053237915, + "loss_ce": 0.0006948804948478937, + "loss_iou": 0.26171875, + "loss_num": 0.035400390625, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 118578664, + "step": 1299 + }, + { + "epoch": 5.416666666666667, + "grad_norm": 2.1261865505838276, + "learning_rate": 5e-05, + "loss": 0.0958, + "num_input_tokens_seen": 118668848, + "step": 1300 + }, + { + "epoch": 5.416666666666667, + "loss": 0.07364241033792496, + "loss_ce": 3.492744099276024e-06, + "loss_iou": 0.25390625, + "loss_num": 0.01470947265625, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 118668848, + "step": 1300 + }, + { + "epoch": 5.420833333333333, + "grad_norm": 10.712939128925962, + "learning_rate": 5e-05, + "loss": 0.1183, + "num_input_tokens_seen": 118759692, + "step": 1301 + }, + { + "epoch": 5.420833333333333, + "loss": 0.1656116545200348, + "loss_ce": 0.0014881201786920428, + "loss_iou": 0.2138671875, + "loss_num": 0.032958984375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 118759692, + "step": 1301 + }, + { + "epoch": 5.425, + "grad_norm": 4.153906106819758, + "learning_rate": 5e-05, + "loss": 0.1514, + "num_input_tokens_seen": 118851164, + "step": 1302 + }, + { + "epoch": 5.425, + "loss": 0.12954509258270264, + "loss_ce": 0.0023173135705292225, + "loss_iou": 0.388671875, + "loss_num": 0.0255126953125, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 118851164, + "step": 1302 + }, + { + "epoch": 5.429166666666666, + "grad_norm": 18.213877327103127, + "learning_rate": 5e-05, + "loss": 0.1049, + "num_input_tokens_seen": 118942860, + "step": 1303 + }, + { + "epoch": 5.429166666666666, + "loss": 0.08647345751523972, + "loss_ce": 0.0007343197357840836, + "loss_iou": 0.283203125, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 118942860, + "step": 1303 + }, + { + "epoch": 5.433333333333334, + "grad_norm": 3.8800971543706892, + "learning_rate": 5e-05, + "loss": 0.1463, + "num_input_tokens_seen": 119034176, + "step": 1304 + }, + { + "epoch": 5.433333333333334, + "loss": 0.14520961046218872, + "loss_ce": 0.0018990678945556283, + "loss_iou": 0.263671875, + "loss_num": 0.0286865234375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 119034176, + "step": 1304 + }, + { + "epoch": 5.4375, + "grad_norm": 3.602987749501208, + "learning_rate": 5e-05, + "loss": 0.1382, + "num_input_tokens_seen": 119125428, + "step": 1305 + }, + { + "epoch": 5.4375, + "loss": 0.1354275494813919, + "loss_ce": 0.0009976228466257453, + "loss_iou": 0.2734375, + "loss_num": 0.02685546875, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 119125428, + "step": 1305 + }, + { + "epoch": 5.441666666666666, + "grad_norm": 4.353646821760044, + "learning_rate": 5e-05, + "loss": 0.0576, + "num_input_tokens_seen": 119216000, + "step": 1306 + }, + { + "epoch": 5.441666666666666, + "loss": 0.06598338484764099, + "loss_ce": 4.378542598715285e-06, + "loss_iou": 0.3828125, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 119216000, + "step": 1306 + }, + { + "epoch": 5.445833333333334, + "grad_norm": 4.755370196621049, + "learning_rate": 5e-05, + "loss": 0.141, + "num_input_tokens_seen": 119307532, + "step": 1307 + }, + { + "epoch": 5.445833333333334, + "loss": 0.17292292416095734, + "loss_ce": 1.032604995998554e-05, + "loss_iou": 0.498046875, + "loss_num": 0.03466796875, + "loss_xval": 0.1728515625, + "num_input_tokens_seen": 119307532, + "step": 1307 + }, + { + "epoch": 5.45, + "grad_norm": 2.3095424606443493, + "learning_rate": 5e-05, + "loss": 0.1128, + "num_input_tokens_seen": 119399076, + "step": 1308 + }, + { + "epoch": 5.45, + "loss": 0.10023734718561172, + "loss_ce": 9.391548519488424e-05, + "loss_iou": 0.33203125, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 119399076, + "step": 1308 + }, + { + "epoch": 5.454166666666667, + "grad_norm": 5.580918206205799, + "learning_rate": 5e-05, + "loss": 0.1014, + "num_input_tokens_seen": 119489568, + "step": 1309 + }, + { + "epoch": 5.454166666666667, + "loss": 0.10514950007200241, + "loss_ce": 3.170152194797993e-05, + "loss_iou": 0.283203125, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 119489568, + "step": 1309 + }, + { + "epoch": 5.458333333333333, + "grad_norm": 3.5980309370300323, + "learning_rate": 5e-05, + "loss": 0.1116, + "num_input_tokens_seen": 119579068, + "step": 1310 + }, + { + "epoch": 5.458333333333333, + "loss": 0.1334247887134552, + "loss_ce": 0.000612288189586252, + "loss_iou": 0.2275390625, + "loss_num": 0.0264892578125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 119579068, + "step": 1310 + }, + { + "epoch": 5.4625, + "grad_norm": 2.3323074278017764, + "learning_rate": 5e-05, + "loss": 0.126, + "num_input_tokens_seen": 119669940, + "step": 1311 + }, + { + "epoch": 5.4625, + "loss": 0.12619194388389587, + "loss_ce": 0.0022600588854402304, + "loss_iou": 0.2041015625, + "loss_num": 0.0247802734375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 119669940, + "step": 1311 + }, + { + "epoch": 5.466666666666667, + "grad_norm": 9.77799078595796, + "learning_rate": 5e-05, + "loss": 0.1282, + "num_input_tokens_seen": 119760884, + "step": 1312 + }, + { + "epoch": 5.466666666666667, + "loss": 0.12309600412845612, + "loss_ce": 0.0017733740387484431, + "loss_iou": 0.248046875, + "loss_num": 0.0242919921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 119760884, + "step": 1312 + }, + { + "epoch": 5.470833333333333, + "grad_norm": 29.41909642915102, + "learning_rate": 5e-05, + "loss": 0.1175, + "num_input_tokens_seen": 119852328, + "step": 1313 + }, + { + "epoch": 5.470833333333333, + "loss": 0.10807901620864868, + "loss_ce": 0.001252235728316009, + "loss_iou": 0.1904296875, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 119852328, + "step": 1313 + }, + { + "epoch": 5.475, + "grad_norm": 2.809070152447759, + "learning_rate": 5e-05, + "loss": 0.1081, + "num_input_tokens_seen": 119943524, + "step": 1314 + }, + { + "epoch": 5.475, + "loss": 0.05476412922143936, + "loss_ce": 0.00011477663065306842, + "loss_iou": 0.11669921875, + "loss_num": 0.01092529296875, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 119943524, + "step": 1314 + }, + { + "epoch": 5.479166666666667, + "grad_norm": 4.520838147968077, + "learning_rate": 5e-05, + "loss": 0.1286, + "num_input_tokens_seen": 120034676, + "step": 1315 + }, + { + "epoch": 5.479166666666667, + "loss": 0.11469803005456924, + "loss_ce": 0.0011726388474926353, + "loss_iou": 0.3984375, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 120034676, + "step": 1315 + }, + { + "epoch": 5.483333333333333, + "grad_norm": 3.9476202456045812, + "learning_rate": 5e-05, + "loss": 0.1512, + "num_input_tokens_seen": 120126436, + "step": 1316 + }, + { + "epoch": 5.483333333333333, + "loss": 0.16531720757484436, + "loss_ce": 0.003909740597009659, + "loss_iou": 0.2421875, + "loss_num": 0.0322265625, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 120126436, + "step": 1316 + }, + { + "epoch": 5.4875, + "grad_norm": 5.073469551742587, + "learning_rate": 5e-05, + "loss": 0.1701, + "num_input_tokens_seen": 120218940, + "step": 1317 + }, + { + "epoch": 5.4875, + "loss": 0.16629727184772491, + "loss_ce": 0.0010369562078267336, + "loss_iou": 0.173828125, + "loss_num": 0.032958984375, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 120218940, + "step": 1317 + }, + { + "epoch": 5.491666666666666, + "grad_norm": 14.568072862075656, + "learning_rate": 5e-05, + "loss": 0.1424, + "num_input_tokens_seen": 120310016, + "step": 1318 + }, + { + "epoch": 5.491666666666666, + "loss": 0.1581239104270935, + "loss_ce": 0.00014966003072913736, + "loss_iou": 0.259765625, + "loss_num": 0.03173828125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 120310016, + "step": 1318 + }, + { + "epoch": 5.495833333333334, + "grad_norm": 4.039395395304905, + "learning_rate": 5e-05, + "loss": 0.132, + "num_input_tokens_seen": 120401500, + "step": 1319 + }, + { + "epoch": 5.495833333333334, + "loss": 0.07492822408676147, + "loss_ce": 0.00043481256579980254, + "loss_iou": 0.2412109375, + "loss_num": 0.014892578125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 120401500, + "step": 1319 + }, + { + "epoch": 5.5, + "grad_norm": 4.834606208319437, + "learning_rate": 5e-05, + "loss": 0.1196, + "num_input_tokens_seen": 120493428, + "step": 1320 + }, + { + "epoch": 5.5, + "loss": 0.10158014297485352, + "loss_ce": 0.00023125787265598774, + "loss_iou": 0.443359375, + "loss_num": 0.020263671875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 120493428, + "step": 1320 + }, + { + "epoch": 5.504166666666666, + "grad_norm": 3.745125674672733, + "learning_rate": 5e-05, + "loss": 0.1399, + "num_input_tokens_seen": 120584860, + "step": 1321 + }, + { + "epoch": 5.504166666666666, + "loss": 0.14215955138206482, + "loss_ce": 0.00031384555040858686, + "loss_iou": 0.296875, + "loss_num": 0.0283203125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 120584860, + "step": 1321 + }, + { + "epoch": 5.508333333333333, + "grad_norm": 4.699836077217276, + "learning_rate": 5e-05, + "loss": 0.0779, + "num_input_tokens_seen": 120676340, + "step": 1322 + }, + { + "epoch": 5.508333333333333, + "loss": 0.09844175726175308, + "loss_ce": 0.0008465431164950132, + "loss_iou": 0.25, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 120676340, + "step": 1322 + }, + { + "epoch": 5.5125, + "grad_norm": 9.791064961882169, + "learning_rate": 5e-05, + "loss": 0.1098, + "num_input_tokens_seen": 120767704, + "step": 1323 + }, + { + "epoch": 5.5125, + "loss": 0.10201099514961243, + "loss_ce": 0.00017383776139467955, + "loss_iou": 0.392578125, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 120767704, + "step": 1323 + }, + { + "epoch": 5.516666666666667, + "grad_norm": 2.11375573286059, + "learning_rate": 5e-05, + "loss": 0.0684, + "num_input_tokens_seen": 120858992, + "step": 1324 + }, + { + "epoch": 5.516666666666667, + "loss": 0.08306519687175751, + "loss_ce": 0.000332036754116416, + "loss_iou": 0.2734375, + "loss_num": 0.0166015625, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 120858992, + "step": 1324 + }, + { + "epoch": 5.520833333333333, + "grad_norm": 3.817167031522216, + "learning_rate": 5e-05, + "loss": 0.1287, + "num_input_tokens_seen": 120949912, + "step": 1325 + }, + { + "epoch": 5.520833333333333, + "loss": 0.16249600052833557, + "loss_ce": 0.0034231427125632763, + "loss_iou": 0.140625, + "loss_num": 0.03173828125, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 120949912, + "step": 1325 + }, + { + "epoch": 5.525, + "grad_norm": 2.8508465367847236, + "learning_rate": 5e-05, + "loss": 0.1103, + "num_input_tokens_seen": 121041228, + "step": 1326 + }, + { + "epoch": 5.525, + "loss": 0.11468237638473511, + "loss_ce": 0.0015842285938560963, + "loss_iou": 0.2578125, + "loss_num": 0.0225830078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 121041228, + "step": 1326 + }, + { + "epoch": 5.529166666666667, + "grad_norm": 3.405898668083595, + "learning_rate": 5e-05, + "loss": 0.1379, + "num_input_tokens_seen": 121133032, + "step": 1327 + }, + { + "epoch": 5.529166666666667, + "loss": 0.16581670939922333, + "loss_ce": 0.001906805788166821, + "loss_iou": 0.2578125, + "loss_num": 0.03271484375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 121133032, + "step": 1327 + }, + { + "epoch": 5.533333333333333, + "grad_norm": 14.186518262591795, + "learning_rate": 5e-05, + "loss": 0.137, + "num_input_tokens_seen": 121223960, + "step": 1328 + }, + { + "epoch": 5.533333333333333, + "loss": 0.18396437168121338, + "loss_ce": 4.412940143083688e-06, + "loss_iou": 0.171875, + "loss_num": 0.036865234375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 121223960, + "step": 1328 + }, + { + "epoch": 5.5375, + "grad_norm": 3.8342764663678093, + "learning_rate": 5e-05, + "loss": 0.1694, + "num_input_tokens_seen": 121315624, + "step": 1329 + }, + { + "epoch": 5.5375, + "loss": 0.2375406175851822, + "loss_ce": 0.00963535811752081, + "loss_iou": 0.236328125, + "loss_num": 0.045654296875, + "loss_xval": 0.2275390625, + "num_input_tokens_seen": 121315624, + "step": 1329 + }, + { + "epoch": 5.541666666666667, + "grad_norm": 7.0100846416572, + "learning_rate": 5e-05, + "loss": 0.1295, + "num_input_tokens_seen": 121406800, + "step": 1330 + }, + { + "epoch": 5.541666666666667, + "loss": 0.1506330966949463, + "loss_ce": 0.0028669715393334627, + "loss_iou": 0.26171875, + "loss_num": 0.029541015625, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 121406800, + "step": 1330 + }, + { + "epoch": 5.545833333333333, + "grad_norm": 6.519639379928086, + "learning_rate": 5e-05, + "loss": 0.1107, + "num_input_tokens_seen": 121498668, + "step": 1331 + }, + { + "epoch": 5.545833333333333, + "loss": 0.11283191293478012, + "loss_ce": 0.0007713669911026955, + "loss_iou": 0.42578125, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 121498668, + "step": 1331 + }, + { + "epoch": 5.55, + "grad_norm": 3.8741606092338543, + "learning_rate": 5e-05, + "loss": 0.0984, + "num_input_tokens_seen": 121590040, + "step": 1332 + }, + { + "epoch": 5.55, + "loss": 0.09328826516866684, + "loss_ce": 0.000362245220458135, + "loss_iou": 0.25390625, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 121590040, + "step": 1332 + }, + { + "epoch": 5.554166666666667, + "grad_norm": 3.3976582814650373, + "learning_rate": 5e-05, + "loss": 0.1116, + "num_input_tokens_seen": 121681272, + "step": 1333 + }, + { + "epoch": 5.554166666666667, + "loss": 0.09696759283542633, + "loss_ce": 0.00013531387958209962, + "loss_iou": 0.408203125, + "loss_num": 0.0194091796875, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 121681272, + "step": 1333 + }, + { + "epoch": 5.558333333333334, + "grad_norm": 5.950849140468419, + "learning_rate": 5e-05, + "loss": 0.1367, + "num_input_tokens_seen": 121771680, + "step": 1334 + }, + { + "epoch": 5.558333333333334, + "loss": 0.1399446427822113, + "loss_ce": 0.0008150078938342631, + "loss_iou": 0.40234375, + "loss_num": 0.02783203125, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 121771680, + "step": 1334 + }, + { + "epoch": 5.5625, + "grad_norm": 4.336659565505447, + "learning_rate": 5e-05, + "loss": 0.0734, + "num_input_tokens_seen": 121862724, + "step": 1335 + }, + { + "epoch": 5.5625, + "loss": 0.08848069608211517, + "loss_ce": 1.0237114111077972e-05, + "loss_iou": 0.1884765625, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 121862724, + "step": 1335 + }, + { + "epoch": 5.566666666666666, + "grad_norm": 3.9108249628072014, + "learning_rate": 5e-05, + "loss": 0.0876, + "num_input_tokens_seen": 121954404, + "step": 1336 + }, + { + "epoch": 5.566666666666666, + "loss": 0.0976998582482338, + "loss_ce": 0.00010464760998729616, + "loss_iou": 0.2470703125, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 121954404, + "step": 1336 + }, + { + "epoch": 5.570833333333333, + "grad_norm": 2.2674158318428037, + "learning_rate": 5e-05, + "loss": 0.1293, + "num_input_tokens_seen": 122044452, + "step": 1337 + }, + { + "epoch": 5.570833333333333, + "loss": 0.18020781874656677, + "loss_ce": 0.0006118802120909095, + "loss_iou": 0.08984375, + "loss_num": 0.035888671875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 122044452, + "step": 1337 + }, + { + "epoch": 5.575, + "grad_norm": 3.6274714851944436, + "learning_rate": 5e-05, + "loss": 0.0759, + "num_input_tokens_seen": 122135744, + "step": 1338 + }, + { + "epoch": 5.575, + "loss": 0.09790819138288498, + "loss_ce": 7.802974323567469e-06, + "loss_iou": 0.240234375, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 122135744, + "step": 1338 + }, + { + "epoch": 5.579166666666667, + "grad_norm": 6.199970319443036, + "learning_rate": 5e-05, + "loss": 0.1686, + "num_input_tokens_seen": 122226308, + "step": 1339 + }, + { + "epoch": 5.579166666666667, + "loss": 0.232774555683136, + "loss_ce": 0.0005968232871964574, + "loss_iou": 0.35546875, + "loss_num": 0.04638671875, + "loss_xval": 0.232421875, + "num_input_tokens_seen": 122226308, + "step": 1339 + }, + { + "epoch": 5.583333333333333, + "grad_norm": 3.0077795954194295, + "learning_rate": 5e-05, + "loss": 0.1475, + "num_input_tokens_seen": 122317768, + "step": 1340 + }, + { + "epoch": 5.583333333333333, + "loss": 0.1635025143623352, + "loss_ce": 5.0357546570012346e-05, + "loss_iou": 0.279296875, + "loss_num": 0.03271484375, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 122317768, + "step": 1340 + }, + { + "epoch": 5.5875, + "grad_norm": 3.7756357130247578, + "learning_rate": 5e-05, + "loss": 0.1144, + "num_input_tokens_seen": 122409612, + "step": 1341 + }, + { + "epoch": 5.5875, + "loss": 0.12036249041557312, + "loss_ce": 7.745936454739422e-05, + "loss_iou": 0.330078125, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 122409612, + "step": 1341 + }, + { + "epoch": 5.591666666666667, + "grad_norm": 3.0500457168651973, + "learning_rate": 5e-05, + "loss": 0.0844, + "num_input_tokens_seen": 122500640, + "step": 1342 + }, + { + "epoch": 5.591666666666667, + "loss": 0.06776072829961777, + "loss_ce": 1.1704881217156071e-05, + "loss_iou": 0.33203125, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 122500640, + "step": 1342 + }, + { + "epoch": 5.595833333333333, + "grad_norm": 2.9487248334348326, + "learning_rate": 5e-05, + "loss": 0.1064, + "num_input_tokens_seen": 122591824, + "step": 1343 + }, + { + "epoch": 5.595833333333333, + "loss": 0.13929034769535065, + "loss_ce": 0.0026021194644272327, + "loss_iou": 0.2421875, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 122591824, + "step": 1343 + }, + { + "epoch": 5.6, + "grad_norm": 9.735746598486841, + "learning_rate": 5e-05, + "loss": 0.1138, + "num_input_tokens_seen": 122683248, + "step": 1344 + }, + { + "epoch": 5.6, + "loss": 0.12929841876029968, + "loss_ce": 0.00048372356104664505, + "loss_iou": 0.365234375, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 122683248, + "step": 1344 + }, + { + "epoch": 5.604166666666667, + "grad_norm": 6.662715988668031, + "learning_rate": 5e-05, + "loss": 0.1206, + "num_input_tokens_seen": 122774692, + "step": 1345 + }, + { + "epoch": 5.604166666666667, + "loss": 0.16683092713356018, + "loss_ce": 0.001517201540991664, + "loss_iou": 0.41796875, + "loss_num": 0.032958984375, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 122774692, + "step": 1345 + }, + { + "epoch": 5.608333333333333, + "grad_norm": 13.980924043996321, + "learning_rate": 5e-05, + "loss": 0.0558, + "num_input_tokens_seen": 122865452, + "step": 1346 + }, + { + "epoch": 5.608333333333333, + "loss": 0.04694174975156784, + "loss_ce": 0.00043295894283801317, + "loss_iou": 0.173828125, + "loss_num": 0.00927734375, + "loss_xval": 0.04638671875, + "num_input_tokens_seen": 122865452, + "step": 1346 + }, + { + "epoch": 5.6125, + "grad_norm": 2.274423403196373, + "learning_rate": 5e-05, + "loss": 0.0814, + "num_input_tokens_seen": 122956992, + "step": 1347 + }, + { + "epoch": 5.6125, + "loss": 0.08316200226545334, + "loss_ce": 0.0005814348114654422, + "loss_iou": 0.30078125, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 122956992, + "step": 1347 + }, + { + "epoch": 5.616666666666667, + "grad_norm": 1.7727502604459684, + "learning_rate": 5e-05, + "loss": 0.0623, + "num_input_tokens_seen": 123048252, + "step": 1348 + }, + { + "epoch": 5.616666666666667, + "loss": 0.07918908447027206, + "loss_ce": 0.00014855354675091803, + "loss_iou": 0.271484375, + "loss_num": 0.0157470703125, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 123048252, + "step": 1348 + }, + { + "epoch": 5.620833333333334, + "grad_norm": 6.512267768507208, + "learning_rate": 5e-05, + "loss": 0.1754, + "num_input_tokens_seen": 123139824, + "step": 1349 + }, + { + "epoch": 5.620833333333334, + "loss": 0.16340044140815735, + "loss_ce": 0.001107958611100912, + "loss_iou": 0.32421875, + "loss_num": 0.032470703125, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 123139824, + "step": 1349 + }, + { + "epoch": 5.625, + "grad_norm": 2.7742319380800087, + "learning_rate": 5e-05, + "loss": 0.114, + "num_input_tokens_seen": 123231200, + "step": 1350 + }, + { + "epoch": 5.625, + "loss": 0.13310708105564117, + "loss_ce": 0.0017289024544879794, + "loss_iou": 0.3203125, + "loss_num": 0.0262451171875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 123231200, + "step": 1350 + }, + { + "epoch": 5.629166666666666, + "grad_norm": 2.7421581868421154, + "learning_rate": 5e-05, + "loss": 0.0669, + "num_input_tokens_seen": 123322200, + "step": 1351 + }, + { + "epoch": 5.629166666666666, + "loss": 0.08086127042770386, + "loss_ce": 0.0007373740081675351, + "loss_iou": 0.224609375, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 123322200, + "step": 1351 + }, + { + "epoch": 5.633333333333333, + "grad_norm": 12.181092060389654, + "learning_rate": 5e-05, + "loss": 0.1288, + "num_input_tokens_seen": 123413700, + "step": 1352 + }, + { + "epoch": 5.633333333333333, + "loss": 0.1216948851943016, + "loss_ce": 2.892697011702694e-05, + "loss_iou": 0.326171875, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 123413700, + "step": 1352 + }, + { + "epoch": 5.6375, + "grad_norm": 2.804866864367334, + "learning_rate": 5e-05, + "loss": 0.1766, + "num_input_tokens_seen": 123505476, + "step": 1353 + }, + { + "epoch": 5.6375, + "loss": 0.19010448455810547, + "loss_ce": 0.0001630748447496444, + "loss_iou": 0.203125, + "loss_num": 0.0380859375, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 123505476, + "step": 1353 + }, + { + "epoch": 5.641666666666667, + "grad_norm": 2.970795203086947, + "learning_rate": 5e-05, + "loss": 0.1032, + "num_input_tokens_seen": 123597304, + "step": 1354 + }, + { + "epoch": 5.641666666666667, + "loss": 0.10757936537265778, + "loss_ce": 0.001179831917397678, + "loss_iou": 0.234375, + "loss_num": 0.021240234375, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 123597304, + "step": 1354 + }, + { + "epoch": 5.645833333333333, + "grad_norm": 4.862489334267388, + "learning_rate": 5e-05, + "loss": 0.0743, + "num_input_tokens_seen": 123688472, + "step": 1355 + }, + { + "epoch": 5.645833333333333, + "loss": 0.08013699948787689, + "loss_ce": 2.8353737434372306e-05, + "loss_iou": 0.310546875, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 123688472, + "step": 1355 + }, + { + "epoch": 5.65, + "grad_norm": 6.869654964868086, + "learning_rate": 5e-05, + "loss": 0.1065, + "num_input_tokens_seen": 123779376, + "step": 1356 + }, + { + "epoch": 5.65, + "loss": 0.10132080316543579, + "loss_ce": 4.822035043616779e-05, + "loss_iou": 0.15234375, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 123779376, + "step": 1356 + }, + { + "epoch": 5.654166666666667, + "grad_norm": 2.983921317394553, + "learning_rate": 5e-05, + "loss": 0.096, + "num_input_tokens_seen": 123870148, + "step": 1357 + }, + { + "epoch": 5.654166666666667, + "loss": 0.09474675357341766, + "loss_ce": 0.000355890195351094, + "loss_iou": 0.37109375, + "loss_num": 0.0189208984375, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 123870148, + "step": 1357 + }, + { + "epoch": 5.658333333333333, + "grad_norm": 5.16639456092189, + "learning_rate": 5e-05, + "loss": 0.1203, + "num_input_tokens_seen": 123961500, + "step": 1358 + }, + { + "epoch": 5.658333333333333, + "loss": 0.15206214785575867, + "loss_ce": 0.00020666493219323456, + "loss_iou": 0.40234375, + "loss_num": 0.0303955078125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 123961500, + "step": 1358 + }, + { + "epoch": 5.6625, + "grad_norm": 2.583455558358417, + "learning_rate": 5e-05, + "loss": 0.125, + "num_input_tokens_seen": 124052488, + "step": 1359 + }, + { + "epoch": 5.6625, + "loss": 0.15860290825366974, + "loss_ce": 3.058829406654695e-06, + "loss_iou": 0.359375, + "loss_num": 0.03173828125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 124052488, + "step": 1359 + }, + { + "epoch": 5.666666666666667, + "grad_norm": 5.218075262894594, + "learning_rate": 5e-05, + "loss": 0.119, + "num_input_tokens_seen": 124142904, + "step": 1360 + }, + { + "epoch": 5.666666666666667, + "loss": 0.13689467310905457, + "loss_ce": 8.085868103080429e-06, + "loss_iou": 0.21484375, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 124142904, + "step": 1360 + }, + { + "epoch": 5.670833333333333, + "grad_norm": 4.067380297859276, + "learning_rate": 5e-05, + "loss": 0.1156, + "num_input_tokens_seen": 124234728, + "step": 1361 + }, + { + "epoch": 5.670833333333333, + "loss": 0.1417345404624939, + "loss_ce": 0.0017504148418083787, + "loss_iou": 0.361328125, + "loss_num": 0.0279541015625, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 124234728, + "step": 1361 + }, + { + "epoch": 5.675, + "grad_norm": 5.526560302845275, + "learning_rate": 5e-05, + "loss": 0.0803, + "num_input_tokens_seen": 124325384, + "step": 1362 + }, + { + "epoch": 5.675, + "loss": 0.05790679529309273, + "loss_ce": 0.0008694432908669114, + "loss_iou": 0.3203125, + "loss_num": 0.01141357421875, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 124325384, + "step": 1362 + }, + { + "epoch": 5.679166666666667, + "grad_norm": 4.522456855446742, + "learning_rate": 5e-05, + "loss": 0.134, + "num_input_tokens_seen": 124417208, + "step": 1363 + }, + { + "epoch": 5.679166666666667, + "loss": 0.14377330243587494, + "loss_ce": 0.00235484610311687, + "loss_iou": 0.06103515625, + "loss_num": 0.0283203125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 124417208, + "step": 1363 + }, + { + "epoch": 5.683333333333334, + "grad_norm": 5.136114884030797, + "learning_rate": 5e-05, + "loss": 0.2137, + "num_input_tokens_seen": 124508272, + "step": 1364 + }, + { + "epoch": 5.683333333333334, + "loss": 0.3197259306907654, + "loss_ce": 0.0006646495312452316, + "loss_iou": 0.2421875, + "loss_num": 0.06396484375, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 124508272, + "step": 1364 + }, + { + "epoch": 5.6875, + "grad_norm": 4.363438357339952, + "learning_rate": 5e-05, + "loss": 0.0901, + "num_input_tokens_seen": 124599716, + "step": 1365 + }, + { + "epoch": 5.6875, + "loss": 0.07296618819236755, + "loss_ce": 0.0005632347892969847, + "loss_iou": 0.333984375, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 124599716, + "step": 1365 + }, + { + "epoch": 5.691666666666666, + "grad_norm": 5.8186678053413114, + "learning_rate": 5e-05, + "loss": 0.1762, + "num_input_tokens_seen": 124691168, + "step": 1366 + }, + { + "epoch": 5.691666666666666, + "loss": 0.1794736683368683, + "loss_ce": 0.00012184677325421944, + "loss_iou": 0.365234375, + "loss_num": 0.035888671875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 124691168, + "step": 1366 + }, + { + "epoch": 5.695833333333333, + "grad_norm": 5.472687379691303, + "learning_rate": 5e-05, + "loss": 0.1672, + "num_input_tokens_seen": 124782024, + "step": 1367 + }, + { + "epoch": 5.695833333333333, + "loss": 0.20980660617351532, + "loss_ce": 5.9296260587871075e-05, + "loss_iou": 0.32421875, + "loss_num": 0.0419921875, + "loss_xval": 0.2099609375, + "num_input_tokens_seen": 124782024, + "step": 1367 + }, + { + "epoch": 5.7, + "grad_norm": 6.9917419649008785, + "learning_rate": 5e-05, + "loss": 0.1356, + "num_input_tokens_seen": 124873252, + "step": 1368 + }, + { + "epoch": 5.7, + "loss": 0.12677843868732452, + "loss_ce": 0.002281972672790289, + "loss_iou": 0.26953125, + "loss_num": 0.02490234375, + "loss_xval": 0.12451171875, + "num_input_tokens_seen": 124873252, + "step": 1368 + }, + { + "epoch": 5.704166666666667, + "grad_norm": 5.814616735385409, + "learning_rate": 5e-05, + "loss": 0.1259, + "num_input_tokens_seen": 124964664, + "step": 1369 + }, + { + "epoch": 5.704166666666667, + "loss": 0.09749096632003784, + "loss_ce": 0.007006343454122543, + "loss_iou": 0.2890625, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 124964664, + "step": 1369 + }, + { + "epoch": 5.708333333333333, + "grad_norm": 3.0660984423798148, + "learning_rate": 5e-05, + "loss": 0.0671, + "num_input_tokens_seen": 125055940, + "step": 1370 + }, + { + "epoch": 5.708333333333333, + "loss": 0.06529150158166885, + "loss_ce": 0.001647092285566032, + "loss_iou": 0.259765625, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 125055940, + "step": 1370 + }, + { + "epoch": 5.7125, + "grad_norm": 6.218628239573136, + "learning_rate": 5e-05, + "loss": 0.1184, + "num_input_tokens_seen": 125146680, + "step": 1371 + }, + { + "epoch": 5.7125, + "loss": 0.09907136857509613, + "loss_ce": 0.0005148549098521471, + "loss_iou": 0.32421875, + "loss_num": 0.0196533203125, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 125146680, + "step": 1371 + }, + { + "epoch": 5.716666666666667, + "grad_norm": 3.549855326087419, + "learning_rate": 5e-05, + "loss": 0.1178, + "num_input_tokens_seen": 125237192, + "step": 1372 + }, + { + "epoch": 5.716666666666667, + "loss": 0.147294819355011, + "loss_ce": 0.0002000824606511742, + "loss_iou": 0.3359375, + "loss_num": 0.0294189453125, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 125237192, + "step": 1372 + }, + { + "epoch": 5.720833333333333, + "grad_norm": 3.438669447358349, + "learning_rate": 5e-05, + "loss": 0.1373, + "num_input_tokens_seen": 125328156, + "step": 1373 + }, + { + "epoch": 5.720833333333333, + "loss": 0.1804531067609787, + "loss_ce": 4.8438068915857e-05, + "loss_iou": 0.263671875, + "loss_num": 0.0361328125, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 125328156, + "step": 1373 + }, + { + "epoch": 5.725, + "grad_norm": 4.258688165607683, + "learning_rate": 5e-05, + "loss": 0.1213, + "num_input_tokens_seen": 125419616, + "step": 1374 + }, + { + "epoch": 5.725, + "loss": 0.1245311051607132, + "loss_ce": 4.9905396735994145e-05, + "loss_iou": 0.365234375, + "loss_num": 0.02490234375, + "loss_xval": 0.12451171875, + "num_input_tokens_seen": 125419616, + "step": 1374 + }, + { + "epoch": 5.729166666666667, + "grad_norm": 11.280003735303778, + "learning_rate": 5e-05, + "loss": 0.0804, + "num_input_tokens_seen": 125511372, + "step": 1375 + }, + { + "epoch": 5.729166666666667, + "loss": 0.08632014691829681, + "loss_ce": 9.273333853343502e-05, + "loss_iou": 0.375, + "loss_num": 0.0172119140625, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 125511372, + "step": 1375 + }, + { + "epoch": 5.733333333333333, + "grad_norm": 4.239119005263746, + "learning_rate": 5e-05, + "loss": 0.0794, + "num_input_tokens_seen": 125602504, + "step": 1376 + }, + { + "epoch": 5.733333333333333, + "loss": 0.07424075156450272, + "loss_ce": 3.726166323758662e-05, + "loss_iou": 0.283203125, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 125602504, + "step": 1376 + }, + { + "epoch": 5.7375, + "grad_norm": 10.669632714655261, + "learning_rate": 5e-05, + "loss": 0.1424, + "num_input_tokens_seen": 125693512, + "step": 1377 + }, + { + "epoch": 5.7375, + "loss": 0.14643409848213196, + "loss_ce": 0.0008347236434929073, + "loss_iou": 0.1611328125, + "loss_num": 0.029052734375, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 125693512, + "step": 1377 + }, + { + "epoch": 5.741666666666667, + "grad_norm": 1.2401994437594337, + "learning_rate": 5e-05, + "loss": 0.1135, + "num_input_tokens_seen": 125784776, + "step": 1378 + }, + { + "epoch": 5.741666666666667, + "loss": 0.12819138169288635, + "loss_ce": 0.00024643377400934696, + "loss_iou": 0.09814453125, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 125784776, + "step": 1378 + }, + { + "epoch": 5.745833333333334, + "grad_norm": 6.072454227835264, + "learning_rate": 5e-05, + "loss": 0.1155, + "num_input_tokens_seen": 125875244, + "step": 1379 + }, + { + "epoch": 5.745833333333334, + "loss": 0.10392003506422043, + "loss_ce": 7.674920198041946e-06, + "loss_iou": 0.279296875, + "loss_num": 0.020751953125, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 125875244, + "step": 1379 + }, + { + "epoch": 5.75, + "grad_norm": 4.779781538803755, + "learning_rate": 5e-05, + "loss": 0.1221, + "num_input_tokens_seen": 125966560, + "step": 1380 + }, + { + "epoch": 5.75, + "loss": 0.13277241587638855, + "loss_ce": 0.006856895983219147, + "loss_iou": 0.26953125, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 125966560, + "step": 1380 + }, + { + "epoch": 5.754166666666666, + "grad_norm": 3.803003348193671, + "learning_rate": 5e-05, + "loss": 0.0758, + "num_input_tokens_seen": 126058024, + "step": 1381 + }, + { + "epoch": 5.754166666666666, + "loss": 0.08865412324666977, + "loss_ce": 0.000580394989810884, + "loss_iou": 0.322265625, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 126058024, + "step": 1381 + }, + { + "epoch": 5.758333333333333, + "grad_norm": 7.029219413765709, + "learning_rate": 5e-05, + "loss": 0.1119, + "num_input_tokens_seen": 126149752, + "step": 1382 + }, + { + "epoch": 5.758333333333333, + "loss": 0.11876152455806732, + "loss_ce": 0.0031609362922608852, + "loss_iou": 0.345703125, + "loss_num": 0.0230712890625, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 126149752, + "step": 1382 + }, + { + "epoch": 5.7625, + "grad_norm": 4.803804465593587, + "learning_rate": 5e-05, + "loss": 0.1242, + "num_input_tokens_seen": 126240872, + "step": 1383 + }, + { + "epoch": 5.7625, + "loss": 0.14519238471984863, + "loss_ce": 2.025286630669143e-05, + "loss_iou": 0.41015625, + "loss_num": 0.029052734375, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 126240872, + "step": 1383 + }, + { + "epoch": 5.766666666666667, + "grad_norm": 3.753139448773272, + "learning_rate": 5e-05, + "loss": 0.0959, + "num_input_tokens_seen": 126331408, + "step": 1384 + }, + { + "epoch": 5.766666666666667, + "loss": 0.08953894674777985, + "loss_ce": 0.0019229742465540767, + "loss_iou": 0.271484375, + "loss_num": 0.017578125, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 126331408, + "step": 1384 + }, + { + "epoch": 5.770833333333333, + "grad_norm": 6.9389629332481215, + "learning_rate": 5e-05, + "loss": 0.1218, + "num_input_tokens_seen": 126422480, + "step": 1385 + }, + { + "epoch": 5.770833333333333, + "loss": 0.10029729455709457, + "loss_ce": 0.0013287862529978156, + "loss_iou": 0.2421875, + "loss_num": 0.019775390625, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 126422480, + "step": 1385 + }, + { + "epoch": 5.775, + "grad_norm": 2.935179544076038, + "learning_rate": 5e-05, + "loss": 0.1264, + "num_input_tokens_seen": 126514056, + "step": 1386 + }, + { + "epoch": 5.775, + "loss": 0.10370725393295288, + "loss_ce": 8.521106792613864e-06, + "loss_iou": 0.2734375, + "loss_num": 0.020751953125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 126514056, + "step": 1386 + }, + { + "epoch": 5.779166666666667, + "grad_norm": 3.8311792865692285, + "learning_rate": 5e-05, + "loss": 0.0775, + "num_input_tokens_seen": 126605832, + "step": 1387 + }, + { + "epoch": 5.779166666666667, + "loss": 0.08688151091337204, + "loss_ce": 0.0016306517645716667, + "loss_iou": 0.259765625, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 126605832, + "step": 1387 + }, + { + "epoch": 5.783333333333333, + "grad_norm": 5.511775875742109, + "learning_rate": 5e-05, + "loss": 0.1236, + "num_input_tokens_seen": 126697332, + "step": 1388 + }, + { + "epoch": 5.783333333333333, + "loss": 0.13793256878852844, + "loss_ce": 0.0012443342711776495, + "loss_iou": 0.294921875, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 126697332, + "step": 1388 + }, + { + "epoch": 5.7875, + "grad_norm": 3.1588293585554132, + "learning_rate": 5e-05, + "loss": 0.0794, + "num_input_tokens_seen": 126788452, + "step": 1389 + }, + { + "epoch": 5.7875, + "loss": 0.05696623772382736, + "loss_ce": 2.0436800696188584e-05, + "loss_iou": 0.1669921875, + "loss_num": 0.0113525390625, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 126788452, + "step": 1389 + }, + { + "epoch": 5.791666666666667, + "grad_norm": 1.7719242686496692, + "learning_rate": 5e-05, + "loss": 0.1047, + "num_input_tokens_seen": 126879952, + "step": 1390 + }, + { + "epoch": 5.791666666666667, + "loss": 0.08999593555927277, + "loss_ce": 0.0005336473695933819, + "loss_iou": 0.30859375, + "loss_num": 0.0179443359375, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 126879952, + "step": 1390 + }, + { + "epoch": 5.795833333333333, + "grad_norm": 4.821880653146912, + "learning_rate": 5e-05, + "loss": 0.0908, + "num_input_tokens_seen": 126970880, + "step": 1391 + }, + { + "epoch": 5.795833333333333, + "loss": 0.09289488196372986, + "loss_ce": 0.00030455196974799037, + "loss_iou": 0.31640625, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 126970880, + "step": 1391 + }, + { + "epoch": 5.8, + "grad_norm": 19.571271712860046, + "learning_rate": 5e-05, + "loss": 0.1938, + "num_input_tokens_seen": 127062744, + "step": 1392 + }, + { + "epoch": 5.8, + "loss": 0.207724928855896, + "loss_ce": 0.000693686306476593, + "loss_iou": 0.298828125, + "loss_num": 0.04150390625, + "loss_xval": 0.20703125, + "num_input_tokens_seen": 127062744, + "step": 1392 + }, + { + "epoch": 5.804166666666667, + "grad_norm": 9.288593008201802, + "learning_rate": 5e-05, + "loss": 0.1026, + "num_input_tokens_seen": 127154420, + "step": 1393 + }, + { + "epoch": 5.804166666666667, + "loss": 0.11515213549137115, + "loss_ce": 0.0022065802477300167, + "loss_iou": 0.3125, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 127154420, + "step": 1393 + }, + { + "epoch": 5.808333333333334, + "grad_norm": 4.990670848828062, + "learning_rate": 5e-05, + "loss": 0.152, + "num_input_tokens_seen": 127246160, + "step": 1394 + }, + { + "epoch": 5.808333333333334, + "loss": 0.13062453269958496, + "loss_ce": 0.001825102255679667, + "loss_iou": 0.3359375, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 127246160, + "step": 1394 + }, + { + "epoch": 5.8125, + "grad_norm": 9.611045071208759, + "learning_rate": 5e-05, + "loss": 0.1137, + "num_input_tokens_seen": 127337672, + "step": 1395 + }, + { + "epoch": 5.8125, + "loss": 0.13898390531539917, + "loss_ce": 6.788775499444455e-05, + "loss_iou": 0.3046875, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 127337672, + "step": 1395 + }, + { + "epoch": 5.816666666666666, + "grad_norm": 6.6446252308609655, + "learning_rate": 5e-05, + "loss": 0.0853, + "num_input_tokens_seen": 127428936, + "step": 1396 + }, + { + "epoch": 5.816666666666666, + "loss": 0.09065217524766922, + "loss_ce": 0.0022732634097337723, + "loss_iou": 0.21484375, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 127428936, + "step": 1396 + }, + { + "epoch": 5.820833333333333, + "grad_norm": 5.360873250355205, + "learning_rate": 5e-05, + "loss": 0.161, + "num_input_tokens_seen": 127520300, + "step": 1397 + }, + { + "epoch": 5.820833333333333, + "loss": 0.16661198437213898, + "loss_ce": 0.0024426807649433613, + "loss_iou": 0.392578125, + "loss_num": 0.03271484375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 127520300, + "step": 1397 + }, + { + "epoch": 5.825, + "grad_norm": 3.458595941170449, + "learning_rate": 5e-05, + "loss": 0.1549, + "num_input_tokens_seen": 127611440, + "step": 1398 + }, + { + "epoch": 5.825, + "loss": 0.10185273736715317, + "loss_ce": 0.00010713595838751644, + "loss_iou": 0.298828125, + "loss_num": 0.0203857421875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 127611440, + "step": 1398 + }, + { + "epoch": 5.829166666666667, + "grad_norm": 5.753849664087916, + "learning_rate": 5e-05, + "loss": 0.1105, + "num_input_tokens_seen": 127702876, + "step": 1399 + }, + { + "epoch": 5.829166666666667, + "loss": 0.12810850143432617, + "loss_ce": 0.00014066360017750412, + "loss_iou": 0.265625, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 127702876, + "step": 1399 + }, + { + "epoch": 5.833333333333333, + "grad_norm": 7.649382509419218, + "learning_rate": 5e-05, + "loss": 0.136, + "num_input_tokens_seen": 127794236, + "step": 1400 + }, + { + "epoch": 5.833333333333333, + "loss": 0.1401953399181366, + "loss_ce": 0.0010351943783462048, + "loss_iou": 0.23046875, + "loss_num": 0.02783203125, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 127794236, + "step": 1400 + }, + { + "epoch": 5.8375, + "grad_norm": 3.7614483806142522, + "learning_rate": 5e-05, + "loss": 0.1117, + "num_input_tokens_seen": 127885452, + "step": 1401 + }, + { + "epoch": 5.8375, + "loss": 0.08002236485481262, + "loss_ce": 0.00020363648945931345, + "loss_iou": 0.26171875, + "loss_num": 0.0159912109375, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 127885452, + "step": 1401 + }, + { + "epoch": 5.841666666666667, + "grad_norm": 4.4975016940302615, + "learning_rate": 5e-05, + "loss": 0.2282, + "num_input_tokens_seen": 127976184, + "step": 1402 + }, + { + "epoch": 5.841666666666667, + "loss": 0.15244199335575104, + "loss_ce": 0.0026617220137268305, + "loss_iou": 0.310546875, + "loss_num": 0.0299072265625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 127976184, + "step": 1402 + }, + { + "epoch": 5.845833333333333, + "grad_norm": 2.8847976439767526, + "learning_rate": 5e-05, + "loss": 0.1286, + "num_input_tokens_seen": 128067436, + "step": 1403 + }, + { + "epoch": 5.845833333333333, + "loss": 0.08370509743690491, + "loss_ce": 0.010707048699259758, + "loss_iou": 0.267578125, + "loss_num": 0.01458740234375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 128067436, + "step": 1403 + }, + { + "epoch": 5.85, + "grad_norm": 3.791157871723593, + "learning_rate": 5e-05, + "loss": 0.1328, + "num_input_tokens_seen": 128158428, + "step": 1404 + }, + { + "epoch": 5.85, + "loss": 0.1484401822090149, + "loss_ce": 0.0034969523549079895, + "loss_iou": 0.1865234375, + "loss_num": 0.0289306640625, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 128158428, + "step": 1404 + }, + { + "epoch": 5.854166666666667, + "grad_norm": 3.90694991816708, + "learning_rate": 5e-05, + "loss": 0.0794, + "num_input_tokens_seen": 128249980, + "step": 1405 + }, + { + "epoch": 5.854166666666667, + "loss": 0.080207958817482, + "loss_ce": 0.0010301051661372185, + "loss_iou": 0.224609375, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 128249980, + "step": 1405 + }, + { + "epoch": 5.858333333333333, + "grad_norm": 8.05090517798076, + "learning_rate": 5e-05, + "loss": 0.0943, + "num_input_tokens_seen": 128341360, + "step": 1406 + }, + { + "epoch": 5.858333333333333, + "loss": 0.09763146936893463, + "loss_ce": 0.0026912896428257227, + "loss_iou": 0.259765625, + "loss_num": 0.01904296875, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 128341360, + "step": 1406 + }, + { + "epoch": 5.8625, + "grad_norm": 5.301622433216056, + "learning_rate": 5e-05, + "loss": 0.1137, + "num_input_tokens_seen": 128432648, + "step": 1407 + }, + { + "epoch": 5.8625, + "loss": 0.15396666526794434, + "loss_ce": 0.0016381873283535242, + "loss_iou": 0.28125, + "loss_num": 0.030517578125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 128432648, + "step": 1407 + }, + { + "epoch": 5.866666666666667, + "grad_norm": 4.992514994151027, + "learning_rate": 5e-05, + "loss": 0.0979, + "num_input_tokens_seen": 128524052, + "step": 1408 + }, + { + "epoch": 5.866666666666667, + "loss": 0.09603699296712875, + "loss_ce": 0.0002728329855017364, + "loss_iou": 0.40234375, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 128524052, + "step": 1408 + }, + { + "epoch": 5.870833333333334, + "grad_norm": 4.992700817112639, + "learning_rate": 5e-05, + "loss": 0.157, + "num_input_tokens_seen": 128615208, + "step": 1409 + }, + { + "epoch": 5.870833333333334, + "loss": 0.14660075306892395, + "loss_ce": 0.00158121925778687, + "loss_iou": 0.3359375, + "loss_num": 0.029052734375, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 128615208, + "step": 1409 + }, + { + "epoch": 5.875, + "grad_norm": 2.618801455878482, + "learning_rate": 5e-05, + "loss": 0.1197, + "num_input_tokens_seen": 128706640, + "step": 1410 + }, + { + "epoch": 5.875, + "loss": 0.09924958646297455, + "loss_ce": 0.0009219488129019737, + "loss_iou": 0.376953125, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 128706640, + "step": 1410 + }, + { + "epoch": 5.879166666666666, + "grad_norm": 2.0525356578434777, + "learning_rate": 5e-05, + "loss": 0.1074, + "num_input_tokens_seen": 128798064, + "step": 1411 + }, + { + "epoch": 5.879166666666666, + "loss": 0.12654848396778107, + "loss_ce": 0.0030896144453436136, + "loss_iou": 0.28125, + "loss_num": 0.024658203125, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 128798064, + "step": 1411 + }, + { + "epoch": 5.883333333333333, + "grad_norm": 4.246914326681091, + "learning_rate": 5e-05, + "loss": 0.1115, + "num_input_tokens_seen": 128889052, + "step": 1412 + }, + { + "epoch": 5.883333333333333, + "loss": 0.10726828873157501, + "loss_ce": 9.055174450622872e-05, + "loss_iou": 0.1689453125, + "loss_num": 0.0213623046875, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 128889052, + "step": 1412 + }, + { + "epoch": 5.8875, + "grad_norm": 10.112082124521148, + "learning_rate": 5e-05, + "loss": 0.1061, + "num_input_tokens_seen": 128980372, + "step": 1413 + }, + { + "epoch": 5.8875, + "loss": 0.1580641269683838, + "loss_ce": 0.0011732680723071098, + "loss_iou": 0.2412109375, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 128980372, + "step": 1413 + }, + { + "epoch": 5.891666666666667, + "grad_norm": 2.956973856175846, + "learning_rate": 5e-05, + "loss": 0.0793, + "num_input_tokens_seen": 129071964, + "step": 1414 + }, + { + "epoch": 5.891666666666667, + "loss": 0.09260989725589752, + "loss_ce": 0.0021863128058612347, + "loss_iou": 0.138671875, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 129071964, + "step": 1414 + }, + { + "epoch": 5.895833333333333, + "grad_norm": 7.539293561220471, + "learning_rate": 5e-05, + "loss": 0.087, + "num_input_tokens_seen": 129162712, + "step": 1415 + }, + { + "epoch": 5.895833333333333, + "loss": 0.11663861572742462, + "loss_ce": 0.0017780864145606756, + "loss_iou": 0.234375, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 129162712, + "step": 1415 + }, + { + "epoch": 5.9, + "grad_norm": 2.354926479183944, + "learning_rate": 5e-05, + "loss": 0.0929, + "num_input_tokens_seen": 129254180, + "step": 1416 + }, + { + "epoch": 5.9, + "loss": 0.14426201581954956, + "loss_ce": 0.00046319590182974935, + "loss_iou": 0.369140625, + "loss_num": 0.02880859375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 129254180, + "step": 1416 + }, + { + "epoch": 5.904166666666667, + "grad_norm": 3.0284601028360214, + "learning_rate": 5e-05, + "loss": 0.0699, + "num_input_tokens_seen": 129345164, + "step": 1417 + }, + { + "epoch": 5.904166666666667, + "loss": 0.048389360308647156, + "loss_ce": 0.0005377948982641101, + "loss_iou": 0.3046875, + "loss_num": 0.00958251953125, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 129345164, + "step": 1417 + }, + { + "epoch": 5.908333333333333, + "grad_norm": 11.571107906423846, + "learning_rate": 5e-05, + "loss": 0.1213, + "num_input_tokens_seen": 129436412, + "step": 1418 + }, + { + "epoch": 5.908333333333333, + "loss": 0.08516918122768402, + "loss_ce": 2.514522202545777e-05, + "loss_iou": 0.12158203125, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 129436412, + "step": 1418 + }, + { + "epoch": 5.9125, + "grad_norm": 3.321284229778174, + "learning_rate": 5e-05, + "loss": 0.1184, + "num_input_tokens_seen": 129528112, + "step": 1419 + }, + { + "epoch": 5.9125, + "loss": 0.07697541266679764, + "loss_ce": 0.0007577605429105461, + "loss_iou": 0.32421875, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 129528112, + "step": 1419 + }, + { + "epoch": 5.916666666666667, + "grad_norm": 2.8526600238132858, + "learning_rate": 5e-05, + "loss": 0.1374, + "num_input_tokens_seen": 129618984, + "step": 1420 + }, + { + "epoch": 5.916666666666667, + "loss": 0.13690005242824554, + "loss_ce": 0.0009137189481407404, + "loss_iou": 0.29296875, + "loss_num": 0.0272216796875, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 129618984, + "step": 1420 + }, + { + "epoch": 5.920833333333333, + "grad_norm": 3.1659884378062655, + "learning_rate": 5e-05, + "loss": 0.1042, + "num_input_tokens_seen": 129710432, + "step": 1421 + }, + { + "epoch": 5.920833333333333, + "loss": 0.07005725800991058, + "loss_ce": 0.0004619219107553363, + "loss_iou": 0.26171875, + "loss_num": 0.013916015625, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 129710432, + "step": 1421 + }, + { + "epoch": 5.925, + "grad_norm": 4.322000714896495, + "learning_rate": 5e-05, + "loss": 0.1226, + "num_input_tokens_seen": 129801128, + "step": 1422 + }, + { + "epoch": 5.925, + "loss": 0.13157625496387482, + "loss_ce": 1.4974492842156906e-05, + "loss_iou": 0.408203125, + "loss_num": 0.0262451171875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 129801128, + "step": 1422 + }, + { + "epoch": 5.929166666666667, + "grad_norm": 4.908923880940535, + "learning_rate": 5e-05, + "loss": 0.1494, + "num_input_tokens_seen": 129892840, + "step": 1423 + }, + { + "epoch": 5.929166666666667, + "loss": 0.17185944318771362, + "loss_ce": 0.002425836632028222, + "loss_iou": 0.369140625, + "loss_num": 0.033935546875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 129892840, + "step": 1423 + }, + { + "epoch": 5.933333333333334, + "grad_norm": 3.4893970706080477, + "learning_rate": 5e-05, + "loss": 0.0859, + "num_input_tokens_seen": 129983844, + "step": 1424 + }, + { + "epoch": 5.933333333333334, + "loss": 0.11811276525259018, + "loss_ce": 0.0005285373190417886, + "loss_iou": 0.365234375, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 129983844, + "step": 1424 + }, + { + "epoch": 5.9375, + "grad_norm": 2.676893704577933, + "learning_rate": 5e-05, + "loss": 0.107, + "num_input_tokens_seen": 130075308, + "step": 1425 + }, + { + "epoch": 5.9375, + "loss": 0.09480118751525879, + "loss_ce": 0.006071331910789013, + "loss_iou": 0.341796875, + "loss_num": 0.0177001953125, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 130075308, + "step": 1425 + }, + { + "epoch": 5.941666666666666, + "grad_norm": 8.114316288839373, + "learning_rate": 5e-05, + "loss": 0.1014, + "num_input_tokens_seen": 130166500, + "step": 1426 + }, + { + "epoch": 5.941666666666666, + "loss": 0.08116979897022247, + "loss_ce": 0.0008933134377002716, + "loss_iou": 0.208984375, + "loss_num": 0.01611328125, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 130166500, + "step": 1426 + }, + { + "epoch": 5.945833333333333, + "grad_norm": 4.24149996907433, + "learning_rate": 5e-05, + "loss": 0.0887, + "num_input_tokens_seen": 130257872, + "step": 1427 + }, + { + "epoch": 5.945833333333333, + "loss": 0.09025895595550537, + "loss_ce": 7.9511315561831e-05, + "loss_iou": 0.2890625, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 130257872, + "step": 1427 + }, + { + "epoch": 5.95, + "grad_norm": 4.695269924066833, + "learning_rate": 5e-05, + "loss": 0.0995, + "num_input_tokens_seen": 130349204, + "step": 1428 + }, + { + "epoch": 5.95, + "loss": 0.09523400664329529, + "loss_ce": 0.000553212477825582, + "loss_iou": 0.228515625, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 130349204, + "step": 1428 + }, + { + "epoch": 5.954166666666667, + "grad_norm": 7.200325980740724, + "learning_rate": 5e-05, + "loss": 0.1506, + "num_input_tokens_seen": 130440384, + "step": 1429 + }, + { + "epoch": 5.954166666666667, + "loss": 0.15087425708770752, + "loss_ce": 0.0002699988253880292, + "loss_iou": 0.3046875, + "loss_num": 0.0301513671875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 130440384, + "step": 1429 + }, + { + "epoch": 5.958333333333333, + "grad_norm": 2.0693370843787076, + "learning_rate": 5e-05, + "loss": 0.0823, + "num_input_tokens_seen": 130531700, + "step": 1430 + }, + { + "epoch": 5.958333333333333, + "loss": 0.062041543424129486, + "loss_ce": 0.0005791393341496587, + "loss_iou": 0.291015625, + "loss_num": 0.01226806640625, + "loss_xval": 0.0615234375, + "num_input_tokens_seen": 130531700, + "step": 1430 + }, + { + "epoch": 5.9625, + "grad_norm": 3.6766062364680234, + "learning_rate": 5e-05, + "loss": 0.0712, + "num_input_tokens_seen": 130623984, + "step": 1431 + }, + { + "epoch": 5.9625, + "loss": 0.06569721549749374, + "loss_ce": 0.0022969457786530256, + "loss_iou": 0.44921875, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 130623984, + "step": 1431 + }, + { + "epoch": 5.966666666666667, + "grad_norm": 4.2509085286253905, + "learning_rate": 5e-05, + "loss": 0.1284, + "num_input_tokens_seen": 130715068, + "step": 1432 + }, + { + "epoch": 5.966666666666667, + "loss": 0.11964157968759537, + "loss_ce": 1.26781924336683e-05, + "loss_iou": 0.3046875, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 130715068, + "step": 1432 + }, + { + "epoch": 5.970833333333333, + "grad_norm": 5.827467890808661, + "learning_rate": 5e-05, + "loss": 0.1208, + "num_input_tokens_seen": 130806308, + "step": 1433 + }, + { + "epoch": 5.970833333333333, + "loss": 0.14763817191123962, + "loss_ce": 0.0007112891180440784, + "loss_iou": 0.142578125, + "loss_num": 0.0294189453125, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 130806308, + "step": 1433 + }, + { + "epoch": 5.975, + "grad_norm": 5.04126021723349, + "learning_rate": 5e-05, + "loss": 0.1002, + "num_input_tokens_seen": 130897584, + "step": 1434 + }, + { + "epoch": 5.975, + "loss": 0.0726742222905159, + "loss_ce": 0.004558989778161049, + "loss_iou": 0.2109375, + "loss_num": 0.01361083984375, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 130897584, + "step": 1434 + }, + { + "epoch": 5.979166666666667, + "grad_norm": 19.47772705817985, + "learning_rate": 5e-05, + "loss": 0.1388, + "num_input_tokens_seen": 130989156, + "step": 1435 + }, + { + "epoch": 5.979166666666667, + "loss": 0.12499289214611053, + "loss_ce": 0.0004506511613726616, + "loss_iou": 0.298828125, + "loss_num": 0.02490234375, + "loss_xval": 0.12451171875, + "num_input_tokens_seen": 130989156, + "step": 1435 + }, + { + "epoch": 5.983333333333333, + "grad_norm": 5.331301563357194, + "learning_rate": 5e-05, + "loss": 0.0831, + "num_input_tokens_seen": 131080592, + "step": 1436 + }, + { + "epoch": 5.983333333333333, + "loss": 0.07300704717636108, + "loss_ce": 0.0007414190331473947, + "loss_iou": 0.2119140625, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 131080592, + "step": 1436 + }, + { + "epoch": 5.9875, + "grad_norm": 3.104241540677376, + "learning_rate": 5e-05, + "loss": 0.1283, + "num_input_tokens_seen": 131171984, + "step": 1437 + }, + { + "epoch": 5.9875, + "loss": 0.057797543704509735, + "loss_ce": 5.828931898577139e-05, + "loss_iou": 0.380859375, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 131171984, + "step": 1437 + }, + { + "epoch": 5.991666666666667, + "grad_norm": 57.36883699022541, + "learning_rate": 5e-05, + "loss": 0.0925, + "num_input_tokens_seen": 131263136, + "step": 1438 + }, + { + "epoch": 5.991666666666667, + "loss": 0.09304441511631012, + "loss_ce": 0.0006066667847335339, + "loss_iou": 0.28125, + "loss_num": 0.0185546875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 131263136, + "step": 1438 + }, + { + "epoch": 5.995833333333334, + "grad_norm": 3.85331435872134, + "learning_rate": 5e-05, + "loss": 0.2113, + "num_input_tokens_seen": 131355128, + "step": 1439 + }, + { + "epoch": 5.995833333333334, + "loss": 0.20464988052845, + "loss_ce": 0.0018910972867161036, + "loss_iou": 0.427734375, + "loss_num": 0.04052734375, + "loss_xval": 0.203125, + "num_input_tokens_seen": 131355128, + "step": 1439 + }, + { + "epoch": 6.0, + "grad_norm": 4.926323185216086, + "learning_rate": 5e-05, + "loss": 0.0946, + "num_input_tokens_seen": 131446204, + "step": 1440 + }, + { + "epoch": 6.0, + "loss": 0.06529416143894196, + "loss_ce": 0.0008562928414903581, + "loss_iou": 0.169921875, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 131446204, + "step": 1440 + }, + { + "epoch": 6.004166666666666, + "grad_norm": 7.437614331691922, + "learning_rate": 5e-05, + "loss": 0.1004, + "num_input_tokens_seen": 131536320, + "step": 1441 + }, + { + "epoch": 6.004166666666666, + "loss": 0.0987076535820961, + "loss_ce": 0.000807262200396508, + "loss_iou": 0.31640625, + "loss_num": 0.0196533203125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 131536320, + "step": 1441 + }, + { + "epoch": 6.008333333333334, + "grad_norm": 6.646119688252158, + "learning_rate": 5e-05, + "loss": 0.1103, + "num_input_tokens_seen": 131626852, + "step": 1442 + }, + { + "epoch": 6.008333333333334, + "loss": 0.13375121355056763, + "loss_ce": 2.317709368071519e-05, + "loss_iou": 0.357421875, + "loss_num": 0.0267333984375, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 131626852, + "step": 1442 + }, + { + "epoch": 6.0125, + "grad_norm": 3.237665159225561, + "learning_rate": 5e-05, + "loss": 0.0688, + "num_input_tokens_seen": 131717908, + "step": 1443 + }, + { + "epoch": 6.0125, + "loss": 0.06890007853507996, + "loss_ce": 0.0015630427515134215, + "loss_iou": 0.46875, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 131717908, + "step": 1443 + }, + { + "epoch": 6.016666666666667, + "grad_norm": 6.813543578042571, + "learning_rate": 5e-05, + "loss": 0.0959, + "num_input_tokens_seen": 131809160, + "step": 1444 + }, + { + "epoch": 6.016666666666667, + "loss": 0.07589545100927353, + "loss_ce": 0.00024237479374278337, + "loss_iou": 0.390625, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 131809160, + "step": 1444 + }, + { + "epoch": 6.020833333333333, + "grad_norm": 3.849074999437511, + "learning_rate": 5e-05, + "loss": 0.0714, + "num_input_tokens_seen": 131900668, + "step": 1445 + }, + { + "epoch": 6.020833333333333, + "loss": 0.07924774289131165, + "loss_ce": 0.00011566528701223433, + "loss_iou": 0.35546875, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 131900668, + "step": 1445 + }, + { + "epoch": 6.025, + "grad_norm": 10.093369529116636, + "learning_rate": 5e-05, + "loss": 0.1565, + "num_input_tokens_seen": 131991300, + "step": 1446 + }, + { + "epoch": 6.025, + "loss": 0.11482731252908707, + "loss_ce": 0.0002643281768541783, + "loss_iou": 0.2021484375, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 131991300, + "step": 1446 + }, + { + "epoch": 6.029166666666667, + "grad_norm": 7.328155770517095, + "learning_rate": 5e-05, + "loss": 0.1191, + "num_input_tokens_seen": 132082604, + "step": 1447 + }, + { + "epoch": 6.029166666666667, + "loss": 0.14458438754081726, + "loss_ce": 0.0007397783920168877, + "loss_iou": 0.3046875, + "loss_num": 0.02880859375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 132082604, + "step": 1447 + }, + { + "epoch": 6.033333333333333, + "grad_norm": 2.1194428935112812, + "learning_rate": 5e-05, + "loss": 0.0854, + "num_input_tokens_seen": 132173368, + "step": 1448 + }, + { + "epoch": 6.033333333333333, + "loss": 0.10437037795782089, + "loss_ce": 0.008209485560655594, + "loss_iou": 0.328125, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 132173368, + "step": 1448 + }, + { + "epoch": 6.0375, + "grad_norm": 2.977298408978728, + "learning_rate": 5e-05, + "loss": 0.1087, + "num_input_tokens_seen": 132265116, + "step": 1449 + }, + { + "epoch": 6.0375, + "loss": 0.15406504273414612, + "loss_ce": 0.0004853248246945441, + "loss_iou": 0.255859375, + "loss_num": 0.03076171875, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 132265116, + "step": 1449 + }, + { + "epoch": 6.041666666666667, + "grad_norm": 1.7783545168507573, + "learning_rate": 5e-05, + "loss": 0.0689, + "num_input_tokens_seen": 132356112, + "step": 1450 + }, + { + "epoch": 6.041666666666667, + "loss": 0.07427071034908295, + "loss_ce": 0.0002350689610466361, + "loss_iou": 0.3515625, + "loss_num": 0.0147705078125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 132356112, + "step": 1450 + }, + { + "epoch": 6.045833333333333, + "grad_norm": 5.857286320553287, + "learning_rate": 5e-05, + "loss": 0.0973, + "num_input_tokens_seen": 132447788, + "step": 1451 + }, + { + "epoch": 6.045833333333333, + "loss": 0.09844372421503067, + "loss_ce": 0.0008790281717665493, + "loss_iou": 0.275390625, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 132447788, + "step": 1451 + }, + { + "epoch": 6.05, + "grad_norm": 3.9468331314917138, + "learning_rate": 5e-05, + "loss": 0.0986, + "num_input_tokens_seen": 132539164, + "step": 1452 + }, + { + "epoch": 6.05, + "loss": 0.0783202052116394, + "loss_ce": 0.0020415245089679956, + "loss_iou": 0.3515625, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 132539164, + "step": 1452 + }, + { + "epoch": 6.054166666666666, + "grad_norm": 2.213922720704317, + "learning_rate": 5e-05, + "loss": 0.0913, + "num_input_tokens_seen": 132628796, + "step": 1453 + }, + { + "epoch": 6.054166666666666, + "loss": 0.1065921038389206, + "loss_ce": 0.000924985040910542, + "loss_iou": 0.333984375, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 132628796, + "step": 1453 + }, + { + "epoch": 6.058333333333334, + "grad_norm": 5.687537479030628, + "learning_rate": 5e-05, + "loss": 0.095, + "num_input_tokens_seen": 132719588, + "step": 1454 + }, + { + "epoch": 6.058333333333334, + "loss": 0.11358708888292313, + "loss_ce": 0.0002753234875854105, + "loss_iou": 0.0888671875, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 132719588, + "step": 1454 + }, + { + "epoch": 6.0625, + "grad_norm": 2.127818009920121, + "learning_rate": 5e-05, + "loss": 0.1102, + "num_input_tokens_seen": 132810988, + "step": 1455 + }, + { + "epoch": 6.0625, + "loss": 0.08428998291492462, + "loss_ce": 0.0008091489435173571, + "loss_iou": 0.251953125, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 132810988, + "step": 1455 + }, + { + "epoch": 6.066666666666666, + "grad_norm": 7.095893761527112, + "learning_rate": 5e-05, + "loss": 0.1075, + "num_input_tokens_seen": 132902488, + "step": 1456 + }, + { + "epoch": 6.066666666666666, + "loss": 0.09274320304393768, + "loss_ce": 0.027572914958000183, + "loss_iou": 0.240234375, + "loss_num": 0.0130615234375, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 132902488, + "step": 1456 + }, + { + "epoch": 6.070833333333334, + "grad_norm": 46.1430369354311, + "learning_rate": 5e-05, + "loss": 0.1321, + "num_input_tokens_seen": 132993932, + "step": 1457 + }, + { + "epoch": 6.070833333333334, + "loss": 0.11721807718276978, + "loss_ce": 0.0012512854300439358, + "loss_iou": 0.314453125, + "loss_num": 0.023193359375, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 132993932, + "step": 1457 + }, + { + "epoch": 6.075, + "grad_norm": 2.7616679834886724, + "learning_rate": 5e-05, + "loss": 0.0698, + "num_input_tokens_seen": 133085476, + "step": 1458 + }, + { + "epoch": 6.075, + "loss": 0.050556816160678864, + "loss_ce": 0.0011488578747957945, + "loss_iou": 0.17578125, + "loss_num": 0.0098876953125, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 133085476, + "step": 1458 + }, + { + "epoch": 6.079166666666667, + "grad_norm": 5.207176340008163, + "learning_rate": 5e-05, + "loss": 0.0959, + "num_input_tokens_seen": 133177676, + "step": 1459 + }, + { + "epoch": 6.079166666666667, + "loss": 0.104974165558815, + "loss_ce": 0.001992600504308939, + "loss_iou": 0.173828125, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 133177676, + "step": 1459 + }, + { + "epoch": 6.083333333333333, + "grad_norm": 3.054466049365312, + "learning_rate": 5e-05, + "loss": 0.0838, + "num_input_tokens_seen": 133269072, + "step": 1460 + }, + { + "epoch": 6.083333333333333, + "loss": 0.04987429827451706, + "loss_ce": 5.4354113672161475e-05, + "loss_iou": 0.333984375, + "loss_num": 0.00994873046875, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 133269072, + "step": 1460 + }, + { + "epoch": 6.0875, + "grad_norm": 2.9583717627145028, + "learning_rate": 5e-05, + "loss": 0.0773, + "num_input_tokens_seen": 133360928, + "step": 1461 + }, + { + "epoch": 6.0875, + "loss": 0.06315495073795319, + "loss_ce": 0.003157388884574175, + "loss_iou": 0.28515625, + "loss_num": 0.011962890625, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 133360928, + "step": 1461 + }, + { + "epoch": 6.091666666666667, + "grad_norm": 3.9025604867301147, + "learning_rate": 5e-05, + "loss": 0.1083, + "num_input_tokens_seen": 133452228, + "step": 1462 + }, + { + "epoch": 6.091666666666667, + "loss": 0.13133090734481812, + "loss_ce": 0.0007767053321003914, + "loss_iou": 0.34375, + "loss_num": 0.026123046875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 133452228, + "step": 1462 + }, + { + "epoch": 6.095833333333333, + "grad_norm": 2.801843021578155, + "learning_rate": 5e-05, + "loss": 0.1139, + "num_input_tokens_seen": 133544256, + "step": 1463 + }, + { + "epoch": 6.095833333333333, + "loss": 0.09132051467895508, + "loss_ce": 0.0009732232429087162, + "loss_iou": 0.298828125, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 133544256, + "step": 1463 + }, + { + "epoch": 6.1, + "grad_norm": 2.8677230220587684, + "learning_rate": 5e-05, + "loss": 0.1237, + "num_input_tokens_seen": 133635944, + "step": 1464 + }, + { + "epoch": 6.1, + "loss": 0.13180416822433472, + "loss_ce": 0.000120820157462731, + "loss_iou": 0.34375, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 133635944, + "step": 1464 + }, + { + "epoch": 6.104166666666667, + "grad_norm": 1.7808807963535567, + "learning_rate": 5e-05, + "loss": 0.1234, + "num_input_tokens_seen": 133726988, + "step": 1465 + }, + { + "epoch": 6.104166666666667, + "loss": 0.1303086131811142, + "loss_ce": 0.001814344897866249, + "loss_iou": 0.2001953125, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 133726988, + "step": 1465 + }, + { + "epoch": 6.108333333333333, + "grad_norm": 5.937209116611709, + "learning_rate": 5e-05, + "loss": 0.113, + "num_input_tokens_seen": 133818424, + "step": 1466 + }, + { + "epoch": 6.108333333333333, + "loss": 0.116533562541008, + "loss_ce": 0.0006964726489968598, + "loss_iou": 0.2392578125, + "loss_num": 0.023193359375, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 133818424, + "step": 1466 + }, + { + "epoch": 6.1125, + "grad_norm": 3.39792518178655, + "learning_rate": 5e-05, + "loss": 0.0893, + "num_input_tokens_seen": 133909628, + "step": 1467 + }, + { + "epoch": 6.1125, + "loss": 0.0704282820224762, + "loss_ce": 0.00011578691191971302, + "loss_iou": 0.41796875, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 133909628, + "step": 1467 + }, + { + "epoch": 6.116666666666666, + "grad_norm": 5.0612334717481176, + "learning_rate": 5e-05, + "loss": 0.0871, + "num_input_tokens_seen": 134001744, + "step": 1468 + }, + { + "epoch": 6.116666666666666, + "loss": 0.04524346441030502, + "loss_ce": 0.0009471987141296268, + "loss_iou": 0.30859375, + "loss_num": 0.00885009765625, + "loss_xval": 0.044189453125, + "num_input_tokens_seen": 134001744, + "step": 1468 + }, + { + "epoch": 6.120833333333334, + "grad_norm": 3.492075358097084, + "learning_rate": 5e-05, + "loss": 0.1234, + "num_input_tokens_seen": 134092876, + "step": 1469 + }, + { + "epoch": 6.120833333333334, + "loss": 0.11776579916477203, + "loss_ce": 0.00024259783094748855, + "loss_iou": 0.376953125, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 134092876, + "step": 1469 + }, + { + "epoch": 6.125, + "grad_norm": 3.2972687382843784, + "learning_rate": 5e-05, + "loss": 0.1241, + "num_input_tokens_seen": 134183496, + "step": 1470 + }, + { + "epoch": 6.125, + "loss": 0.15864822268486023, + "loss_ce": 0.00035355405998416245, + "loss_iou": 0.40625, + "loss_num": 0.03173828125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 134183496, + "step": 1470 + }, + { + "epoch": 6.129166666666666, + "grad_norm": 5.518848294310319, + "learning_rate": 5e-05, + "loss": 0.1456, + "num_input_tokens_seen": 134274676, + "step": 1471 + }, + { + "epoch": 6.129166666666666, + "loss": 0.1667410433292389, + "loss_ce": 5.404305920819752e-05, + "loss_iou": 0.349609375, + "loss_num": 0.033447265625, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 134274676, + "step": 1471 + }, + { + "epoch": 6.133333333333334, + "grad_norm": 2.9549253104072615, + "learning_rate": 5e-05, + "loss": 0.1211, + "num_input_tokens_seen": 134366344, + "step": 1472 + }, + { + "epoch": 6.133333333333334, + "loss": 0.15231193602085114, + "loss_ce": 2.9224675017758273e-05, + "loss_iou": 0.283203125, + "loss_num": 0.030517578125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 134366344, + "step": 1472 + }, + { + "epoch": 6.1375, + "grad_norm": 8.080211795996055, + "learning_rate": 5e-05, + "loss": 0.125, + "num_input_tokens_seen": 134457720, + "step": 1473 + }, + { + "epoch": 6.1375, + "loss": 0.10112521052360535, + "loss_ce": 0.001439541345462203, + "loss_iou": 0.314453125, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 134457720, + "step": 1473 + }, + { + "epoch": 6.141666666666667, + "grad_norm": 3.3321246765884918, + "learning_rate": 5e-05, + "loss": 0.0586, + "num_input_tokens_seen": 134549376, + "step": 1474 + }, + { + "epoch": 6.141666666666667, + "loss": 0.045930132269859314, + "loss_ce": 0.0023357742466032505, + "loss_iou": 0.322265625, + "loss_num": 0.00872802734375, + "loss_xval": 0.043701171875, + "num_input_tokens_seen": 134549376, + "step": 1474 + }, + { + "epoch": 6.145833333333333, + "grad_norm": 2.323448607746615, + "learning_rate": 5e-05, + "loss": 0.1121, + "num_input_tokens_seen": 134641320, + "step": 1475 + }, + { + "epoch": 6.145833333333333, + "loss": 0.14385700225830078, + "loss_ce": 0.0012483518803492188, + "loss_iou": 0.2021484375, + "loss_num": 0.028564453125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 134641320, + "step": 1475 + }, + { + "epoch": 6.15, + "grad_norm": 9.099784493689713, + "learning_rate": 5e-05, + "loss": 0.1039, + "num_input_tokens_seen": 134732684, + "step": 1476 + }, + { + "epoch": 6.15, + "loss": 0.09532591700553894, + "loss_ce": 0.0016216891817748547, + "loss_iou": 0.341796875, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 134732684, + "step": 1476 + }, + { + "epoch": 6.154166666666667, + "grad_norm": 2.0798782768136723, + "learning_rate": 5e-05, + "loss": 0.0845, + "num_input_tokens_seen": 134824232, + "step": 1477 + }, + { + "epoch": 6.154166666666667, + "loss": 0.08923730254173279, + "loss_ce": 0.00021752758766524494, + "loss_iou": 0.33984375, + "loss_num": 0.017822265625, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 134824232, + "step": 1477 + }, + { + "epoch": 6.158333333333333, + "grad_norm": 1.7335927622566298, + "learning_rate": 5e-05, + "loss": 0.0884, + "num_input_tokens_seen": 134915328, + "step": 1478 + }, + { + "epoch": 6.158333333333333, + "loss": 0.08593946695327759, + "loss_ce": 0.0001240387064171955, + "loss_iou": 0.2734375, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 134915328, + "step": 1478 + }, + { + "epoch": 6.1625, + "grad_norm": 2.8402593123658186, + "learning_rate": 5e-05, + "loss": 0.1228, + "num_input_tokens_seen": 135006756, + "step": 1479 + }, + { + "epoch": 6.1625, + "loss": 0.13833099603652954, + "loss_ce": 0.0007653862703591585, + "loss_iou": 0.322265625, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 135006756, + "step": 1479 + }, + { + "epoch": 6.166666666666667, + "grad_norm": 4.202233004494281, + "learning_rate": 5e-05, + "loss": 0.1646, + "num_input_tokens_seen": 135098072, + "step": 1480 + }, + { + "epoch": 6.166666666666667, + "loss": 0.11366622895002365, + "loss_ce": 0.0014530995395034552, + "loss_iou": 0.2890625, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 135098072, + "step": 1480 + }, + { + "epoch": 6.170833333333333, + "grad_norm": 4.031083413757542, + "learning_rate": 5e-05, + "loss": 0.0726, + "num_input_tokens_seen": 135189284, + "step": 1481 + }, + { + "epoch": 6.170833333333333, + "loss": 0.07110023498535156, + "loss_ce": 0.0002841942186933011, + "loss_iou": 0.451171875, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 135189284, + "step": 1481 + }, + { + "epoch": 6.175, + "grad_norm": 5.6421420925258365, + "learning_rate": 5e-05, + "loss": 0.1378, + "num_input_tokens_seen": 135280224, + "step": 1482 + }, + { + "epoch": 6.175, + "loss": 0.15990953147411346, + "loss_ce": 0.0003636321926023811, + "loss_iou": 0.35546875, + "loss_num": 0.031982421875, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 135280224, + "step": 1482 + }, + { + "epoch": 6.179166666666666, + "grad_norm": 18.276343621342225, + "learning_rate": 5e-05, + "loss": 0.1245, + "num_input_tokens_seen": 135371488, + "step": 1483 + }, + { + "epoch": 6.179166666666666, + "loss": 0.1595810353755951, + "loss_ce": 9.616982424631715e-05, + "loss_iou": 0.39453125, + "loss_num": 0.031982421875, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 135371488, + "step": 1483 + }, + { + "epoch": 6.183333333333334, + "grad_norm": 2.4957749905650672, + "learning_rate": 5e-05, + "loss": 0.0817, + "num_input_tokens_seen": 135463448, + "step": 1484 + }, + { + "epoch": 6.183333333333334, + "loss": 0.08113342523574829, + "loss_ce": 3.2958269002847373e-05, + "loss_iou": 0.1572265625, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 135463448, + "step": 1484 + }, + { + "epoch": 6.1875, + "grad_norm": 2.7456250298887594, + "learning_rate": 5e-05, + "loss": 0.0983, + "num_input_tokens_seen": 135554848, + "step": 1485 + }, + { + "epoch": 6.1875, + "loss": 0.08282457292079926, + "loss_ce": 4.5643879275303334e-05, + "loss_iou": 0.396484375, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 135554848, + "step": 1485 + }, + { + "epoch": 6.191666666666666, + "grad_norm": 4.149341669310884, + "learning_rate": 5e-05, + "loss": 0.1047, + "num_input_tokens_seen": 135646256, + "step": 1486 + }, + { + "epoch": 6.191666666666666, + "loss": 0.1378132700920105, + "loss_ce": 1.114791666623205e-05, + "loss_iou": 0.0986328125, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 135646256, + "step": 1486 + }, + { + "epoch": 6.195833333333334, + "grad_norm": 1.3396171777343122, + "learning_rate": 5e-05, + "loss": 0.0547, + "num_input_tokens_seen": 135737080, + "step": 1487 + }, + { + "epoch": 6.195833333333334, + "loss": 0.06343643367290497, + "loss_ce": 5.651055289490614e-06, + "loss_iou": 0.11376953125, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 135737080, + "step": 1487 + }, + { + "epoch": 6.2, + "grad_norm": 1.9860821156088158, + "learning_rate": 5e-05, + "loss": 0.1024, + "num_input_tokens_seen": 135828560, + "step": 1488 + }, + { + "epoch": 6.2, + "loss": 0.15105964243412018, + "loss_ce": 0.0005469413590617478, + "loss_iou": 0.1591796875, + "loss_num": 0.0301513671875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 135828560, + "step": 1488 + }, + { + "epoch": 6.204166666666667, + "grad_norm": 6.458528606026268, + "learning_rate": 5e-05, + "loss": 0.13, + "num_input_tokens_seen": 135920016, + "step": 1489 + }, + { + "epoch": 6.204166666666667, + "loss": 0.12129916250705719, + "loss_ce": 6.808717444073409e-05, + "loss_iou": 0.27734375, + "loss_num": 0.024169921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 135920016, + "step": 1489 + }, + { + "epoch": 6.208333333333333, + "grad_norm": 2.15464906974121, + "learning_rate": 5e-05, + "loss": 0.0627, + "num_input_tokens_seen": 136011944, + "step": 1490 + }, + { + "epoch": 6.208333333333333, + "loss": 0.07406759262084961, + "loss_ce": 0.00021505873883143067, + "loss_iou": 0.294921875, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 136011944, + "step": 1490 + }, + { + "epoch": 6.2125, + "grad_norm": 3.2448957136092105, + "learning_rate": 5e-05, + "loss": 0.0912, + "num_input_tokens_seen": 136103984, + "step": 1491 + }, + { + "epoch": 6.2125, + "loss": 0.09109637886285782, + "loss_ce": 0.0012373746139928699, + "loss_iou": 0.220703125, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 136103984, + "step": 1491 + }, + { + "epoch": 6.216666666666667, + "grad_norm": 3.997611756884178, + "learning_rate": 5e-05, + "loss": 0.0991, + "num_input_tokens_seen": 136195588, + "step": 1492 + }, + { + "epoch": 6.216666666666667, + "loss": 0.06629104912281036, + "loss_ce": 0.0008918737876228988, + "loss_iou": 0.314453125, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 136195588, + "step": 1492 + }, + { + "epoch": 6.220833333333333, + "grad_norm": 2.595286086541442, + "learning_rate": 5e-05, + "loss": 0.0658, + "num_input_tokens_seen": 136286792, + "step": 1493 + }, + { + "epoch": 6.220833333333333, + "loss": 0.08234380930662155, + "loss_ce": 6.84204715071246e-05, + "loss_iou": 0.30859375, + "loss_num": 0.0164794921875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 136286792, + "step": 1493 + }, + { + "epoch": 6.225, + "grad_norm": 3.270012287666817, + "learning_rate": 5e-05, + "loss": 0.0776, + "num_input_tokens_seen": 136378704, + "step": 1494 + }, + { + "epoch": 6.225, + "loss": 0.09001626074314117, + "loss_ce": 0.0010651469929143786, + "loss_iou": 0.34765625, + "loss_num": 0.017822265625, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 136378704, + "step": 1494 + }, + { + "epoch": 6.229166666666667, + "grad_norm": 3.2462930501077816, + "learning_rate": 5e-05, + "loss": 0.065, + "num_input_tokens_seen": 136470544, + "step": 1495 + }, + { + "epoch": 6.229166666666667, + "loss": 0.05531272664666176, + "loss_ce": 0.001144025707617402, + "loss_iou": 0.30859375, + "loss_num": 0.01080322265625, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 136470544, + "step": 1495 + }, + { + "epoch": 6.233333333333333, + "grad_norm": 2.739395620419567, + "learning_rate": 5e-05, + "loss": 0.1097, + "num_input_tokens_seen": 136562108, + "step": 1496 + }, + { + "epoch": 6.233333333333333, + "loss": 0.08684214949607849, + "loss_ce": 0.00047740069567225873, + "loss_iou": 0.27734375, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 136562108, + "step": 1496 + }, + { + "epoch": 6.2375, + "grad_norm": 4.301696296347918, + "learning_rate": 5e-05, + "loss": 0.1126, + "num_input_tokens_seen": 136653324, + "step": 1497 + }, + { + "epoch": 6.2375, + "loss": 0.09686444699764252, + "loss_ce": 4.7431603888981044e-05, + "loss_iou": 0.30859375, + "loss_num": 0.0194091796875, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 136653324, + "step": 1497 + }, + { + "epoch": 6.241666666666666, + "grad_norm": 7.418185168253085, + "learning_rate": 5e-05, + "loss": 0.1631, + "num_input_tokens_seen": 136745004, + "step": 1498 + }, + { + "epoch": 6.241666666666666, + "loss": 0.1644340306520462, + "loss_ce": 0.001714312587864697, + "loss_iou": 0.154296875, + "loss_num": 0.032470703125, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 136745004, + "step": 1498 + }, + { + "epoch": 6.245833333333334, + "grad_norm": 4.203904690296846, + "learning_rate": 5e-05, + "loss": 0.0761, + "num_input_tokens_seen": 136835992, + "step": 1499 + }, + { + "epoch": 6.245833333333334, + "loss": 0.09441401809453964, + "loss_ce": 7.891281711636111e-06, + "loss_iou": 0.32421875, + "loss_num": 0.0189208984375, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 136835992, + "step": 1499 + }, + { + "epoch": 6.25, + "grad_norm": 4.399785995865877, + "learning_rate": 5e-05, + "loss": 0.1119, + "num_input_tokens_seen": 136927000, + "step": 1500 + }, + { + "epoch": 6.25, + "eval_seeclick_CIoU": 0.20020649209618568, + "eval_seeclick_GIoU": 0.18264785408973694, + "eval_seeclick_IoU": 0.3048545867204666, + "eval_seeclick_MAE_all": 0.09458190575242043, + "eval_seeclick_MAE_h": 0.07541835866868496, + "eval_seeclick_MAE_w": 0.2035619094967842, + "eval_seeclick_MAE_x_boxes": 0.2103462964296341, + "eval_seeclick_MAE_y_boxes": 0.0807495042681694, + "eval_seeclick_NUM_probability": 0.9999992549419403, + "eval_seeclick_inside_bbox": 0.4332386404275894, + "eval_seeclick_loss": 0.5469575524330139, + "eval_seeclick_loss_ce": 0.13234156370162964, + "eval_seeclick_loss_iou": 0.3863525390625, + "eval_seeclick_loss_num": 0.08294677734375, + "eval_seeclick_loss_xval": 0.414794921875, + "eval_seeclick_runtime": 74.5888, + "eval_seeclick_samples_per_second": 0.576, + "eval_seeclick_steps_per_second": 0.027, + "num_input_tokens_seen": 136927000, + "step": 1500 + }, + { + "epoch": 6.25, + "eval_icons_CIoU": 0.3565501272678375, + "eval_icons_GIoU": 0.37757113575935364, + "eval_icons_IoU": 0.438765212893486, + "eval_icons_MAE_all": 0.06771966256201267, + "eval_icons_MAE_h": 0.13854750245809555, + "eval_icons_MAE_w": 0.10688314586877823, + "eval_icons_MAE_x_boxes": 0.10696740448474884, + "eval_icons_MAE_y_boxes": 0.13959594815969467, + "eval_icons_NUM_probability": 0.999999612569809, + "eval_icons_inside_bbox": 0.6371527910232544, + "eval_icons_loss": 0.3279392719268799, + "eval_icons_loss_ce": 7.772192816446477e-06, + "eval_icons_loss_iou": 0.2781982421875, + "eval_icons_loss_num": 0.06714630126953125, + "eval_icons_loss_xval": 0.335784912109375, + "eval_icons_runtime": 84.0066, + "eval_icons_samples_per_second": 0.595, + "eval_icons_steps_per_second": 0.024, + "num_input_tokens_seen": 136927000, + "step": 1500 + }, + { + "epoch": 6.25, + "eval_screenspot_CIoU": 0.3894497851530711, + "eval_screenspot_GIoU": 0.38316018382708233, + "eval_screenspot_IoU": 0.4590388039747874, + "eval_screenspot_MAE_all": 0.09581841280062993, + "eval_screenspot_MAE_h": 0.08313464000821114, + "eval_screenspot_MAE_w": 0.19596777856349945, + "eval_screenspot_MAE_x_boxes": 0.18529337644577026, + "eval_screenspot_MAE_y_boxes": 0.07308414205908775, + "eval_screenspot_NUM_probability": 0.9999992251396179, + "eval_screenspot_inside_bbox": 0.725000003973643, + "eval_screenspot_loss": 0.4786304831504822, + "eval_screenspot_loss_ce": 2.510744145448977e-05, + "eval_screenspot_loss_iou": 0.407958984375, + "eval_screenspot_loss_num": 0.0963592529296875, + "eval_screenspot_loss_xval": 0.4816487630208333, + "eval_screenspot_runtime": 159.7863, + "eval_screenspot_samples_per_second": 0.557, + "eval_screenspot_steps_per_second": 0.019, + "num_input_tokens_seen": 136927000, + "step": 1500 + }, + { + "epoch": 6.25, + "eval_compot_CIoU": 0.4208727031946182, + "eval_compot_GIoU": 0.41121308505535126, + "eval_compot_IoU": 0.49092237651348114, + "eval_compot_MAE_all": 0.06092522293329239, + "eval_compot_MAE_h": 0.06699452549219131, + "eval_compot_MAE_w": 0.15196169167757034, + "eval_compot_MAE_x_boxes": 0.14676962792873383, + "eval_compot_MAE_y_boxes": 0.06717049330472946, + "eval_compot_NUM_probability": 0.9999985098838806, + "eval_compot_inside_bbox": 0.7204861044883728, + "eval_compot_loss": 0.3193596601486206, + "eval_compot_loss_ce": 0.011697435285896063, + "eval_compot_loss_iou": 0.32623291015625, + "eval_compot_loss_num": 0.0563812255859375, + "eval_compot_loss_xval": 0.2819061279296875, + "eval_compot_runtime": 88.4549, + "eval_compot_samples_per_second": 0.565, + "eval_compot_steps_per_second": 0.023, + "num_input_tokens_seen": 136927000, + "step": 1500 + }, + { + "epoch": 6.25, + "loss": 0.25956547260284424, + "loss_ce": 0.012830821797251701, + "loss_iou": 0.326171875, + "loss_num": 0.04931640625, + "loss_xval": 0.2470703125, + "num_input_tokens_seen": 136927000, + "step": 1500 + }, + { + "epoch": 6.254166666666666, + "grad_norm": 2.1614253857529997, + "learning_rate": 5e-05, + "loss": 0.097, + "num_input_tokens_seen": 137018200, + "step": 1501 + }, + { + "epoch": 6.254166666666666, + "loss": 0.11686230450868607, + "loss_ce": 4.101651211385615e-05, + "loss_iou": 0.392578125, + "loss_num": 0.0233154296875, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 137018200, + "step": 1501 + }, + { + "epoch": 6.258333333333334, + "grad_norm": 2.7009496825179626, + "learning_rate": 5e-05, + "loss": 0.101, + "num_input_tokens_seen": 137109356, + "step": 1502 + }, + { + "epoch": 6.258333333333334, + "loss": 0.11811286211013794, + "loss_ce": 0.0006506989011541009, + "loss_iou": 0.384765625, + "loss_num": 0.0234375, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 137109356, + "step": 1502 + }, + { + "epoch": 6.2625, + "grad_norm": 7.285209682589673, + "learning_rate": 5e-05, + "loss": 0.0777, + "num_input_tokens_seen": 137200692, + "step": 1503 + }, + { + "epoch": 6.2625, + "loss": 0.05951286107301712, + "loss_ce": 0.00018668676784727722, + "loss_iou": 0.40625, + "loss_num": 0.0118408203125, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 137200692, + "step": 1503 + }, + { + "epoch": 6.266666666666667, + "grad_norm": 4.393759772485003, + "learning_rate": 5e-05, + "loss": 0.0987, + "num_input_tokens_seen": 137292620, + "step": 1504 + }, + { + "epoch": 6.266666666666667, + "loss": 0.09740344434976578, + "loss_ce": 0.002066532615572214, + "loss_iou": 0.330078125, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 137292620, + "step": 1504 + }, + { + "epoch": 6.270833333333333, + "grad_norm": 4.201436243065331, + "learning_rate": 5e-05, + "loss": 0.0735, + "num_input_tokens_seen": 137384388, + "step": 1505 + }, + { + "epoch": 6.270833333333333, + "loss": 0.0596717894077301, + "loss_ce": 0.002146154874935746, + "loss_iou": 0.2373046875, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 137384388, + "step": 1505 + }, + { + "epoch": 6.275, + "grad_norm": 5.428190265142759, + "learning_rate": 5e-05, + "loss": 0.0977, + "num_input_tokens_seen": 137475952, + "step": 1506 + }, + { + "epoch": 6.275, + "loss": 0.11430226266384125, + "loss_ce": 0.0012346402509137988, + "loss_iou": 0.154296875, + "loss_num": 0.0225830078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 137475952, + "step": 1506 + }, + { + "epoch": 6.279166666666667, + "grad_norm": 8.940473330611962, + "learning_rate": 5e-05, + "loss": 0.1001, + "num_input_tokens_seen": 137567536, + "step": 1507 + }, + { + "epoch": 6.279166666666667, + "loss": 0.08325809240341187, + "loss_ce": 0.0006470083026215434, + "loss_iou": 0.1708984375, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 137567536, + "step": 1507 + }, + { + "epoch": 6.283333333333333, + "grad_norm": 6.633974688331686, + "learning_rate": 5e-05, + "loss": 0.1029, + "num_input_tokens_seen": 137659404, + "step": 1508 + }, + { + "epoch": 6.283333333333333, + "loss": 0.08943825960159302, + "loss_ce": 0.0007160389795899391, + "loss_iou": 0.259765625, + "loss_num": 0.0177001953125, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 137659404, + "step": 1508 + }, + { + "epoch": 6.2875, + "grad_norm": 2.7176989734291284, + "learning_rate": 5e-05, + "loss": 0.108, + "num_input_tokens_seen": 137750828, + "step": 1509 + }, + { + "epoch": 6.2875, + "loss": 0.08843083679676056, + "loss_ce": 0.0017304003704339266, + "loss_iou": 0.181640625, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 137750828, + "step": 1509 + }, + { + "epoch": 6.291666666666667, + "grad_norm": 2.0135651794519402, + "learning_rate": 5e-05, + "loss": 0.086, + "num_input_tokens_seen": 137842976, + "step": 1510 + }, + { + "epoch": 6.291666666666667, + "loss": 0.047538742423057556, + "loss_ce": 0.0022048787213861942, + "loss_iou": 0.2177734375, + "loss_num": 0.00909423828125, + "loss_xval": 0.04541015625, + "num_input_tokens_seen": 137842976, + "step": 1510 + }, + { + "epoch": 6.295833333333333, + "grad_norm": 5.309436376013892, + "learning_rate": 5e-05, + "loss": 0.0671, + "num_input_tokens_seen": 137934964, + "step": 1511 + }, + { + "epoch": 6.295833333333333, + "loss": 0.05960085242986679, + "loss_ce": 0.0008850316517055035, + "loss_iou": 0.384765625, + "loss_num": 0.01177978515625, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 137934964, + "step": 1511 + }, + { + "epoch": 6.3, + "grad_norm": 4.1509108275575715, + "learning_rate": 5e-05, + "loss": 0.094, + "num_input_tokens_seen": 138026256, + "step": 1512 + }, + { + "epoch": 6.3, + "loss": 0.10616110265254974, + "loss_ce": 0.0001735452242428437, + "loss_iou": 0.36328125, + "loss_num": 0.0211181640625, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 138026256, + "step": 1512 + }, + { + "epoch": 6.304166666666666, + "grad_norm": 2.621120171436987, + "learning_rate": 5e-05, + "loss": 0.125, + "num_input_tokens_seen": 138117368, + "step": 1513 + }, + { + "epoch": 6.304166666666666, + "loss": 0.14343056082725525, + "loss_ce": 0.0007303733727894723, + "loss_iou": 0.28515625, + "loss_num": 0.028564453125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 138117368, + "step": 1513 + }, + { + "epoch": 6.308333333333334, + "grad_norm": 2.7964839161004944, + "learning_rate": 5e-05, + "loss": 0.1252, + "num_input_tokens_seen": 138208852, + "step": 1514 + }, + { + "epoch": 6.308333333333334, + "loss": 0.10081670433282852, + "loss_ce": 0.0007343016914092004, + "loss_iou": 0.2275390625, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 138208852, + "step": 1514 + }, + { + "epoch": 6.3125, + "grad_norm": 2.282470761627585, + "learning_rate": 5e-05, + "loss": 0.0655, + "num_input_tokens_seen": 138299940, + "step": 1515 + }, + { + "epoch": 6.3125, + "loss": 0.05543072521686554, + "loss_ce": 0.0010483998339623213, + "loss_iou": 0.189453125, + "loss_num": 0.0108642578125, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 138299940, + "step": 1515 + }, + { + "epoch": 6.316666666666666, + "grad_norm": 4.131042125302842, + "learning_rate": 5e-05, + "loss": 0.073, + "num_input_tokens_seen": 138391328, + "step": 1516 + }, + { + "epoch": 6.316666666666666, + "loss": 0.08035748451948166, + "loss_ce": 0.00046246696729213, + "loss_iou": 0.275390625, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 138391328, + "step": 1516 + }, + { + "epoch": 6.320833333333334, + "grad_norm": 8.27696528656622, + "learning_rate": 5e-05, + "loss": 0.0808, + "num_input_tokens_seen": 138482532, + "step": 1517 + }, + { + "epoch": 6.320833333333334, + "loss": 0.07589618861675262, + "loss_ce": 0.000273626996204257, + "loss_iou": 0.26953125, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 138482532, + "step": 1517 + }, + { + "epoch": 6.325, + "grad_norm": 3.7231851864538137, + "learning_rate": 5e-05, + "loss": 0.1171, + "num_input_tokens_seen": 138573620, + "step": 1518 + }, + { + "epoch": 6.325, + "loss": 0.09571607410907745, + "loss_ce": 0.0011115849483758211, + "loss_iou": 0.35546875, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 138573620, + "step": 1518 + }, + { + "epoch": 6.329166666666667, + "grad_norm": 8.090219286591468, + "learning_rate": 5e-05, + "loss": 0.1327, + "num_input_tokens_seen": 138664500, + "step": 1519 + }, + { + "epoch": 6.329166666666667, + "loss": 0.12188278883695602, + "loss_ce": 0.000499117944855243, + "loss_iou": 0.30859375, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 138664500, + "step": 1519 + }, + { + "epoch": 6.333333333333333, + "grad_norm": 4.691978282317901, + "learning_rate": 5e-05, + "loss": 0.1281, + "num_input_tokens_seen": 138755820, + "step": 1520 + }, + { + "epoch": 6.333333333333333, + "loss": 0.08768007159233093, + "loss_ce": 0.0009185929084196687, + "loss_iou": 0.46484375, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 138755820, + "step": 1520 + }, + { + "epoch": 6.3375, + "grad_norm": 4.05389583485167, + "learning_rate": 5e-05, + "loss": 0.1139, + "num_input_tokens_seen": 138846992, + "step": 1521 + }, + { + "epoch": 6.3375, + "loss": 0.15876007080078125, + "loss_ce": 0.00052641675574705, + "loss_iou": 0.205078125, + "loss_num": 0.03173828125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 138846992, + "step": 1521 + }, + { + "epoch": 6.341666666666667, + "grad_norm": 9.50735611222299, + "learning_rate": 5e-05, + "loss": 0.1257, + "num_input_tokens_seen": 138938144, + "step": 1522 + }, + { + "epoch": 6.341666666666667, + "loss": 0.14819855988025665, + "loss_ce": 5.187587703403551e-06, + "loss_iou": 0.455078125, + "loss_num": 0.0296630859375, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 138938144, + "step": 1522 + }, + { + "epoch": 6.345833333333333, + "grad_norm": 4.860180651361698, + "learning_rate": 5e-05, + "loss": 0.0836, + "num_input_tokens_seen": 139029296, + "step": 1523 + }, + { + "epoch": 6.345833333333333, + "loss": 0.07304719090461731, + "loss_ce": 0.0006594919832423329, + "loss_iou": 0.1923828125, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 139029296, + "step": 1523 + }, + { + "epoch": 6.35, + "grad_norm": 6.304762865561375, + "learning_rate": 5e-05, + "loss": 0.1134, + "num_input_tokens_seen": 139120612, + "step": 1524 + }, + { + "epoch": 6.35, + "loss": 0.163400799036026, + "loss_ce": 0.00010123385436600074, + "loss_iou": 0.318359375, + "loss_num": 0.03271484375, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 139120612, + "step": 1524 + }, + { + "epoch": 6.354166666666667, + "grad_norm": 4.101944550969832, + "learning_rate": 5e-05, + "loss": 0.0855, + "num_input_tokens_seen": 139211944, + "step": 1525 + }, + { + "epoch": 6.354166666666667, + "loss": 0.10763823986053467, + "loss_ce": 2.7328803753334796e-06, + "loss_iou": 0.306640625, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 139211944, + "step": 1525 + }, + { + "epoch": 6.358333333333333, + "grad_norm": 2.8743059740589767, + "learning_rate": 5e-05, + "loss": 0.1122, + "num_input_tokens_seen": 139302760, + "step": 1526 + }, + { + "epoch": 6.358333333333333, + "loss": 0.109195277094841, + "loss_ce": 6.442190351663157e-05, + "loss_iou": 0.37109375, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 139302760, + "step": 1526 + }, + { + "epoch": 6.3625, + "grad_norm": 5.708628465847872, + "learning_rate": 5e-05, + "loss": 0.1256, + "num_input_tokens_seen": 139394120, + "step": 1527 + }, + { + "epoch": 6.3625, + "loss": 0.15449079871177673, + "loss_ce": 0.006007526069879532, + "loss_iou": 0.2890625, + "loss_num": 0.0296630859375, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 139394120, + "step": 1527 + }, + { + "epoch": 6.366666666666666, + "grad_norm": 7.874856125203204, + "learning_rate": 5e-05, + "loss": 0.1415, + "num_input_tokens_seen": 139485488, + "step": 1528 + }, + { + "epoch": 6.366666666666666, + "loss": 0.19475838541984558, + "loss_ce": 0.00019355639233253896, + "loss_iou": 0.388671875, + "loss_num": 0.038818359375, + "loss_xval": 0.1943359375, + "num_input_tokens_seen": 139485488, + "step": 1528 + }, + { + "epoch": 6.370833333333334, + "grad_norm": 2.231030623695389, + "learning_rate": 5e-05, + "loss": 0.0958, + "num_input_tokens_seen": 139577164, + "step": 1529 + }, + { + "epoch": 6.370833333333334, + "loss": 0.0933036208152771, + "loss_ce": 0.0006522503099404275, + "loss_iou": 0.345703125, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 139577164, + "step": 1529 + }, + { + "epoch": 6.375, + "grad_norm": 1.9998978914688346, + "learning_rate": 5e-05, + "loss": 0.0999, + "num_input_tokens_seen": 139668980, + "step": 1530 + }, + { + "epoch": 6.375, + "loss": 0.07884591817855835, + "loss_ce": 0.0002936720848083496, + "loss_iou": 0.2890625, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 139668980, + "step": 1530 + }, + { + "epoch": 6.379166666666666, + "grad_norm": 3.496869670948756, + "learning_rate": 5e-05, + "loss": 0.0708, + "num_input_tokens_seen": 139760436, + "step": 1531 + }, + { + "epoch": 6.379166666666666, + "loss": 0.11092659831047058, + "loss_ce": 8.675569551996887e-05, + "loss_iou": 0.298828125, + "loss_num": 0.022216796875, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 139760436, + "step": 1531 + }, + { + "epoch": 6.383333333333334, + "grad_norm": 3.3589500487803674, + "learning_rate": 5e-05, + "loss": 0.1107, + "num_input_tokens_seen": 139851524, + "step": 1532 + }, + { + "epoch": 6.383333333333334, + "loss": 0.1541489064693451, + "loss_ce": 0.0009506536880508065, + "loss_iou": 0.259765625, + "loss_num": 0.0306396484375, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 139851524, + "step": 1532 + }, + { + "epoch": 6.3875, + "grad_norm": 2.828243613484087, + "learning_rate": 5e-05, + "loss": 0.1191, + "num_input_tokens_seen": 139942808, + "step": 1533 + }, + { + "epoch": 6.3875, + "loss": 0.10458563268184662, + "loss_ce": 1.7153741282527335e-05, + "loss_iou": 0.328125, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 139942808, + "step": 1533 + }, + { + "epoch": 6.391666666666667, + "grad_norm": 3.5751189494029765, + "learning_rate": 5e-05, + "loss": 0.1509, + "num_input_tokens_seen": 140033636, + "step": 1534 + }, + { + "epoch": 6.391666666666667, + "loss": 0.1473105400800705, + "loss_ce": 0.0007346185739152133, + "loss_iou": 0.353515625, + "loss_num": 0.029296875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 140033636, + "step": 1534 + }, + { + "epoch": 6.395833333333333, + "grad_norm": 3.7131141432185877, + "learning_rate": 5e-05, + "loss": 0.1078, + "num_input_tokens_seen": 140124936, + "step": 1535 + }, + { + "epoch": 6.395833333333333, + "loss": 0.06990425288677216, + "loss_ce": 0.00038521114038303494, + "loss_iou": 0.3203125, + "loss_num": 0.013916015625, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 140124936, + "step": 1535 + }, + { + "epoch": 6.4, + "grad_norm": 5.4413194288201145, + "learning_rate": 5e-05, + "loss": 0.1216, + "num_input_tokens_seen": 140216540, + "step": 1536 + }, + { + "epoch": 6.4, + "loss": 0.04465536028146744, + "loss_ce": 0.00011495660874061286, + "loss_iou": 0.30859375, + "loss_num": 0.0089111328125, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 140216540, + "step": 1536 + }, + { + "epoch": 6.404166666666667, + "grad_norm": 3.0270632390471923, + "learning_rate": 5e-05, + "loss": 0.1218, + "num_input_tokens_seen": 140308108, + "step": 1537 + }, + { + "epoch": 6.404166666666667, + "loss": 0.14716391265392303, + "loss_ce": 0.001472989795729518, + "loss_iou": 0.1640625, + "loss_num": 0.0291748046875, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 140308108, + "step": 1537 + }, + { + "epoch": 6.408333333333333, + "grad_norm": 12.993363647613782, + "learning_rate": 5e-05, + "loss": 0.1193, + "num_input_tokens_seen": 140399288, + "step": 1538 + }, + { + "epoch": 6.408333333333333, + "loss": 0.14310534298419952, + "loss_ce": 6.94481932441704e-05, + "loss_iou": 0.2578125, + "loss_num": 0.028564453125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 140399288, + "step": 1538 + }, + { + "epoch": 6.4125, + "grad_norm": 3.263224080350857, + "learning_rate": 5e-05, + "loss": 0.1301, + "num_input_tokens_seen": 140490692, + "step": 1539 + }, + { + "epoch": 6.4125, + "loss": 0.15512920916080475, + "loss_ce": 8.361228537978604e-06, + "loss_iou": 0.2109375, + "loss_num": 0.031005859375, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 140490692, + "step": 1539 + }, + { + "epoch": 6.416666666666667, + "grad_norm": 5.526571946364959, + "learning_rate": 5e-05, + "loss": 0.1131, + "num_input_tokens_seen": 140580992, + "step": 1540 + }, + { + "epoch": 6.416666666666667, + "loss": 0.08357784152030945, + "loss_ce": 0.00021907762857154012, + "loss_iou": 0.15234375, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 140580992, + "step": 1540 + }, + { + "epoch": 6.420833333333333, + "grad_norm": 2.6590068483528353, + "learning_rate": 5e-05, + "loss": 0.0616, + "num_input_tokens_seen": 140671864, + "step": 1541 + }, + { + "epoch": 6.420833333333333, + "loss": 0.06562237441539764, + "loss_ce": 0.0007114805048331618, + "loss_iou": 0.228515625, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 140671864, + "step": 1541 + }, + { + "epoch": 6.425, + "grad_norm": 5.086586938084599, + "learning_rate": 5e-05, + "loss": 0.1152, + "num_input_tokens_seen": 140762204, + "step": 1542 + }, + { + "epoch": 6.425, + "loss": 0.061930038034915924, + "loss_ce": 2.5129629648290575e-05, + "loss_iou": 0.328125, + "loss_num": 0.01239013671875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 140762204, + "step": 1542 + }, + { + "epoch": 6.429166666666666, + "grad_norm": 8.287373152003422, + "learning_rate": 5e-05, + "loss": 0.0898, + "num_input_tokens_seen": 140853496, + "step": 1543 + }, + { + "epoch": 6.429166666666666, + "loss": 0.11346882581710815, + "loss_ce": 0.0021559547167271376, + "loss_iou": 0.275390625, + "loss_num": 0.0223388671875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 140853496, + "step": 1543 + }, + { + "epoch": 6.433333333333334, + "grad_norm": 2.6481493953194724, + "learning_rate": 5e-05, + "loss": 0.1064, + "num_input_tokens_seen": 140944676, + "step": 1544 + }, + { + "epoch": 6.433333333333334, + "loss": 0.11181750893592834, + "loss_ce": 0.0027782029937952757, + "loss_iou": 0.212890625, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 140944676, + "step": 1544 + }, + { + "epoch": 6.4375, + "grad_norm": 2.5476793630601224, + "learning_rate": 5e-05, + "loss": 0.0924, + "num_input_tokens_seen": 141036008, + "step": 1545 + }, + { + "epoch": 6.4375, + "loss": 0.0740530788898468, + "loss_ce": 0.0023215145338326693, + "loss_iou": 0.205078125, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 141036008, + "step": 1545 + }, + { + "epoch": 6.441666666666666, + "grad_norm": 3.222784811184737, + "learning_rate": 5e-05, + "loss": 0.1079, + "num_input_tokens_seen": 141127444, + "step": 1546 + }, + { + "epoch": 6.441666666666666, + "loss": 0.09326840192079544, + "loss_ce": 0.00015926752530504018, + "loss_iou": 0.275390625, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 141127444, + "step": 1546 + }, + { + "epoch": 6.445833333333334, + "grad_norm": 9.30775884504721, + "learning_rate": 5e-05, + "loss": 0.1288, + "num_input_tokens_seen": 141218368, + "step": 1547 + }, + { + "epoch": 6.445833333333334, + "loss": 0.1136021539568901, + "loss_ce": 0.000778665067628026, + "loss_iou": 0.33203125, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 141218368, + "step": 1547 + }, + { + "epoch": 6.45, + "grad_norm": 2.857794250190675, + "learning_rate": 5e-05, + "loss": 0.087, + "num_input_tokens_seen": 141309712, + "step": 1548 + }, + { + "epoch": 6.45, + "loss": 0.07592833787202835, + "loss_ce": 3.111540718236938e-05, + "loss_iou": 0.265625, + "loss_num": 0.01519775390625, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 141309712, + "step": 1548 + }, + { + "epoch": 6.454166666666667, + "grad_norm": 10.41193222880002, + "learning_rate": 5e-05, + "loss": 0.0904, + "num_input_tokens_seen": 141401432, + "step": 1549 + }, + { + "epoch": 6.454166666666667, + "loss": 0.08329534530639648, + "loss_ce": 0.0031943346839398146, + "loss_iou": 0.244140625, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 141401432, + "step": 1549 + }, + { + "epoch": 6.458333333333333, + "grad_norm": 3.074911801642584, + "learning_rate": 5e-05, + "loss": 0.0777, + "num_input_tokens_seen": 141492608, + "step": 1550 + }, + { + "epoch": 6.458333333333333, + "loss": 0.0982789471745491, + "loss_ce": 0.00019544607494026423, + "loss_iou": 0.41796875, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 141492608, + "step": 1550 + }, + { + "epoch": 6.4625, + "grad_norm": 10.409750197190485, + "learning_rate": 5e-05, + "loss": 0.101, + "num_input_tokens_seen": 141584332, + "step": 1551 + }, + { + "epoch": 6.4625, + "loss": 0.10331732034683228, + "loss_ce": 0.0012970553943887353, + "loss_iou": 0.337890625, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 141584332, + "step": 1551 + }, + { + "epoch": 6.466666666666667, + "grad_norm": 2.2759078001399535, + "learning_rate": 5e-05, + "loss": 0.1277, + "num_input_tokens_seen": 141676000, + "step": 1552 + }, + { + "epoch": 6.466666666666667, + "loss": 0.1590229868888855, + "loss_ce": 0.008357708342373371, + "loss_iou": 0.314453125, + "loss_num": 0.0301513671875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 141676000, + "step": 1552 + }, + { + "epoch": 6.470833333333333, + "grad_norm": 2.7347982728298326, + "learning_rate": 5e-05, + "loss": 0.1051, + "num_input_tokens_seen": 141767732, + "step": 1553 + }, + { + "epoch": 6.470833333333333, + "loss": 0.10795509815216064, + "loss_ce": 0.0003195986500941217, + "loss_iou": 0.205078125, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 141767732, + "step": 1553 + }, + { + "epoch": 6.475, + "grad_norm": 3.010863972284371, + "learning_rate": 5e-05, + "loss": 0.1039, + "num_input_tokens_seen": 141859912, + "step": 1554 + }, + { + "epoch": 6.475, + "loss": 0.1464884877204895, + "loss_ce": 0.0006449909415096045, + "loss_iou": 0.2890625, + "loss_num": 0.0291748046875, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 141859912, + "step": 1554 + }, + { + "epoch": 6.479166666666667, + "grad_norm": 4.290786568194312, + "learning_rate": 5e-05, + "loss": 0.1159, + "num_input_tokens_seen": 141951128, + "step": 1555 + }, + { + "epoch": 6.479166666666667, + "loss": 0.12644046545028687, + "loss_ce": 0.0007080405484884977, + "loss_iou": 0.212890625, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 141951128, + "step": 1555 + }, + { + "epoch": 6.483333333333333, + "grad_norm": 5.329031237918446, + "learning_rate": 5e-05, + "loss": 0.1194, + "num_input_tokens_seen": 142042252, + "step": 1556 + }, + { + "epoch": 6.483333333333333, + "loss": 0.09762811660766602, + "loss_ce": 2.3853920083638513e-06, + "loss_iou": 0.32421875, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 142042252, + "step": 1556 + }, + { + "epoch": 6.4875, + "grad_norm": 3.0036119988830863, + "learning_rate": 5e-05, + "loss": 0.0886, + "num_input_tokens_seen": 142133724, + "step": 1557 + }, + { + "epoch": 6.4875, + "loss": 0.08220556378364563, + "loss_ce": 0.0009372499189339578, + "loss_iou": 0.40234375, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 142133724, + "step": 1557 + }, + { + "epoch": 6.491666666666666, + "grad_norm": 5.490478725111228, + "learning_rate": 5e-05, + "loss": 0.0967, + "num_input_tokens_seen": 142225104, + "step": 1558 + }, + { + "epoch": 6.491666666666666, + "loss": 0.08201521635055542, + "loss_ce": 0.0021812329068779945, + "loss_iou": 0.4296875, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 142225104, + "step": 1558 + }, + { + "epoch": 6.495833333333334, + "grad_norm": 13.400139381111213, + "learning_rate": 5e-05, + "loss": 0.1145, + "num_input_tokens_seen": 142316760, + "step": 1559 + }, + { + "epoch": 6.495833333333334, + "loss": 0.06825940310955048, + "loss_ce": 6.787155871279538e-05, + "loss_iou": 0.193359375, + "loss_num": 0.01361083984375, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 142316760, + "step": 1559 + }, + { + "epoch": 6.5, + "grad_norm": 3.718230018374104, + "learning_rate": 5e-05, + "loss": 0.0934, + "num_input_tokens_seen": 142407900, + "step": 1560 + }, + { + "epoch": 6.5, + "loss": 0.12160571664571762, + "loss_ce": 0.001763185835443437, + "loss_iou": 0.263671875, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 142407900, + "step": 1560 + }, + { + "epoch": 6.504166666666666, + "grad_norm": 3.332714913964349, + "learning_rate": 5e-05, + "loss": 0.1115, + "num_input_tokens_seen": 142498844, + "step": 1561 + }, + { + "epoch": 6.504166666666666, + "loss": 0.09362047910690308, + "loss_ce": 0.0001756606507115066, + "loss_iou": 0.306640625, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 142498844, + "step": 1561 + }, + { + "epoch": 6.508333333333333, + "grad_norm": 3.329570004581266, + "learning_rate": 5e-05, + "loss": 0.1104, + "num_input_tokens_seen": 142590136, + "step": 1562 + }, + { + "epoch": 6.508333333333333, + "loss": 0.10767680406570435, + "loss_ce": 0.00013285694876685739, + "loss_iou": 0.44140625, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 142590136, + "step": 1562 + }, + { + "epoch": 6.5125, + "grad_norm": 3.5592758832194806, + "learning_rate": 5e-05, + "loss": 0.1144, + "num_input_tokens_seen": 142681180, + "step": 1563 + }, + { + "epoch": 6.5125, + "loss": 0.06250756978988647, + "loss_ce": 0.0017089198809117079, + "loss_iou": 0.24609375, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 142681180, + "step": 1563 + }, + { + "epoch": 6.516666666666667, + "grad_norm": 2.79429299910581, + "learning_rate": 5e-05, + "loss": 0.0551, + "num_input_tokens_seen": 142772400, + "step": 1564 + }, + { + "epoch": 6.516666666666667, + "loss": 0.04886992275714874, + "loss_ce": 0.0014303472125902772, + "loss_iou": 0.240234375, + "loss_num": 0.009521484375, + "loss_xval": 0.04736328125, + "num_input_tokens_seen": 142772400, + "step": 1564 + }, + { + "epoch": 6.520833333333333, + "grad_norm": 4.021842943865755, + "learning_rate": 5e-05, + "loss": 0.1259, + "num_input_tokens_seen": 142863396, + "step": 1565 + }, + { + "epoch": 6.520833333333333, + "loss": 0.14970502257347107, + "loss_ce": 0.0012675110483542085, + "loss_iou": 0.3125, + "loss_num": 0.0296630859375, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 142863396, + "step": 1565 + }, + { + "epoch": 6.525, + "grad_norm": 3.9084038734956557, + "learning_rate": 5e-05, + "loss": 0.1083, + "num_input_tokens_seen": 142954776, + "step": 1566 + }, + { + "epoch": 6.525, + "loss": 0.10808855295181274, + "loss_ce": 0.0004988283035345376, + "loss_iou": 0.2470703125, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 142954776, + "step": 1566 + }, + { + "epoch": 6.529166666666667, + "grad_norm": 1.9237307426450732, + "learning_rate": 5e-05, + "loss": 0.0757, + "num_input_tokens_seen": 143045836, + "step": 1567 + }, + { + "epoch": 6.529166666666667, + "loss": 0.06775303930044174, + "loss_ce": 1.927517951116897e-05, + "loss_iou": 0.29296875, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 143045836, + "step": 1567 + }, + { + "epoch": 6.533333333333333, + "grad_norm": 4.421940334695053, + "learning_rate": 5e-05, + "loss": 0.1091, + "num_input_tokens_seen": 143137316, + "step": 1568 + }, + { + "epoch": 6.533333333333333, + "loss": 0.12645383179187775, + "loss_ce": 0.00047726318007335067, + "loss_iou": 0.279296875, + "loss_num": 0.0252685546875, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 143137316, + "step": 1568 + }, + { + "epoch": 6.5375, + "grad_norm": 4.5291712901952295, + "learning_rate": 5e-05, + "loss": 0.0846, + "num_input_tokens_seen": 143228708, + "step": 1569 + }, + { + "epoch": 6.5375, + "loss": 0.08038702607154846, + "loss_ce": 0.0004767438513226807, + "loss_iou": 0.2421875, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 143228708, + "step": 1569 + }, + { + "epoch": 6.541666666666667, + "grad_norm": 7.921331568022988, + "learning_rate": 5e-05, + "loss": 0.0751, + "num_input_tokens_seen": 143319700, + "step": 1570 + }, + { + "epoch": 6.541666666666667, + "loss": 0.0716252326965332, + "loss_ce": 4.6252054744400084e-05, + "loss_iou": 0.3203125, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 143319700, + "step": 1570 + }, + { + "epoch": 6.545833333333333, + "grad_norm": 2.6572462351600374, + "learning_rate": 5e-05, + "loss": 0.1061, + "num_input_tokens_seen": 143410636, + "step": 1571 + }, + { + "epoch": 6.545833333333333, + "loss": 0.12413694709539413, + "loss_ce": 2.195342858613003e-05, + "loss_iou": 0.314453125, + "loss_num": 0.02490234375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 143410636, + "step": 1571 + }, + { + "epoch": 6.55, + "grad_norm": 3.452842329680069, + "learning_rate": 5e-05, + "loss": 0.0772, + "num_input_tokens_seen": 143502496, + "step": 1572 + }, + { + "epoch": 6.55, + "loss": 0.09016988426446915, + "loss_ce": 6.673274037893862e-05, + "loss_iou": 0.330078125, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 143502496, + "step": 1572 + }, + { + "epoch": 6.554166666666667, + "grad_norm": 2.4604739050610465, + "learning_rate": 5e-05, + "loss": 0.0748, + "num_input_tokens_seen": 143594092, + "step": 1573 + }, + { + "epoch": 6.554166666666667, + "loss": 0.08416140079498291, + "loss_ce": 7.021539204288274e-05, + "loss_iou": 0.36328125, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 143594092, + "step": 1573 + }, + { + "epoch": 6.558333333333334, + "grad_norm": 3.4583027620478486, + "learning_rate": 5e-05, + "loss": 0.1189, + "num_input_tokens_seen": 143684768, + "step": 1574 + }, + { + "epoch": 6.558333333333334, + "loss": 0.18618930876255035, + "loss_ce": 1.5710368188592838e-06, + "loss_iou": 0.22265625, + "loss_num": 0.037353515625, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 143684768, + "step": 1574 + }, + { + "epoch": 6.5625, + "grad_norm": 4.276077174262582, + "learning_rate": 5e-05, + "loss": 0.117, + "num_input_tokens_seen": 143776528, + "step": 1575 + }, + { + "epoch": 6.5625, + "loss": 0.15433287620544434, + "loss_ce": 0.00028013830888085067, + "loss_iou": 0.28125, + "loss_num": 0.03076171875, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 143776528, + "step": 1575 + }, + { + "epoch": 6.566666666666666, + "grad_norm": 3.6015378520829078, + "learning_rate": 5e-05, + "loss": 0.1529, + "num_input_tokens_seen": 143867688, + "step": 1576 + }, + { + "epoch": 6.566666666666666, + "loss": 0.1484794020652771, + "loss_ce": 0.0014762284699827433, + "loss_iou": 0.298828125, + "loss_num": 0.0294189453125, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 143867688, + "step": 1576 + }, + { + "epoch": 6.570833333333333, + "grad_norm": 2.562262945602401, + "learning_rate": 5e-05, + "loss": 0.0847, + "num_input_tokens_seen": 143959236, + "step": 1577 + }, + { + "epoch": 6.570833333333333, + "loss": 0.09467719495296478, + "loss_ce": 0.001583316596224904, + "loss_iou": 0.328125, + "loss_num": 0.0185546875, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 143959236, + "step": 1577 + }, + { + "epoch": 6.575, + "grad_norm": 2.621665897659335, + "learning_rate": 5e-05, + "loss": 0.068, + "num_input_tokens_seen": 144050656, + "step": 1578 + }, + { + "epoch": 6.575, + "loss": 0.07747948914766312, + "loss_ce": 4.876002640230581e-05, + "loss_iou": 0.287109375, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 144050656, + "step": 1578 + }, + { + "epoch": 6.579166666666667, + "grad_norm": 10.60835756383391, + "learning_rate": 5e-05, + "loss": 0.1034, + "num_input_tokens_seen": 144140052, + "step": 1579 + }, + { + "epoch": 6.579166666666667, + "loss": 0.1193399503827095, + "loss_ce": 7.724653551122174e-05, + "loss_iou": 0.427734375, + "loss_num": 0.02392578125, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 144140052, + "step": 1579 + }, + { + "epoch": 6.583333333333333, + "grad_norm": 6.254374980240291, + "learning_rate": 5e-05, + "loss": 0.1012, + "num_input_tokens_seen": 144231644, + "step": 1580 + }, + { + "epoch": 6.583333333333333, + "loss": 0.08207383751869202, + "loss_ce": 0.0006224174285307527, + "loss_iou": 0.423828125, + "loss_num": 0.0162353515625, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 144231644, + "step": 1580 + }, + { + "epoch": 6.5875, + "grad_norm": 4.899090457878927, + "learning_rate": 5e-05, + "loss": 0.1001, + "num_input_tokens_seen": 144322608, + "step": 1581 + }, + { + "epoch": 6.5875, + "loss": 0.06415297091007233, + "loss_ce": 0.0028584187384694815, + "loss_iou": 0.345703125, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 144322608, + "step": 1581 + }, + { + "epoch": 6.591666666666667, + "grad_norm": 2.766175501257015, + "learning_rate": 5e-05, + "loss": 0.1122, + "num_input_tokens_seen": 144413772, + "step": 1582 + }, + { + "epoch": 6.591666666666667, + "loss": 0.10372322797775269, + "loss_ce": 0.0006882529705762863, + "loss_iou": 0.2734375, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 144413772, + "step": 1582 + }, + { + "epoch": 6.595833333333333, + "grad_norm": 2.7264734440635925, + "learning_rate": 5e-05, + "loss": 0.0975, + "num_input_tokens_seen": 144504496, + "step": 1583 + }, + { + "epoch": 6.595833333333333, + "loss": 0.134153351187706, + "loss_ce": 0.0001811852998798713, + "loss_iou": 0.0771484375, + "loss_num": 0.0267333984375, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 144504496, + "step": 1583 + }, + { + "epoch": 6.6, + "grad_norm": 3.967699299929471, + "learning_rate": 5e-05, + "loss": 0.0948, + "num_input_tokens_seen": 144596468, + "step": 1584 + }, + { + "epoch": 6.6, + "loss": 0.13596948981285095, + "loss_ce": 7.469956472050399e-05, + "loss_iou": 0.39453125, + "loss_num": 0.0272216796875, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 144596468, + "step": 1584 + }, + { + "epoch": 6.604166666666667, + "grad_norm": 5.155884044774041, + "learning_rate": 5e-05, + "loss": 0.0707, + "num_input_tokens_seen": 144687304, + "step": 1585 + }, + { + "epoch": 6.604166666666667, + "loss": 0.04194006323814392, + "loss_ce": 0.0004208995960652828, + "loss_iou": 0.27734375, + "loss_num": 0.00830078125, + "loss_xval": 0.04150390625, + "num_input_tokens_seen": 144687304, + "step": 1585 + }, + { + "epoch": 6.608333333333333, + "grad_norm": 2.303773461049103, + "learning_rate": 5e-05, + "loss": 0.0779, + "num_input_tokens_seen": 144778800, + "step": 1586 + }, + { + "epoch": 6.608333333333333, + "loss": 0.05826836824417114, + "loss_ce": 0.0009410970378667116, + "loss_iou": 0.251953125, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 144778800, + "step": 1586 + }, + { + "epoch": 6.6125, + "grad_norm": 2.0548811267657325, + "learning_rate": 5e-05, + "loss": 0.1494, + "num_input_tokens_seen": 144870196, + "step": 1587 + }, + { + "epoch": 6.6125, + "loss": 0.15138491988182068, + "loss_ce": 4.8250392865156755e-05, + "loss_iou": 0.2158203125, + "loss_num": 0.0302734375, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 144870196, + "step": 1587 + }, + { + "epoch": 6.616666666666667, + "grad_norm": 5.6424045492552395, + "learning_rate": 5e-05, + "loss": 0.111, + "num_input_tokens_seen": 144959672, + "step": 1588 + }, + { + "epoch": 6.616666666666667, + "loss": 0.13981932401657104, + "loss_ce": 3.0346068342623767e-06, + "loss_iou": 0.294921875, + "loss_num": 0.0279541015625, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 144959672, + "step": 1588 + }, + { + "epoch": 6.620833333333334, + "grad_norm": 4.411052069608031, + "learning_rate": 5e-05, + "loss": 0.0917, + "num_input_tokens_seen": 145051176, + "step": 1589 + }, + { + "epoch": 6.620833333333334, + "loss": 0.10094712674617767, + "loss_ce": 0.002283800160512328, + "loss_iou": 0.419921875, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 145051176, + "step": 1589 + }, + { + "epoch": 6.625, + "grad_norm": 4.046755342352393, + "learning_rate": 5e-05, + "loss": 0.1034, + "num_input_tokens_seen": 145142076, + "step": 1590 + }, + { + "epoch": 6.625, + "loss": 0.11325374245643616, + "loss_ce": 0.0004302625893615186, + "loss_iou": 0.287109375, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 145142076, + "step": 1590 + }, + { + "epoch": 6.629166666666666, + "grad_norm": 2.562778790765417, + "learning_rate": 5e-05, + "loss": 0.1086, + "num_input_tokens_seen": 145233604, + "step": 1591 + }, + { + "epoch": 6.629166666666666, + "loss": 0.10232369601726532, + "loss_ce": 8.980841812444851e-05, + "loss_iou": 0.33203125, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 145233604, + "step": 1591 + }, + { + "epoch": 6.633333333333333, + "grad_norm": 5.5019492787113, + "learning_rate": 5e-05, + "loss": 0.127, + "num_input_tokens_seen": 145324992, + "step": 1592 + }, + { + "epoch": 6.633333333333333, + "loss": 0.1514090746641159, + "loss_ce": 1.1372105291229673e-05, + "loss_iou": 0.39453125, + "loss_num": 0.0302734375, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 145324992, + "step": 1592 + }, + { + "epoch": 6.6375, + "grad_norm": 3.1473607225260904, + "learning_rate": 5e-05, + "loss": 0.0828, + "num_input_tokens_seen": 145416480, + "step": 1593 + }, + { + "epoch": 6.6375, + "loss": 0.10823452472686768, + "loss_ce": 0.00035488815046846867, + "loss_iou": 0.205078125, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 145416480, + "step": 1593 + }, + { + "epoch": 6.641666666666667, + "grad_norm": 11.970380699147968, + "learning_rate": 5e-05, + "loss": 0.1205, + "num_input_tokens_seen": 145508484, + "step": 1594 + }, + { + "epoch": 6.641666666666667, + "loss": 0.1729552000761032, + "loss_ce": 0.004803344141691923, + "loss_iou": 0.2333984375, + "loss_num": 0.03369140625, + "loss_xval": 0.16796875, + "num_input_tokens_seen": 145508484, + "step": 1594 + }, + { + "epoch": 6.645833333333333, + "grad_norm": 4.8173482154462945, + "learning_rate": 5e-05, + "loss": 0.0927, + "num_input_tokens_seen": 145599724, + "step": 1595 + }, + { + "epoch": 6.645833333333333, + "loss": 0.10813955962657928, + "loss_ce": 0.0008092427160590887, + "loss_iou": 0.244140625, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 145599724, + "step": 1595 + }, + { + "epoch": 6.65, + "grad_norm": 4.772495453558251, + "learning_rate": 5e-05, + "loss": 0.1477, + "num_input_tokens_seen": 145690884, + "step": 1596 + }, + { + "epoch": 6.65, + "loss": 0.1627688705921173, + "loss_ce": 0.00021699043281842023, + "loss_iou": 0.375, + "loss_num": 0.032470703125, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 145690884, + "step": 1596 + }, + { + "epoch": 6.654166666666667, + "grad_norm": 3.9667383937114513, + "learning_rate": 5e-05, + "loss": 0.105, + "num_input_tokens_seen": 145781884, + "step": 1597 + }, + { + "epoch": 6.654166666666667, + "loss": 0.1544293314218521, + "loss_ce": 0.000986945815384388, + "loss_iou": 0.326171875, + "loss_num": 0.03076171875, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 145781884, + "step": 1597 + }, + { + "epoch": 6.658333333333333, + "grad_norm": 4.575704460362979, + "learning_rate": 5e-05, + "loss": 0.1118, + "num_input_tokens_seen": 145873312, + "step": 1598 + }, + { + "epoch": 6.658333333333333, + "loss": 0.06805611401796341, + "loss_ce": 0.0012226162943989038, + "loss_iou": 0.1591796875, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 145873312, + "step": 1598 + }, + { + "epoch": 6.6625, + "grad_norm": 1.9421159193829607, + "learning_rate": 5e-05, + "loss": 0.0884, + "num_input_tokens_seen": 145964720, + "step": 1599 + }, + { + "epoch": 6.6625, + "loss": 0.04284782335162163, + "loss_ce": 0.0012065876508131623, + "loss_iou": 0.2255859375, + "loss_num": 0.00836181640625, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 145964720, + "step": 1599 + }, + { + "epoch": 6.666666666666667, + "grad_norm": 4.205435522590141, + "learning_rate": 5e-05, + "loss": 0.0853, + "num_input_tokens_seen": 146056544, + "step": 1600 + }, + { + "epoch": 6.666666666666667, + "loss": 0.10162333399057388, + "loss_ce": 0.0015409357147291303, + "loss_iou": 0.26953125, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 146056544, + "step": 1600 + }, + { + "epoch": 6.670833333333333, + "grad_norm": 4.996232392456174, + "learning_rate": 5e-05, + "loss": 0.1017, + "num_input_tokens_seen": 146147216, + "step": 1601 + }, + { + "epoch": 6.670833333333333, + "loss": 0.1020415648818016, + "loss_ce": 0.0002044006687356159, + "loss_iou": 0.310546875, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 146147216, + "step": 1601 + }, + { + "epoch": 6.675, + "grad_norm": 5.239999392150706, + "learning_rate": 5e-05, + "loss": 0.1187, + "num_input_tokens_seen": 146239040, + "step": 1602 + }, + { + "epoch": 6.675, + "loss": 0.125333771109581, + "loss_ce": 0.002058014739304781, + "loss_iou": 0.294921875, + "loss_num": 0.024658203125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 146239040, + "step": 1602 + }, + { + "epoch": 6.679166666666667, + "grad_norm": 11.941534013991284, + "learning_rate": 5e-05, + "loss": 0.1318, + "num_input_tokens_seen": 146330304, + "step": 1603 + }, + { + "epoch": 6.679166666666667, + "loss": 0.08114509284496307, + "loss_ce": 9.041121666086838e-05, + "loss_iou": 0.2138671875, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 146330304, + "step": 1603 + }, + { + "epoch": 6.683333333333334, + "grad_norm": 4.644288999865973, + "learning_rate": 5e-05, + "loss": 0.096, + "num_input_tokens_seen": 146421016, + "step": 1604 + }, + { + "epoch": 6.683333333333334, + "loss": 0.1072845384478569, + "loss_ce": 0.000381466350518167, + "loss_iou": 0.2177734375, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 146421016, + "step": 1604 + }, + { + "epoch": 6.6875, + "grad_norm": 4.404307762190488, + "learning_rate": 5e-05, + "loss": 0.096, + "num_input_tokens_seen": 146512496, + "step": 1605 + }, + { + "epoch": 6.6875, + "loss": 0.0725177675485611, + "loss_ce": 0.0004962862585671246, + "loss_iou": 0.34765625, + "loss_num": 0.014404296875, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 146512496, + "step": 1605 + }, + { + "epoch": 6.691666666666666, + "grad_norm": 2.097287608660134, + "learning_rate": 5e-05, + "loss": 0.0792, + "num_input_tokens_seen": 146603768, + "step": 1606 + }, + { + "epoch": 6.691666666666666, + "loss": 0.0623895600438118, + "loss_ce": 0.0010339674772694707, + "loss_iou": 0.267578125, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 146603768, + "step": 1606 + }, + { + "epoch": 6.695833333333333, + "grad_norm": 4.467146990709377, + "learning_rate": 5e-05, + "loss": 0.1235, + "num_input_tokens_seen": 146695264, + "step": 1607 + }, + { + "epoch": 6.695833333333333, + "loss": 0.17170248925685883, + "loss_ce": 0.0025435483548790216, + "loss_iou": 0.33203125, + "loss_num": 0.033935546875, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 146695264, + "step": 1607 + }, + { + "epoch": 6.7, + "grad_norm": 6.296580911285716, + "learning_rate": 5e-05, + "loss": 0.1097, + "num_input_tokens_seen": 146786668, + "step": 1608 + }, + { + "epoch": 6.7, + "loss": 0.11704735457897186, + "loss_ce": 0.000805901363492012, + "loss_iou": 0.1025390625, + "loss_num": 0.023193359375, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 146786668, + "step": 1608 + }, + { + "epoch": 6.704166666666667, + "grad_norm": 5.336888472091619, + "learning_rate": 5e-05, + "loss": 0.1373, + "num_input_tokens_seen": 146877584, + "step": 1609 + }, + { + "epoch": 6.704166666666667, + "loss": 0.10470438748598099, + "loss_ce": 0.0004563412512652576, + "loss_iou": 0.359375, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 146877584, + "step": 1609 + }, + { + "epoch": 6.708333333333333, + "grad_norm": 2.571261156505444, + "learning_rate": 5e-05, + "loss": 0.1165, + "num_input_tokens_seen": 146968948, + "step": 1610 + }, + { + "epoch": 6.708333333333333, + "loss": 0.12027784436941147, + "loss_ce": 0.0006794579094275832, + "loss_iou": 0.3359375, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 146968948, + "step": 1610 + }, + { + "epoch": 6.7125, + "grad_norm": 10.122247458063885, + "learning_rate": 5e-05, + "loss": 0.0949, + "num_input_tokens_seen": 147059772, + "step": 1611 + }, + { + "epoch": 6.7125, + "loss": 0.09837611019611359, + "loss_ce": 0.0013912541326135397, + "loss_iou": 0.205078125, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 147059772, + "step": 1611 + }, + { + "epoch": 6.716666666666667, + "grad_norm": 4.187241160722909, + "learning_rate": 5e-05, + "loss": 0.1325, + "num_input_tokens_seen": 147150372, + "step": 1612 + }, + { + "epoch": 6.716666666666667, + "loss": 0.15941470861434937, + "loss_ce": 0.000479156500659883, + "loss_iou": 0.310546875, + "loss_num": 0.03173828125, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 147150372, + "step": 1612 + }, + { + "epoch": 6.720833333333333, + "grad_norm": 3.4823413210199328, + "learning_rate": 5e-05, + "loss": 0.0829, + "num_input_tokens_seen": 147241568, + "step": 1613 + }, + { + "epoch": 6.720833333333333, + "loss": 0.10597635060548782, + "loss_ce": 4.063520464114845e-06, + "loss_iou": 0.1806640625, + "loss_num": 0.021240234375, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 147241568, + "step": 1613 + }, + { + "epoch": 6.725, + "grad_norm": 7.069400820243496, + "learning_rate": 5e-05, + "loss": 0.1113, + "num_input_tokens_seen": 147332292, + "step": 1614 + }, + { + "epoch": 6.725, + "loss": 0.1232280358672142, + "loss_ce": 0.0009517316939309239, + "loss_iou": 0.1826171875, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 147332292, + "step": 1614 + }, + { + "epoch": 6.729166666666667, + "grad_norm": 1.9823059290298373, + "learning_rate": 5e-05, + "loss": 0.083, + "num_input_tokens_seen": 147423048, + "step": 1615 + }, + { + "epoch": 6.729166666666667, + "loss": 0.08469944447278976, + "loss_ce": 2.0791940187336877e-05, + "loss_iou": 0.408203125, + "loss_num": 0.0169677734375, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 147423048, + "step": 1615 + }, + { + "epoch": 6.733333333333333, + "grad_norm": 2.4822174257526077, + "learning_rate": 5e-05, + "loss": 0.0585, + "num_input_tokens_seen": 147513936, + "step": 1616 + }, + { + "epoch": 6.733333333333333, + "loss": 0.057547569274902344, + "loss_ce": 0.00044917932245880365, + "loss_iou": 0.283203125, + "loss_num": 0.01141357421875, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 147513936, + "step": 1616 + }, + { + "epoch": 6.7375, + "grad_norm": 2.833317054725588, + "learning_rate": 5e-05, + "loss": 0.1107, + "num_input_tokens_seen": 147605708, + "step": 1617 + }, + { + "epoch": 6.7375, + "loss": 0.07036544382572174, + "loss_ce": 0.0010142435785382986, + "loss_iou": 0.298828125, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 147605708, + "step": 1617 + }, + { + "epoch": 6.741666666666667, + "grad_norm": 4.051920430799818, + "learning_rate": 5e-05, + "loss": 0.1105, + "num_input_tokens_seen": 147697648, + "step": 1618 + }, + { + "epoch": 6.741666666666667, + "loss": 0.08087938278913498, + "loss_ce": 0.0002824601251631975, + "loss_iou": 0.265625, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 147697648, + "step": 1618 + }, + { + "epoch": 6.745833333333334, + "grad_norm": 3.1222260691833696, + "learning_rate": 5e-05, + "loss": 0.1416, + "num_input_tokens_seen": 147788916, + "step": 1619 + }, + { + "epoch": 6.745833333333334, + "loss": 0.13396668434143066, + "loss_ce": 0.00019288396288175136, + "loss_iou": 0.189453125, + "loss_num": 0.0267333984375, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 147788916, + "step": 1619 + }, + { + "epoch": 6.75, + "grad_norm": 3.6992346856006324, + "learning_rate": 5e-05, + "loss": 0.1125, + "num_input_tokens_seen": 147880112, + "step": 1620 + }, + { + "epoch": 6.75, + "loss": 0.11802740395069122, + "loss_ce": 0.000916196615435183, + "loss_iou": 0.380859375, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 147880112, + "step": 1620 + }, + { + "epoch": 6.754166666666666, + "grad_norm": 8.15302032172279, + "learning_rate": 5e-05, + "loss": 0.1344, + "num_input_tokens_seen": 147971792, + "step": 1621 + }, + { + "epoch": 6.754166666666666, + "loss": 0.1697504073381424, + "loss_ce": 0.002224163617938757, + "loss_iou": 0.27734375, + "loss_num": 0.033447265625, + "loss_xval": 0.16796875, + "num_input_tokens_seen": 147971792, + "step": 1621 + }, + { + "epoch": 6.758333333333333, + "grad_norm": 4.230587195826469, + "learning_rate": 5e-05, + "loss": 0.1052, + "num_input_tokens_seen": 148062112, + "step": 1622 + }, + { + "epoch": 6.758333333333333, + "loss": 0.12787632644176483, + "loss_ce": 1.5303545296774246e-05, + "loss_iou": 0.193359375, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 148062112, + "step": 1622 + }, + { + "epoch": 6.7625, + "grad_norm": 4.901904450953437, + "learning_rate": 5e-05, + "loss": 0.1005, + "num_input_tokens_seen": 148153220, + "step": 1623 + }, + { + "epoch": 6.7625, + "loss": 0.07316801697015762, + "loss_ce": 0.0028860336169600487, + "loss_iou": 0.28515625, + "loss_num": 0.01409912109375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 148153220, + "step": 1623 + }, + { + "epoch": 6.766666666666667, + "grad_norm": 3.9667116514826097, + "learning_rate": 5e-05, + "loss": 0.1551, + "num_input_tokens_seen": 148244284, + "step": 1624 + }, + { + "epoch": 6.766666666666667, + "loss": 0.151185542345047, + "loss_ce": 1.4684163716083276e-06, + "loss_iou": 0.328125, + "loss_num": 0.0302734375, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 148244284, + "step": 1624 + }, + { + "epoch": 6.770833333333333, + "grad_norm": 13.36068440365898, + "learning_rate": 5e-05, + "loss": 0.0834, + "num_input_tokens_seen": 148335192, + "step": 1625 + }, + { + "epoch": 6.770833333333333, + "loss": 0.09784172475337982, + "loss_ce": 0.00039909378392621875, + "loss_iou": 0.265625, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 148335192, + "step": 1625 + }, + { + "epoch": 6.775, + "grad_norm": 4.5555459822511395, + "learning_rate": 5e-05, + "loss": 0.141, + "num_input_tokens_seen": 148426388, + "step": 1626 + }, + { + "epoch": 6.775, + "loss": 0.14454184472560883, + "loss_ce": 0.0003768008027691394, + "loss_iou": 0.3046875, + "loss_num": 0.02880859375, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 148426388, + "step": 1626 + }, + { + "epoch": 6.779166666666667, + "grad_norm": 6.7472256934039025, + "learning_rate": 5e-05, + "loss": 0.1081, + "num_input_tokens_seen": 148516512, + "step": 1627 + }, + { + "epoch": 6.779166666666667, + "loss": 0.06415146589279175, + "loss_ce": 3.5179459700884763e-06, + "loss_iou": 0.1025390625, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 148516512, + "step": 1627 + }, + { + "epoch": 6.783333333333333, + "grad_norm": 7.431925452000919, + "learning_rate": 5e-05, + "loss": 0.0868, + "num_input_tokens_seen": 148607596, + "step": 1628 + }, + { + "epoch": 6.783333333333333, + "loss": 0.10833147913217545, + "loss_ce": 0.0010011536069214344, + "loss_iou": 0.06494140625, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 148607596, + "step": 1628 + }, + { + "epoch": 6.7875, + "grad_norm": 3.834227391343124, + "learning_rate": 5e-05, + "loss": 0.1278, + "num_input_tokens_seen": 148699612, + "step": 1629 + }, + { + "epoch": 6.7875, + "loss": 0.14319217205047607, + "loss_ce": 0.001041282550431788, + "loss_iou": 0.296875, + "loss_num": 0.0284423828125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 148699612, + "step": 1629 + }, + { + "epoch": 6.791666666666667, + "grad_norm": 2.6814714656783742, + "learning_rate": 5e-05, + "loss": 0.1478, + "num_input_tokens_seen": 148789564, + "step": 1630 + }, + { + "epoch": 6.791666666666667, + "loss": 0.09333618730306625, + "loss_ce": 0.00021942633611615747, + "loss_iou": 0.26171875, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 148789564, + "step": 1630 + }, + { + "epoch": 6.795833333333333, + "grad_norm": 4.520185963024227, + "learning_rate": 5e-05, + "loss": 0.1148, + "num_input_tokens_seen": 148879640, + "step": 1631 + }, + { + "epoch": 6.795833333333333, + "loss": 0.14096848666667938, + "loss_ce": 0.002510237041860819, + "loss_iou": 0.296875, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 148879640, + "step": 1631 + }, + { + "epoch": 6.8, + "grad_norm": 4.447410411705429, + "learning_rate": 5e-05, + "loss": 0.1073, + "num_input_tokens_seen": 148970324, + "step": 1632 + }, + { + "epoch": 6.8, + "loss": 0.1173885315656662, + "loss_ce": 3.318277231301181e-05, + "loss_iou": 0.296875, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 148970324, + "step": 1632 + }, + { + "epoch": 6.804166666666667, + "grad_norm": 3.4723243249443794, + "learning_rate": 5e-05, + "loss": 0.0711, + "num_input_tokens_seen": 149061416, + "step": 1633 + }, + { + "epoch": 6.804166666666667, + "loss": 0.07656733691692352, + "loss_ce": 0.001734415884129703, + "loss_iou": 0.2333984375, + "loss_num": 0.0150146484375, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 149061416, + "step": 1633 + }, + { + "epoch": 6.808333333333334, + "grad_norm": 4.963455215852101, + "learning_rate": 5e-05, + "loss": 0.1045, + "num_input_tokens_seen": 149152852, + "step": 1634 + }, + { + "epoch": 6.808333333333334, + "loss": 0.07179413735866547, + "loss_ce": 0.0003524910134728998, + "loss_iou": 0.294921875, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 149152852, + "step": 1634 + }, + { + "epoch": 6.8125, + "grad_norm": 6.349033898429906, + "learning_rate": 5e-05, + "loss": 0.0939, + "num_input_tokens_seen": 149244076, + "step": 1635 + }, + { + "epoch": 6.8125, + "loss": 0.07682006061077118, + "loss_ce": 1.4947971067158505e-05, + "loss_iou": 0.283203125, + "loss_num": 0.015380859375, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 149244076, + "step": 1635 + }, + { + "epoch": 6.816666666666666, + "grad_norm": 3.292050967196103, + "learning_rate": 5e-05, + "loss": 0.0796, + "num_input_tokens_seen": 149335296, + "step": 1636 + }, + { + "epoch": 6.816666666666666, + "loss": 0.09713432937860489, + "loss_ce": 5.791701914859004e-05, + "loss_iou": 0.1884765625, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 149335296, + "step": 1636 + }, + { + "epoch": 6.820833333333333, + "grad_norm": 16.227618304181107, + "learning_rate": 5e-05, + "loss": 0.1328, + "num_input_tokens_seen": 149426624, + "step": 1637 + }, + { + "epoch": 6.820833333333333, + "loss": 0.10378938913345337, + "loss_ce": 0.001647052587941289, + "loss_iou": 0.154296875, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 149426624, + "step": 1637 + }, + { + "epoch": 6.825, + "grad_norm": 9.950541544841345, + "learning_rate": 5e-05, + "loss": 0.1208, + "num_input_tokens_seen": 149517172, + "step": 1638 + }, + { + "epoch": 6.825, + "loss": 0.1490859091281891, + "loss_ce": 0.0022963478695601225, + "loss_iou": 0.2734375, + "loss_num": 0.0294189453125, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 149517172, + "step": 1638 + }, + { + "epoch": 6.829166666666667, + "grad_norm": 2.888281399700848, + "learning_rate": 5e-05, + "loss": 0.0709, + "num_input_tokens_seen": 149608580, + "step": 1639 + }, + { + "epoch": 6.829166666666667, + "loss": 0.08360613882541656, + "loss_ce": 0.002017396269366145, + "loss_iou": 0.2578125, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 149608580, + "step": 1639 + }, + { + "epoch": 6.833333333333333, + "grad_norm": 3.125102142349154, + "learning_rate": 5e-05, + "loss": 0.0874, + "num_input_tokens_seen": 149699524, + "step": 1640 + }, + { + "epoch": 6.833333333333333, + "loss": 0.07310190796852112, + "loss_ce": 2.756038520601578e-05, + "loss_iou": 0.341796875, + "loss_num": 0.01458740234375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 149699524, + "step": 1640 + }, + { + "epoch": 6.8375, + "grad_norm": 5.308252886132232, + "learning_rate": 5e-05, + "loss": 0.1095, + "num_input_tokens_seen": 149790584, + "step": 1641 + }, + { + "epoch": 6.8375, + "loss": 0.0729517862200737, + "loss_ce": 0.0010676286183297634, + "loss_iou": 0.11328125, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 149790584, + "step": 1641 + }, + { + "epoch": 6.841666666666667, + "grad_norm": 4.232688259836703, + "learning_rate": 5e-05, + "loss": 0.0783, + "num_input_tokens_seen": 149882032, + "step": 1642 + }, + { + "epoch": 6.841666666666667, + "loss": 0.08408161997795105, + "loss_ce": 0.00041005387902259827, + "loss_iou": 0.283203125, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 149882032, + "step": 1642 + }, + { + "epoch": 6.845833333333333, + "grad_norm": 3.5412300451929783, + "learning_rate": 5e-05, + "loss": 0.0779, + "num_input_tokens_seen": 149973372, + "step": 1643 + }, + { + "epoch": 6.845833333333333, + "loss": 0.04515673965215683, + "loss_ce": 0.0004332278040237725, + "loss_iou": 0.3359375, + "loss_num": 0.0089111328125, + "loss_xval": 0.044677734375, + "num_input_tokens_seen": 149973372, + "step": 1643 + }, + { + "epoch": 6.85, + "grad_norm": 4.356357508934272, + "learning_rate": 5e-05, + "loss": 0.0771, + "num_input_tokens_seen": 150064332, + "step": 1644 + }, + { + "epoch": 6.85, + "loss": 0.09591878205537796, + "loss_ce": 3.254601324442774e-05, + "loss_iou": 0.275390625, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 150064332, + "step": 1644 + }, + { + "epoch": 6.854166666666667, + "grad_norm": 9.82791526650308, + "learning_rate": 5e-05, + "loss": 0.0799, + "num_input_tokens_seen": 150155820, + "step": 1645 + }, + { + "epoch": 6.854166666666667, + "loss": 0.11722063273191452, + "loss_ce": 0.0007655585068278015, + "loss_iou": 0.337890625, + "loss_num": 0.0233154296875, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 150155820, + "step": 1645 + }, + { + "epoch": 6.858333333333333, + "grad_norm": 8.523554873394339, + "learning_rate": 5e-05, + "loss": 0.0961, + "num_input_tokens_seen": 150246624, + "step": 1646 + }, + { + "epoch": 6.858333333333333, + "loss": 0.11677127331495285, + "loss_ce": 1.1021089449059218e-05, + "loss_iou": 0.259765625, + "loss_num": 0.0233154296875, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 150246624, + "step": 1646 + }, + { + "epoch": 6.8625, + "grad_norm": 2.841981736257889, + "learning_rate": 5e-05, + "loss": 0.078, + "num_input_tokens_seen": 150338200, + "step": 1647 + }, + { + "epoch": 6.8625, + "loss": 0.07999046891927719, + "loss_ce": 3.441448279772885e-05, + "loss_iou": 0.33984375, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 150338200, + "step": 1647 + }, + { + "epoch": 6.866666666666667, + "grad_norm": 4.739673541337045, + "learning_rate": 5e-05, + "loss": 0.0881, + "num_input_tokens_seen": 150429216, + "step": 1648 + }, + { + "epoch": 6.866666666666667, + "loss": 0.10366171598434448, + "loss_ce": 0.0005733439465984702, + "loss_iou": 0.208984375, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 150429216, + "step": 1648 + }, + { + "epoch": 6.870833333333334, + "grad_norm": 3.6239360814138815, + "learning_rate": 5e-05, + "loss": 0.1146, + "num_input_tokens_seen": 150520388, + "step": 1649 + }, + { + "epoch": 6.870833333333334, + "loss": 0.10713136941194534, + "loss_ce": 0.0002359184727538377, + "loss_iou": 0.296875, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 150520388, + "step": 1649 + }, + { + "epoch": 6.875, + "grad_norm": 10.076261514779913, + "learning_rate": 5e-05, + "loss": 0.0655, + "num_input_tokens_seen": 150611424, + "step": 1650 + }, + { + "epoch": 6.875, + "loss": 0.054482243955135345, + "loss_ce": 0.0006187166436575353, + "loss_iou": 0.416015625, + "loss_num": 0.0107421875, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 150611424, + "step": 1650 + }, + { + "epoch": 6.879166666666666, + "grad_norm": 7.037017041308367, + "learning_rate": 5e-05, + "loss": 0.1007, + "num_input_tokens_seen": 150702592, + "step": 1651 + }, + { + "epoch": 6.879166666666666, + "loss": 0.0805845558643341, + "loss_ce": 6.392721115844324e-05, + "loss_iou": 0.298828125, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 150702592, + "step": 1651 + }, + { + "epoch": 6.883333333333333, + "grad_norm": 2.2476195907523726, + "learning_rate": 5e-05, + "loss": 0.1, + "num_input_tokens_seen": 150792232, + "step": 1652 + }, + { + "epoch": 6.883333333333333, + "loss": 0.10652382671833038, + "loss_ce": 0.0002539954148232937, + "loss_iou": 0.1435546875, + "loss_num": 0.021240234375, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 150792232, + "step": 1652 + }, + { + "epoch": 6.8875, + "grad_norm": 8.071696596054197, + "learning_rate": 5e-05, + "loss": 0.1469, + "num_input_tokens_seen": 150883820, + "step": 1653 + }, + { + "epoch": 6.8875, + "loss": 0.12309402227401733, + "loss_ce": 0.0009016396361403167, + "loss_iou": 0.3125, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 150883820, + "step": 1653 + }, + { + "epoch": 6.891666666666667, + "grad_norm": 6.26788141368876, + "learning_rate": 5e-05, + "loss": 0.1246, + "num_input_tokens_seen": 150975532, + "step": 1654 + }, + { + "epoch": 6.891666666666667, + "loss": 0.07721350342035294, + "loss_ce": 0.0004923146916553378, + "loss_iou": 0.21484375, + "loss_num": 0.015380859375, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 150975532, + "step": 1654 + }, + { + "epoch": 6.895833333333333, + "grad_norm": 2.9578790282701592, + "learning_rate": 5e-05, + "loss": 0.1126, + "num_input_tokens_seen": 151066492, + "step": 1655 + }, + { + "epoch": 6.895833333333333, + "loss": 0.1371522843837738, + "loss_ce": 3.680928057292476e-05, + "loss_iou": 0.31640625, + "loss_num": 0.0274658203125, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 151066492, + "step": 1655 + }, + { + "epoch": 6.9, + "grad_norm": 7.018461363606477, + "learning_rate": 5e-05, + "loss": 0.1057, + "num_input_tokens_seen": 151156936, + "step": 1656 + }, + { + "epoch": 6.9, + "loss": 0.11592155694961548, + "loss_ce": 0.000977097311988473, + "loss_iou": 0.267578125, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 151156936, + "step": 1656 + }, + { + "epoch": 6.904166666666667, + "grad_norm": 9.570021745040235, + "learning_rate": 5e-05, + "loss": 0.0916, + "num_input_tokens_seen": 151248348, + "step": 1657 + }, + { + "epoch": 6.904166666666667, + "loss": 0.06212965026497841, + "loss_ce": 0.0002247430384159088, + "loss_iou": 0.291015625, + "loss_num": 0.01239013671875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 151248348, + "step": 1657 + }, + { + "epoch": 6.908333333333333, + "grad_norm": 7.408031973473457, + "learning_rate": 5e-05, + "loss": 0.0955, + "num_input_tokens_seen": 151339644, + "step": 1658 + }, + { + "epoch": 6.908333333333333, + "loss": 0.07610473036766052, + "loss_ce": 8.544308366253972e-05, + "loss_iou": 0.296875, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 151339644, + "step": 1658 + }, + { + "epoch": 6.9125, + "grad_norm": 3.9852023699854087, + "learning_rate": 5e-05, + "loss": 0.0868, + "num_input_tokens_seen": 151430928, + "step": 1659 + }, + { + "epoch": 6.9125, + "loss": 0.10938706994056702, + "loss_ce": 0.0001646591699682176, + "loss_iou": 0.458984375, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 151430928, + "step": 1659 + }, + { + "epoch": 6.916666666666667, + "grad_norm": 3.203128534099005, + "learning_rate": 5e-05, + "loss": 0.162, + "num_input_tokens_seen": 151521964, + "step": 1660 + }, + { + "epoch": 6.916666666666667, + "loss": 0.1142687052488327, + "loss_ce": 4.141416138736531e-05, + "loss_iou": 0.3984375, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 151521964, + "step": 1660 + }, + { + "epoch": 6.920833333333333, + "grad_norm": 3.0602870881140274, + "learning_rate": 5e-05, + "loss": 0.1139, + "num_input_tokens_seen": 151613612, + "step": 1661 + }, + { + "epoch": 6.920833333333333, + "loss": 0.06900735199451447, + "loss_ce": 0.00019021421030629426, + "loss_iou": 0.322265625, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 151613612, + "step": 1661 + }, + { + "epoch": 6.925, + "grad_norm": 3.7765433687416072, + "learning_rate": 5e-05, + "loss": 0.0734, + "num_input_tokens_seen": 151705088, + "step": 1662 + }, + { + "epoch": 6.925, + "loss": 0.05029073357582092, + "loss_ce": 0.0009438100969418883, + "loss_iou": 0.234375, + "loss_num": 0.0098876953125, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 151705088, + "step": 1662 + }, + { + "epoch": 6.929166666666667, + "grad_norm": 9.780738346093939, + "learning_rate": 5e-05, + "loss": 0.0924, + "num_input_tokens_seen": 151796132, + "step": 1663 + }, + { + "epoch": 6.929166666666667, + "loss": 0.06529055535793304, + "loss_ce": 0.0007916553295217454, + "loss_iou": 0.1943359375, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 151796132, + "step": 1663 + }, + { + "epoch": 6.933333333333334, + "grad_norm": 15.110671025740874, + "learning_rate": 5e-05, + "loss": 0.1172, + "num_input_tokens_seen": 151887784, + "step": 1664 + }, + { + "epoch": 6.933333333333334, + "loss": 0.1223883330821991, + "loss_ce": 0.001172511139884591, + "loss_iou": 0.310546875, + "loss_num": 0.0242919921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 151887784, + "step": 1664 + }, + { + "epoch": 6.9375, + "grad_norm": 3.9839866327172326, + "learning_rate": 5e-05, + "loss": 0.088, + "num_input_tokens_seen": 151979232, + "step": 1665 + }, + { + "epoch": 6.9375, + "loss": 0.12088834494352341, + "loss_ce": 0.0007711583748459816, + "loss_iou": 0.271484375, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 151979232, + "step": 1665 + }, + { + "epoch": 6.941666666666666, + "grad_norm": 31.80681572542876, + "learning_rate": 5e-05, + "loss": 0.0928, + "num_input_tokens_seen": 152070888, + "step": 1666 + }, + { + "epoch": 6.941666666666666, + "loss": 0.11462222039699554, + "loss_ce": 0.0007611305918544531, + "loss_iou": 0.337890625, + "loss_num": 0.0228271484375, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 152070888, + "step": 1666 + }, + { + "epoch": 6.945833333333333, + "grad_norm": 1.9487821449189608, + "learning_rate": 5e-05, + "loss": 0.0967, + "num_input_tokens_seen": 152162488, + "step": 1667 + }, + { + "epoch": 6.945833333333333, + "loss": 0.10232115536928177, + "loss_ce": 0.001414780505001545, + "loss_iou": 0.328125, + "loss_num": 0.0201416015625, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 152162488, + "step": 1667 + }, + { + "epoch": 6.95, + "grad_norm": 2.296245106334686, + "learning_rate": 5e-05, + "loss": 0.079, + "num_input_tokens_seen": 152253852, + "step": 1668 + }, + { + "epoch": 6.95, + "loss": 0.08986086398363113, + "loss_ce": 0.0011920389952138066, + "loss_iou": 0.333984375, + "loss_num": 0.0177001953125, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 152253852, + "step": 1668 + }, + { + "epoch": 6.954166666666667, + "grad_norm": 3.10920167890936, + "learning_rate": 5e-05, + "loss": 0.0904, + "num_input_tokens_seen": 152344728, + "step": 1669 + }, + { + "epoch": 6.954166666666667, + "loss": 0.11594560742378235, + "loss_ce": 0.0003145021037198603, + "loss_iou": 0.287109375, + "loss_num": 0.0230712890625, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 152344728, + "step": 1669 + }, + { + "epoch": 6.958333333333333, + "grad_norm": 3.1787055623635814, + "learning_rate": 5e-05, + "loss": 0.1104, + "num_input_tokens_seen": 152434540, + "step": 1670 + }, + { + "epoch": 6.958333333333333, + "loss": 0.11103759706020355, + "loss_ce": 0.003005367936566472, + "loss_iou": 0.244140625, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 152434540, + "step": 1670 + }, + { + "epoch": 6.9625, + "grad_norm": 3.809738381982028, + "learning_rate": 5e-05, + "loss": 0.1244, + "num_input_tokens_seen": 152525336, + "step": 1671 + }, + { + "epoch": 6.9625, + "loss": 0.11085816472768784, + "loss_ce": 0.0011627288768067956, + "loss_iou": 0.04931640625, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 152525336, + "step": 1671 + }, + { + "epoch": 6.966666666666667, + "grad_norm": 2.7704701899550392, + "learning_rate": 5e-05, + "loss": 0.1406, + "num_input_tokens_seen": 152617008, + "step": 1672 + }, + { + "epoch": 6.966666666666667, + "loss": 0.10031631588935852, + "loss_ce": 0.00012711159070022404, + "loss_iou": 0.361328125, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 152617008, + "step": 1672 + }, + { + "epoch": 6.970833333333333, + "grad_norm": 3.4645120063749784, + "learning_rate": 5e-05, + "loss": 0.072, + "num_input_tokens_seen": 152708576, + "step": 1673 + }, + { + "epoch": 6.970833333333333, + "loss": 0.07226431369781494, + "loss_ce": 0.001585603691637516, + "loss_iou": 0.1591796875, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 152708576, + "step": 1673 + }, + { + "epoch": 6.975, + "grad_norm": 6.7276705007467195, + "learning_rate": 5e-05, + "loss": 0.0878, + "num_input_tokens_seen": 152800444, + "step": 1674 + }, + { + "epoch": 6.975, + "loss": 0.10426914691925049, + "loss_ce": 0.0008298219181597233, + "loss_iou": 0.291015625, + "loss_num": 0.0206298828125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 152800444, + "step": 1674 + }, + { + "epoch": 6.979166666666667, + "grad_norm": 3.366820868004411, + "learning_rate": 5e-05, + "loss": 0.0757, + "num_input_tokens_seen": 152890124, + "step": 1675 + }, + { + "epoch": 6.979166666666667, + "loss": 0.07411395013332367, + "loss_ce": 1.7270358512178063e-05, + "loss_iou": 0.263671875, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 152890124, + "step": 1675 + }, + { + "epoch": 6.983333333333333, + "grad_norm": 4.284830340007906, + "learning_rate": 5e-05, + "loss": 0.1426, + "num_input_tokens_seen": 152982236, + "step": 1676 + }, + { + "epoch": 6.983333333333333, + "loss": 0.15787337720394135, + "loss_ce": 0.0007231036433950067, + "loss_iou": 0.3828125, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 152982236, + "step": 1676 + }, + { + "epoch": 6.9875, + "grad_norm": 2.7269392270511936, + "learning_rate": 5e-05, + "loss": 0.1975, + "num_input_tokens_seen": 153072924, + "step": 1677 + }, + { + "epoch": 6.9875, + "loss": 0.2391592264175415, + "loss_ce": 0.000420216383645311, + "loss_iou": 0.421875, + "loss_num": 0.0478515625, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 153072924, + "step": 1677 + }, + { + "epoch": 6.991666666666667, + "grad_norm": 2.2636614694574386, + "learning_rate": 5e-05, + "loss": 0.1208, + "num_input_tokens_seen": 153163848, + "step": 1678 + }, + { + "epoch": 6.991666666666667, + "loss": 0.13624174892902374, + "loss_ce": 0.0009573178831487894, + "loss_iou": 0.08642578125, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 153163848, + "step": 1678 + }, + { + "epoch": 6.995833333333334, + "grad_norm": 4.28179404571259, + "learning_rate": 5e-05, + "loss": 0.0896, + "num_input_tokens_seen": 153255320, + "step": 1679 + }, + { + "epoch": 6.995833333333334, + "loss": 0.10170367360115051, + "loss_ce": 0.0024910294450819492, + "loss_iou": 0.375, + "loss_num": 0.0198974609375, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 153255320, + "step": 1679 + }, + { + "epoch": 7.0, + "grad_norm": 6.3688665214514755, + "learning_rate": 5e-05, + "loss": 0.1089, + "num_input_tokens_seen": 153346276, + "step": 1680 + }, + { + "epoch": 7.0, + "loss": 0.11220959573984146, + "loss_ce": 1.1721253031282686e-05, + "loss_iou": 0.2275390625, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 153346276, + "step": 1680 + }, + { + "epoch": 7.004166666666666, + "grad_norm": 3.5212944807976916, + "learning_rate": 5e-05, + "loss": 0.0766, + "num_input_tokens_seen": 153437916, + "step": 1681 + }, + { + "epoch": 7.004166666666666, + "loss": 0.043974943459033966, + "loss_ce": 0.0007086455589160323, + "loss_iou": 0.2890625, + "loss_num": 0.0086669921875, + "loss_xval": 0.043212890625, + "num_input_tokens_seen": 153437916, + "step": 1681 + }, + { + "epoch": 7.008333333333334, + "grad_norm": 3.0994500949291277, + "learning_rate": 5e-05, + "loss": 0.0619, + "num_input_tokens_seen": 153528748, + "step": 1682 + }, + { + "epoch": 7.008333333333334, + "loss": 0.07113240659236908, + "loss_ce": 0.00017903783009387553, + "loss_iou": 0.34375, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 153528748, + "step": 1682 + }, + { + "epoch": 7.0125, + "grad_norm": 2.7395665448088615, + "learning_rate": 5e-05, + "loss": 0.0495, + "num_input_tokens_seen": 153619940, + "step": 1683 + }, + { + "epoch": 7.0125, + "loss": 0.04401383176445961, + "loss_ce": 0.0005491725169122219, + "loss_iou": 0.2578125, + "loss_num": 0.00872802734375, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 153619940, + "step": 1683 + }, + { + "epoch": 7.016666666666667, + "grad_norm": 4.569723659784505, + "learning_rate": 5e-05, + "loss": 0.1008, + "num_input_tokens_seen": 153710872, + "step": 1684 + }, + { + "epoch": 7.016666666666667, + "loss": 0.1020163893699646, + "loss_ce": 0.002246800111606717, + "loss_iou": 0.29296875, + "loss_num": 0.02001953125, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 153710872, + "step": 1684 + }, + { + "epoch": 7.020833333333333, + "grad_norm": 2.6046811093814908, + "learning_rate": 5e-05, + "loss": 0.079, + "num_input_tokens_seen": 153801900, + "step": 1685 + }, + { + "epoch": 7.020833333333333, + "loss": 0.0888538509607315, + "loss_ce": 1.7180602299049497e-05, + "loss_iou": 0.40234375, + "loss_num": 0.017822265625, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 153801900, + "step": 1685 + }, + { + "epoch": 7.025, + "grad_norm": 5.760755754723652, + "learning_rate": 5e-05, + "loss": 0.1325, + "num_input_tokens_seen": 153893304, + "step": 1686 + }, + { + "epoch": 7.025, + "loss": 0.09790083765983582, + "loss_ce": 0.00230452255345881, + "loss_iou": 0.390625, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 153893304, + "step": 1686 + }, + { + "epoch": 7.029166666666667, + "grad_norm": 5.651359354673425, + "learning_rate": 5e-05, + "loss": 0.0892, + "num_input_tokens_seen": 153984260, + "step": 1687 + }, + { + "epoch": 7.029166666666667, + "loss": 0.12273789942264557, + "loss_ce": 0.0003013756650034338, + "loss_iou": 0.357421875, + "loss_num": 0.0245361328125, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 153984260, + "step": 1687 + }, + { + "epoch": 7.033333333333333, + "grad_norm": 4.5602271451019565, + "learning_rate": 5e-05, + "loss": 0.0749, + "num_input_tokens_seen": 154076040, + "step": 1688 + }, + { + "epoch": 7.033333333333333, + "loss": 0.0731680616736412, + "loss_ce": 0.0006277795182541013, + "loss_iou": 0.3359375, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 154076040, + "step": 1688 + }, + { + "epoch": 7.0375, + "grad_norm": 4.816161090981711, + "learning_rate": 5e-05, + "loss": 0.0918, + "num_input_tokens_seen": 154166928, + "step": 1689 + }, + { + "epoch": 7.0375, + "loss": 0.07955670356750488, + "loss_ce": 0.000699278840329498, + "loss_iou": 0.1328125, + "loss_num": 0.0157470703125, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 154166928, + "step": 1689 + }, + { + "epoch": 7.041666666666667, + "grad_norm": 4.0218740355714315, + "learning_rate": 5e-05, + "loss": 0.0825, + "num_input_tokens_seen": 154258108, + "step": 1690 + }, + { + "epoch": 7.041666666666667, + "loss": 0.09026078134775162, + "loss_ce": 0.00026444171089679003, + "loss_iou": 0.259765625, + "loss_num": 0.01806640625, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 154258108, + "step": 1690 + }, + { + "epoch": 7.045833333333333, + "grad_norm": 10.266520343130312, + "learning_rate": 5e-05, + "loss": 0.1145, + "num_input_tokens_seen": 154349940, + "step": 1691 + }, + { + "epoch": 7.045833333333333, + "loss": 0.061724916100502014, + "loss_ce": 0.002810731064528227, + "loss_iou": 0.298828125, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 154349940, + "step": 1691 + }, + { + "epoch": 7.05, + "grad_norm": 6.154462162547571, + "learning_rate": 5e-05, + "loss": 0.1351, + "num_input_tokens_seen": 154441532, + "step": 1692 + }, + { + "epoch": 7.05, + "loss": 0.1281442642211914, + "loss_ce": 0.0013589877635240555, + "loss_iou": 0.302734375, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 154441532, + "step": 1692 + }, + { + "epoch": 7.054166666666666, + "grad_norm": 3.2667525209962776, + "learning_rate": 5e-05, + "loss": 0.1133, + "num_input_tokens_seen": 154533092, + "step": 1693 + }, + { + "epoch": 7.054166666666666, + "loss": 0.09928688406944275, + "loss_ce": 0.0016916656168177724, + "loss_iou": 0.265625, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 154533092, + "step": 1693 + }, + { + "epoch": 7.058333333333334, + "grad_norm": 3.077707416306053, + "learning_rate": 5e-05, + "loss": 0.1039, + "num_input_tokens_seen": 154624224, + "step": 1694 + }, + { + "epoch": 7.058333333333334, + "loss": 0.09326724708080292, + "loss_ce": 3.6047880712430924e-05, + "loss_iou": 0.2890625, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 154624224, + "step": 1694 + }, + { + "epoch": 7.0625, + "grad_norm": 5.3368795339634945, + "learning_rate": 5e-05, + "loss": 0.0891, + "num_input_tokens_seen": 154715264, + "step": 1695 + }, + { + "epoch": 7.0625, + "loss": 0.0723080262541771, + "loss_ce": 1.1880889360327274e-05, + "loss_iou": 0.28125, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 154715264, + "step": 1695 + }, + { + "epoch": 7.066666666666666, + "grad_norm": 6.211008016088809, + "learning_rate": 5e-05, + "loss": 0.1016, + "num_input_tokens_seen": 154805936, + "step": 1696 + }, + { + "epoch": 7.066666666666666, + "loss": 0.13396210968494415, + "loss_ce": 8.14942322904244e-05, + "loss_iou": 0.2265625, + "loss_num": 0.0267333984375, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 154805936, + "step": 1696 + }, + { + "epoch": 7.070833333333334, + "grad_norm": 7.984183401976844, + "learning_rate": 5e-05, + "loss": 0.1112, + "num_input_tokens_seen": 154897224, + "step": 1697 + }, + { + "epoch": 7.070833333333334, + "loss": 0.11862719058990479, + "loss_ce": 0.0006462404271587729, + "loss_iou": 0.2578125, + "loss_num": 0.0235595703125, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 154897224, + "step": 1697 + }, + { + "epoch": 7.075, + "grad_norm": 4.0972656746849685, + "learning_rate": 5e-05, + "loss": 0.1214, + "num_input_tokens_seen": 154987660, + "step": 1698 + }, + { + "epoch": 7.075, + "loss": 0.09393851459026337, + "loss_ce": 5.118623812450096e-05, + "loss_iou": 0.279296875, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 154987660, + "step": 1698 + }, + { + "epoch": 7.079166666666667, + "grad_norm": 7.568165199224343, + "learning_rate": 5e-05, + "loss": 0.0579, + "num_input_tokens_seen": 155078880, + "step": 1699 + }, + { + "epoch": 7.079166666666667, + "loss": 0.05662469193339348, + "loss_ce": 0.0005028644227422774, + "loss_iou": 0.341796875, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 155078880, + "step": 1699 + }, + { + "epoch": 7.083333333333333, + "grad_norm": 3.1584135776625484, + "learning_rate": 5e-05, + "loss": 0.0835, + "num_input_tokens_seen": 155170128, + "step": 1700 + }, + { + "epoch": 7.083333333333333, + "loss": 0.08303529024124146, + "loss_ce": 0.00010377775470260531, + "loss_iou": 0.21484375, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 155170128, + "step": 1700 + }, + { + "epoch": 7.0875, + "grad_norm": 5.456883289815665, + "learning_rate": 5e-05, + "loss": 0.1264, + "num_input_tokens_seen": 155261300, + "step": 1701 + }, + { + "epoch": 7.0875, + "loss": 0.08729474991559982, + "loss_ce": 0.0007011258276179433, + "loss_iou": 0.189453125, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 155261300, + "step": 1701 + }, + { + "epoch": 7.091666666666667, + "grad_norm": 6.332498523640968, + "learning_rate": 5e-05, + "loss": 0.0662, + "num_input_tokens_seen": 155352628, + "step": 1702 + }, + { + "epoch": 7.091666666666667, + "loss": 0.0688634067773819, + "loss_ce": 0.0005803273525089025, + "loss_iou": 0.2890625, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 155352628, + "step": 1702 + }, + { + "epoch": 7.095833333333333, + "grad_norm": 3.144063696162726, + "learning_rate": 5e-05, + "loss": 0.1169, + "num_input_tokens_seen": 155443380, + "step": 1703 + }, + { + "epoch": 7.095833333333333, + "loss": 0.12068355828523636, + "loss_ce": 1.7052898328984156e-05, + "loss_iou": 0.2578125, + "loss_num": 0.024169921875, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 155443380, + "step": 1703 + }, + { + "epoch": 7.1, + "grad_norm": 2.2506929894056227, + "learning_rate": 5e-05, + "loss": 0.0684, + "num_input_tokens_seen": 155534608, + "step": 1704 + }, + { + "epoch": 7.1, + "loss": 0.06894703209400177, + "loss_ce": 2.308711555087939e-05, + "loss_iou": 0.392578125, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 155534608, + "step": 1704 + }, + { + "epoch": 7.104166666666667, + "grad_norm": 5.102828558713652, + "learning_rate": 5e-05, + "loss": 0.0963, + "num_input_tokens_seen": 155625756, + "step": 1705 + }, + { + "epoch": 7.104166666666667, + "loss": 0.08216731250286102, + "loss_ce": 0.0002810218429658562, + "loss_iou": 0.283203125, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 155625756, + "step": 1705 + }, + { + "epoch": 7.108333333333333, + "grad_norm": 1.9318064493586913, + "learning_rate": 5e-05, + "loss": 0.0907, + "num_input_tokens_seen": 155717672, + "step": 1706 + }, + { + "epoch": 7.108333333333333, + "loss": 0.07421234250068665, + "loss_ce": 0.000268249015789479, + "loss_iou": 0.287109375, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 155717672, + "step": 1706 + }, + { + "epoch": 7.1125, + "grad_norm": 4.992836138998759, + "learning_rate": 5e-05, + "loss": 0.1104, + "num_input_tokens_seen": 155808784, + "step": 1707 + }, + { + "epoch": 7.1125, + "loss": 0.10705584287643433, + "loss_ce": 0.000419794290792197, + "loss_iou": 0.244140625, + "loss_num": 0.0213623046875, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 155808784, + "step": 1707 + }, + { + "epoch": 7.116666666666666, + "grad_norm": 4.0846848007526955, + "learning_rate": 5e-05, + "loss": 0.084, + "num_input_tokens_seen": 155900316, + "step": 1708 + }, + { + "epoch": 7.116666666666666, + "loss": 0.10803817212581635, + "loss_ce": 0.0026304549537599087, + "loss_iou": 0.31640625, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 155900316, + "step": 1708 + }, + { + "epoch": 7.120833333333334, + "grad_norm": 3.303076878654234, + "learning_rate": 5e-05, + "loss": 0.0758, + "num_input_tokens_seen": 155991784, + "step": 1709 + }, + { + "epoch": 7.120833333333334, + "loss": 0.10933535546064377, + "loss_ce": 0.0003570847911760211, + "loss_iou": 0.265625, + "loss_num": 0.0218505859375, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 155991784, + "step": 1709 + }, + { + "epoch": 7.125, + "grad_norm": 15.969175324546415, + "learning_rate": 5e-05, + "loss": 0.0786, + "num_input_tokens_seen": 156083244, + "step": 1710 + }, + { + "epoch": 7.125, + "loss": 0.07175493240356445, + "loss_ce": 0.0028767550829797983, + "loss_iou": 0.271484375, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 156083244, + "step": 1710 + }, + { + "epoch": 7.129166666666666, + "grad_norm": 5.318811792978995, + "learning_rate": 5e-05, + "loss": 0.094, + "num_input_tokens_seen": 156174512, + "step": 1711 + }, + { + "epoch": 7.129166666666666, + "loss": 0.08147059381008148, + "loss_ce": 0.0002480625989846885, + "loss_iou": 0.48046875, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 156174512, + "step": 1711 + }, + { + "epoch": 7.133333333333334, + "grad_norm": 3.591541353502176, + "learning_rate": 5e-05, + "loss": 0.0569, + "num_input_tokens_seen": 156266228, + "step": 1712 + }, + { + "epoch": 7.133333333333334, + "loss": 0.04183837026357651, + "loss_ce": 0.0005480895051732659, + "loss_iou": 0.310546875, + "loss_num": 0.00823974609375, + "loss_xval": 0.041259765625, + "num_input_tokens_seen": 156266228, + "step": 1712 + }, + { + "epoch": 7.1375, + "grad_norm": 6.928688783074016, + "learning_rate": 5e-05, + "loss": 0.1291, + "num_input_tokens_seen": 156357936, + "step": 1713 + }, + { + "epoch": 7.1375, + "loss": 0.14094755053520203, + "loss_ce": 7.841399929020554e-05, + "loss_iou": 0.287109375, + "loss_num": 0.0281982421875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 156357936, + "step": 1713 + }, + { + "epoch": 7.141666666666667, + "grad_norm": 6.571922978586136, + "learning_rate": 5e-05, + "loss": 0.0955, + "num_input_tokens_seen": 156449336, + "step": 1714 + }, + { + "epoch": 7.141666666666667, + "loss": 0.10532679408788681, + "loss_ce": 1.0631374607328326e-05, + "loss_iou": 0.37109375, + "loss_num": 0.02099609375, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 156449336, + "step": 1714 + }, + { + "epoch": 7.145833333333333, + "grad_norm": 3.1341725428157314, + "learning_rate": 5e-05, + "loss": 0.1052, + "num_input_tokens_seen": 156541464, + "step": 1715 + }, + { + "epoch": 7.145833333333333, + "loss": 0.11285033822059631, + "loss_ce": 0.006099840626120567, + "loss_iou": 0.33203125, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 156541464, + "step": 1715 + }, + { + "epoch": 7.15, + "grad_norm": 4.091806497084258, + "learning_rate": 5e-05, + "loss": 0.1007, + "num_input_tokens_seen": 156632996, + "step": 1716 + }, + { + "epoch": 7.15, + "loss": 0.08084506541490555, + "loss_ce": 0.0005380603251978755, + "loss_iou": 0.28125, + "loss_num": 0.01611328125, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 156632996, + "step": 1716 + }, + { + "epoch": 7.154166666666667, + "grad_norm": 4.742170644569578, + "learning_rate": 5e-05, + "loss": 0.0893, + "num_input_tokens_seen": 156724204, + "step": 1717 + }, + { + "epoch": 7.154166666666667, + "loss": 0.07490211725234985, + "loss_ce": 1.1978670954704285e-05, + "loss_iou": 0.3828125, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 156724204, + "step": 1717 + }, + { + "epoch": 7.158333333333333, + "grad_norm": 5.407806077931396, + "learning_rate": 5e-05, + "loss": 0.1, + "num_input_tokens_seen": 156815800, + "step": 1718 + }, + { + "epoch": 7.158333333333333, + "loss": 0.093394935131073, + "loss_ce": 0.0001637310051592067, + "loss_iou": 0.283203125, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 156815800, + "step": 1718 + }, + { + "epoch": 7.1625, + "grad_norm": 2.095490907997988, + "learning_rate": 5e-05, + "loss": 0.0565, + "num_input_tokens_seen": 156907332, + "step": 1719 + }, + { + "epoch": 7.1625, + "loss": 0.04713945463299751, + "loss_ce": 0.0006001486326567829, + "loss_iou": 0.205078125, + "loss_num": 0.00933837890625, + "loss_xval": 0.046630859375, + "num_input_tokens_seen": 156907332, + "step": 1719 + }, + { + "epoch": 7.166666666666667, + "grad_norm": 13.781994171670535, + "learning_rate": 5e-05, + "loss": 0.1179, + "num_input_tokens_seen": 156999412, + "step": 1720 + }, + { + "epoch": 7.166666666666667, + "loss": 0.1414538472890854, + "loss_ce": 0.002720939228311181, + "loss_iou": 0.2080078125, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 156999412, + "step": 1720 + }, + { + "epoch": 7.170833333333333, + "grad_norm": 5.382132021159549, + "learning_rate": 5e-05, + "loss": 0.1094, + "num_input_tokens_seen": 157090580, + "step": 1721 + }, + { + "epoch": 7.170833333333333, + "loss": 0.14994627237319946, + "loss_ce": 0.0006390261114574969, + "loss_iou": 0.40625, + "loss_num": 0.02978515625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 157090580, + "step": 1721 + }, + { + "epoch": 7.175, + "grad_norm": 2.8820191575286382, + "learning_rate": 5e-05, + "loss": 0.0963, + "num_input_tokens_seen": 157181548, + "step": 1722 + }, + { + "epoch": 7.175, + "loss": 0.08300650119781494, + "loss_ce": 0.0009599894401617348, + "loss_iou": 0.255859375, + "loss_num": 0.0164794921875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 157181548, + "step": 1722 + }, + { + "epoch": 7.179166666666666, + "grad_norm": 2.4030723794000637, + "learning_rate": 5e-05, + "loss": 0.1025, + "num_input_tokens_seen": 157272880, + "step": 1723 + }, + { + "epoch": 7.179166666666666, + "loss": 0.142893984913826, + "loss_ce": 0.0014907920267432928, + "loss_iou": 0.2138671875, + "loss_num": 0.0283203125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 157272880, + "step": 1723 + }, + { + "epoch": 7.183333333333334, + "grad_norm": 6.555642784643792, + "learning_rate": 5e-05, + "loss": 0.1072, + "num_input_tokens_seen": 157363724, + "step": 1724 + }, + { + "epoch": 7.183333333333334, + "loss": 0.08335313946008682, + "loss_ce": 9.633986337576061e-06, + "loss_iou": 0.31640625, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 157363724, + "step": 1724 + }, + { + "epoch": 7.1875, + "grad_norm": 2.5435019828539365, + "learning_rate": 5e-05, + "loss": 0.0735, + "num_input_tokens_seen": 157455424, + "step": 1725 + }, + { + "epoch": 7.1875, + "loss": 0.053869716823101044, + "loss_ce": 0.0006470587686635554, + "loss_iou": 0.384765625, + "loss_num": 0.0106201171875, + "loss_xval": 0.05322265625, + "num_input_tokens_seen": 157455424, + "step": 1725 + }, + { + "epoch": 7.191666666666666, + "grad_norm": 2.8380242838082737, + "learning_rate": 5e-05, + "loss": 0.1243, + "num_input_tokens_seen": 157546968, + "step": 1726 + }, + { + "epoch": 7.191666666666666, + "loss": 0.12494509667158127, + "loss_ce": 3.665284748421982e-05, + "loss_iou": 0.30078125, + "loss_num": 0.02490234375, + "loss_xval": 0.125, + "num_input_tokens_seen": 157546968, + "step": 1726 + }, + { + "epoch": 7.195833333333334, + "grad_norm": 4.899015514922593, + "learning_rate": 5e-05, + "loss": 0.0606, + "num_input_tokens_seen": 157638908, + "step": 1727 + }, + { + "epoch": 7.195833333333334, + "loss": 0.048561014235019684, + "loss_ce": 0.00087729626102373, + "loss_iou": 0.31640625, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 157638908, + "step": 1727 + }, + { + "epoch": 7.2, + "grad_norm": 1.9257435909322718, + "learning_rate": 5e-05, + "loss": 0.0835, + "num_input_tokens_seen": 157730612, + "step": 1728 + }, + { + "epoch": 7.2, + "loss": 0.11189435422420502, + "loss_ce": 1.6904214135138318e-05, + "loss_iou": 0.216796875, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 157730612, + "step": 1728 + }, + { + "epoch": 7.204166666666667, + "grad_norm": 2.4804711767686283, + "learning_rate": 5e-05, + "loss": 0.094, + "num_input_tokens_seen": 157821592, + "step": 1729 + }, + { + "epoch": 7.204166666666667, + "loss": 0.1316571682691574, + "loss_ce": 4.337953214417212e-06, + "loss_iou": 0.380859375, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 157821592, + "step": 1729 + }, + { + "epoch": 7.208333333333333, + "grad_norm": 8.301899808282865, + "learning_rate": 5e-05, + "loss": 0.1034, + "num_input_tokens_seen": 157913000, + "step": 1730 + }, + { + "epoch": 7.208333333333333, + "loss": 0.12285293638706207, + "loss_ce": 1.968711512745358e-05, + "loss_iou": 0.283203125, + "loss_num": 0.024658203125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 157913000, + "step": 1730 + }, + { + "epoch": 7.2125, + "grad_norm": 4.1752854960273, + "learning_rate": 5e-05, + "loss": 0.0798, + "num_input_tokens_seen": 158004764, + "step": 1731 + }, + { + "epoch": 7.2125, + "loss": 0.06936931610107422, + "loss_ce": 0.0003538095043040812, + "loss_iou": 0.36328125, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 158004764, + "step": 1731 + }, + { + "epoch": 7.216666666666667, + "grad_norm": 3.3411710342234895, + "learning_rate": 5e-05, + "loss": 0.0788, + "num_input_tokens_seen": 158096040, + "step": 1732 + }, + { + "epoch": 7.216666666666667, + "loss": 0.10201053321361542, + "loss_ce": 0.0016687337774783373, + "loss_iou": 0.384765625, + "loss_num": 0.02001953125, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 158096040, + "step": 1732 + }, + { + "epoch": 7.220833333333333, + "grad_norm": 3.532147965636991, + "learning_rate": 5e-05, + "loss": 0.0754, + "num_input_tokens_seen": 158186896, + "step": 1733 + }, + { + "epoch": 7.220833333333333, + "loss": 0.08085125684738159, + "loss_ce": 0.0004450652631931007, + "loss_iou": 0.32421875, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 158186896, + "step": 1733 + }, + { + "epoch": 7.225, + "grad_norm": 4.222192536783828, + "learning_rate": 5e-05, + "loss": 0.1023, + "num_input_tokens_seen": 158278220, + "step": 1734 + }, + { + "epoch": 7.225, + "loss": 0.07080645859241486, + "loss_ce": 0.0016383626498281956, + "loss_iou": 0.341796875, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 158278220, + "step": 1734 + }, + { + "epoch": 7.229166666666667, + "grad_norm": 5.968139727235326, + "learning_rate": 5e-05, + "loss": 0.0941, + "num_input_tokens_seen": 158369004, + "step": 1735 + }, + { + "epoch": 7.229166666666667, + "loss": 0.0862986296415329, + "loss_ce": 1.018310558720259e-05, + "loss_iou": 0.322265625, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 158369004, + "step": 1735 + }, + { + "epoch": 7.233333333333333, + "grad_norm": 6.387746133616494, + "learning_rate": 5e-05, + "loss": 0.1032, + "num_input_tokens_seen": 158460752, + "step": 1736 + }, + { + "epoch": 7.233333333333333, + "loss": 0.1081860139966011, + "loss_ce": 0.00024534264230169356, + "loss_iou": 0.392578125, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 158460752, + "step": 1736 + }, + { + "epoch": 7.2375, + "grad_norm": 1.3763892685803873, + "learning_rate": 5e-05, + "loss": 0.1363, + "num_input_tokens_seen": 158552252, + "step": 1737 + }, + { + "epoch": 7.2375, + "loss": 0.16385185718536377, + "loss_ce": 0.0014983414439484477, + "loss_iou": 0.302734375, + "loss_num": 0.032470703125, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 158552252, + "step": 1737 + }, + { + "epoch": 7.241666666666666, + "grad_norm": 5.7207446363743895, + "learning_rate": 5e-05, + "loss": 0.1203, + "num_input_tokens_seen": 158644072, + "step": 1738 + }, + { + "epoch": 7.241666666666666, + "loss": 0.16266997158527374, + "loss_ce": 0.0008810389554128051, + "loss_iou": 0.1953125, + "loss_num": 0.032470703125, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 158644072, + "step": 1738 + }, + { + "epoch": 7.245833333333334, + "grad_norm": 4.812282703252393, + "learning_rate": 5e-05, + "loss": 0.1012, + "num_input_tokens_seen": 158734884, + "step": 1739 + }, + { + "epoch": 7.245833333333334, + "loss": 0.10428060591220856, + "loss_ce": 1.7295438738074154e-05, + "loss_iou": 0.21875, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 158734884, + "step": 1739 + }, + { + "epoch": 7.25, + "grad_norm": 1.5478532072316726, + "learning_rate": 5e-05, + "loss": 0.0765, + "num_input_tokens_seen": 158826440, + "step": 1740 + }, + { + "epoch": 7.25, + "loss": 0.07442457973957062, + "loss_ce": 0.0011671364773064852, + "loss_iou": 0.24609375, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 158826440, + "step": 1740 + }, + { + "epoch": 7.254166666666666, + "grad_norm": 6.928940226324635, + "learning_rate": 5e-05, + "loss": 0.0902, + "num_input_tokens_seen": 158917548, + "step": 1741 + }, + { + "epoch": 7.254166666666666, + "loss": 0.11471019685268402, + "loss_ce": 8.617914863862097e-05, + "loss_iou": 0.2578125, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 158917548, + "step": 1741 + }, + { + "epoch": 7.258333333333334, + "grad_norm": 3.058820583296289, + "learning_rate": 5e-05, + "loss": 0.0841, + "num_input_tokens_seen": 159009512, + "step": 1742 + }, + { + "epoch": 7.258333333333334, + "loss": 0.053757186979055405, + "loss_ce": 0.0035862871445715427, + "loss_iou": 0.34375, + "loss_num": 0.010009765625, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 159009512, + "step": 1742 + }, + { + "epoch": 7.2625, + "grad_norm": 2.936191227345829, + "learning_rate": 5e-05, + "loss": 0.0919, + "num_input_tokens_seen": 159101340, + "step": 1743 + }, + { + "epoch": 7.2625, + "loss": 0.0766802728176117, + "loss_ce": 0.0006152114365249872, + "loss_iou": 0.38671875, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 159101340, + "step": 1743 + }, + { + "epoch": 7.266666666666667, + "grad_norm": 7.309826953349478, + "learning_rate": 5e-05, + "loss": 0.0705, + "num_input_tokens_seen": 159193408, + "step": 1744 + }, + { + "epoch": 7.266666666666667, + "loss": 0.08390648663043976, + "loss_ce": 0.002439806703478098, + "loss_iou": 0.302734375, + "loss_num": 0.0162353515625, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 159193408, + "step": 1744 + }, + { + "epoch": 7.270833333333333, + "grad_norm": 4.970001831892565, + "learning_rate": 5e-05, + "loss": 0.0955, + "num_input_tokens_seen": 159284636, + "step": 1745 + }, + { + "epoch": 7.270833333333333, + "loss": 0.12510189414024353, + "loss_ce": 7.13820118107833e-05, + "loss_iou": 0.28515625, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 159284636, + "step": 1745 + }, + { + "epoch": 7.275, + "grad_norm": 3.2103467072844767, + "learning_rate": 5e-05, + "loss": 0.1118, + "num_input_tokens_seen": 159376364, + "step": 1746 + }, + { + "epoch": 7.275, + "loss": 0.08453178405761719, + "loss_ce": 0.001142505556344986, + "loss_iou": 0.35546875, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 159376364, + "step": 1746 + }, + { + "epoch": 7.279166666666667, + "grad_norm": 2.8748852927933912, + "learning_rate": 5e-05, + "loss": 0.0864, + "num_input_tokens_seen": 159467452, + "step": 1747 + }, + { + "epoch": 7.279166666666667, + "loss": 0.09032277762889862, + "loss_ce": 0.00034169177524745464, + "loss_iou": 0.0751953125, + "loss_num": 0.01806640625, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 159467452, + "step": 1747 + }, + { + "epoch": 7.283333333333333, + "grad_norm": 1.8429200812612072, + "learning_rate": 5e-05, + "loss": 0.0886, + "num_input_tokens_seen": 159558896, + "step": 1748 + }, + { + "epoch": 7.283333333333333, + "loss": 0.07524539530277252, + "loss_ce": 0.002735632471740246, + "loss_iou": 0.263671875, + "loss_num": 0.0145263671875, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 159558896, + "step": 1748 + }, + { + "epoch": 7.2875, + "grad_norm": 2.1798804635746323, + "learning_rate": 5e-05, + "loss": 0.0865, + "num_input_tokens_seen": 159648372, + "step": 1749 + }, + { + "epoch": 7.2875, + "loss": 0.06625260412693024, + "loss_ce": 1.4205359548213892e-05, + "loss_iou": 0.1669921875, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 159648372, + "step": 1749 + }, + { + "epoch": 7.291666666666667, + "grad_norm": 2.636309635324973, + "learning_rate": 5e-05, + "loss": 0.0642, + "num_input_tokens_seen": 159739544, + "step": 1750 + }, + { + "epoch": 7.291666666666667, + "eval_seeclick_CIoU": 0.274772547185421, + "eval_seeclick_GIoU": 0.2568713426589966, + "eval_seeclick_IoU": 0.3638703525066376, + "eval_seeclick_MAE_all": 0.08645062521100044, + "eval_seeclick_MAE_h": 0.08346085995435715, + "eval_seeclick_MAE_w": 0.157043918967247, + "eval_seeclick_MAE_x_boxes": 0.17312509566545486, + "eval_seeclick_MAE_y_boxes": 0.08508550748229027, + "eval_seeclick_NUM_probability": 0.9999987185001373, + "eval_seeclick_inside_bbox": 0.5241477340459824, + "eval_seeclick_loss": 0.5137174725532532, + "eval_seeclick_loss_ce": 0.13229186832904816, + "eval_seeclick_loss_iou": 0.4346923828125, + "eval_seeclick_loss_num": 0.07465362548828125, + "eval_seeclick_loss_xval": 0.372894287109375, + "eval_seeclick_runtime": 75.2043, + "eval_seeclick_samples_per_second": 0.572, + "eval_seeclick_steps_per_second": 0.027, + "num_input_tokens_seen": 159739544, + "step": 1750 + }, + { + "epoch": 7.291666666666667, + "eval_icons_CIoU": 0.3470269590616226, + "eval_icons_GIoU": 0.37907470762729645, + "eval_icons_IoU": 0.42870812118053436, + "eval_icons_MAE_all": 0.0643687080591917, + "eval_icons_MAE_h": 0.13963689282536507, + "eval_icons_MAE_w": 0.09502165019512177, + "eval_icons_MAE_x_boxes": 0.09512116760015488, + "eval_icons_MAE_y_boxes": 0.14081553369760513, + "eval_icons_NUM_probability": 0.9999993741512299, + "eval_icons_inside_bbox": 0.6215277910232544, + "eval_icons_loss": 0.31115666031837463, + "eval_icons_loss_ce": 0.0013536059414036572, + "eval_icons_loss_iou": 0.27740478515625, + "eval_icons_loss_num": 0.0612945556640625, + "eval_icons_loss_xval": 0.30670166015625, + "eval_icons_runtime": 88.8429, + "eval_icons_samples_per_second": 0.563, + "eval_icons_steps_per_second": 0.023, + "num_input_tokens_seen": 159739544, + "step": 1750 + }, + { + "epoch": 7.291666666666667, + "eval_screenspot_CIoU": 0.3959275384744008, + "eval_screenspot_GIoU": 0.3979260226090749, + "eval_screenspot_IoU": 0.4649760623772939, + "eval_screenspot_MAE_all": 0.09103357543547948, + "eval_screenspot_MAE_h": 0.06914364298184712, + "eval_screenspot_MAE_w": 0.20312588413556418, + "eval_screenspot_MAE_x_boxes": 0.18540192147096, + "eval_screenspot_MAE_y_boxes": 0.06311593949794769, + "eval_screenspot_NUM_probability": 0.9999985496203104, + "eval_screenspot_inside_bbox": 0.693750003973643, + "eval_screenspot_loss": 0.4558645188808441, + "eval_screenspot_loss_ce": 0.0005789737806480844, + "eval_screenspot_loss_iou": 0.4074300130208333, + "eval_screenspot_loss_num": 0.09119669596354167, + "eval_screenspot_loss_xval": 0.4561360677083333, + "eval_screenspot_runtime": 153.3033, + "eval_screenspot_samples_per_second": 0.581, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 159739544, + "step": 1750 + }, + { + "epoch": 7.291666666666667, + "eval_compot_CIoU": 0.39731016755104065, + "eval_compot_GIoU": 0.3796231150627136, + "eval_compot_IoU": 0.47868672013282776, + "eval_compot_MAE_all": 0.06850907020270824, + "eval_compot_MAE_h": 0.09156372398138046, + "eval_compot_MAE_w": 0.15624917298555374, + "eval_compot_MAE_x_boxes": 0.15558727830648422, + "eval_compot_MAE_y_boxes": 0.08946410566568375, + "eval_compot_NUM_probability": 0.9999960064888, + "eval_compot_inside_bbox": 0.6371527910232544, + "eval_compot_loss": 0.3551335632801056, + "eval_compot_loss_ce": 0.03361728601157665, + "eval_compot_loss_iou": 0.3394775390625, + "eval_compot_loss_num": 0.057178497314453125, + "eval_compot_loss_xval": 0.2858428955078125, + "eval_compot_runtime": 91.3889, + "eval_compot_samples_per_second": 0.547, + "eval_compot_steps_per_second": 0.022, + "num_input_tokens_seen": 159739544, + "step": 1750 + }, + { + "epoch": 7.291666666666667, + "loss": 0.35525137186050415, + "loss_ce": 0.03658682852983475, + "loss_iou": 0.330078125, + "loss_num": 0.06396484375, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 159739544, + "step": 1750 + }, + { + "epoch": 7.295833333333333, + "grad_norm": 6.198843157364279, + "learning_rate": 5e-05, + "loss": 0.0698, + "num_input_tokens_seen": 159830292, + "step": 1751 + }, + { + "epoch": 7.295833333333333, + "loss": 0.05363104119896889, + "loss_ce": 4.2173967813141644e-05, + "loss_iou": 0.2890625, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 159830292, + "step": 1751 + }, + { + "epoch": 7.3, + "grad_norm": 8.462068378678884, + "learning_rate": 5e-05, + "loss": 0.0886, + "num_input_tokens_seen": 159921348, + "step": 1752 + }, + { + "epoch": 7.3, + "loss": 0.06018263101577759, + "loss_ce": 0.00015455312677659094, + "loss_iou": 0.2890625, + "loss_num": 0.01202392578125, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 159921348, + "step": 1752 + }, + { + "epoch": 7.304166666666666, + "grad_norm": 2.9406017989968753, + "learning_rate": 5e-05, + "loss": 0.039, + "num_input_tokens_seen": 160013564, + "step": 1753 + }, + { + "epoch": 7.304166666666666, + "loss": 0.03546938672661781, + "loss_ce": 0.003761622356250882, + "loss_iou": 0.232421875, + "loss_num": 0.00634765625, + "loss_xval": 0.03173828125, + "num_input_tokens_seen": 160013564, + "step": 1753 + }, + { + "epoch": 7.308333333333334, + "grad_norm": 2.107795043077596, + "learning_rate": 5e-05, + "loss": 0.0827, + "num_input_tokens_seen": 160104816, + "step": 1754 + }, + { + "epoch": 7.308333333333334, + "loss": 0.1058989018201828, + "loss_ce": 0.0004911827854812145, + "loss_iou": 0.2734375, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 160104816, + "step": 1754 + }, + { + "epoch": 7.3125, + "grad_norm": 4.2175137993238305, + "learning_rate": 5e-05, + "loss": 0.1338, + "num_input_tokens_seen": 160196380, + "step": 1755 + }, + { + "epoch": 7.3125, + "loss": 0.16473883390426636, + "loss_ce": 0.0004474582092370838, + "loss_iou": 0.1103515625, + "loss_num": 0.032958984375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 160196380, + "step": 1755 + }, + { + "epoch": 7.316666666666666, + "grad_norm": 1.958983067716341, + "learning_rate": 5e-05, + "loss": 0.0813, + "num_input_tokens_seen": 160286304, + "step": 1756 + }, + { + "epoch": 7.316666666666666, + "loss": 0.07399453222751617, + "loss_ce": 0.00073708884883672, + "loss_iou": 0.271484375, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 160286304, + "step": 1756 + }, + { + "epoch": 7.320833333333334, + "grad_norm": 2.9633889667155144, + "learning_rate": 5e-05, + "loss": 0.0693, + "num_input_tokens_seen": 160377004, + "step": 1757 + }, + { + "epoch": 7.320833333333334, + "loss": 0.05870777368545532, + "loss_ce": 0.00048023491399362683, + "loss_iou": 0.37890625, + "loss_num": 0.01165771484375, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 160377004, + "step": 1757 + }, + { + "epoch": 7.325, + "grad_norm": 29.96955038482011, + "learning_rate": 5e-05, + "loss": 0.0882, + "num_input_tokens_seen": 160468532, + "step": 1758 + }, + { + "epoch": 7.325, + "loss": 0.0886864885687828, + "loss_ce": 0.0011010420275852084, + "loss_iou": 0.2578125, + "loss_num": 0.017578125, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 160468532, + "step": 1758 + }, + { + "epoch": 7.329166666666667, + "grad_norm": 3.084052387858358, + "learning_rate": 5e-05, + "loss": 0.1047, + "num_input_tokens_seen": 160559280, + "step": 1759 + }, + { + "epoch": 7.329166666666667, + "loss": 0.0805143266916275, + "loss_ce": 6.99890370015055e-05, + "loss_iou": 0.1513671875, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 160559280, + "step": 1759 + }, + { + "epoch": 7.333333333333333, + "grad_norm": 3.032790090810266, + "learning_rate": 5e-05, + "loss": 0.1004, + "num_input_tokens_seen": 160650272, + "step": 1760 + }, + { + "epoch": 7.333333333333333, + "loss": 0.10603293776512146, + "loss_ce": 0.0004116000491194427, + "loss_iou": 0.322265625, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 160650272, + "step": 1760 + }, + { + "epoch": 7.3375, + "grad_norm": 2.9409556739143667, + "learning_rate": 5e-05, + "loss": 0.0716, + "num_input_tokens_seen": 160741936, + "step": 1761 + }, + { + "epoch": 7.3375, + "loss": 0.08558277040719986, + "loss_ce": 0.00027087461785413325, + "loss_iou": 0.306640625, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 160741936, + "step": 1761 + }, + { + "epoch": 7.341666666666667, + "grad_norm": 5.113927821544099, + "learning_rate": 5e-05, + "loss": 0.1019, + "num_input_tokens_seen": 160833300, + "step": 1762 + }, + { + "epoch": 7.341666666666667, + "loss": 0.06774862110614777, + "loss_ce": 3.774898505071178e-05, + "loss_iou": 0.271484375, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 160833300, + "step": 1762 + }, + { + "epoch": 7.345833333333333, + "grad_norm": 2.827984109355143, + "learning_rate": 5e-05, + "loss": 0.1087, + "num_input_tokens_seen": 160924888, + "step": 1763 + }, + { + "epoch": 7.345833333333333, + "loss": 0.13058581948280334, + "loss_ce": 0.0015193530125543475, + "loss_iou": 0.251953125, + "loss_num": 0.02587890625, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 160924888, + "step": 1763 + }, + { + "epoch": 7.35, + "grad_norm": 2.7586655479972992, + "learning_rate": 5e-05, + "loss": 0.106, + "num_input_tokens_seen": 161015604, + "step": 1764 + }, + { + "epoch": 7.35, + "loss": 0.08002396672964096, + "loss_ce": 0.004813396371901035, + "loss_iou": 0.216796875, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 161015604, + "step": 1764 + }, + { + "epoch": 7.354166666666667, + "grad_norm": 4.62529291783808, + "learning_rate": 5e-05, + "loss": 0.0898, + "num_input_tokens_seen": 161107176, + "step": 1765 + }, + { + "epoch": 7.354166666666667, + "loss": 0.10449203848838806, + "loss_ce": 0.002532810904085636, + "loss_iou": 0.263671875, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 161107176, + "step": 1765 + }, + { + "epoch": 7.358333333333333, + "grad_norm": 2.5774845134583004, + "learning_rate": 5e-05, + "loss": 0.1122, + "num_input_tokens_seen": 161198640, + "step": 1766 + }, + { + "epoch": 7.358333333333333, + "loss": 0.07467900216579437, + "loss_ce": 1.774665724951774e-05, + "loss_iou": 0.22265625, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 161198640, + "step": 1766 + }, + { + "epoch": 7.3625, + "grad_norm": 1.900906724887545, + "learning_rate": 5e-05, + "loss": 0.1232, + "num_input_tokens_seen": 161290120, + "step": 1767 + }, + { + "epoch": 7.3625, + "loss": 0.09348450601100922, + "loss_ce": 0.00036012171767652035, + "loss_iou": 0.30078125, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 161290120, + "step": 1767 + }, + { + "epoch": 7.366666666666666, + "grad_norm": 2.3698025807422476, + "learning_rate": 5e-05, + "loss": 0.0604, + "num_input_tokens_seen": 161381656, + "step": 1768 + }, + { + "epoch": 7.366666666666666, + "loss": 0.0635412186384201, + "loss_ce": 0.00021724399994127452, + "loss_iou": 0.3203125, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 161381656, + "step": 1768 + }, + { + "epoch": 7.370833333333334, + "grad_norm": 5.86903240139928, + "learning_rate": 5e-05, + "loss": 0.0694, + "num_input_tokens_seen": 161472568, + "step": 1769 + }, + { + "epoch": 7.370833333333334, + "loss": 0.03189694136381149, + "loss_ce": 0.0002273284044349566, + "loss_iou": 0.265625, + "loss_num": 0.006317138671875, + "loss_xval": 0.03173828125, + "num_input_tokens_seen": 161472568, + "step": 1769 + }, + { + "epoch": 7.375, + "grad_norm": 3.7597933825689926, + "learning_rate": 5e-05, + "loss": 0.1349, + "num_input_tokens_seen": 161562416, + "step": 1770 + }, + { + "epoch": 7.375, + "loss": 0.10958030074834824, + "loss_ce": 3.745627327589318e-05, + "loss_iou": 0.2490234375, + "loss_num": 0.02197265625, + "loss_xval": 0.109375, + "num_input_tokens_seen": 161562416, + "step": 1770 + }, + { + "epoch": 7.379166666666666, + "grad_norm": 3.4972689341463834, + "learning_rate": 5e-05, + "loss": 0.0715, + "num_input_tokens_seen": 161653772, + "step": 1771 + }, + { + "epoch": 7.379166666666666, + "loss": 0.06820204108953476, + "loss_ce": 0.00020887877326458693, + "loss_iou": 0.23828125, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 161653772, + "step": 1771 + }, + { + "epoch": 7.383333333333334, + "grad_norm": 5.528369180509633, + "learning_rate": 5e-05, + "loss": 0.16, + "num_input_tokens_seen": 161745908, + "step": 1772 + }, + { + "epoch": 7.383333333333334, + "loss": 0.17425945401191711, + "loss_ce": 0.004047665745019913, + "loss_iou": 0.2734375, + "loss_num": 0.0341796875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 161745908, + "step": 1772 + }, + { + "epoch": 7.3875, + "grad_norm": 14.436643120766435, + "learning_rate": 5e-05, + "loss": 0.1285, + "num_input_tokens_seen": 161837884, + "step": 1773 + }, + { + "epoch": 7.3875, + "loss": 0.14607399702072144, + "loss_ce": 0.0024887986946851015, + "loss_iou": 0.25, + "loss_num": 0.0286865234375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 161837884, + "step": 1773 + }, + { + "epoch": 7.391666666666667, + "grad_norm": 5.205319470785945, + "learning_rate": 5e-05, + "loss": 0.0941, + "num_input_tokens_seen": 161929180, + "step": 1774 + }, + { + "epoch": 7.391666666666667, + "loss": 0.11127667874097824, + "loss_ce": 7.062430813675746e-05, + "loss_iou": 0.185546875, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 161929180, + "step": 1774 + }, + { + "epoch": 7.395833333333333, + "grad_norm": 3.9657384758323784, + "learning_rate": 5e-05, + "loss": 0.0796, + "num_input_tokens_seen": 162020952, + "step": 1775 + }, + { + "epoch": 7.395833333333333, + "loss": 0.041575320065021515, + "loss_ce": 0.00013244900037534535, + "loss_iou": 0.3203125, + "loss_num": 0.00830078125, + "loss_xval": 0.04150390625, + "num_input_tokens_seen": 162020952, + "step": 1775 + }, + { + "epoch": 7.4, + "grad_norm": 3.3889964350870416, + "learning_rate": 5e-05, + "loss": 0.0642, + "num_input_tokens_seen": 162112412, + "step": 1776 + }, + { + "epoch": 7.4, + "loss": 0.05319908261299133, + "loss_ce": 0.000830917851999402, + "loss_iou": 0.41015625, + "loss_num": 0.010498046875, + "loss_xval": 0.05224609375, + "num_input_tokens_seen": 162112412, + "step": 1776 + }, + { + "epoch": 7.404166666666667, + "grad_norm": 3.1937320635112303, + "learning_rate": 5e-05, + "loss": 0.1485, + "num_input_tokens_seen": 162203620, + "step": 1777 + }, + { + "epoch": 7.404166666666667, + "loss": 0.20381496846675873, + "loss_ce": 0.00026272700051777065, + "loss_iou": 0.2216796875, + "loss_num": 0.040771484375, + "loss_xval": 0.203125, + "num_input_tokens_seen": 162203620, + "step": 1777 + }, + { + "epoch": 7.408333333333333, + "grad_norm": 3.9853992529387043, + "learning_rate": 5e-05, + "loss": 0.0981, + "num_input_tokens_seen": 162294724, + "step": 1778 + }, + { + "epoch": 7.408333333333333, + "loss": 0.046426184475421906, + "loss_ce": 0.0010465431259945035, + "loss_iou": 0.287109375, + "loss_num": 0.00909423828125, + "loss_xval": 0.04541015625, + "num_input_tokens_seen": 162294724, + "step": 1778 + }, + { + "epoch": 7.4125, + "grad_norm": 4.154016201076832, + "learning_rate": 5e-05, + "loss": 0.0682, + "num_input_tokens_seen": 162387112, + "step": 1779 + }, + { + "epoch": 7.4125, + "loss": 0.05779798701405525, + "loss_ce": 0.0002952393260784447, + "loss_iou": 0.25, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 162387112, + "step": 1779 + }, + { + "epoch": 7.416666666666667, + "grad_norm": 2.7081336478210845, + "learning_rate": 5e-05, + "loss": 0.0791, + "num_input_tokens_seen": 162478244, + "step": 1780 + }, + { + "epoch": 7.416666666666667, + "loss": 0.12627843022346497, + "loss_ce": 0.0008054060745052993, + "loss_iou": 0.263671875, + "loss_num": 0.025146484375, + "loss_xval": 0.125, + "num_input_tokens_seen": 162478244, + "step": 1780 + }, + { + "epoch": 7.420833333333333, + "grad_norm": 3.848145766539644, + "learning_rate": 5e-05, + "loss": 0.1032, + "num_input_tokens_seen": 162570056, + "step": 1781 + }, + { + "epoch": 7.420833333333333, + "loss": 0.12114088982343674, + "loss_ce": 0.0005659367889165878, + "loss_iou": 0.2734375, + "loss_num": 0.024169921875, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 162570056, + "step": 1781 + }, + { + "epoch": 7.425, + "grad_norm": 4.482393117561415, + "learning_rate": 5e-05, + "loss": 0.0661, + "num_input_tokens_seen": 162661248, + "step": 1782 + }, + { + "epoch": 7.425, + "loss": 0.07931487262248993, + "loss_ce": 3.0203231290215626e-05, + "loss_iou": 0.31640625, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 162661248, + "step": 1782 + }, + { + "epoch": 7.429166666666666, + "grad_norm": 3.044807825876327, + "learning_rate": 5e-05, + "loss": 0.0735, + "num_input_tokens_seen": 162752764, + "step": 1783 + }, + { + "epoch": 7.429166666666666, + "loss": 0.07627473771572113, + "loss_ce": 0.0013845983194187284, + "loss_iou": 0.283203125, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 162752764, + "step": 1783 + }, + { + "epoch": 7.433333333333334, + "grad_norm": 4.313521043969587, + "learning_rate": 5e-05, + "loss": 0.0968, + "num_input_tokens_seen": 162843720, + "step": 1784 + }, + { + "epoch": 7.433333333333334, + "loss": 0.07608497887849808, + "loss_ce": 4.6575505621149205e-06, + "loss_iou": 0.3515625, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 162843720, + "step": 1784 + }, + { + "epoch": 7.4375, + "grad_norm": 3.3882547187141774, + "learning_rate": 5e-05, + "loss": 0.0833, + "num_input_tokens_seen": 162934908, + "step": 1785 + }, + { + "epoch": 7.4375, + "loss": 0.050569839775562286, + "loss_ce": 0.003069228958338499, + "loss_iou": 0.306640625, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 162934908, + "step": 1785 + }, + { + "epoch": 7.441666666666666, + "grad_norm": 9.266034898076681, + "learning_rate": 5e-05, + "loss": 0.1423, + "num_input_tokens_seen": 163025636, + "step": 1786 + }, + { + "epoch": 7.441666666666666, + "loss": 0.13430999219417572, + "loss_ce": 0.0001089509969460778, + "loss_iou": 0.251953125, + "loss_num": 0.02685546875, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 163025636, + "step": 1786 + }, + { + "epoch": 7.445833333333334, + "grad_norm": 5.601334033354807, + "learning_rate": 5e-05, + "loss": 0.083, + "num_input_tokens_seen": 163116296, + "step": 1787 + }, + { + "epoch": 7.445833333333334, + "loss": 0.08957132697105408, + "loss_ce": 0.00032267015194520354, + "loss_iou": 0.21875, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 163116296, + "step": 1787 + }, + { + "epoch": 7.45, + "grad_norm": 4.064665573397149, + "learning_rate": 5e-05, + "loss": 0.0832, + "num_input_tokens_seen": 163207372, + "step": 1788 + }, + { + "epoch": 7.45, + "loss": 0.07631734013557434, + "loss_ce": 8.442741818726063e-05, + "loss_iou": 0.3359375, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 163207372, + "step": 1788 + }, + { + "epoch": 7.454166666666667, + "grad_norm": 4.578626723828098, + "learning_rate": 5e-05, + "loss": 0.0702, + "num_input_tokens_seen": 163299240, + "step": 1789 + }, + { + "epoch": 7.454166666666667, + "loss": 0.07299777865409851, + "loss_ce": 0.0004727557534351945, + "loss_iou": 0.330078125, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 163299240, + "step": 1789 + }, + { + "epoch": 7.458333333333333, + "grad_norm": 3.3572150574552944, + "learning_rate": 5e-05, + "loss": 0.0722, + "num_input_tokens_seen": 163390396, + "step": 1790 + }, + { + "epoch": 7.458333333333333, + "loss": 0.05886243283748627, + "loss_ce": 0.00036023391294293106, + "loss_iou": 0.314453125, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 163390396, + "step": 1790 + }, + { + "epoch": 7.4625, + "grad_norm": 4.475665431055516, + "learning_rate": 5e-05, + "loss": 0.1383, + "num_input_tokens_seen": 163481352, + "step": 1791 + }, + { + "epoch": 7.4625, + "loss": 0.11866101622581482, + "loss_ce": 0.00034437025897204876, + "loss_iou": 0.3828125, + "loss_num": 0.023681640625, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 163481352, + "step": 1791 + }, + { + "epoch": 7.466666666666667, + "grad_norm": 6.067499830201406, + "learning_rate": 5e-05, + "loss": 0.0587, + "num_input_tokens_seen": 163573364, + "step": 1792 + }, + { + "epoch": 7.466666666666667, + "loss": 0.03994344547390938, + "loss_ce": 1.1194573744433e-05, + "loss_iou": 0.3125, + "loss_num": 0.00799560546875, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 163573364, + "step": 1792 + }, + { + "epoch": 7.470833333333333, + "grad_norm": 3.6669409658968752, + "learning_rate": 5e-05, + "loss": 0.1106, + "num_input_tokens_seen": 163664580, + "step": 1793 + }, + { + "epoch": 7.470833333333333, + "loss": 0.06494011729955673, + "loss_ce": 0.00010552479943726212, + "loss_iou": 0.1640625, + "loss_num": 0.012939453125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 163664580, + "step": 1793 + }, + { + "epoch": 7.475, + "grad_norm": 1.9554889225100551, + "learning_rate": 5e-05, + "loss": 0.1038, + "num_input_tokens_seen": 163755956, + "step": 1794 + }, + { + "epoch": 7.475, + "loss": 0.03204867243766785, + "loss_ce": 0.000508758588694036, + "loss_iou": 0.1982421875, + "loss_num": 0.006317138671875, + "loss_xval": 0.031494140625, + "num_input_tokens_seen": 163755956, + "step": 1794 + }, + { + "epoch": 7.479166666666667, + "grad_norm": 3.8375101247338526, + "learning_rate": 5e-05, + "loss": 0.1012, + "num_input_tokens_seen": 163847680, + "step": 1795 + }, + { + "epoch": 7.479166666666667, + "loss": 0.08776212483644485, + "loss_ce": 0.0027172621339559555, + "loss_iou": 0.1787109375, + "loss_num": 0.01708984375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 163847680, + "step": 1795 + }, + { + "epoch": 7.483333333333333, + "grad_norm": 7.015851833823572, + "learning_rate": 5e-05, + "loss": 0.1032, + "num_input_tokens_seen": 163939356, + "step": 1796 + }, + { + "epoch": 7.483333333333333, + "loss": 0.12932045757770538, + "loss_ce": 0.00032265795744024217, + "loss_iou": 0.240234375, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 163939356, + "step": 1796 + }, + { + "epoch": 7.4875, + "grad_norm": 6.226142800925313, + "learning_rate": 5e-05, + "loss": 0.109, + "num_input_tokens_seen": 164030500, + "step": 1797 + }, + { + "epoch": 7.4875, + "loss": 0.11356852203607559, + "loss_ce": 0.00040170963620767, + "loss_iou": 0.236328125, + "loss_num": 0.0225830078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 164030500, + "step": 1797 + }, + { + "epoch": 7.491666666666666, + "grad_norm": 3.3726405865994757, + "learning_rate": 5e-05, + "loss": 0.086, + "num_input_tokens_seen": 164121848, + "step": 1798 + }, + { + "epoch": 7.491666666666666, + "loss": 0.11696110665798187, + "loss_ce": 0.00012456333206500858, + "loss_iou": 0.328125, + "loss_num": 0.0233154296875, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 164121848, + "step": 1798 + }, + { + "epoch": 7.495833333333334, + "grad_norm": 6.853153266624445, + "learning_rate": 5e-05, + "loss": 0.1338, + "num_input_tokens_seen": 164212348, + "step": 1799 + }, + { + "epoch": 7.495833333333334, + "loss": 0.1441565304994583, + "loss_ce": 0.003134795930236578, + "loss_iou": 0.28515625, + "loss_num": 0.0281982421875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 164212348, + "step": 1799 + }, + { + "epoch": 7.5, + "grad_norm": 3.5010048761569976, + "learning_rate": 5e-05, + "loss": 0.0802, + "num_input_tokens_seen": 164303544, + "step": 1800 + }, + { + "epoch": 7.5, + "loss": 0.07428819686174393, + "loss_ce": 6.944570486666635e-05, + "loss_iou": 0.439453125, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 164303544, + "step": 1800 + }, + { + "epoch": 7.504166666666666, + "grad_norm": 2.7467803518871814, + "learning_rate": 5e-05, + "loss": 0.0795, + "num_input_tokens_seen": 164395328, + "step": 1801 + }, + { + "epoch": 7.504166666666666, + "loss": 0.08531016856431961, + "loss_ce": 0.0025922697968780994, + "loss_iou": 0.373046875, + "loss_num": 0.0166015625, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 164395328, + "step": 1801 + }, + { + "epoch": 7.508333333333333, + "grad_norm": 2.641313105386422, + "learning_rate": 5e-05, + "loss": 0.1034, + "num_input_tokens_seen": 164486700, + "step": 1802 + }, + { + "epoch": 7.508333333333333, + "loss": 0.14157049357891083, + "loss_ce": 2.996017792611383e-05, + "loss_iou": 0.26171875, + "loss_num": 0.0283203125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 164486700, + "step": 1802 + }, + { + "epoch": 7.5125, + "grad_norm": 2.5541777417865483, + "learning_rate": 5e-05, + "loss": 0.1008, + "num_input_tokens_seen": 164578112, + "step": 1803 + }, + { + "epoch": 7.5125, + "loss": 0.06394675374031067, + "loss_ce": 0.0002565682225394994, + "loss_iou": 0.16015625, + "loss_num": 0.01275634765625, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 164578112, + "step": 1803 + }, + { + "epoch": 7.516666666666667, + "grad_norm": 5.188109239324115, + "learning_rate": 5e-05, + "loss": 0.1443, + "num_input_tokens_seen": 164669168, + "step": 1804 + }, + { + "epoch": 7.516666666666667, + "loss": 0.1864711046218872, + "loss_ce": 6.974764983169734e-05, + "loss_iou": 0.2421875, + "loss_num": 0.037353515625, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 164669168, + "step": 1804 + }, + { + "epoch": 7.520833333333333, + "grad_norm": 5.032568141629647, + "learning_rate": 5e-05, + "loss": 0.1401, + "num_input_tokens_seen": 164760132, + "step": 1805 + }, + { + "epoch": 7.520833333333333, + "loss": 0.13615745306015015, + "loss_ce": 0.0006136275478638709, + "loss_iou": 0.2451171875, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 164760132, + "step": 1805 + }, + { + "epoch": 7.525, + "grad_norm": 4.41854309371531, + "learning_rate": 5e-05, + "loss": 0.1193, + "num_input_tokens_seen": 164851280, + "step": 1806 + }, + { + "epoch": 7.525, + "loss": 0.12078698724508286, + "loss_ce": 0.0022567142732441425, + "loss_iou": 0.34765625, + "loss_num": 0.0238037109375, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 164851280, + "step": 1806 + }, + { + "epoch": 7.529166666666667, + "grad_norm": 33.93754758442748, + "learning_rate": 5e-05, + "loss": 0.1264, + "num_input_tokens_seen": 164942324, + "step": 1807 + }, + { + "epoch": 7.529166666666667, + "loss": 0.06015244871377945, + "loss_ce": 2.2963965875533177e-06, + "loss_iou": 0.248046875, + "loss_num": 0.01202392578125, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 164942324, + "step": 1807 + }, + { + "epoch": 7.533333333333333, + "grad_norm": 3.4167314336458787, + "learning_rate": 5e-05, + "loss": 0.1355, + "num_input_tokens_seen": 165033428, + "step": 1808 + }, + { + "epoch": 7.533333333333333, + "loss": 0.17138734459877014, + "loss_ce": 4.6413282689172775e-05, + "loss_iou": 0.234375, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 165033428, + "step": 1808 + }, + { + "epoch": 7.5375, + "grad_norm": 2.9019079700505035, + "learning_rate": 5e-05, + "loss": 0.0953, + "num_input_tokens_seen": 165124268, + "step": 1809 + }, + { + "epoch": 7.5375, + "loss": 0.09286807477474213, + "loss_ce": 0.000521889771334827, + "loss_iou": 0.28125, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 165124268, + "step": 1809 + }, + { + "epoch": 7.541666666666667, + "grad_norm": 2.17195560578624, + "learning_rate": 5e-05, + "loss": 0.1329, + "num_input_tokens_seen": 165215284, + "step": 1810 + }, + { + "epoch": 7.541666666666667, + "loss": 0.1432662159204483, + "loss_ce": 0.0005965381278656423, + "loss_iou": 0.3515625, + "loss_num": 0.028564453125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 165215284, + "step": 1810 + }, + { + "epoch": 7.545833333333333, + "grad_norm": 1.8534083120133813, + "learning_rate": 5e-05, + "loss": 0.1059, + "num_input_tokens_seen": 165306224, + "step": 1811 + }, + { + "epoch": 7.545833333333333, + "loss": 0.14295382797718048, + "loss_ce": 0.002542451722547412, + "loss_iou": 0.28515625, + "loss_num": 0.028076171875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 165306224, + "step": 1811 + }, + { + "epoch": 7.55, + "grad_norm": 2.4445060582381823, + "learning_rate": 5e-05, + "loss": 0.1071, + "num_input_tokens_seen": 165397308, + "step": 1812 + }, + { + "epoch": 7.55, + "loss": 0.11119158565998077, + "loss_ce": 0.0020302007906138897, + "loss_iou": 0.259765625, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 165397308, + "step": 1812 + }, + { + "epoch": 7.554166666666667, + "grad_norm": 2.9314327836733636, + "learning_rate": 5e-05, + "loss": 0.1002, + "num_input_tokens_seen": 165488264, + "step": 1813 + }, + { + "epoch": 7.554166666666667, + "loss": 0.05487770587205887, + "loss_ce": 3.762233245652169e-05, + "loss_iou": 0.341796875, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 165488264, + "step": 1813 + }, + { + "epoch": 7.558333333333334, + "grad_norm": 2.961509975422286, + "learning_rate": 5e-05, + "loss": 0.0755, + "num_input_tokens_seen": 165579952, + "step": 1814 + }, + { + "epoch": 7.558333333333334, + "loss": 0.06028253957629204, + "loss_ce": 0.0006588200340047479, + "loss_iou": 0.2294921875, + "loss_num": 0.01190185546875, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 165579952, + "step": 1814 + }, + { + "epoch": 7.5625, + "grad_norm": 6.481985228004668, + "learning_rate": 5e-05, + "loss": 0.0843, + "num_input_tokens_seen": 165671460, + "step": 1815 + }, + { + "epoch": 7.5625, + "loss": 0.07226623594760895, + "loss_ce": 0.0004965229891240597, + "loss_iou": 0.228515625, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 165671460, + "step": 1815 + }, + { + "epoch": 7.566666666666666, + "grad_norm": 3.116352673166522, + "learning_rate": 5e-05, + "loss": 0.0947, + "num_input_tokens_seen": 165762564, + "step": 1816 + }, + { + "epoch": 7.566666666666666, + "loss": 0.11289401352405548, + "loss_ce": 9.493182005826384e-06, + "loss_iou": 0.3125, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 165762564, + "step": 1816 + }, + { + "epoch": 7.570833333333333, + "grad_norm": 1.9908482594454786, + "learning_rate": 5e-05, + "loss": 0.0727, + "num_input_tokens_seen": 165853848, + "step": 1817 + }, + { + "epoch": 7.570833333333333, + "loss": 0.08714696764945984, + "loss_ce": 1.9282315406599082e-05, + "loss_iou": 0.166015625, + "loss_num": 0.0174560546875, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 165853848, + "step": 1817 + }, + { + "epoch": 7.575, + "grad_norm": 7.419420004332939, + "learning_rate": 5e-05, + "loss": 0.1001, + "num_input_tokens_seen": 165944908, + "step": 1818 + }, + { + "epoch": 7.575, + "loss": 0.0688440203666687, + "loss_ce": 0.0010339573491364717, + "loss_iou": 0.33203125, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 165944908, + "step": 1818 + }, + { + "epoch": 7.579166666666667, + "grad_norm": 2.3215453774434605, + "learning_rate": 5e-05, + "loss": 0.0911, + "num_input_tokens_seen": 166035940, + "step": 1819 + }, + { + "epoch": 7.579166666666667, + "loss": 0.10113656520843506, + "loss_ce": 0.00033700454514473677, + "loss_iou": 0.296875, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 166035940, + "step": 1819 + }, + { + "epoch": 7.583333333333333, + "grad_norm": 8.378167852766184, + "learning_rate": 5e-05, + "loss": 0.0907, + "num_input_tokens_seen": 166127672, + "step": 1820 + }, + { + "epoch": 7.583333333333333, + "loss": 0.11757265776395798, + "loss_ce": 0.002384064719080925, + "loss_iou": 0.306640625, + "loss_num": 0.0230712890625, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 166127672, + "step": 1820 + }, + { + "epoch": 7.5875, + "grad_norm": 3.225532831738545, + "learning_rate": 5e-05, + "loss": 0.1326, + "num_input_tokens_seen": 166218888, + "step": 1821 + }, + { + "epoch": 7.5875, + "loss": 0.13063114881515503, + "loss_ce": 1.59115697897505e-05, + "loss_iou": 0.2578125, + "loss_num": 0.026123046875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 166218888, + "step": 1821 + }, + { + "epoch": 7.591666666666667, + "grad_norm": 2.9795922010986216, + "learning_rate": 5e-05, + "loss": 0.0916, + "num_input_tokens_seen": 166310356, + "step": 1822 + }, + { + "epoch": 7.591666666666667, + "loss": 0.11850694566965103, + "loss_ce": 3.77087781089358e-05, + "loss_iou": 0.400390625, + "loss_num": 0.023681640625, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 166310356, + "step": 1822 + }, + { + "epoch": 7.595833333333333, + "grad_norm": 3.9278588907171903, + "learning_rate": 5e-05, + "loss": 0.0925, + "num_input_tokens_seen": 166401668, + "step": 1823 + }, + { + "epoch": 7.595833333333333, + "loss": 0.10767964273691177, + "loss_ce": 0.0028212470933794975, + "loss_iou": 0.365234375, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 166401668, + "step": 1823 + }, + { + "epoch": 7.6, + "grad_norm": 3.5113905798888885, + "learning_rate": 5e-05, + "loss": 0.1064, + "num_input_tokens_seen": 166493016, + "step": 1824 + }, + { + "epoch": 7.6, + "loss": 0.07028500735759735, + "loss_ce": 6.406075408449396e-05, + "loss_iou": 0.193359375, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 166493016, + "step": 1824 + }, + { + "epoch": 7.604166666666667, + "grad_norm": 4.0741472080729, + "learning_rate": 5e-05, + "loss": 0.0861, + "num_input_tokens_seen": 166584856, + "step": 1825 + }, + { + "epoch": 7.604166666666667, + "loss": 0.0921793282032013, + "loss_ce": 4.675585660152137e-05, + "loss_iou": 0.17578125, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 166584856, + "step": 1825 + }, + { + "epoch": 7.608333333333333, + "grad_norm": 4.428130988861915, + "learning_rate": 5e-05, + "loss": 0.116, + "num_input_tokens_seen": 166675996, + "step": 1826 + }, + { + "epoch": 7.608333333333333, + "loss": 0.12392785400152206, + "loss_ce": 0.00017907016444951296, + "loss_iou": 0.181640625, + "loss_num": 0.0247802734375, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 166675996, + "step": 1826 + }, + { + "epoch": 7.6125, + "grad_norm": 7.578798622859177, + "learning_rate": 5e-05, + "loss": 0.0992, + "num_input_tokens_seen": 166767272, + "step": 1827 + }, + { + "epoch": 7.6125, + "loss": 0.07048434019088745, + "loss_ce": 0.001316254260018468, + "loss_iou": 0.400390625, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 166767272, + "step": 1827 + }, + { + "epoch": 7.616666666666667, + "grad_norm": 2.482887727779412, + "learning_rate": 5e-05, + "loss": 0.065, + "num_input_tokens_seen": 166858764, + "step": 1828 + }, + { + "epoch": 7.616666666666667, + "loss": 0.06286533176898956, + "loss_ce": 0.00019748101476579905, + "loss_iou": 0.31640625, + "loss_num": 0.0125732421875, + "loss_xval": 0.0625, + "num_input_tokens_seen": 166858764, + "step": 1828 + }, + { + "epoch": 7.620833333333334, + "grad_norm": 3.2505263368321122, + "learning_rate": 5e-05, + "loss": 0.0719, + "num_input_tokens_seen": 166950188, + "step": 1829 + }, + { + "epoch": 7.620833333333334, + "loss": 0.07289623469114304, + "loss_ce": 0.0010273351799696684, + "loss_iou": 0.28125, + "loss_num": 0.014404296875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 166950188, + "step": 1829 + }, + { + "epoch": 7.625, + "grad_norm": 6.27788075124899, + "learning_rate": 5e-05, + "loss": 0.1238, + "num_input_tokens_seen": 167039588, + "step": 1830 + }, + { + "epoch": 7.625, + "loss": 0.10974088311195374, + "loss_ce": 0.0013424496864899993, + "loss_iou": 0.2890625, + "loss_num": 0.021728515625, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 167039588, + "step": 1830 + }, + { + "epoch": 7.629166666666666, + "grad_norm": 2.2287300058880852, + "learning_rate": 5e-05, + "loss": 0.0802, + "num_input_tokens_seen": 167130012, + "step": 1831 + }, + { + "epoch": 7.629166666666666, + "loss": 0.07976742833852768, + "loss_ce": 0.0007879381300881505, + "loss_iou": 0.375, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 167130012, + "step": 1831 + }, + { + "epoch": 7.633333333333333, + "grad_norm": 6.5495447919435685, + "learning_rate": 5e-05, + "loss": 0.1034, + "num_input_tokens_seen": 167220916, + "step": 1832 + }, + { + "epoch": 7.633333333333333, + "loss": 0.08783036470413208, + "loss_ce": 0.000504314957652241, + "loss_iou": 0.2890625, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 167220916, + "step": 1832 + }, + { + "epoch": 7.6375, + "grad_norm": 2.199207485947871, + "learning_rate": 5e-05, + "loss": 0.1095, + "num_input_tokens_seen": 167312560, + "step": 1833 + }, + { + "epoch": 7.6375, + "loss": 0.1437080353498459, + "loss_ce": 0.0005805969703942537, + "loss_iou": 0.19140625, + "loss_num": 0.0286865234375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 167312560, + "step": 1833 + }, + { + "epoch": 7.641666666666667, + "grad_norm": 3.217155580648952, + "learning_rate": 5e-05, + "loss": 0.0586, + "num_input_tokens_seen": 167404172, + "step": 1834 + }, + { + "epoch": 7.641666666666667, + "loss": 0.049773965030908585, + "loss_ce": 0.0006101447506807745, + "loss_iou": 0.349609375, + "loss_num": 0.00982666015625, + "loss_xval": 0.049072265625, + "num_input_tokens_seen": 167404172, + "step": 1834 + }, + { + "epoch": 7.645833333333333, + "grad_norm": 4.943883406727365, + "learning_rate": 5e-05, + "loss": 0.0878, + "num_input_tokens_seen": 167495484, + "step": 1835 + }, + { + "epoch": 7.645833333333333, + "loss": 0.08797941356897354, + "loss_ce": 0.0005847018328495324, + "loss_iou": 0.37890625, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 167495484, + "step": 1835 + }, + { + "epoch": 7.65, + "grad_norm": 3.7136872933224874, + "learning_rate": 5e-05, + "loss": 0.1135, + "num_input_tokens_seen": 167586412, + "step": 1836 + }, + { + "epoch": 7.65, + "loss": 0.12610657513141632, + "loss_ce": 0.0008624346228316426, + "loss_iou": 0.359375, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 167586412, + "step": 1836 + }, + { + "epoch": 7.654166666666667, + "grad_norm": 8.318407165207367, + "learning_rate": 5e-05, + "loss": 0.09, + "num_input_tokens_seen": 167677852, + "step": 1837 + }, + { + "epoch": 7.654166666666667, + "loss": 0.10709414631128311, + "loss_ce": 0.0015491031808778644, + "loss_iou": 0.1845703125, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 167677852, + "step": 1837 + }, + { + "epoch": 7.658333333333333, + "grad_norm": 4.611140970076494, + "learning_rate": 5e-05, + "loss": 0.0605, + "num_input_tokens_seen": 167769152, + "step": 1838 + }, + { + "epoch": 7.658333333333333, + "loss": 0.06653960049152374, + "loss_ce": 0.00030119341681711376, + "loss_iou": 0.267578125, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 167769152, + "step": 1838 + }, + { + "epoch": 7.6625, + "grad_norm": 2.719468001458222, + "learning_rate": 5e-05, + "loss": 0.0788, + "num_input_tokens_seen": 167860692, + "step": 1839 + }, + { + "epoch": 7.6625, + "loss": 0.10713286697864532, + "loss_ce": 3.1428429792867973e-05, + "loss_iou": 0.333984375, + "loss_num": 0.021484375, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 167860692, + "step": 1839 + }, + { + "epoch": 7.666666666666667, + "grad_norm": 3.146483263204079, + "learning_rate": 5e-05, + "loss": 0.0734, + "num_input_tokens_seen": 167952176, + "step": 1840 + }, + { + "epoch": 7.666666666666667, + "loss": 0.1102394163608551, + "loss_ce": 0.00010147166176466271, + "loss_iou": 0.267578125, + "loss_num": 0.02197265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 167952176, + "step": 1840 + }, + { + "epoch": 7.670833333333333, + "grad_norm": 7.767544722147642, + "learning_rate": 5e-05, + "loss": 0.1318, + "num_input_tokens_seen": 168044080, + "step": 1841 + }, + { + "epoch": 7.670833333333333, + "loss": 0.155223548412323, + "loss_ce": 0.002299969084560871, + "loss_iou": 0.310546875, + "loss_num": 0.030517578125, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 168044080, + "step": 1841 + }, + { + "epoch": 7.675, + "grad_norm": 2.668628386340425, + "learning_rate": 5e-05, + "loss": 0.1067, + "num_input_tokens_seen": 168135656, + "step": 1842 + }, + { + "epoch": 7.675, + "loss": 0.11792254447937012, + "loss_ce": 4.8401838284917176e-05, + "loss_iou": 0.26171875, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 168135656, + "step": 1842 + }, + { + "epoch": 7.679166666666667, + "grad_norm": 5.252145209510077, + "learning_rate": 5e-05, + "loss": 0.1599, + "num_input_tokens_seen": 168227168, + "step": 1843 + }, + { + "epoch": 7.679166666666667, + "loss": 0.16746670007705688, + "loss_ce": 1.6735710232751444e-05, + "loss_iou": 0.353515625, + "loss_num": 0.033447265625, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 168227168, + "step": 1843 + }, + { + "epoch": 7.683333333333334, + "grad_norm": 5.91726659370321, + "learning_rate": 5e-05, + "loss": 0.1143, + "num_input_tokens_seen": 168318868, + "step": 1844 + }, + { + "epoch": 7.683333333333334, + "loss": 0.11179641634225845, + "loss_ce": 0.0002699237084016204, + "loss_iou": 0.294921875, + "loss_num": 0.0223388671875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 168318868, + "step": 1844 + }, + { + "epoch": 7.6875, + "grad_norm": 4.096332452166496, + "learning_rate": 5e-05, + "loss": 0.1178, + "num_input_tokens_seen": 168409736, + "step": 1845 + }, + { + "epoch": 7.6875, + "loss": 0.10695420950651169, + "loss_ce": 0.0005241526523604989, + "loss_iou": 0.2109375, + "loss_num": 0.0213623046875, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 168409736, + "step": 1845 + }, + { + "epoch": 7.691666666666666, + "grad_norm": 3.4184673683269455, + "learning_rate": 5e-05, + "loss": 0.1033, + "num_input_tokens_seen": 168501456, + "step": 1846 + }, + { + "epoch": 7.691666666666666, + "loss": 0.10996747016906738, + "loss_ce": 0.0020267972722649574, + "loss_iou": 0.248046875, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 168501456, + "step": 1846 + }, + { + "epoch": 7.695833333333333, + "grad_norm": 3.4885460354486733, + "learning_rate": 5e-05, + "loss": 0.0956, + "num_input_tokens_seen": 168591996, + "step": 1847 + }, + { + "epoch": 7.695833333333333, + "loss": 0.12594377994537354, + "loss_ce": 4.350730523583479e-05, + "loss_iou": 0.2490234375, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 168591996, + "step": 1847 + }, + { + "epoch": 7.7, + "grad_norm": 9.28699081074281, + "learning_rate": 5e-05, + "loss": 0.0809, + "num_input_tokens_seen": 168683664, + "step": 1848 + }, + { + "epoch": 7.7, + "loss": 0.1240956038236618, + "loss_ce": 0.0022083972580730915, + "loss_iou": 0.376953125, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 168683664, + "step": 1848 + }, + { + "epoch": 7.704166666666667, + "grad_norm": 65.82829142422523, + "learning_rate": 5e-05, + "loss": 0.1116, + "num_input_tokens_seen": 168775256, + "step": 1849 + }, + { + "epoch": 7.704166666666667, + "loss": 0.1421854943037033, + "loss_ce": 0.00011090566113125533, + "loss_iou": 0.2431640625, + "loss_num": 0.0284423828125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 168775256, + "step": 1849 + }, + { + "epoch": 7.708333333333333, + "grad_norm": 8.425677609000575, + "learning_rate": 5e-05, + "loss": 0.0714, + "num_input_tokens_seen": 168866368, + "step": 1850 + }, + { + "epoch": 7.708333333333333, + "loss": 0.06213594973087311, + "loss_ce": 6.31937655271031e-05, + "loss_iou": 0.36328125, + "loss_num": 0.012451171875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 168866368, + "step": 1850 + }, + { + "epoch": 7.7125, + "grad_norm": 4.058259871683852, + "learning_rate": 5e-05, + "loss": 0.1456, + "num_input_tokens_seen": 168957900, + "step": 1851 + }, + { + "epoch": 7.7125, + "loss": 0.15907834470272064, + "loss_ce": 0.0002343444648431614, + "loss_iou": 0.34765625, + "loss_num": 0.03173828125, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 168957900, + "step": 1851 + }, + { + "epoch": 7.716666666666667, + "grad_norm": 5.1463068875925915, + "learning_rate": 5e-05, + "loss": 0.1189, + "num_input_tokens_seen": 169048904, + "step": 1852 + }, + { + "epoch": 7.716666666666667, + "loss": 0.14455726742744446, + "loss_ce": 0.0004303784226067364, + "loss_iou": 0.19921875, + "loss_num": 0.02880859375, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 169048904, + "step": 1852 + }, + { + "epoch": 7.720833333333333, + "grad_norm": 3.2107538886682767, + "learning_rate": 5e-05, + "loss": 0.1203, + "num_input_tokens_seen": 169139440, + "step": 1853 + }, + { + "epoch": 7.720833333333333, + "loss": 0.13053694367408752, + "loss_ce": 0.0003337044327054173, + "loss_iou": 0.31640625, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 169139440, + "step": 1853 + }, + { + "epoch": 7.725, + "grad_norm": 3.870283976834058, + "learning_rate": 5e-05, + "loss": 0.0604, + "num_input_tokens_seen": 169230360, + "step": 1854 + }, + { + "epoch": 7.725, + "loss": 0.06204288825392723, + "loss_ce": 0.0007635898073203862, + "loss_iou": 0.2333984375, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 169230360, + "step": 1854 + }, + { + "epoch": 7.729166666666667, + "grad_norm": 3.443786270282577, + "learning_rate": 5e-05, + "loss": 0.1153, + "num_input_tokens_seen": 169321300, + "step": 1855 + }, + { + "epoch": 7.729166666666667, + "loss": 0.15847985446453094, + "loss_ce": 0.0018026053439825773, + "loss_iou": 0.302734375, + "loss_num": 0.03125, + "loss_xval": 0.15625, + "num_input_tokens_seen": 169321300, + "step": 1855 + }, + { + "epoch": 7.733333333333333, + "grad_norm": 2.49730143699476, + "learning_rate": 5e-05, + "loss": 0.1253, + "num_input_tokens_seen": 169411800, + "step": 1856 + }, + { + "epoch": 7.733333333333333, + "loss": 0.06755806505680084, + "loss_ce": 0.00019051358685828745, + "loss_iou": 0.24609375, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 169411800, + "step": 1856 + }, + { + "epoch": 7.7375, + "grad_norm": 3.293136068195067, + "learning_rate": 5e-05, + "loss": 0.1055, + "num_input_tokens_seen": 169503240, + "step": 1857 + }, + { + "epoch": 7.7375, + "loss": 0.059763744473457336, + "loss_ce": 0.005366160534322262, + "loss_iou": 0.2412109375, + "loss_num": 0.0108642578125, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 169503240, + "step": 1857 + }, + { + "epoch": 7.741666666666667, + "grad_norm": 3.009868145722623, + "learning_rate": 5e-05, + "loss": 0.0908, + "num_input_tokens_seen": 169594456, + "step": 1858 + }, + { + "epoch": 7.741666666666667, + "loss": 0.11617320775985718, + "loss_ce": 0.0018543555634096265, + "loss_iou": 0.21484375, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 169594456, + "step": 1858 + }, + { + "epoch": 7.745833333333334, + "grad_norm": 4.291106420758004, + "learning_rate": 5e-05, + "loss": 0.0961, + "num_input_tokens_seen": 169686484, + "step": 1859 + }, + { + "epoch": 7.745833333333334, + "loss": 0.07079610228538513, + "loss_ce": 0.001109211822040379, + "loss_iou": 0.28125, + "loss_num": 0.013916015625, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 169686484, + "step": 1859 + }, + { + "epoch": 7.75, + "grad_norm": 3.5117610849754146, + "learning_rate": 5e-05, + "loss": 0.0632, + "num_input_tokens_seen": 169777856, + "step": 1860 + }, + { + "epoch": 7.75, + "loss": 0.08096813410520554, + "loss_ce": 0.00035595300141721964, + "loss_iou": 0.390625, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 169777856, + "step": 1860 + }, + { + "epoch": 7.754166666666666, + "grad_norm": 3.216652645214244, + "learning_rate": 5e-05, + "loss": 0.0757, + "num_input_tokens_seen": 169869736, + "step": 1861 + }, + { + "epoch": 7.754166666666666, + "loss": 0.056906167417764664, + "loss_ce": 0.0049194754101336, + "loss_iou": 0.35546875, + "loss_num": 0.0103759765625, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 169869736, + "step": 1861 + }, + { + "epoch": 7.758333333333333, + "grad_norm": 4.3446204121281085, + "learning_rate": 5e-05, + "loss": 0.1213, + "num_input_tokens_seen": 169961388, + "step": 1862 + }, + { + "epoch": 7.758333333333333, + "loss": 0.08808216452598572, + "loss_ce": 0.0010307676857337356, + "loss_iou": 0.416015625, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 169961388, + "step": 1862 + }, + { + "epoch": 7.7625, + "grad_norm": 3.631632204860175, + "learning_rate": 5e-05, + "loss": 0.0958, + "num_input_tokens_seen": 170052108, + "step": 1863 + }, + { + "epoch": 7.7625, + "loss": 0.05665234476327896, + "loss_ce": 1.1720794645952992e-05, + "loss_iou": 0.275390625, + "loss_num": 0.0113525390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 170052108, + "step": 1863 + }, + { + "epoch": 7.766666666666667, + "grad_norm": 4.006442486133152, + "learning_rate": 5e-05, + "loss": 0.094, + "num_input_tokens_seen": 170142784, + "step": 1864 + }, + { + "epoch": 7.766666666666667, + "loss": 0.13484537601470947, + "loss_ce": 4.922464722767472e-05, + "loss_iou": 0.27734375, + "loss_num": 0.0269775390625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 170142784, + "step": 1864 + }, + { + "epoch": 7.770833333333333, + "grad_norm": 3.8589614399252836, + "learning_rate": 5e-05, + "loss": 0.1013, + "num_input_tokens_seen": 170233396, + "step": 1865 + }, + { + "epoch": 7.770833333333333, + "loss": 0.12279890477657318, + "loss_ce": 0.0018882558215409517, + "loss_iou": 0.380859375, + "loss_num": 0.024169921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 170233396, + "step": 1865 + }, + { + "epoch": 7.775, + "grad_norm": 4.936401157655488, + "learning_rate": 5e-05, + "loss": 0.1006, + "num_input_tokens_seen": 170323888, + "step": 1866 + }, + { + "epoch": 7.775, + "loss": 0.10373049974441528, + "loss_ce": 0.0020726316142827272, + "loss_iou": 0.244140625, + "loss_num": 0.0203857421875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 170323888, + "step": 1866 + }, + { + "epoch": 7.779166666666667, + "grad_norm": 3.4016737513695263, + "learning_rate": 5e-05, + "loss": 0.0813, + "num_input_tokens_seen": 170414608, + "step": 1867 + }, + { + "epoch": 7.779166666666667, + "loss": 0.08684270083904266, + "loss_ce": 4.935580363962799e-06, + "loss_iou": 0.36328125, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 170414608, + "step": 1867 + }, + { + "epoch": 7.783333333333333, + "grad_norm": 2.8843144741959255, + "learning_rate": 5e-05, + "loss": 0.0822, + "num_input_tokens_seen": 170505572, + "step": 1868 + }, + { + "epoch": 7.783333333333333, + "loss": 0.12317100912332535, + "loss_ce": 2.0571660570567474e-06, + "loss_iou": 0.3125, + "loss_num": 0.024658203125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 170505572, + "step": 1868 + }, + { + "epoch": 7.7875, + "grad_norm": 2.774487838379211, + "learning_rate": 5e-05, + "loss": 0.0632, + "num_input_tokens_seen": 170596752, + "step": 1869 + }, + { + "epoch": 7.7875, + "loss": 0.058745529502630234, + "loss_ce": 0.0003348880272824317, + "loss_iou": 0.298828125, + "loss_num": 0.01165771484375, + "loss_xval": 0.058349609375, + "num_input_tokens_seen": 170596752, + "step": 1869 + }, + { + "epoch": 7.791666666666667, + "grad_norm": 2.6278615952842075, + "learning_rate": 5e-05, + "loss": 0.1284, + "num_input_tokens_seen": 170687504, + "step": 1870 + }, + { + "epoch": 7.791666666666667, + "loss": 0.1696607768535614, + "loss_ce": 4.4082615204388276e-05, + "loss_iou": 0.423828125, + "loss_num": 0.033935546875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 170687504, + "step": 1870 + }, + { + "epoch": 7.795833333333333, + "grad_norm": 6.625014552147208, + "learning_rate": 5e-05, + "loss": 0.0545, + "num_input_tokens_seen": 170778504, + "step": 1871 + }, + { + "epoch": 7.795833333333333, + "loss": 0.05947096273303032, + "loss_ce": 0.0007856582524254918, + "loss_iou": 0.2578125, + "loss_num": 0.01177978515625, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 170778504, + "step": 1871 + }, + { + "epoch": 7.8, + "grad_norm": 4.606783633451379, + "learning_rate": 5e-05, + "loss": 0.0717, + "num_input_tokens_seen": 170869956, + "step": 1872 + }, + { + "epoch": 7.8, + "loss": 0.10387454181909561, + "loss_ce": 8.426039858022705e-05, + "loss_iou": 0.318359375, + "loss_num": 0.020751953125, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 170869956, + "step": 1872 + }, + { + "epoch": 7.804166666666667, + "grad_norm": 5.782852205094, + "learning_rate": 5e-05, + "loss": 0.1185, + "num_input_tokens_seen": 170961116, + "step": 1873 + }, + { + "epoch": 7.804166666666667, + "loss": 0.09680216014385223, + "loss_ce": 0.0006412738002836704, + "loss_iou": 0.341796875, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 170961116, + "step": 1873 + }, + { + "epoch": 7.808333333333334, + "grad_norm": 1.7964825342418311, + "learning_rate": 5e-05, + "loss": 0.0923, + "num_input_tokens_seen": 171052512, + "step": 1874 + }, + { + "epoch": 7.808333333333334, + "loss": 0.12361488491296768, + "loss_ce": 1.8692413505050354e-05, + "loss_iou": 0.25, + "loss_num": 0.0247802734375, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 171052512, + "step": 1874 + }, + { + "epoch": 7.8125, + "grad_norm": 2.0243645225505853, + "learning_rate": 5e-05, + "loss": 0.0513, + "num_input_tokens_seen": 171143440, + "step": 1875 + }, + { + "epoch": 7.8125, + "loss": 0.04031776636838913, + "loss_ce": 4.047397851536516e-06, + "loss_iou": 0.189453125, + "loss_num": 0.008056640625, + "loss_xval": 0.040283203125, + "num_input_tokens_seen": 171143440, + "step": 1875 + }, + { + "epoch": 7.816666666666666, + "grad_norm": 46.0512120265654, + "learning_rate": 5e-05, + "loss": 0.1445, + "num_input_tokens_seen": 171234224, + "step": 1876 + }, + { + "epoch": 7.816666666666666, + "loss": 0.19212490320205688, + "loss_ce": 1.6747279005357996e-05, + "loss_iou": 0.384765625, + "loss_num": 0.03857421875, + "loss_xval": 0.1923828125, + "num_input_tokens_seen": 171234224, + "step": 1876 + }, + { + "epoch": 7.820833333333333, + "grad_norm": 3.132204944431851, + "learning_rate": 5e-05, + "loss": 0.0643, + "num_input_tokens_seen": 171325636, + "step": 1877 + }, + { + "epoch": 7.820833333333333, + "loss": 0.07965581119060516, + "loss_ce": 3.5450975701678544e-05, + "loss_iou": 0.2578125, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 171325636, + "step": 1877 + }, + { + "epoch": 7.825, + "grad_norm": 2.754800643128859, + "learning_rate": 5e-05, + "loss": 0.065, + "num_input_tokens_seen": 171417576, + "step": 1878 + }, + { + "epoch": 7.825, + "loss": 0.09073353558778763, + "loss_ce": 0.0015916909324005246, + "loss_iou": 0.279296875, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 171417576, + "step": 1878 + }, + { + "epoch": 7.829166666666667, + "grad_norm": 4.052791143927779, + "learning_rate": 5e-05, + "loss": 0.1028, + "num_input_tokens_seen": 171509616, + "step": 1879 + }, + { + "epoch": 7.829166666666667, + "loss": 0.13964907824993134, + "loss_ce": 6.167963874759153e-05, + "loss_iou": 0.330078125, + "loss_num": 0.0279541015625, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 171509616, + "step": 1879 + }, + { + "epoch": 7.833333333333333, + "grad_norm": 7.993966658893457, + "learning_rate": 5e-05, + "loss": 0.1307, + "num_input_tokens_seen": 171601172, + "step": 1880 + }, + { + "epoch": 7.833333333333333, + "loss": 0.1629827916622162, + "loss_ce": 6.088300870032981e-05, + "loss_iou": 0.1708984375, + "loss_num": 0.032470703125, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 171601172, + "step": 1880 + }, + { + "epoch": 7.8375, + "grad_norm": 6.632999575668596, + "learning_rate": 5e-05, + "loss": 0.0669, + "num_input_tokens_seen": 171693368, + "step": 1881 + }, + { + "epoch": 7.8375, + "loss": 0.09090165048837662, + "loss_ce": 0.0015919567085802555, + "loss_iou": 0.1962890625, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 171693368, + "step": 1881 + }, + { + "epoch": 7.841666666666667, + "grad_norm": 3.9364271993934463, + "learning_rate": 5e-05, + "loss": 0.0714, + "num_input_tokens_seen": 171784312, + "step": 1882 + }, + { + "epoch": 7.841666666666667, + "loss": 0.07746930420398712, + "loss_ce": 0.0007175912614911795, + "loss_iou": 0.1796875, + "loss_num": 0.015380859375, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 171784312, + "step": 1882 + }, + { + "epoch": 7.845833333333333, + "grad_norm": 2.5086294138768084, + "learning_rate": 5e-05, + "loss": 0.0731, + "num_input_tokens_seen": 171875756, + "step": 1883 + }, + { + "epoch": 7.845833333333333, + "loss": 0.05923663079738617, + "loss_ce": 0.0008412470342591405, + "loss_iou": 0.236328125, + "loss_num": 0.01171875, + "loss_xval": 0.058349609375, + "num_input_tokens_seen": 171875756, + "step": 1883 + }, + { + "epoch": 7.85, + "grad_norm": 3.9182379212981764, + "learning_rate": 5e-05, + "loss": 0.0981, + "num_input_tokens_seen": 171967624, + "step": 1884 + }, + { + "epoch": 7.85, + "loss": 0.1100146546959877, + "loss_ce": 0.0007235782104544342, + "loss_iou": 0.353515625, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 171967624, + "step": 1884 + }, + { + "epoch": 7.854166666666667, + "grad_norm": 3.8226533127540403, + "learning_rate": 5e-05, + "loss": 0.0649, + "num_input_tokens_seen": 172058620, + "step": 1885 + }, + { + "epoch": 7.854166666666667, + "loss": 0.06194145977497101, + "loss_ce": 0.001837087795138359, + "loss_iou": 0.21484375, + "loss_num": 0.01202392578125, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 172058620, + "step": 1885 + }, + { + "epoch": 7.858333333333333, + "grad_norm": 6.492949437861943, + "learning_rate": 5e-05, + "loss": 0.0616, + "num_input_tokens_seen": 172150144, + "step": 1886 + }, + { + "epoch": 7.858333333333333, + "loss": 0.07762310653924942, + "loss_ce": 7.794049452058971e-05, + "loss_iou": 0.3046875, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 172150144, + "step": 1886 + }, + { + "epoch": 7.8625, + "grad_norm": 3.818031643454962, + "learning_rate": 5e-05, + "loss": 0.104, + "num_input_tokens_seen": 172241676, + "step": 1887 + }, + { + "epoch": 7.8625, + "loss": 0.060611166059970856, + "loss_ce": 3.2567709240538534e-06, + "loss_iou": 0.275390625, + "loss_num": 0.01214599609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 172241676, + "step": 1887 + }, + { + "epoch": 7.866666666666667, + "grad_norm": 3.2853044128340465, + "learning_rate": 5e-05, + "loss": 0.1077, + "num_input_tokens_seen": 172333200, + "step": 1888 + }, + { + "epoch": 7.866666666666667, + "loss": 0.1084718331694603, + "loss_ce": 0.000546415220014751, + "loss_iou": 0.32421875, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 172333200, + "step": 1888 + }, + { + "epoch": 7.870833333333334, + "grad_norm": 4.887550603839418, + "learning_rate": 5e-05, + "loss": 0.058, + "num_input_tokens_seen": 172423792, + "step": 1889 + }, + { + "epoch": 7.870833333333334, + "loss": 0.06197632476687431, + "loss_ce": 1.038287973642582e-05, + "loss_iou": 0.3046875, + "loss_num": 0.01239013671875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 172423792, + "step": 1889 + }, + { + "epoch": 7.875, + "grad_norm": 2.795912828890586, + "learning_rate": 5e-05, + "loss": 0.0996, + "num_input_tokens_seen": 172514996, + "step": 1890 + }, + { + "epoch": 7.875, + "loss": 0.12136028707027435, + "loss_ce": 0.00025128168636001647, + "loss_iou": 0.205078125, + "loss_num": 0.024169921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 172514996, + "step": 1890 + }, + { + "epoch": 7.879166666666666, + "grad_norm": 3.52171389089644, + "learning_rate": 5e-05, + "loss": 0.1072, + "num_input_tokens_seen": 172605524, + "step": 1891 + }, + { + "epoch": 7.879166666666666, + "loss": 0.1516215056180954, + "loss_ce": 1.0173911505262367e-05, + "loss_iou": 0.353515625, + "loss_num": 0.0302734375, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 172605524, + "step": 1891 + }, + { + "epoch": 7.883333333333333, + "grad_norm": 10.1324719488612, + "learning_rate": 5e-05, + "loss": 0.0854, + "num_input_tokens_seen": 172696440, + "step": 1892 + }, + { + "epoch": 7.883333333333333, + "loss": 0.11221377551555634, + "loss_ce": 3.1158208003034815e-05, + "loss_iou": 0.267578125, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 172696440, + "step": 1892 + }, + { + "epoch": 7.8875, + "grad_norm": 10.27030357885242, + "learning_rate": 5e-05, + "loss": 0.1131, + "num_input_tokens_seen": 172788140, + "step": 1893 + }, + { + "epoch": 7.8875, + "loss": 0.17470771074295044, + "loss_ce": 0.00016241407138295472, + "loss_iou": 0.21875, + "loss_num": 0.034912109375, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 172788140, + "step": 1893 + }, + { + "epoch": 7.891666666666667, + "grad_norm": 3.9635728907775243, + "learning_rate": 5e-05, + "loss": 0.1529, + "num_input_tokens_seen": 172879188, + "step": 1894 + }, + { + "epoch": 7.891666666666667, + "loss": 0.20308756828308105, + "loss_ce": 0.001488459762185812, + "loss_iou": 0.1630859375, + "loss_num": 0.040283203125, + "loss_xval": 0.201171875, + "num_input_tokens_seen": 172879188, + "step": 1894 + }, + { + "epoch": 7.895833333333333, + "grad_norm": 2.936241068833577, + "learning_rate": 5e-05, + "loss": 0.0716, + "num_input_tokens_seen": 172970984, + "step": 1895 + }, + { + "epoch": 7.895833333333333, + "loss": 0.053100377321243286, + "loss_ce": 0.0014951550401747227, + "loss_iou": 0.185546875, + "loss_num": 0.01031494140625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 172970984, + "step": 1895 + }, + { + "epoch": 7.9, + "grad_norm": 3.421123041364866, + "learning_rate": 5e-05, + "loss": 0.1266, + "num_input_tokens_seen": 173062196, + "step": 1896 + }, + { + "epoch": 7.9, + "loss": 0.13779032230377197, + "loss_ce": 6.44890678813681e-05, + "loss_iou": 0.333984375, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 173062196, + "step": 1896 + }, + { + "epoch": 7.904166666666667, + "grad_norm": 7.247987310893387, + "learning_rate": 5e-05, + "loss": 0.1087, + "num_input_tokens_seen": 173153772, + "step": 1897 + }, + { + "epoch": 7.904166666666667, + "loss": 0.08903989940881729, + "loss_ce": 0.002934554824605584, + "loss_iou": 0.1328125, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 173153772, + "step": 1897 + }, + { + "epoch": 7.908333333333333, + "grad_norm": 7.165382740064726, + "learning_rate": 5e-05, + "loss": 0.1056, + "num_input_tokens_seen": 173244088, + "step": 1898 + }, + { + "epoch": 7.908333333333333, + "loss": 0.08239862322807312, + "loss_ce": 8.508615428581834e-05, + "loss_iou": 0.328125, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 173244088, + "step": 1898 + }, + { + "epoch": 7.9125, + "grad_norm": 5.654221775747249, + "learning_rate": 5e-05, + "loss": 0.1309, + "num_input_tokens_seen": 173335192, + "step": 1899 + }, + { + "epoch": 7.9125, + "loss": 0.13726767897605896, + "loss_ce": 0.0011592707596719265, + "loss_iou": 0.33203125, + "loss_num": 0.0272216796875, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 173335192, + "step": 1899 + }, + { + "epoch": 7.916666666666667, + "grad_norm": 5.467712394921771, + "learning_rate": 5e-05, + "loss": 0.0613, + "num_input_tokens_seen": 173426656, + "step": 1900 + }, + { + "epoch": 7.916666666666667, + "loss": 0.060835033655166626, + "loss_ce": 0.0005170417134650052, + "loss_iou": 0.40234375, + "loss_num": 0.0120849609375, + "loss_xval": 0.060302734375, + "num_input_tokens_seen": 173426656, + "step": 1900 + }, + { + "epoch": 7.920833333333333, + "grad_norm": 3.4832356065050822, + "learning_rate": 5e-05, + "loss": 0.085, + "num_input_tokens_seen": 173518204, + "step": 1901 + }, + { + "epoch": 7.920833333333333, + "loss": 0.07123122364282608, + "loss_ce": 0.0002168197388527915, + "loss_iou": 0.259765625, + "loss_num": 0.01422119140625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 173518204, + "step": 1901 + }, + { + "epoch": 7.925, + "grad_norm": 8.246157039093085, + "learning_rate": 5e-05, + "loss": 0.0665, + "num_input_tokens_seen": 173609712, + "step": 1902 + }, + { + "epoch": 7.925, + "loss": 0.06213460490107536, + "loss_ce": 0.0048988861963152885, + "loss_iou": 0.234375, + "loss_num": 0.011474609375, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 173609712, + "step": 1902 + }, + { + "epoch": 7.929166666666667, + "grad_norm": 1.8546311321482563, + "learning_rate": 5e-05, + "loss": 0.0903, + "num_input_tokens_seen": 173701596, + "step": 1903 + }, + { + "epoch": 7.929166666666667, + "loss": 0.0635804757475853, + "loss_ce": 0.0011720317415893078, + "loss_iou": 0.263671875, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 173701596, + "step": 1903 + }, + { + "epoch": 7.933333333333334, + "grad_norm": 9.380074079968875, + "learning_rate": 5e-05, + "loss": 0.062, + "num_input_tokens_seen": 173793244, + "step": 1904 + }, + { + "epoch": 7.933333333333334, + "loss": 0.069390207529068, + "loss_ce": 0.0006646226975135505, + "loss_iou": 0.154296875, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 173793244, + "step": 1904 + }, + { + "epoch": 7.9375, + "grad_norm": 2.965480902277429, + "learning_rate": 5e-05, + "loss": 0.0743, + "num_input_tokens_seen": 173884972, + "step": 1905 + }, + { + "epoch": 7.9375, + "loss": 0.08638650178909302, + "loss_ce": 0.003958521876484156, + "loss_iou": 0.1904296875, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 173884972, + "step": 1905 + }, + { + "epoch": 7.941666666666666, + "grad_norm": 5.18458949518755, + "learning_rate": 5e-05, + "loss": 0.1219, + "num_input_tokens_seen": 173976040, + "step": 1906 + }, + { + "epoch": 7.941666666666666, + "loss": 0.07028573006391525, + "loss_ce": 0.0013160043163225055, + "loss_iou": 0.33203125, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 173976040, + "step": 1906 + }, + { + "epoch": 7.945833333333333, + "grad_norm": 2.3609421895316305, + "learning_rate": 5e-05, + "loss": 0.0801, + "num_input_tokens_seen": 174067072, + "step": 1907 + }, + { + "epoch": 7.945833333333333, + "loss": 0.0694548636674881, + "loss_ce": 0.001217557000927627, + "loss_iou": 0.267578125, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 174067072, + "step": 1907 + }, + { + "epoch": 7.95, + "grad_norm": 2.7941265022676958, + "learning_rate": 5e-05, + "loss": 0.059, + "num_input_tokens_seen": 174158448, + "step": 1908 + }, + { + "epoch": 7.95, + "loss": 0.05729863792657852, + "loss_ce": 0.0006427493062801659, + "loss_iou": 0.40625, + "loss_num": 0.0113525390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 174158448, + "step": 1908 + }, + { + "epoch": 7.954166666666667, + "grad_norm": 3.680505406275804, + "learning_rate": 5e-05, + "loss": 0.0677, + "num_input_tokens_seen": 174249904, + "step": 1909 + }, + { + "epoch": 7.954166666666667, + "loss": 0.05491199344396591, + "loss_ce": 0.0043977717868983746, + "loss_iou": 0.2177734375, + "loss_num": 0.01007080078125, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 174249904, + "step": 1909 + }, + { + "epoch": 7.958333333333333, + "grad_norm": 7.906246591304872, + "learning_rate": 5e-05, + "loss": 0.1651, + "num_input_tokens_seen": 174341280, + "step": 1910 + }, + { + "epoch": 7.958333333333333, + "loss": 0.18820932507514954, + "loss_ce": 9.898372809402645e-05, + "loss_iou": 0.3125, + "loss_num": 0.03759765625, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 174341280, + "step": 1910 + }, + { + "epoch": 7.9625, + "grad_norm": 1.848318747618714, + "learning_rate": 5e-05, + "loss": 0.0976, + "num_input_tokens_seen": 174432296, + "step": 1911 + }, + { + "epoch": 7.9625, + "loss": 0.07310444116592407, + "loss_ce": 0.0002284618967678398, + "loss_iou": 0.2265625, + "loss_num": 0.01458740234375, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 174432296, + "step": 1911 + }, + { + "epoch": 7.966666666666667, + "grad_norm": 3.8571062261351057, + "learning_rate": 5e-05, + "loss": 0.0428, + "num_input_tokens_seen": 174523860, + "step": 1912 + }, + { + "epoch": 7.966666666666667, + "loss": 0.02909906394779682, + "loss_ce": 0.0005956448148936033, + "loss_iou": 0.283203125, + "loss_num": 0.005706787109375, + "loss_xval": 0.028564453125, + "num_input_tokens_seen": 174523860, + "step": 1912 + }, + { + "epoch": 7.970833333333333, + "grad_norm": 5.069425793896958, + "learning_rate": 5e-05, + "loss": 0.0914, + "num_input_tokens_seen": 174615020, + "step": 1913 + }, + { + "epoch": 7.970833333333333, + "loss": 0.06316525489091873, + "loss_ce": 0.00209958222694695, + "loss_iou": 0.21484375, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 174615020, + "step": 1913 + }, + { + "epoch": 7.975, + "grad_norm": 7.767234055317729, + "learning_rate": 5e-05, + "loss": 0.1184, + "num_input_tokens_seen": 174706624, + "step": 1914 + }, + { + "epoch": 7.975, + "loss": 0.07708540558815002, + "loss_ce": 5.9040161431767046e-05, + "loss_iou": 0.341796875, + "loss_num": 0.015380859375, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 174706624, + "step": 1914 + }, + { + "epoch": 7.979166666666667, + "grad_norm": 5.756764179723251, + "learning_rate": 5e-05, + "loss": 0.0732, + "num_input_tokens_seen": 174797684, + "step": 1915 + }, + { + "epoch": 7.979166666666667, + "loss": 0.0657171905040741, + "loss_ce": 7.387852383544669e-05, + "loss_iou": 0.10009765625, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 174797684, + "step": 1915 + }, + { + "epoch": 7.983333333333333, + "grad_norm": 2.8165166655344525, + "learning_rate": 5e-05, + "loss": 0.0782, + "num_input_tokens_seen": 174889664, + "step": 1916 + }, + { + "epoch": 7.983333333333333, + "loss": 0.07849019765853882, + "loss_ce": 0.0003499391896184534, + "loss_iou": 0.26171875, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 174889664, + "step": 1916 + }, + { + "epoch": 7.9875, + "grad_norm": 4.331019918999385, + "learning_rate": 5e-05, + "loss": 0.0817, + "num_input_tokens_seen": 174981060, + "step": 1917 + }, + { + "epoch": 7.9875, + "loss": 0.08154531568288803, + "loss_ce": 0.0007042511133477092, + "loss_iou": 0.271484375, + "loss_num": 0.01611328125, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 174981060, + "step": 1917 + }, + { + "epoch": 7.991666666666667, + "grad_norm": 2.5517017116929397, + "learning_rate": 5e-05, + "loss": 0.1072, + "num_input_tokens_seen": 175072248, + "step": 1918 + }, + { + "epoch": 7.991666666666667, + "loss": 0.0968976840376854, + "loss_ce": 0.0007673102663829923, + "loss_iou": 0.1904296875, + "loss_num": 0.0191650390625, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 175072248, + "step": 1918 + }, + { + "epoch": 7.995833333333334, + "grad_norm": 10.617618874938415, + "learning_rate": 5e-05, + "loss": 0.0718, + "num_input_tokens_seen": 175163296, + "step": 1919 + }, + { + "epoch": 7.995833333333334, + "loss": 0.07148117572069168, + "loss_ce": 0.00025314692175015807, + "loss_iou": 0.08837890625, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 175163296, + "step": 1919 + }, + { + "epoch": 8.0, + "grad_norm": 9.509254592027485, + "learning_rate": 5e-05, + "loss": 0.1508, + "num_input_tokens_seen": 175254576, + "step": 1920 + }, + { + "epoch": 8.0, + "loss": 0.11267201602458954, + "loss_ce": 0.0007335399859584868, + "loss_iou": 0.1826171875, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 175254576, + "step": 1920 + }, + { + "epoch": 8.004166666666666, + "grad_norm": 2.7558632771081855, + "learning_rate": 5e-05, + "loss": 0.0556, + "num_input_tokens_seen": 175346468, + "step": 1921 + }, + { + "epoch": 8.004166666666666, + "loss": 0.05728989467024803, + "loss_ce": 1.602834163350053e-05, + "loss_iou": 0.328125, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 175346468, + "step": 1921 + }, + { + "epoch": 8.008333333333333, + "grad_norm": 3.141904081215093, + "learning_rate": 5e-05, + "loss": 0.051, + "num_input_tokens_seen": 175437396, + "step": 1922 + }, + { + "epoch": 8.008333333333333, + "loss": 0.06695879250764847, + "loss_ce": 0.0001252962974831462, + "loss_iou": 0.2734375, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 175437396, + "step": 1922 + }, + { + "epoch": 8.0125, + "grad_norm": 5.683325618422793, + "learning_rate": 5e-05, + "loss": 0.0587, + "num_input_tokens_seen": 175529152, + "step": 1923 + }, + { + "epoch": 8.0125, + "loss": 0.05996830761432648, + "loss_ce": 0.00016911182319745421, + "loss_iou": 0.369140625, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 175529152, + "step": 1923 + }, + { + "epoch": 8.016666666666667, + "grad_norm": 4.526126081879709, + "learning_rate": 5e-05, + "loss": 0.0979, + "num_input_tokens_seen": 175620760, + "step": 1924 + }, + { + "epoch": 8.016666666666667, + "loss": 0.14104092121124268, + "loss_ce": 8.023128611966968e-05, + "loss_iou": 0.0, + "loss_num": 0.0281982421875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 175620760, + "step": 1924 + }, + { + "epoch": 8.020833333333334, + "grad_norm": 2.5408139371917704, + "learning_rate": 5e-05, + "loss": 0.0512, + "num_input_tokens_seen": 175712500, + "step": 1925 + }, + { + "epoch": 8.020833333333334, + "loss": 0.06760569661855698, + "loss_ce": 0.0007111626910045743, + "loss_iou": 0.296875, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 175712500, + "step": 1925 + }, + { + "epoch": 8.025, + "grad_norm": 2.229218566080554, + "learning_rate": 5e-05, + "loss": 0.0888, + "num_input_tokens_seen": 175803620, + "step": 1926 + }, + { + "epoch": 8.025, + "loss": 0.09539847820997238, + "loss_ce": 0.00026755582075566053, + "loss_iou": 0.2890625, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 175803620, + "step": 1926 + }, + { + "epoch": 8.029166666666667, + "grad_norm": 2.0268256812580963, + "learning_rate": 5e-05, + "loss": 0.0863, + "num_input_tokens_seen": 175895072, + "step": 1927 + }, + { + "epoch": 8.029166666666667, + "loss": 0.09223587810993195, + "loss_ce": 4.9901744205271825e-05, + "loss_iou": 0.203125, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 175895072, + "step": 1927 + }, + { + "epoch": 8.033333333333333, + "grad_norm": 2.28864677907678, + "learning_rate": 5e-05, + "loss": 0.1157, + "num_input_tokens_seen": 175986172, + "step": 1928 + }, + { + "epoch": 8.033333333333333, + "loss": 0.11336939036846161, + "loss_ce": 7.288237247848883e-05, + "loss_iou": 0.2890625, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 175986172, + "step": 1928 + }, + { + "epoch": 8.0375, + "grad_norm": 2.2996102812429093, + "learning_rate": 5e-05, + "loss": 0.0638, + "num_input_tokens_seen": 176077696, + "step": 1929 + }, + { + "epoch": 8.0375, + "loss": 0.05129075050354004, + "loss_ce": 9.75160874077119e-05, + "loss_iou": 0.34765625, + "loss_num": 0.01025390625, + "loss_xval": 0.05126953125, + "num_input_tokens_seen": 176077696, + "step": 1929 + }, + { + "epoch": 8.041666666666666, + "grad_norm": 8.353008091933523, + "learning_rate": 5e-05, + "loss": 0.0816, + "num_input_tokens_seen": 176168784, + "step": 1930 + }, + { + "epoch": 8.041666666666666, + "loss": 0.07607695460319519, + "loss_ce": 1.1887209439009894e-05, + "loss_iou": 0.1875, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 176168784, + "step": 1930 + }, + { + "epoch": 8.045833333333333, + "grad_norm": 2.8779811742424117, + "learning_rate": 5e-05, + "loss": 0.1072, + "num_input_tokens_seen": 176260380, + "step": 1931 + }, + { + "epoch": 8.045833333333333, + "loss": 0.05590657889842987, + "loss_ce": 0.00010518834460526705, + "loss_iou": 0.390625, + "loss_num": 0.01116943359375, + "loss_xval": 0.055908203125, + "num_input_tokens_seen": 176260380, + "step": 1931 + }, + { + "epoch": 8.05, + "grad_norm": 3.310628240398785, + "learning_rate": 5e-05, + "loss": 0.0635, + "num_input_tokens_seen": 176351776, + "step": 1932 + }, + { + "epoch": 8.05, + "loss": 0.05620376765727997, + "loss_ce": 5.650423645420233e-06, + "loss_iou": 0.27734375, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 176351776, + "step": 1932 + }, + { + "epoch": 8.054166666666667, + "grad_norm": 2.3084546178570946, + "learning_rate": 5e-05, + "loss": 0.0579, + "num_input_tokens_seen": 176442468, + "step": 1933 + }, + { + "epoch": 8.054166666666667, + "loss": 0.0433502122759819, + "loss_ce": 3.05097291857237e-05, + "loss_iou": 0.251953125, + "loss_num": 0.0086669921875, + "loss_xval": 0.043212890625, + "num_input_tokens_seen": 176442468, + "step": 1933 + }, + { + "epoch": 8.058333333333334, + "grad_norm": 1.8548286182587073, + "learning_rate": 5e-05, + "loss": 0.0709, + "num_input_tokens_seen": 176533344, + "step": 1934 + }, + { + "epoch": 8.058333333333334, + "loss": 0.07856310904026031, + "loss_ce": 0.00011767195246648043, + "loss_iou": 0.2314453125, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 176533344, + "step": 1934 + }, + { + "epoch": 8.0625, + "grad_norm": 1.9476347775688605, + "learning_rate": 5e-05, + "loss": 0.0692, + "num_input_tokens_seen": 176624524, + "step": 1935 + }, + { + "epoch": 8.0625, + "loss": 0.06961099803447723, + "loss_ce": 0.0011600647121667862, + "loss_iou": 0.26171875, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 176624524, + "step": 1935 + }, + { + "epoch": 8.066666666666666, + "grad_norm": 3.4435977127079016, + "learning_rate": 5e-05, + "loss": 0.083, + "num_input_tokens_seen": 176716328, + "step": 1936 + }, + { + "epoch": 8.066666666666666, + "loss": 0.10934816300868988, + "loss_ce": 3.419243876123801e-05, + "loss_iou": 0.5, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 176716328, + "step": 1936 + }, + { + "epoch": 8.070833333333333, + "grad_norm": 2.4531498270055305, + "learning_rate": 5e-05, + "loss": 0.0836, + "num_input_tokens_seen": 176806684, + "step": 1937 + }, + { + "epoch": 8.070833333333333, + "loss": 0.0756228044629097, + "loss_ce": 8.416684431722388e-05, + "loss_iou": 0.365234375, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 176806684, + "step": 1937 + }, + { + "epoch": 8.075, + "grad_norm": 3.6167625223497257, + "learning_rate": 5e-05, + "loss": 0.0942, + "num_input_tokens_seen": 176897692, + "step": 1938 + }, + { + "epoch": 8.075, + "loss": 0.09166642278432846, + "loss_ce": 6.876679435663391e-06, + "loss_iou": 0.40234375, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 176897692, + "step": 1938 + }, + { + "epoch": 8.079166666666667, + "grad_norm": 2.436191096534518, + "learning_rate": 5e-05, + "loss": 0.0832, + "num_input_tokens_seen": 176988544, + "step": 1939 + }, + { + "epoch": 8.079166666666667, + "loss": 0.11645212769508362, + "loss_ce": 0.00019542011432349682, + "loss_iou": 0.267578125, + "loss_num": 0.0233154296875, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 176988544, + "step": 1939 + }, + { + "epoch": 8.083333333333334, + "grad_norm": 3.7810363764121036, + "learning_rate": 5e-05, + "loss": 0.0975, + "num_input_tokens_seen": 177080004, + "step": 1940 + }, + { + "epoch": 8.083333333333334, + "loss": 0.11389046162366867, + "loss_ce": 0.0005481801927089691, + "loss_iou": 0.310546875, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 177080004, + "step": 1940 + }, + { + "epoch": 8.0875, + "grad_norm": 5.408759817581234, + "learning_rate": 5e-05, + "loss": 0.0822, + "num_input_tokens_seen": 177171380, + "step": 1941 + }, + { + "epoch": 8.0875, + "loss": 0.09823840111494064, + "loss_ce": 0.00027697303448803723, + "loss_iou": 0.341796875, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 177171380, + "step": 1941 + }, + { + "epoch": 8.091666666666667, + "grad_norm": 6.60248086092445, + "learning_rate": 5e-05, + "loss": 0.1204, + "num_input_tokens_seen": 177262460, + "step": 1942 + }, + { + "epoch": 8.091666666666667, + "loss": 0.13897444307804108, + "loss_ce": 0.0013706819154322147, + "loss_iou": 0.2890625, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 177262460, + "step": 1942 + }, + { + "epoch": 8.095833333333333, + "grad_norm": 3.6691278808928183, + "learning_rate": 5e-05, + "loss": 0.0678, + "num_input_tokens_seen": 177353752, + "step": 1943 + }, + { + "epoch": 8.095833333333333, + "loss": 0.05663401260972023, + "loss_ce": 0.0008783958619460464, + "loss_iou": 0.302734375, + "loss_num": 0.0111083984375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 177353752, + "step": 1943 + }, + { + "epoch": 8.1, + "grad_norm": 2.7196245880429375, + "learning_rate": 5e-05, + "loss": 0.0855, + "num_input_tokens_seen": 177445656, + "step": 1944 + }, + { + "epoch": 8.1, + "loss": 0.1299806535243988, + "loss_ce": 0.00012835516827180982, + "loss_iou": 0.193359375, + "loss_num": 0.02587890625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 177445656, + "step": 1944 + }, + { + "epoch": 8.104166666666666, + "grad_norm": 16.454721595659908, + "learning_rate": 5e-05, + "loss": 0.091, + "num_input_tokens_seen": 177536400, + "step": 1945 + }, + { + "epoch": 8.104166666666666, + "loss": 0.040842410176992416, + "loss_ce": 0.00034558697370812297, + "loss_iou": 0.220703125, + "loss_num": 0.00811767578125, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 177536400, + "step": 1945 + }, + { + "epoch": 8.108333333333333, + "grad_norm": 7.317190624473312, + "learning_rate": 5e-05, + "loss": 0.0779, + "num_input_tokens_seen": 177627352, + "step": 1946 + }, + { + "epoch": 8.108333333333333, + "loss": 0.09328107535839081, + "loss_ce": 4.101587364857551e-06, + "loss_iou": 0.279296875, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 177627352, + "step": 1946 + }, + { + "epoch": 8.1125, + "grad_norm": 3.5851380464629243, + "learning_rate": 5e-05, + "loss": 0.1054, + "num_input_tokens_seen": 177716688, + "step": 1947 + }, + { + "epoch": 8.1125, + "loss": 0.15177735686302185, + "loss_ce": 2.8688094971585087e-05, + "loss_iou": 0.265625, + "loss_num": 0.0303955078125, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 177716688, + "step": 1947 + }, + { + "epoch": 8.116666666666667, + "grad_norm": 1.3617134730767266, + "learning_rate": 5e-05, + "loss": 0.0832, + "num_input_tokens_seen": 177807800, + "step": 1948 + }, + { + "epoch": 8.116666666666667, + "loss": 0.06027965247631073, + "loss_ce": 2.269490869366564e-05, + "loss_iou": 0.255859375, + "loss_num": 0.01202392578125, + "loss_xval": 0.060302734375, + "num_input_tokens_seen": 177807800, + "step": 1948 + }, + { + "epoch": 8.120833333333334, + "grad_norm": 11.342054491371455, + "learning_rate": 5e-05, + "loss": 0.081, + "num_input_tokens_seen": 177899068, + "step": 1949 + }, + { + "epoch": 8.120833333333334, + "loss": 0.05866030231118202, + "loss_ce": 0.00011232466931687668, + "loss_iou": 0.271484375, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 177899068, + "step": 1949 + }, + { + "epoch": 8.125, + "grad_norm": 12.640378581487273, + "learning_rate": 5e-05, + "loss": 0.101, + "num_input_tokens_seen": 177990316, + "step": 1950 + }, + { + "epoch": 8.125, + "loss": 0.0911889374256134, + "loss_ce": 0.00013211587793193758, + "loss_iou": 0.142578125, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 177990316, + "step": 1950 + }, + { + "epoch": 8.129166666666666, + "grad_norm": 2.9527899337102412, + "learning_rate": 5e-05, + "loss": 0.0687, + "num_input_tokens_seen": 178081300, + "step": 1951 + }, + { + "epoch": 8.129166666666666, + "loss": 0.0566537082195282, + "loss_ce": 0.00039455119986087084, + "loss_iou": 0.271484375, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 178081300, + "step": 1951 + }, + { + "epoch": 8.133333333333333, + "grad_norm": 4.2287548918279025, + "learning_rate": 5e-05, + "loss": 0.1153, + "num_input_tokens_seen": 178172116, + "step": 1952 + }, + { + "epoch": 8.133333333333333, + "loss": 0.1239519938826561, + "loss_ce": 9.640220378059894e-05, + "loss_iou": 0.2578125, + "loss_num": 0.0247802734375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 178172116, + "step": 1952 + }, + { + "epoch": 8.1375, + "grad_norm": 2.3381843610726185, + "learning_rate": 5e-05, + "loss": 0.0613, + "num_input_tokens_seen": 178263160, + "step": 1953 + }, + { + "epoch": 8.1375, + "loss": 0.0618969164788723, + "loss_ce": 0.00017511726764496416, + "loss_iou": 0.2578125, + "loss_num": 0.0123291015625, + "loss_xval": 0.061767578125, + "num_input_tokens_seen": 178263160, + "step": 1953 + }, + { + "epoch": 8.141666666666667, + "grad_norm": 2.5747334144500322, + "learning_rate": 5e-05, + "loss": 0.0759, + "num_input_tokens_seen": 178355032, + "step": 1954 + }, + { + "epoch": 8.141666666666667, + "loss": 0.061729107052087784, + "loss_ce": 2.25647436309373e-05, + "loss_iou": 0.2578125, + "loss_num": 0.0123291015625, + "loss_xval": 0.061767578125, + "num_input_tokens_seen": 178355032, + "step": 1954 + }, + { + "epoch": 8.145833333333334, + "grad_norm": 9.611222694930518, + "learning_rate": 5e-05, + "loss": 0.0855, + "num_input_tokens_seen": 178446324, + "step": 1955 + }, + { + "epoch": 8.145833333333334, + "loss": 0.1247691959142685, + "loss_ce": 0.00021170321269892156, + "loss_iou": 0.1611328125, + "loss_num": 0.02490234375, + "loss_xval": 0.12451171875, + "num_input_tokens_seen": 178446324, + "step": 1955 + }, + { + "epoch": 8.15, + "grad_norm": 2.7844977171891974, + "learning_rate": 5e-05, + "loss": 0.1012, + "num_input_tokens_seen": 178538236, + "step": 1956 + }, + { + "epoch": 8.15, + "loss": 0.055539753288030624, + "loss_ce": 4.353750409791246e-05, + "loss_iou": 0.298828125, + "loss_num": 0.0111083984375, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 178538236, + "step": 1956 + }, + { + "epoch": 8.154166666666667, + "grad_norm": 3.6994877077194186, + "learning_rate": 5e-05, + "loss": 0.0956, + "num_input_tokens_seen": 178629736, + "step": 1957 + }, + { + "epoch": 8.154166666666667, + "loss": 0.059186916798353195, + "loss_ce": 0.0023326671216636896, + "loss_iou": 0.2021484375, + "loss_num": 0.0113525390625, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 178629736, + "step": 1957 + }, + { + "epoch": 8.158333333333333, + "grad_norm": 2.9171432609320127, + "learning_rate": 5e-05, + "loss": 0.1063, + "num_input_tokens_seen": 178720776, + "step": 1958 + }, + { + "epoch": 8.158333333333333, + "loss": 0.16347545385360718, + "loss_ce": 8.051843906287104e-06, + "loss_iou": 0.318359375, + "loss_num": 0.03271484375, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 178720776, + "step": 1958 + }, + { + "epoch": 8.1625, + "grad_norm": 1.9009285367972448, + "learning_rate": 5e-05, + "loss": 0.1118, + "num_input_tokens_seen": 178812356, + "step": 1959 + }, + { + "epoch": 8.1625, + "loss": 0.1348172128200531, + "loss_ce": 0.0013333172537386417, + "loss_iou": 0.361328125, + "loss_num": 0.026611328125, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 178812356, + "step": 1959 + }, + { + "epoch": 8.166666666666666, + "grad_norm": 2.970864300802223, + "learning_rate": 5e-05, + "loss": 0.1303, + "num_input_tokens_seen": 178903584, + "step": 1960 + }, + { + "epoch": 8.166666666666666, + "loss": 0.181955486536026, + "loss_ce": 9.36027427087538e-05, + "loss_iou": 0.11279296875, + "loss_num": 0.036376953125, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 178903584, + "step": 1960 + }, + { + "epoch": 8.170833333333333, + "grad_norm": 4.069553929687919, + "learning_rate": 5e-05, + "loss": 0.074, + "num_input_tokens_seen": 178994236, + "step": 1961 + }, + { + "epoch": 8.170833333333333, + "loss": 0.07317335158586502, + "loss_ce": 0.00029737126897089183, + "loss_iou": 0.2421875, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 178994236, + "step": 1961 + }, + { + "epoch": 8.175, + "grad_norm": 4.51077294510098, + "learning_rate": 5e-05, + "loss": 0.0609, + "num_input_tokens_seen": 179084796, + "step": 1962 + }, + { + "epoch": 8.175, + "loss": 0.0737156867980957, + "loss_ce": 0.0025029226671904325, + "loss_iou": 0.419921875, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 179084796, + "step": 1962 + }, + { + "epoch": 8.179166666666667, + "grad_norm": 9.711530780142349, + "learning_rate": 5e-05, + "loss": 0.0768, + "num_input_tokens_seen": 179175948, + "step": 1963 + }, + { + "epoch": 8.179166666666667, + "loss": 0.0733020007610321, + "loss_ce": 0.0001208468820550479, + "loss_iou": 0.2890625, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 179175948, + "step": 1963 + }, + { + "epoch": 8.183333333333334, + "grad_norm": 2.456772823324688, + "learning_rate": 5e-05, + "loss": 0.0547, + "num_input_tokens_seen": 179267144, + "step": 1964 + }, + { + "epoch": 8.183333333333334, + "loss": 0.07388445734977722, + "loss_ce": 0.00010821619798662141, + "loss_iou": 0.361328125, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 179267144, + "step": 1964 + }, + { + "epoch": 8.1875, + "grad_norm": 2.482028249470996, + "learning_rate": 5e-05, + "loss": 0.0719, + "num_input_tokens_seen": 179359172, + "step": 1965 + }, + { + "epoch": 8.1875, + "loss": 0.08596872538328171, + "loss_ce": 0.000702614663168788, + "loss_iou": 0.396484375, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 179359172, + "step": 1965 + }, + { + "epoch": 8.191666666666666, + "grad_norm": 3.417093743348807, + "learning_rate": 5e-05, + "loss": 0.0785, + "num_input_tokens_seen": 179450416, + "step": 1966 + }, + { + "epoch": 8.191666666666666, + "loss": 0.08479488641023636, + "loss_ce": 0.0011767229298129678, + "loss_iou": 0.22265625, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 179450416, + "step": 1966 + }, + { + "epoch": 8.195833333333333, + "grad_norm": 2.6848051093651137, + "learning_rate": 5e-05, + "loss": 0.0729, + "num_input_tokens_seen": 179542024, + "step": 1967 + }, + { + "epoch": 8.195833333333333, + "loss": 0.08799510449171066, + "loss_ce": 2.818577195284888e-05, + "loss_iou": 0.265625, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 179542024, + "step": 1967 + }, + { + "epoch": 8.2, + "grad_norm": 3.131329820317152, + "learning_rate": 5e-05, + "loss": 0.0942, + "num_input_tokens_seen": 179633412, + "step": 1968 + }, + { + "epoch": 8.2, + "loss": 0.05659928917884827, + "loss_ce": 0.000569014810025692, + "loss_iou": 0.35546875, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 179633412, + "step": 1968 + }, + { + "epoch": 8.204166666666667, + "grad_norm": 4.17209524896321, + "learning_rate": 5e-05, + "loss": 0.0805, + "num_input_tokens_seen": 179724624, + "step": 1969 + }, + { + "epoch": 8.204166666666667, + "loss": 0.0819387137889862, + "loss_ce": 0.0014943802962079644, + "loss_iou": 0.25, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 179724624, + "step": 1969 + }, + { + "epoch": 8.208333333333334, + "grad_norm": 4.50290891660392, + "learning_rate": 5e-05, + "loss": 0.1009, + "num_input_tokens_seen": 179816320, + "step": 1970 + }, + { + "epoch": 8.208333333333334, + "loss": 0.1005803719162941, + "loss_ce": 5.546794272959232e-05, + "loss_iou": 0.34765625, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 179816320, + "step": 1970 + }, + { + "epoch": 8.2125, + "grad_norm": 3.9169085252996116, + "learning_rate": 5e-05, + "loss": 0.142, + "num_input_tokens_seen": 179907560, + "step": 1971 + }, + { + "epoch": 8.2125, + "loss": 0.16928379237651825, + "loss_ce": 0.0008267580415122211, + "loss_iou": 0.330078125, + "loss_num": 0.03369140625, + "loss_xval": 0.16796875, + "num_input_tokens_seen": 179907560, + "step": 1971 + }, + { + "epoch": 8.216666666666667, + "grad_norm": 3.4743631391853134, + "learning_rate": 5e-05, + "loss": 0.0652, + "num_input_tokens_seen": 179998500, + "step": 1972 + }, + { + "epoch": 8.216666666666667, + "loss": 0.08536157011985779, + "loss_ce": 0.0012093504192307591, + "loss_iou": 0.400390625, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 179998500, + "step": 1972 + }, + { + "epoch": 8.220833333333333, + "grad_norm": 9.228609800262559, + "learning_rate": 5e-05, + "loss": 0.0778, + "num_input_tokens_seen": 180089536, + "step": 1973 + }, + { + "epoch": 8.220833333333333, + "loss": 0.11307109892368317, + "loss_ce": 0.00040019513107836246, + "loss_iou": 0.232421875, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 180089536, + "step": 1973 + }, + { + "epoch": 8.225, + "grad_norm": 6.383981100098228, + "learning_rate": 5e-05, + "loss": 0.065, + "num_input_tokens_seen": 180180812, + "step": 1974 + }, + { + "epoch": 8.225, + "loss": 0.07836788892745972, + "loss_ce": 0.00033443939173594117, + "loss_iou": 0.357421875, + "loss_num": 0.01556396484375, + "loss_xval": 0.078125, + "num_input_tokens_seen": 180180812, + "step": 1974 + }, + { + "epoch": 8.229166666666666, + "grad_norm": 2.6634427522258792, + "learning_rate": 5e-05, + "loss": 0.1005, + "num_input_tokens_seen": 180272216, + "step": 1975 + }, + { + "epoch": 8.229166666666666, + "loss": 0.07826492935419083, + "loss_ce": 1.785774111340288e-05, + "loss_iou": 0.322265625, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 180272216, + "step": 1975 + }, + { + "epoch": 8.233333333333333, + "grad_norm": 4.054808828099222, + "learning_rate": 5e-05, + "loss": 0.0808, + "num_input_tokens_seen": 180363436, + "step": 1976 + }, + { + "epoch": 8.233333333333333, + "loss": 0.07174524664878845, + "loss_ce": 0.00023492946638725698, + "loss_iou": 0.19140625, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 180363436, + "step": 1976 + }, + { + "epoch": 8.2375, + "grad_norm": 1.876663851471951, + "learning_rate": 5e-05, + "loss": 0.0774, + "num_input_tokens_seen": 180455088, + "step": 1977 + }, + { + "epoch": 8.2375, + "loss": 0.07476097345352173, + "loss_ce": 0.000984729966148734, + "loss_iou": 0.201171875, + "loss_num": 0.01470947265625, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 180455088, + "step": 1977 + }, + { + "epoch": 8.241666666666667, + "grad_norm": 4.035059021505715, + "learning_rate": 5e-05, + "loss": 0.0737, + "num_input_tokens_seen": 180545456, + "step": 1978 + }, + { + "epoch": 8.241666666666667, + "loss": 0.06536682695150375, + "loss_ce": 2.8695054425043054e-05, + "loss_iou": 0.271484375, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 180545456, + "step": 1978 + }, + { + "epoch": 8.245833333333334, + "grad_norm": 3.8579126149062017, + "learning_rate": 5e-05, + "loss": 0.1052, + "num_input_tokens_seen": 180636760, + "step": 1979 + }, + { + "epoch": 8.245833333333334, + "loss": 0.10296410322189331, + "loss_ce": 0.0005165926995687187, + "loss_iou": 0.2158203125, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 180636760, + "step": 1979 + }, + { + "epoch": 8.25, + "grad_norm": 2.5788982024479403, + "learning_rate": 5e-05, + "loss": 0.0685, + "num_input_tokens_seen": 180728236, + "step": 1980 + }, + { + "epoch": 8.25, + "loss": 0.08393379300832748, + "loss_ce": 0.0004987327847629786, + "loss_iou": 0.2421875, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 180728236, + "step": 1980 + }, + { + "epoch": 8.254166666666666, + "grad_norm": 7.725813315284284, + "learning_rate": 5e-05, + "loss": 0.0829, + "num_input_tokens_seen": 180819708, + "step": 1981 + }, + { + "epoch": 8.254166666666666, + "loss": 0.08290599286556244, + "loss_ce": 0.00013469035911839455, + "loss_iou": 0.2119140625, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 180819708, + "step": 1981 + }, + { + "epoch": 8.258333333333333, + "grad_norm": 7.410506149901965, + "learning_rate": 5e-05, + "loss": 0.0782, + "num_input_tokens_seen": 180911432, + "step": 1982 + }, + { + "epoch": 8.258333333333333, + "loss": 0.0912504717707634, + "loss_ce": 0.0002317955659236759, + "loss_iou": 0.255859375, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 180911432, + "step": 1982 + }, + { + "epoch": 8.2625, + "grad_norm": 9.975238003908782, + "learning_rate": 5e-05, + "loss": 0.0855, + "num_input_tokens_seen": 181003188, + "step": 1983 + }, + { + "epoch": 8.2625, + "loss": 0.07827382534742355, + "loss_ce": 0.0001183067579404451, + "loss_iou": 0.30078125, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 181003188, + "step": 1983 + }, + { + "epoch": 8.266666666666667, + "grad_norm": 3.316524325597984, + "learning_rate": 5e-05, + "loss": 0.0893, + "num_input_tokens_seen": 181094616, + "step": 1984 + }, + { + "epoch": 8.266666666666667, + "loss": 0.09194935858249664, + "loss_ce": 1.5155635082919616e-05, + "loss_iou": 0.322265625, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 181094616, + "step": 1984 + }, + { + "epoch": 8.270833333333334, + "grad_norm": 9.740987394451327, + "learning_rate": 5e-05, + "loss": 0.0874, + "num_input_tokens_seen": 181185980, + "step": 1985 + }, + { + "epoch": 8.270833333333334, + "loss": 0.09619566798210144, + "loss_ce": 0.0030407612212002277, + "loss_iou": 0.185546875, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 181185980, + "step": 1985 + }, + { + "epoch": 8.275, + "grad_norm": 3.0570700848047307, + "learning_rate": 5e-05, + "loss": 0.0614, + "num_input_tokens_seen": 181276952, + "step": 1986 + }, + { + "epoch": 8.275, + "loss": 0.03943505138158798, + "loss_ce": 2.92310505756177e-05, + "loss_iou": 0.353515625, + "loss_num": 0.00787353515625, + "loss_xval": 0.039306640625, + "num_input_tokens_seen": 181276952, + "step": 1986 + }, + { + "epoch": 8.279166666666667, + "grad_norm": 2.52623572076418, + "learning_rate": 5e-05, + "loss": 0.0687, + "num_input_tokens_seen": 181368384, + "step": 1987 + }, + { + "epoch": 8.279166666666667, + "loss": 0.0691833421587944, + "loss_ce": 0.0005950895138084888, + "loss_iou": 0.2236328125, + "loss_num": 0.01373291015625, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 181368384, + "step": 1987 + }, + { + "epoch": 8.283333333333333, + "grad_norm": 3.198067753348154, + "learning_rate": 5e-05, + "loss": 0.0939, + "num_input_tokens_seen": 181459680, + "step": 1988 + }, + { + "epoch": 8.283333333333333, + "loss": 0.08994542062282562, + "loss_ce": 0.0004373587144073099, + "loss_iou": 0.302734375, + "loss_num": 0.0179443359375, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 181459680, + "step": 1988 + }, + { + "epoch": 8.2875, + "grad_norm": 3.831468348340558, + "learning_rate": 5e-05, + "loss": 0.0742, + "num_input_tokens_seen": 181550624, + "step": 1989 + }, + { + "epoch": 8.2875, + "loss": 0.06827230751514435, + "loss_ce": 4.485354565986199e-06, + "loss_iou": 0.2314453125, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 181550624, + "step": 1989 + }, + { + "epoch": 8.291666666666666, + "grad_norm": 4.826052777734482, + "learning_rate": 5e-05, + "loss": 0.0985, + "num_input_tokens_seen": 181642232, + "step": 1990 + }, + { + "epoch": 8.291666666666666, + "loss": 0.10974957793951035, + "loss_ce": 3.8890477298991755e-05, + "loss_iou": 0.2265625, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 181642232, + "step": 1990 + }, + { + "epoch": 8.295833333333333, + "grad_norm": 2.1973734075297613, + "learning_rate": 5e-05, + "loss": 0.0657, + "num_input_tokens_seen": 181733320, + "step": 1991 + }, + { + "epoch": 8.295833333333333, + "loss": 0.05872727930545807, + "loss_ce": 5.723710637539625e-05, + "loss_iou": 0.146484375, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 181733320, + "step": 1991 + }, + { + "epoch": 8.3, + "grad_norm": 8.823895302868364, + "learning_rate": 5e-05, + "loss": 0.1295, + "num_input_tokens_seen": 181824632, + "step": 1992 + }, + { + "epoch": 8.3, + "loss": 0.11481700837612152, + "loss_ce": 9.878131095319986e-06, + "loss_iou": 0.373046875, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 181824632, + "step": 1992 + }, + { + "epoch": 8.304166666666667, + "grad_norm": 3.6855112760489823, + "learning_rate": 5e-05, + "loss": 0.089, + "num_input_tokens_seen": 181915556, + "step": 1993 + }, + { + "epoch": 8.304166666666667, + "loss": 0.08049677312374115, + "loss_ce": 0.0005712423007935286, + "loss_iou": 0.357421875, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 181915556, + "step": 1993 + }, + { + "epoch": 8.308333333333334, + "grad_norm": 1.9136759246235993, + "learning_rate": 5e-05, + "loss": 0.082, + "num_input_tokens_seen": 182006528, + "step": 1994 + }, + { + "epoch": 8.308333333333334, + "loss": 0.09684373438358307, + "loss_ce": 1.1463387636467814e-05, + "loss_iou": 0.2734375, + "loss_num": 0.0194091796875, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 182006528, + "step": 1994 + }, + { + "epoch": 8.3125, + "grad_norm": 2.4837722716856874, + "learning_rate": 5e-05, + "loss": 0.0843, + "num_input_tokens_seen": 182098420, + "step": 1995 + }, + { + "epoch": 8.3125, + "loss": 0.13037192821502686, + "loss_ce": 0.0021828359458595514, + "loss_iou": 0.326171875, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 182098420, + "step": 1995 + }, + { + "epoch": 8.316666666666666, + "grad_norm": 4.2129148888337555, + "learning_rate": 5e-05, + "loss": 0.0713, + "num_input_tokens_seen": 182189936, + "step": 1996 + }, + { + "epoch": 8.316666666666666, + "loss": 0.06477083265781403, + "loss_ce": 0.00014985792222432792, + "loss_iou": 0.3359375, + "loss_num": 0.012939453125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 182189936, + "step": 1996 + }, + { + "epoch": 8.320833333333333, + "grad_norm": 5.524144085808494, + "learning_rate": 5e-05, + "loss": 0.0694, + "num_input_tokens_seen": 182281536, + "step": 1997 + }, + { + "epoch": 8.320833333333333, + "loss": 0.06460338830947876, + "loss_ce": 5.87101822020486e-05, + "loss_iou": 0.392578125, + "loss_num": 0.012939453125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 182281536, + "step": 1997 + }, + { + "epoch": 8.325, + "grad_norm": 6.583606047512185, + "learning_rate": 5e-05, + "loss": 0.1289, + "num_input_tokens_seen": 182373100, + "step": 1998 + }, + { + "epoch": 8.325, + "loss": 0.11908264458179474, + "loss_ce": 4.883207657258026e-05, + "loss_iou": 0.404296875, + "loss_num": 0.0238037109375, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 182373100, + "step": 1998 + }, + { + "epoch": 8.329166666666667, + "grad_norm": 2.4531039298088952, + "learning_rate": 5e-05, + "loss": 0.0948, + "num_input_tokens_seen": 182465048, + "step": 1999 + }, + { + "epoch": 8.329166666666667, + "loss": 0.12307038903236389, + "loss_ce": 0.0015951667446643114, + "loss_iou": 0.138671875, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 182465048, + "step": 1999 + }, + { + "epoch": 8.333333333333334, + "grad_norm": 2.298450315643497, + "learning_rate": 5e-05, + "loss": 0.0858, + "num_input_tokens_seen": 182556628, + "step": 2000 + }, + { + "epoch": 8.333333333333334, + "eval_seeclick_CIoU": 0.2732051685452461, + "eval_seeclick_GIoU": 0.25603655725717545, + "eval_seeclick_IoU": 0.36378540098667145, + "eval_seeclick_MAE_all": 0.09515979886054993, + "eval_seeclick_MAE_h": 0.06633967533707619, + "eval_seeclick_MAE_w": 0.1958950012922287, + "eval_seeclick_MAE_x_boxes": 0.19774916768074036, + "eval_seeclick_MAE_y_boxes": 0.06996404752135277, + "eval_seeclick_NUM_probability": 0.9999985694885254, + "eval_seeclick_inside_bbox": 0.5852272808551788, + "eval_seeclick_loss": 0.5696161985397339, + "eval_seeclick_loss_ce": 0.14711932837963104, + "eval_seeclick_loss_iou": 0.4210205078125, + "eval_seeclick_loss_num": 0.0845184326171875, + "eval_seeclick_loss_xval": 0.4228515625, + "eval_seeclick_runtime": 101.9273, + "eval_seeclick_samples_per_second": 0.422, + "eval_seeclick_steps_per_second": 0.02, + "num_input_tokens_seen": 182556628, + "step": 2000 + }, + { + "epoch": 8.333333333333334, + "eval_icons_CIoU": 0.40883131325244904, + "eval_icons_GIoU": 0.440561980009079, + "eval_icons_IoU": 0.48336389660835266, + "eval_icons_MAE_all": 0.05929296091198921, + "eval_icons_MAE_h": 0.10431193187832832, + "eval_icons_MAE_w": 0.10969538241624832, + "eval_icons_MAE_x_boxes": 0.11118783056735992, + "eval_icons_MAE_y_boxes": 0.10491820424795151, + "eval_icons_NUM_probability": 0.9999992847442627, + "eval_icons_inside_bbox": 0.6493055522441864, + "eval_icons_loss": 0.29335036873817444, + "eval_icons_loss_ce": 4.6103790737106465e-05, + "eval_icons_loss_iou": 0.30523681640625, + "eval_icons_loss_num": 0.0626220703125, + "eval_icons_loss_xval": 0.3133392333984375, + "eval_icons_runtime": 88.402, + "eval_icons_samples_per_second": 0.566, + "eval_icons_steps_per_second": 0.023, + "num_input_tokens_seen": 182556628, + "step": 2000 + }, + { + "epoch": 8.333333333333334, + "eval_screenspot_CIoU": 0.3823320269584656, + "eval_screenspot_GIoU": 0.36015834907690686, + "eval_screenspot_IoU": 0.45546941955884296, + "eval_screenspot_MAE_all": 0.09890640527009964, + "eval_screenspot_MAE_h": 0.10396142303943634, + "eval_screenspot_MAE_w": 0.1897703061501185, + "eval_screenspot_MAE_x_boxes": 0.18329455455144247, + "eval_screenspot_MAE_y_boxes": 0.09101255610585213, + "eval_screenspot_NUM_probability": 0.9999987483024597, + "eval_screenspot_inside_bbox": 0.6804166634877523, + "eval_screenspot_loss": 0.4989835321903229, + "eval_screenspot_loss_ce": 0.00019722061066810662, + "eval_screenspot_loss_iou": 0.3795979817708333, + "eval_screenspot_loss_num": 0.100006103515625, + "eval_screenspot_loss_xval": 0.5005289713541666, + "eval_screenspot_runtime": 152.4963, + "eval_screenspot_samples_per_second": 0.584, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 182556628, + "step": 2000 + }, + { + "epoch": 8.333333333333334, + "eval_compot_CIoU": 0.409628689289093, + "eval_compot_GIoU": 0.40224941074848175, + "eval_compot_IoU": 0.48659662902355194, + "eval_compot_MAE_all": 0.0632275864481926, + "eval_compot_MAE_h": 0.08800311759114265, + "eval_compot_MAE_w": 0.14701341837644577, + "eval_compot_MAE_x_boxes": 0.1445625126361847, + "eval_compot_MAE_y_boxes": 0.08778712153434753, + "eval_compot_NUM_probability": 0.9999973773956299, + "eval_compot_inside_bbox": 0.6927083432674408, + "eval_compot_loss": 0.34585246443748474, + "eval_compot_loss_ce": 0.030167696997523308, + "eval_compot_loss_iou": 0.29705810546875, + "eval_compot_loss_num": 0.057285308837890625, + "eval_compot_loss_xval": 0.2865447998046875, + "eval_compot_runtime": 88.9042, + "eval_compot_samples_per_second": 0.562, + "eval_compot_steps_per_second": 0.022, + "num_input_tokens_seen": 182556628, + "step": 2000 + }, + { + "epoch": 8.333333333333334, + "loss": 0.2891286611557007, + "loss_ce": 0.028233911842107773, + "loss_iou": 0.33984375, + "loss_num": 0.05224609375, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 182556628, + "step": 2000 + }, + { + "epoch": 8.3375, + "grad_norm": 3.536412880551861, + "learning_rate": 5e-05, + "loss": 0.0975, + "num_input_tokens_seen": 182648036, + "step": 2001 + }, + { + "epoch": 8.3375, + "loss": 0.07516656816005707, + "loss_ce": 1.70273742696736e-05, + "loss_iou": 0.236328125, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 182648036, + "step": 2001 + }, + { + "epoch": 8.341666666666667, + "grad_norm": 2.2089360748949334, + "learning_rate": 5e-05, + "loss": 0.0785, + "num_input_tokens_seen": 182738916, + "step": 2002 + }, + { + "epoch": 8.341666666666667, + "loss": 0.10036590695381165, + "loss_ce": 0.003472598735243082, + "loss_iou": 0.287109375, + "loss_num": 0.0194091796875, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 182738916, + "step": 2002 + }, + { + "epoch": 8.345833333333333, + "grad_norm": 3.3586904659649424, + "learning_rate": 5e-05, + "loss": 0.0759, + "num_input_tokens_seen": 182830120, + "step": 2003 + }, + { + "epoch": 8.345833333333333, + "loss": 0.0666748583316803, + "loss_ce": 7.787040522089228e-05, + "loss_iou": 0.298828125, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 182830120, + "step": 2003 + }, + { + "epoch": 8.35, + "grad_norm": 2.6220769322004513, + "learning_rate": 5e-05, + "loss": 0.0476, + "num_input_tokens_seen": 182921292, + "step": 2004 + }, + { + "epoch": 8.35, + "loss": 0.04563147574663162, + "loss_ce": 2.295524245710112e-05, + "loss_iou": 0.2578125, + "loss_num": 0.0091552734375, + "loss_xval": 0.045654296875, + "num_input_tokens_seen": 182921292, + "step": 2004 + }, + { + "epoch": 8.354166666666666, + "grad_norm": 1.4887052911847, + "learning_rate": 5e-05, + "loss": 0.0778, + "num_input_tokens_seen": 183012836, + "step": 2005 + }, + { + "epoch": 8.354166666666666, + "loss": 0.049174100160598755, + "loss_ce": 0.0004833037673961371, + "loss_iou": 0.1796875, + "loss_num": 0.009765625, + "loss_xval": 0.048583984375, + "num_input_tokens_seen": 183012836, + "step": 2005 + }, + { + "epoch": 8.358333333333333, + "grad_norm": 2.326647265916884, + "learning_rate": 5e-05, + "loss": 0.0608, + "num_input_tokens_seen": 183104412, + "step": 2006 + }, + { + "epoch": 8.358333333333333, + "loss": 0.04949578642845154, + "loss_ce": 0.0006676615448668599, + "loss_iou": 0.349609375, + "loss_num": 0.009765625, + "loss_xval": 0.048828125, + "num_input_tokens_seen": 183104412, + "step": 2006 + }, + { + "epoch": 8.3625, + "grad_norm": 46.17555951747404, + "learning_rate": 5e-05, + "loss": 0.0892, + "num_input_tokens_seen": 183195828, + "step": 2007 + }, + { + "epoch": 8.3625, + "loss": 0.04580899327993393, + "loss_ce": 7.077553891576827e-05, + "loss_iou": 0.1357421875, + "loss_num": 0.0091552734375, + "loss_xval": 0.045654296875, + "num_input_tokens_seen": 183195828, + "step": 2007 + }, + { + "epoch": 8.366666666666667, + "grad_norm": 21.30313606809819, + "learning_rate": 5e-05, + "loss": 0.1172, + "num_input_tokens_seen": 183287228, + "step": 2008 + }, + { + "epoch": 8.366666666666667, + "loss": 0.09672500193119049, + "loss_ce": 0.00012923777103424072, + "loss_iou": 0.359375, + "loss_num": 0.019287109375, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 183287228, + "step": 2008 + }, + { + "epoch": 8.370833333333334, + "grad_norm": 4.311377430234867, + "learning_rate": 5e-05, + "loss": 0.1136, + "num_input_tokens_seen": 183378304, + "step": 2009 + }, + { + "epoch": 8.370833333333334, + "loss": 0.10985423624515533, + "loss_ce": 0.0010438107419759035, + "loss_iou": 0.2314453125, + "loss_num": 0.0218505859375, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 183378304, + "step": 2009 + }, + { + "epoch": 8.375, + "grad_norm": 3.105143321823887, + "learning_rate": 5e-05, + "loss": 0.1125, + "num_input_tokens_seen": 183468988, + "step": 2010 + }, + { + "epoch": 8.375, + "loss": 0.12895122170448303, + "loss_ce": 7.547732820967212e-05, + "loss_iou": 0.171875, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 183468988, + "step": 2010 + }, + { + "epoch": 8.379166666666666, + "grad_norm": 3.014097309533224, + "learning_rate": 5e-05, + "loss": 0.1494, + "num_input_tokens_seen": 183560592, + "step": 2011 + }, + { + "epoch": 8.379166666666666, + "loss": 0.15173643827438354, + "loss_ce": 4.8823003453435376e-05, + "loss_iou": 0.1904296875, + "loss_num": 0.0303955078125, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 183560592, + "step": 2011 + }, + { + "epoch": 8.383333333333333, + "grad_norm": 9.420722778412506, + "learning_rate": 5e-05, + "loss": 0.1165, + "num_input_tokens_seen": 183651696, + "step": 2012 + }, + { + "epoch": 8.383333333333333, + "loss": 0.1760517656803131, + "loss_ce": 0.001826913678087294, + "loss_iou": 0.2451171875, + "loss_num": 0.034912109375, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 183651696, + "step": 2012 + }, + { + "epoch": 8.3875, + "grad_norm": 2.78315603049161, + "learning_rate": 5e-05, + "loss": 0.1282, + "num_input_tokens_seen": 183743368, + "step": 2013 + }, + { + "epoch": 8.3875, + "loss": 0.12846490740776062, + "loss_ce": 1.6419715393567458e-05, + "loss_iou": 0.1611328125, + "loss_num": 0.025634765625, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 183743368, + "step": 2013 + }, + { + "epoch": 8.391666666666667, + "grad_norm": 3.112445349758835, + "learning_rate": 5e-05, + "loss": 0.0974, + "num_input_tokens_seen": 183834992, + "step": 2014 + }, + { + "epoch": 8.391666666666667, + "loss": 0.11074173450469971, + "loss_ce": 0.0008479395764879882, + "loss_iou": 0.2158203125, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 183834992, + "step": 2014 + }, + { + "epoch": 8.395833333333334, + "grad_norm": 9.581477628961181, + "learning_rate": 5e-05, + "loss": 0.1136, + "num_input_tokens_seen": 183927132, + "step": 2015 + }, + { + "epoch": 8.395833333333334, + "loss": 0.13781800866127014, + "loss_ce": 0.0006720098899677396, + "loss_iou": 0.212890625, + "loss_num": 0.0274658203125, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 183927132, + "step": 2015 + }, + { + "epoch": 8.4, + "grad_norm": 3.2485861634052227, + "learning_rate": 5e-05, + "loss": 0.0544, + "num_input_tokens_seen": 184018300, + "step": 2016 + }, + { + "epoch": 8.4, + "loss": 0.03941866755485535, + "loss_ce": 0.0006460819276981056, + "loss_iou": 0.25, + "loss_num": 0.00775146484375, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 184018300, + "step": 2016 + }, + { + "epoch": 8.404166666666667, + "grad_norm": 3.567778135692095, + "learning_rate": 5e-05, + "loss": 0.0943, + "num_input_tokens_seen": 184109572, + "step": 2017 + }, + { + "epoch": 8.404166666666667, + "loss": 0.11215195059776306, + "loss_ce": 0.0006407210021279752, + "loss_iou": 0.248046875, + "loss_num": 0.0223388671875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 184109572, + "step": 2017 + }, + { + "epoch": 8.408333333333333, + "grad_norm": 11.915336200996682, + "learning_rate": 5e-05, + "loss": 0.0816, + "num_input_tokens_seen": 184201736, + "step": 2018 + }, + { + "epoch": 8.408333333333333, + "loss": 0.05989161133766174, + "loss_ce": 0.0011071297340095043, + "loss_iou": 0.2265625, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 184201736, + "step": 2018 + }, + { + "epoch": 8.4125, + "grad_norm": 3.1239092174908096, + "learning_rate": 5e-05, + "loss": 0.0582, + "num_input_tokens_seen": 184293072, + "step": 2019 + }, + { + "epoch": 8.4125, + "loss": 0.03606855124235153, + "loss_ce": 0.00043927942169830203, + "loss_iou": 0.2578125, + "loss_num": 0.00714111328125, + "loss_xval": 0.03564453125, + "num_input_tokens_seen": 184293072, + "step": 2019 + }, + { + "epoch": 8.416666666666666, + "grad_norm": 4.363526779562095, + "learning_rate": 5e-05, + "loss": 0.1132, + "num_input_tokens_seen": 184384216, + "step": 2020 + }, + { + "epoch": 8.416666666666666, + "loss": 0.13690415024757385, + "loss_ce": 3.2802829082356766e-05, + "loss_iou": 0.283203125, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 184384216, + "step": 2020 + }, + { + "epoch": 8.420833333333333, + "grad_norm": 3.6835107197547483, + "learning_rate": 5e-05, + "loss": 0.0848, + "num_input_tokens_seen": 184475548, + "step": 2021 + }, + { + "epoch": 8.420833333333333, + "loss": 0.070304274559021, + "loss_ce": 3.755126817850396e-05, + "loss_iou": 0.42578125, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 184475548, + "step": 2021 + }, + { + "epoch": 8.425, + "grad_norm": 43.325644936886654, + "learning_rate": 5e-05, + "loss": 0.0883, + "num_input_tokens_seen": 184566020, + "step": 2022 + }, + { + "epoch": 8.425, + "loss": 0.1017274484038353, + "loss_ce": 0.00030228166724555194, + "loss_iou": 0.30859375, + "loss_num": 0.020263671875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 184566020, + "step": 2022 + }, + { + "epoch": 8.429166666666667, + "grad_norm": 5.629879042491284, + "learning_rate": 5e-05, + "loss": 0.068, + "num_input_tokens_seen": 184657684, + "step": 2023 + }, + { + "epoch": 8.429166666666667, + "loss": 0.07418912649154663, + "loss_ce": 0.0017098871758207679, + "loss_iou": 0.162109375, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 184657684, + "step": 2023 + }, + { + "epoch": 8.433333333333334, + "grad_norm": 3.660697851414219, + "learning_rate": 5e-05, + "loss": 0.1359, + "num_input_tokens_seen": 184748280, + "step": 2024 + }, + { + "epoch": 8.433333333333334, + "loss": 0.1027696281671524, + "loss_ce": 0.001321564195677638, + "loss_iou": 0.09521484375, + "loss_num": 0.020263671875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 184748280, + "step": 2024 + }, + { + "epoch": 8.4375, + "grad_norm": 4.670549770025157, + "learning_rate": 5e-05, + "loss": 0.0862, + "num_input_tokens_seen": 184839656, + "step": 2025 + }, + { + "epoch": 8.4375, + "loss": 0.061051446944475174, + "loss_ce": 0.00016887954552657902, + "loss_iou": 0.337890625, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 184839656, + "step": 2025 + }, + { + "epoch": 8.441666666666666, + "grad_norm": 4.255448876340178, + "learning_rate": 5e-05, + "loss": 0.128, + "num_input_tokens_seen": 184930812, + "step": 2026 + }, + { + "epoch": 8.441666666666666, + "loss": 0.08803963661193848, + "loss_ce": 0.013515707105398178, + "loss_iou": 0.26953125, + "loss_num": 0.014892578125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 184930812, + "step": 2026 + }, + { + "epoch": 8.445833333333333, + "grad_norm": 2.9847180326094547, + "learning_rate": 5e-05, + "loss": 0.095, + "num_input_tokens_seen": 185021820, + "step": 2027 + }, + { + "epoch": 8.445833333333333, + "loss": 0.0788634866476059, + "loss_ce": 6.063582986826077e-06, + "loss_iou": 0.349609375, + "loss_num": 0.0157470703125, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 185021820, + "step": 2027 + }, + { + "epoch": 8.45, + "grad_norm": 5.296533863173452, + "learning_rate": 5e-05, + "loss": 0.1021, + "num_input_tokens_seen": 185113136, + "step": 2028 + }, + { + "epoch": 8.45, + "loss": 0.08161696791648865, + "loss_ce": 0.00010451077105244622, + "loss_iou": 0.26953125, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 185113136, + "step": 2028 + }, + { + "epoch": 8.454166666666667, + "grad_norm": 2.359461532383512, + "learning_rate": 5e-05, + "loss": 0.0787, + "num_input_tokens_seen": 185204172, + "step": 2029 + }, + { + "epoch": 8.454166666666667, + "loss": 0.11959385871887207, + "loss_ce": 5.649681406794116e-05, + "loss_iou": 0.32421875, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 185204172, + "step": 2029 + }, + { + "epoch": 8.458333333333334, + "grad_norm": 3.3681433875629514, + "learning_rate": 5e-05, + "loss": 0.0436, + "num_input_tokens_seen": 185295588, + "step": 2030 + }, + { + "epoch": 8.458333333333334, + "loss": 0.051598481833934784, + "loss_ce": 0.0004052447038702667, + "loss_iou": 0.431640625, + "loss_num": 0.01025390625, + "loss_xval": 0.05126953125, + "num_input_tokens_seen": 185295588, + "step": 2030 + }, + { + "epoch": 8.4625, + "grad_norm": 1.8409437670897544, + "learning_rate": 5e-05, + "loss": 0.0518, + "num_input_tokens_seen": 185386868, + "step": 2031 + }, + { + "epoch": 8.4625, + "loss": 0.05945602431893349, + "loss_ce": 0.000984343234449625, + "loss_iou": 0.26953125, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 185386868, + "step": 2031 + }, + { + "epoch": 8.466666666666667, + "grad_norm": 7.258809483257304, + "learning_rate": 5e-05, + "loss": 0.097, + "num_input_tokens_seen": 185478912, + "step": 2032 + }, + { + "epoch": 8.466666666666667, + "loss": 0.10054165124893188, + "loss_ce": 0.002397121163085103, + "loss_iou": 0.296875, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 185478912, + "step": 2032 + }, + { + "epoch": 8.470833333333333, + "grad_norm": 3.5315820826842215, + "learning_rate": 5e-05, + "loss": 0.0859, + "num_input_tokens_seen": 185570160, + "step": 2033 + }, + { + "epoch": 8.470833333333333, + "loss": 0.12144699692726135, + "loss_ce": 0.0003608747501857579, + "loss_iou": 0.259765625, + "loss_num": 0.0242919921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 185570160, + "step": 2033 + }, + { + "epoch": 8.475, + "grad_norm": 2.7617066459888617, + "learning_rate": 5e-05, + "loss": 0.0928, + "num_input_tokens_seen": 185661288, + "step": 2034 + }, + { + "epoch": 8.475, + "loss": 0.10207094252109528, + "loss_ce": 0.0007830956601537764, + "loss_iou": 0.2001953125, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 185661288, + "step": 2034 + }, + { + "epoch": 8.479166666666666, + "grad_norm": 4.366291895999364, + "learning_rate": 5e-05, + "loss": 0.0744, + "num_input_tokens_seen": 185751704, + "step": 2035 + }, + { + "epoch": 8.479166666666666, + "loss": 0.08858929574489594, + "loss_ce": 0.0017973067006096244, + "loss_iou": 0.291015625, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 185751704, + "step": 2035 + }, + { + "epoch": 8.483333333333333, + "grad_norm": 4.527448075354709, + "learning_rate": 5e-05, + "loss": 0.1133, + "num_input_tokens_seen": 185842700, + "step": 2036 + }, + { + "epoch": 8.483333333333333, + "loss": 0.0761345848441124, + "loss_ce": 0.0007256510434672236, + "loss_iou": 0.34765625, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 185842700, + "step": 2036 + }, + { + "epoch": 8.4875, + "grad_norm": 2.658451616936535, + "learning_rate": 5e-05, + "loss": 0.0682, + "num_input_tokens_seen": 185933988, + "step": 2037 + }, + { + "epoch": 8.4875, + "loss": 0.056536074727773666, + "loss_ce": 0.003008242230862379, + "loss_iou": 0.310546875, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 185933988, + "step": 2037 + }, + { + "epoch": 8.491666666666667, + "grad_norm": 2.532176322012894, + "learning_rate": 5e-05, + "loss": 0.0606, + "num_input_tokens_seen": 186025476, + "step": 2038 + }, + { + "epoch": 8.491666666666667, + "loss": 0.07818962633609772, + "loss_ce": 0.0006292054313234985, + "loss_iou": 0.1435546875, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 186025476, + "step": 2038 + }, + { + "epoch": 8.495833333333334, + "grad_norm": 4.14495850982906, + "learning_rate": 5e-05, + "loss": 0.057, + "num_input_tokens_seen": 186116408, + "step": 2039 + }, + { + "epoch": 8.495833333333334, + "loss": 0.042085736989974976, + "loss_ce": 0.00010880563786486164, + "loss_iou": 0.2890625, + "loss_num": 0.0084228515625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 186116408, + "step": 2039 + }, + { + "epoch": 8.5, + "grad_norm": 6.4528287559735285, + "learning_rate": 5e-05, + "loss": 0.0852, + "num_input_tokens_seen": 186208424, + "step": 2040 + }, + { + "epoch": 8.5, + "loss": 0.10338255763053894, + "loss_ce": 0.0007214199285954237, + "loss_iou": 0.296875, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 186208424, + "step": 2040 + }, + { + "epoch": 8.504166666666666, + "grad_norm": 3.13212007479543, + "learning_rate": 5e-05, + "loss": 0.059, + "num_input_tokens_seen": 186299120, + "step": 2041 + }, + { + "epoch": 8.504166666666666, + "loss": 0.07471642643213272, + "loss_ce": 0.00019250292098149657, + "loss_iou": 0.3515625, + "loss_num": 0.014892578125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 186299120, + "step": 2041 + }, + { + "epoch": 8.508333333333333, + "grad_norm": 3.038527609423815, + "learning_rate": 5e-05, + "loss": 0.1123, + "num_input_tokens_seen": 186390332, + "step": 2042 + }, + { + "epoch": 8.508333333333333, + "loss": 0.13762235641479492, + "loss_ce": 0.00021695908799301833, + "loss_iou": 0.376953125, + "loss_num": 0.0274658203125, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 186390332, + "step": 2042 + }, + { + "epoch": 8.5125, + "grad_norm": 3.118662041893769, + "learning_rate": 5e-05, + "loss": 0.0754, + "num_input_tokens_seen": 186482152, + "step": 2043 + }, + { + "epoch": 8.5125, + "loss": 0.07551813125610352, + "loss_ce": 0.00024652251158840954, + "loss_iou": 0.408203125, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 186482152, + "step": 2043 + }, + { + "epoch": 8.516666666666667, + "grad_norm": 2.1146865073908656, + "learning_rate": 5e-05, + "loss": 0.1233, + "num_input_tokens_seen": 186573600, + "step": 2044 + }, + { + "epoch": 8.516666666666667, + "loss": 0.08287165313959122, + "loss_ce": 3.931445462512784e-05, + "loss_iou": 0.341796875, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 186573600, + "step": 2044 + }, + { + "epoch": 8.520833333333334, + "grad_norm": 2.9617114380688663, + "learning_rate": 5e-05, + "loss": 0.1143, + "num_input_tokens_seen": 186665696, + "step": 2045 + }, + { + "epoch": 8.520833333333334, + "loss": 0.1069689616560936, + "loss_ce": 0.0004855015140492469, + "loss_iou": 0.2890625, + "loss_num": 0.021240234375, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 186665696, + "step": 2045 + }, + { + "epoch": 8.525, + "grad_norm": 8.627872563995814, + "learning_rate": 5e-05, + "loss": 0.1128, + "num_input_tokens_seen": 186757492, + "step": 2046 + }, + { + "epoch": 8.525, + "loss": 0.12421756237745285, + "loss_ce": 0.0015674126334488392, + "loss_iou": 0.216796875, + "loss_num": 0.0245361328125, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 186757492, + "step": 2046 + }, + { + "epoch": 8.529166666666667, + "grad_norm": 1.4626980237684941, + "learning_rate": 5e-05, + "loss": 0.1105, + "num_input_tokens_seen": 186849112, + "step": 2047 + }, + { + "epoch": 8.529166666666667, + "loss": 0.15261510014533997, + "loss_ce": 1.1944499419769272e-05, + "loss_iou": 0.33203125, + "loss_num": 0.030517578125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 186849112, + "step": 2047 + }, + { + "epoch": 8.533333333333333, + "grad_norm": 1.1764810754134916, + "learning_rate": 5e-05, + "loss": 0.0724, + "num_input_tokens_seen": 186940656, + "step": 2048 + }, + { + "epoch": 8.533333333333333, + "loss": 0.07178713381290436, + "loss_ce": 0.00011659866140689701, + "loss_iou": 0.263671875, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 186940656, + "step": 2048 + }, + { + "epoch": 8.5375, + "grad_norm": 6.872566752363237, + "learning_rate": 5e-05, + "loss": 0.0461, + "num_input_tokens_seen": 187032012, + "step": 2049 + }, + { + "epoch": 8.5375, + "loss": 0.04218311607837677, + "loss_ce": 2.3082926418283023e-05, + "loss_iou": 0.2890625, + "loss_num": 0.0084228515625, + "loss_xval": 0.042236328125, + "num_input_tokens_seen": 187032012, + "step": 2049 + }, + { + "epoch": 8.541666666666666, + "grad_norm": 6.567042892572599, + "learning_rate": 5e-05, + "loss": 0.1076, + "num_input_tokens_seen": 187123528, + "step": 2050 + }, + { + "epoch": 8.541666666666666, + "loss": 0.09021590650081635, + "loss_ce": 3.6459336115513e-05, + "loss_iou": 0.33203125, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 187123528, + "step": 2050 + }, + { + "epoch": 8.545833333333333, + "grad_norm": 3.9997670150130307, + "learning_rate": 5e-05, + "loss": 0.1097, + "num_input_tokens_seen": 187214544, + "step": 2051 + }, + { + "epoch": 8.545833333333333, + "loss": 0.05643386393785477, + "loss_ce": 0.0014259336749091744, + "loss_iou": 0.1943359375, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 187214544, + "step": 2051 + }, + { + "epoch": 8.55, + "grad_norm": 2.946976187208218, + "learning_rate": 5e-05, + "loss": 0.0885, + "num_input_tokens_seen": 187305548, + "step": 2052 + }, + { + "epoch": 8.55, + "loss": 0.06314453482627869, + "loss_ce": 0.0005377238849177957, + "loss_iou": 0.21875, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 187305548, + "step": 2052 + }, + { + "epoch": 8.554166666666667, + "grad_norm": 6.762155479494456, + "learning_rate": 5e-05, + "loss": 0.0901, + "num_input_tokens_seen": 187397320, + "step": 2053 + }, + { + "epoch": 8.554166666666667, + "loss": 0.0761374980211258, + "loss_ce": 1.1393080967536662e-05, + "loss_iou": 0.39453125, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 187397320, + "step": 2053 + }, + { + "epoch": 8.558333333333334, + "grad_norm": 1.898023791525541, + "learning_rate": 5e-05, + "loss": 0.0852, + "num_input_tokens_seen": 187489576, + "step": 2054 + }, + { + "epoch": 8.558333333333334, + "loss": 0.06530947238206863, + "loss_ce": 0.0002612557145766914, + "loss_iou": 0.208984375, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 187489576, + "step": 2054 + }, + { + "epoch": 8.5625, + "grad_norm": 2.0714800012369534, + "learning_rate": 5e-05, + "loss": 0.0887, + "num_input_tokens_seen": 187580848, + "step": 2055 + }, + { + "epoch": 8.5625, + "loss": 0.08084389567375183, + "loss_ce": 2.8269764698052313e-06, + "loss_iou": 0.1640625, + "loss_num": 0.01611328125, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 187580848, + "step": 2055 + }, + { + "epoch": 8.566666666666666, + "grad_norm": 4.945427219437998, + "learning_rate": 5e-05, + "loss": 0.1061, + "num_input_tokens_seen": 187672592, + "step": 2056 + }, + { + "epoch": 8.566666666666666, + "loss": 0.06978463381528854, + "loss_ce": 0.00023506842262577266, + "loss_iou": 0.26171875, + "loss_num": 0.013916015625, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 187672592, + "step": 2056 + }, + { + "epoch": 8.570833333333333, + "grad_norm": 20.23750197807952, + "learning_rate": 5e-05, + "loss": 0.0589, + "num_input_tokens_seen": 187764052, + "step": 2057 + }, + { + "epoch": 8.570833333333333, + "loss": 0.0696493536233902, + "loss_ce": 5.401993257692084e-05, + "loss_iou": 0.31640625, + "loss_num": 0.013916015625, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 187764052, + "step": 2057 + }, + { + "epoch": 8.575, + "grad_norm": 1.6020255845359246, + "learning_rate": 5e-05, + "loss": 0.0807, + "num_input_tokens_seen": 187856304, + "step": 2058 + }, + { + "epoch": 8.575, + "loss": 0.10885445773601532, + "loss_ce": 0.0014325795928016305, + "loss_iou": 0.255859375, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 187856304, + "step": 2058 + }, + { + "epoch": 8.579166666666667, + "grad_norm": 4.16555631246304, + "learning_rate": 5e-05, + "loss": 0.0766, + "num_input_tokens_seen": 187948216, + "step": 2059 + }, + { + "epoch": 8.579166666666667, + "loss": 0.10074003040790558, + "loss_ce": 0.00023802298528607935, + "loss_iou": 0.3203125, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 187948216, + "step": 2059 + }, + { + "epoch": 8.583333333333334, + "grad_norm": 1.9885928154411758, + "learning_rate": 5e-05, + "loss": 0.0535, + "num_input_tokens_seen": 188040056, + "step": 2060 + }, + { + "epoch": 8.583333333333334, + "loss": 0.046524547040462494, + "loss_ce": 0.0013280146522447467, + "loss_iou": 0.3046875, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 188040056, + "step": 2060 + }, + { + "epoch": 8.5875, + "grad_norm": 4.150974742592255, + "learning_rate": 5e-05, + "loss": 0.0902, + "num_input_tokens_seen": 188132004, + "step": 2061 + }, + { + "epoch": 8.5875, + "loss": 0.10680100321769714, + "loss_ce": 0.0002336244797334075, + "loss_iou": 0.443359375, + "loss_num": 0.0213623046875, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 188132004, + "step": 2061 + }, + { + "epoch": 8.591666666666667, + "grad_norm": 2.198395054417203, + "learning_rate": 5e-05, + "loss": 0.1037, + "num_input_tokens_seen": 188222984, + "step": 2062 + }, + { + "epoch": 8.591666666666667, + "loss": 0.14485947787761688, + "loss_ce": 0.0012742701219394803, + "loss_iou": 0.251953125, + "loss_num": 0.0286865234375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 188222984, + "step": 2062 + }, + { + "epoch": 8.595833333333333, + "grad_norm": 2.525503803552608, + "learning_rate": 5e-05, + "loss": 0.1066, + "num_input_tokens_seen": 188314616, + "step": 2063 + }, + { + "epoch": 8.595833333333333, + "loss": 0.04075239598751068, + "loss_ce": 0.0002250537509098649, + "loss_iou": 0.4375, + "loss_num": 0.00811767578125, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 188314616, + "step": 2063 + }, + { + "epoch": 8.6, + "grad_norm": 3.629610035159468, + "learning_rate": 5e-05, + "loss": 0.0634, + "num_input_tokens_seen": 188406216, + "step": 2064 + }, + { + "epoch": 8.6, + "loss": 0.05258668214082718, + "loss_ce": 0.0001269671629415825, + "loss_iou": 0.26953125, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 188406216, + "step": 2064 + }, + { + "epoch": 8.604166666666666, + "grad_norm": 9.671827213881189, + "learning_rate": 5e-05, + "loss": 0.0814, + "num_input_tokens_seen": 188497988, + "step": 2065 + }, + { + "epoch": 8.604166666666666, + "loss": 0.051124535501003265, + "loss_ce": 0.0021590786054730415, + "loss_iou": 0.3203125, + "loss_num": 0.00982666015625, + "loss_xval": 0.049072265625, + "num_input_tokens_seen": 188497988, + "step": 2065 + }, + { + "epoch": 8.608333333333333, + "grad_norm": 3.3838829799006787, + "learning_rate": 5e-05, + "loss": 0.0697, + "num_input_tokens_seen": 188589916, + "step": 2066 + }, + { + "epoch": 8.608333333333333, + "loss": 0.06941956281661987, + "loss_ce": 2.2590240405406803e-05, + "loss_iou": 0.306640625, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 188589916, + "step": 2066 + }, + { + "epoch": 8.6125, + "grad_norm": 7.240025883543789, + "learning_rate": 5e-05, + "loss": 0.1184, + "num_input_tokens_seen": 188680664, + "step": 2067 + }, + { + "epoch": 8.6125, + "loss": 0.13569886982440948, + "loss_ce": 2.461551275700913e-06, + "loss_iou": 0.291015625, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 188680664, + "step": 2067 + }, + { + "epoch": 8.616666666666667, + "grad_norm": 10.289727923962106, + "learning_rate": 5e-05, + "loss": 0.0636, + "num_input_tokens_seen": 188771524, + "step": 2068 + }, + { + "epoch": 8.616666666666667, + "loss": 0.06156828999519348, + "loss_ce": 0.0006246872362680733, + "loss_iou": 0.330078125, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 188771524, + "step": 2068 + }, + { + "epoch": 8.620833333333334, + "grad_norm": 3.0223526650592687, + "learning_rate": 5e-05, + "loss": 0.0381, + "num_input_tokens_seen": 188862644, + "step": 2069 + }, + { + "epoch": 8.620833333333334, + "loss": 0.03289524093270302, + "loss_ce": 0.0002719507901929319, + "loss_iou": 0.32421875, + "loss_num": 0.00653076171875, + "loss_xval": 0.03271484375, + "num_input_tokens_seen": 188862644, + "step": 2069 + }, + { + "epoch": 8.625, + "grad_norm": 7.6635049861009295, + "learning_rate": 5e-05, + "loss": 0.136, + "num_input_tokens_seen": 188953916, + "step": 2070 + }, + { + "epoch": 8.625, + "loss": 0.14031267166137695, + "loss_ce": 0.001472959527745843, + "loss_iou": 0.2109375, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 188953916, + "step": 2070 + }, + { + "epoch": 8.629166666666666, + "grad_norm": 3.774380906886847, + "learning_rate": 5e-05, + "loss": 0.0951, + "num_input_tokens_seen": 189044936, + "step": 2071 + }, + { + "epoch": 8.629166666666666, + "loss": 0.04810202494263649, + "loss_ce": 3.6839413951383904e-05, + "loss_iou": 0.208984375, + "loss_num": 0.0096435546875, + "loss_xval": 0.048095703125, + "num_input_tokens_seen": 189044936, + "step": 2071 + }, + { + "epoch": 8.633333333333333, + "grad_norm": 3.1837139415114883, + "learning_rate": 5e-05, + "loss": 0.0947, + "num_input_tokens_seen": 189134984, + "step": 2072 + }, + { + "epoch": 8.633333333333333, + "loss": 0.07738348841667175, + "loss_ce": 5.1942650316050276e-05, + "loss_iou": 0.2578125, + "loss_num": 0.0155029296875, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 189134984, + "step": 2072 + }, + { + "epoch": 8.6375, + "grad_norm": 2.321453932470765, + "learning_rate": 5e-05, + "loss": 0.0726, + "num_input_tokens_seen": 189226136, + "step": 2073 + }, + { + "epoch": 8.6375, + "loss": 0.05806281045079231, + "loss_ce": 3.3636093576205894e-05, + "loss_iou": 0.25390625, + "loss_num": 0.0115966796875, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 189226136, + "step": 2073 + }, + { + "epoch": 8.641666666666667, + "grad_norm": 7.354532468160025, + "learning_rate": 5e-05, + "loss": 0.1341, + "num_input_tokens_seen": 189317268, + "step": 2074 + }, + { + "epoch": 8.641666666666667, + "loss": 0.1462700068950653, + "loss_ce": 6.0283447965048254e-05, + "loss_iou": 0.265625, + "loss_num": 0.029296875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 189317268, + "step": 2074 + }, + { + "epoch": 8.645833333333334, + "grad_norm": 3.238401783158548, + "learning_rate": 5e-05, + "loss": 0.0958, + "num_input_tokens_seen": 189408096, + "step": 2075 + }, + { + "epoch": 8.645833333333334, + "loss": 0.1143522709608078, + "loss_ce": 2.9121104034857126e-06, + "loss_iou": 0.11279296875, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 189408096, + "step": 2075 + }, + { + "epoch": 8.65, + "grad_norm": 4.565351741671237, + "learning_rate": 5e-05, + "loss": 0.1101, + "num_input_tokens_seen": 189499192, + "step": 2076 + }, + { + "epoch": 8.65, + "loss": 0.11114580929279327, + "loss_ce": 0.0005043236305937171, + "loss_iou": 0.29296875, + "loss_num": 0.0220947265625, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 189499192, + "step": 2076 + }, + { + "epoch": 8.654166666666667, + "grad_norm": 2.7719252653668978, + "learning_rate": 5e-05, + "loss": 0.0707, + "num_input_tokens_seen": 189589952, + "step": 2077 + }, + { + "epoch": 8.654166666666667, + "loss": 0.07235507667064667, + "loss_ce": 0.00012377678649500012, + "loss_iou": 0.310546875, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 189589952, + "step": 2077 + }, + { + "epoch": 8.658333333333333, + "grad_norm": 8.810895366492268, + "learning_rate": 5e-05, + "loss": 0.1348, + "num_input_tokens_seen": 189681012, + "step": 2078 + }, + { + "epoch": 8.658333333333333, + "loss": 0.10324892401695251, + "loss_ce": 0.0006106742075644433, + "loss_iou": 0.28515625, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 189681012, + "step": 2078 + }, + { + "epoch": 8.6625, + "grad_norm": 2.9084218501299213, + "learning_rate": 5e-05, + "loss": 0.1109, + "num_input_tokens_seen": 189771988, + "step": 2079 + }, + { + "epoch": 8.6625, + "loss": 0.0454835519194603, + "loss_ce": 0.00011916970106540248, + "loss_iou": 0.32421875, + "loss_num": 0.00909423828125, + "loss_xval": 0.04541015625, + "num_input_tokens_seen": 189771988, + "step": 2079 + }, + { + "epoch": 8.666666666666666, + "grad_norm": 3.59826792646645, + "learning_rate": 5e-05, + "loss": 0.0586, + "num_input_tokens_seen": 189863352, + "step": 2080 + }, + { + "epoch": 8.666666666666666, + "loss": 0.06523586809635162, + "loss_ce": 4.540888767223805e-06, + "loss_iou": 0.33984375, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 189863352, + "step": 2080 + }, + { + "epoch": 8.670833333333333, + "grad_norm": 17.34496416831108, + "learning_rate": 5e-05, + "loss": 0.0881, + "num_input_tokens_seen": 189955520, + "step": 2081 + }, + { + "epoch": 8.670833333333333, + "loss": 0.13551849126815796, + "loss_ce": 0.003350684652104974, + "loss_iou": 0.255859375, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 189955520, + "step": 2081 + }, + { + "epoch": 8.675, + "grad_norm": 20.911905020217905, + "learning_rate": 5e-05, + "loss": 0.1087, + "num_input_tokens_seen": 190047308, + "step": 2082 + }, + { + "epoch": 8.675, + "loss": 0.09232109785079956, + "loss_ce": 0.00012748880544677377, + "loss_iou": 0.375, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 190047308, + "step": 2082 + }, + { + "epoch": 8.679166666666667, + "grad_norm": 2.6915926195134414, + "learning_rate": 5e-05, + "loss": 0.0635, + "num_input_tokens_seen": 190137948, + "step": 2083 + }, + { + "epoch": 8.679166666666667, + "loss": 0.06865386664867401, + "loss_ce": 0.005497746169567108, + "loss_iou": 0.271484375, + "loss_num": 0.01263427734375, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 190137948, + "step": 2083 + }, + { + "epoch": 8.683333333333334, + "grad_norm": 5.811296943866821, + "learning_rate": 5e-05, + "loss": 0.0792, + "num_input_tokens_seen": 190229720, + "step": 2084 + }, + { + "epoch": 8.683333333333334, + "loss": 0.07164183259010315, + "loss_ce": 0.00100889487657696, + "loss_iou": 0.203125, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 190229720, + "step": 2084 + }, + { + "epoch": 8.6875, + "grad_norm": 2.114910384036427, + "learning_rate": 5e-05, + "loss": 0.0738, + "num_input_tokens_seen": 190321172, + "step": 2085 + }, + { + "epoch": 8.6875, + "loss": 0.05217263475060463, + "loss_ce": 0.0006894819671288133, + "loss_iou": 0.271484375, + "loss_num": 0.01031494140625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 190321172, + "step": 2085 + }, + { + "epoch": 8.691666666666666, + "grad_norm": 5.1616318256216775, + "learning_rate": 5e-05, + "loss": 0.0683, + "num_input_tokens_seen": 190412864, + "step": 2086 + }, + { + "epoch": 8.691666666666666, + "loss": 0.0800904929637909, + "loss_ce": 0.00022598655777983367, + "loss_iou": 0.255859375, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 190412864, + "step": 2086 + }, + { + "epoch": 8.695833333333333, + "grad_norm": 2.7341320610815245, + "learning_rate": 5e-05, + "loss": 0.0907, + "num_input_tokens_seen": 190503832, + "step": 2087 + }, + { + "epoch": 8.695833333333333, + "loss": 0.12463854253292084, + "loss_ce": 4.754873771162238e-06, + "loss_iou": 0.408203125, + "loss_num": 0.02490234375, + "loss_xval": 0.12451171875, + "num_input_tokens_seen": 190503832, + "step": 2087 + }, + { + "epoch": 8.7, + "grad_norm": 2.5917705973807563, + "learning_rate": 5e-05, + "loss": 0.105, + "num_input_tokens_seen": 190594728, + "step": 2088 + }, + { + "epoch": 8.7, + "loss": 0.048062510788440704, + "loss_ce": 1.2583659554366022e-05, + "loss_iou": 0.2421875, + "loss_num": 0.00958251953125, + "loss_xval": 0.048095703125, + "num_input_tokens_seen": 190594728, + "step": 2088 + }, + { + "epoch": 8.704166666666667, + "grad_norm": 13.43020791954383, + "learning_rate": 5e-05, + "loss": 0.0937, + "num_input_tokens_seen": 190686224, + "step": 2089 + }, + { + "epoch": 8.704166666666667, + "loss": 0.09442667663097382, + "loss_ce": 0.00011209918739041314, + "loss_iou": 0.3125, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 190686224, + "step": 2089 + }, + { + "epoch": 8.708333333333334, + "grad_norm": 4.704922294384652, + "learning_rate": 5e-05, + "loss": 0.1076, + "num_input_tokens_seen": 190777928, + "step": 2090 + }, + { + "epoch": 8.708333333333334, + "loss": 0.09706898033618927, + "loss_ce": 0.0027696597389876842, + "loss_iou": 0.228515625, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 190777928, + "step": 2090 + }, + { + "epoch": 8.7125, + "grad_norm": 2.9925223158655196, + "learning_rate": 5e-05, + "loss": 0.0729, + "num_input_tokens_seen": 190868672, + "step": 2091 + }, + { + "epoch": 8.7125, + "loss": 0.09800204634666443, + "loss_ce": 4.062041625729762e-05, + "loss_iou": 0.3359375, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 190868672, + "step": 2091 + }, + { + "epoch": 8.716666666666667, + "grad_norm": 5.7989558548599, + "learning_rate": 5e-05, + "loss": 0.1182, + "num_input_tokens_seen": 190959652, + "step": 2092 + }, + { + "epoch": 8.716666666666667, + "loss": 0.16375920176506042, + "loss_ce": 1.8912126051873202e-06, + "loss_iou": 0.189453125, + "loss_num": 0.03271484375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 190959652, + "step": 2092 + }, + { + "epoch": 8.720833333333333, + "grad_norm": 2.431675951228576, + "learning_rate": 5e-05, + "loss": 0.111, + "num_input_tokens_seen": 191051024, + "step": 2093 + }, + { + "epoch": 8.720833333333333, + "loss": 0.1278170347213745, + "loss_ce": 0.005975592415779829, + "loss_iou": 0.345703125, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 191051024, + "step": 2093 + }, + { + "epoch": 8.725, + "grad_norm": 2.937522668485948, + "learning_rate": 5e-05, + "loss": 0.077, + "num_input_tokens_seen": 191142540, + "step": 2094 + }, + { + "epoch": 8.725, + "loss": 0.07788769155740738, + "loss_ce": 0.0004645938170142472, + "loss_iou": 0.3359375, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 191142540, + "step": 2094 + }, + { + "epoch": 8.729166666666666, + "grad_norm": 3.6110487331418923, + "learning_rate": 5e-05, + "loss": 0.0899, + "num_input_tokens_seen": 191233892, + "step": 2095 + }, + { + "epoch": 8.729166666666666, + "loss": 0.07968556880950928, + "loss_ce": 0.0003703873953782022, + "loss_iou": 0.2470703125, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 191233892, + "step": 2095 + }, + { + "epoch": 8.733333333333333, + "grad_norm": 2.5680080860852246, + "learning_rate": 5e-05, + "loss": 0.0656, + "num_input_tokens_seen": 191325460, + "step": 2096 + }, + { + "epoch": 8.733333333333333, + "loss": 0.07615506649017334, + "loss_ce": 0.00021970555826555938, + "loss_iou": 0.2236328125, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 191325460, + "step": 2096 + }, + { + "epoch": 8.7375, + "grad_norm": 1.4405956050430866, + "learning_rate": 5e-05, + "loss": 0.099, + "num_input_tokens_seen": 191416692, + "step": 2097 + }, + { + "epoch": 8.7375, + "loss": 0.11963039636611938, + "loss_ce": 0.0008712376584298909, + "loss_iou": 0.2431640625, + "loss_num": 0.0238037109375, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 191416692, + "step": 2097 + }, + { + "epoch": 8.741666666666667, + "grad_norm": 4.359680089658798, + "learning_rate": 5e-05, + "loss": 0.0665, + "num_input_tokens_seen": 191508012, + "step": 2098 + }, + { + "epoch": 8.741666666666667, + "loss": 0.07305742800235748, + "loss_ce": 2.885695721488446e-05, + "loss_iou": 0.34375, + "loss_num": 0.01458740234375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 191508012, + "step": 2098 + }, + { + "epoch": 8.745833333333334, + "grad_norm": 4.433279601445979, + "learning_rate": 5e-05, + "loss": 0.1643, + "num_input_tokens_seen": 191599136, + "step": 2099 + }, + { + "epoch": 8.745833333333334, + "loss": 0.16946928203105927, + "loss_ce": 0.00017300611943937838, + "loss_iou": 0.41796875, + "loss_num": 0.033935546875, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 191599136, + "step": 2099 + }, + { + "epoch": 8.75, + "grad_norm": 3.7135848759531958, + "learning_rate": 5e-05, + "loss": 0.1112, + "num_input_tokens_seen": 191690728, + "step": 2100 + }, + { + "epoch": 8.75, + "loss": 0.12792375683784485, + "loss_ce": 0.0010927030816674232, + "loss_iou": 0.154296875, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 191690728, + "step": 2100 + }, + { + "epoch": 8.754166666666666, + "grad_norm": 1.7264645288509217, + "learning_rate": 5e-05, + "loss": 0.1034, + "num_input_tokens_seen": 191781468, + "step": 2101 + }, + { + "epoch": 8.754166666666666, + "loss": 0.12447254359722137, + "loss_ce": 2.185742232541088e-05, + "loss_iou": 0.322265625, + "loss_num": 0.02490234375, + "loss_xval": 0.12451171875, + "num_input_tokens_seen": 191781468, + "step": 2101 + }, + { + "epoch": 8.758333333333333, + "grad_norm": 3.3933607028179202, + "learning_rate": 5e-05, + "loss": 0.0889, + "num_input_tokens_seen": 191872584, + "step": 2102 + }, + { + "epoch": 8.758333333333333, + "loss": 0.04163660481572151, + "loss_ce": 0.0016738366102799773, + "loss_iou": 0.296875, + "loss_num": 0.00799560546875, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 191872584, + "step": 2102 + }, + { + "epoch": 8.7625, + "grad_norm": 8.584544951393006, + "learning_rate": 5e-05, + "loss": 0.0658, + "num_input_tokens_seen": 191964244, + "step": 2103 + }, + { + "epoch": 8.7625, + "loss": 0.05103296786546707, + "loss_ce": 2.2833030016045086e-05, + "loss_iou": 0.2412109375, + "loss_num": 0.01019287109375, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 191964244, + "step": 2103 + }, + { + "epoch": 8.766666666666667, + "grad_norm": 3.389537826363904, + "learning_rate": 5e-05, + "loss": 0.1095, + "num_input_tokens_seen": 192055988, + "step": 2104 + }, + { + "epoch": 8.766666666666667, + "loss": 0.11578889936208725, + "loss_ce": 0.0028128253761678934, + "loss_iou": 0.26171875, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 192055988, + "step": 2104 + }, + { + "epoch": 8.770833333333334, + "grad_norm": 3.1339648510996656, + "learning_rate": 5e-05, + "loss": 0.0707, + "num_input_tokens_seen": 192147132, + "step": 2105 + }, + { + "epoch": 8.770833333333334, + "loss": 0.03980373591184616, + "loss_ce": 0.0013515896862372756, + "loss_iou": 0.31640625, + "loss_num": 0.0076904296875, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 192147132, + "step": 2105 + }, + { + "epoch": 8.775, + "grad_norm": 29.03098369988653, + "learning_rate": 5e-05, + "loss": 0.0764, + "num_input_tokens_seen": 192236892, + "step": 2106 + }, + { + "epoch": 8.775, + "loss": 0.11094363778829575, + "loss_ce": 4.276115942047909e-05, + "loss_iou": 0.216796875, + "loss_num": 0.022216796875, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 192236892, + "step": 2106 + }, + { + "epoch": 8.779166666666667, + "grad_norm": 6.477582900237416, + "learning_rate": 5e-05, + "loss": 0.0961, + "num_input_tokens_seen": 192328540, + "step": 2107 + }, + { + "epoch": 8.779166666666667, + "loss": 0.12595024704933167, + "loss_ce": 0.004871758632361889, + "loss_iou": 0.326171875, + "loss_num": 0.024169921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 192328540, + "step": 2107 + }, + { + "epoch": 8.783333333333333, + "grad_norm": 4.283770827213335, + "learning_rate": 5e-05, + "loss": 0.1416, + "num_input_tokens_seen": 192419156, + "step": 2108 + }, + { + "epoch": 8.783333333333333, + "loss": 0.14962969720363617, + "loss_ce": 0.004701715894043446, + "loss_iou": 0.20703125, + "loss_num": 0.0289306640625, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 192419156, + "step": 2108 + }, + { + "epoch": 8.7875, + "grad_norm": 2.6686898314759477, + "learning_rate": 5e-05, + "loss": 0.1166, + "num_input_tokens_seen": 192510612, + "step": 2109 + }, + { + "epoch": 8.7875, + "loss": 0.10354699194431305, + "loss_ce": 0.0019234552746638656, + "loss_iou": 0.205078125, + "loss_num": 0.0203857421875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 192510612, + "step": 2109 + }, + { + "epoch": 8.791666666666666, + "grad_norm": 17.66232531707253, + "learning_rate": 5e-05, + "loss": 0.093, + "num_input_tokens_seen": 192602204, + "step": 2110 + }, + { + "epoch": 8.791666666666666, + "loss": 0.06168989837169647, + "loss_ce": 2.1500989532796666e-05, + "loss_iou": 0.2236328125, + "loss_num": 0.0123291015625, + "loss_xval": 0.061767578125, + "num_input_tokens_seen": 192602204, + "step": 2110 + }, + { + "epoch": 8.795833333333333, + "grad_norm": 4.238738535772789, + "learning_rate": 5e-05, + "loss": 0.162, + "num_input_tokens_seen": 192693876, + "step": 2111 + }, + { + "epoch": 8.795833333333333, + "loss": 0.16318759322166443, + "loss_ce": 7.11341854184866e-05, + "loss_iou": 0.2314453125, + "loss_num": 0.03271484375, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 192693876, + "step": 2111 + }, + { + "epoch": 8.8, + "grad_norm": 4.050774282573471, + "learning_rate": 5e-05, + "loss": 0.117, + "num_input_tokens_seen": 192785168, + "step": 2112 + }, + { + "epoch": 8.8, + "loss": 0.06067885830998421, + "loss_ce": 0.0006050096708349884, + "loss_iou": 0.3359375, + "loss_num": 0.01202392578125, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 192785168, + "step": 2112 + }, + { + "epoch": 8.804166666666667, + "grad_norm": 4.165906172671899, + "learning_rate": 5e-05, + "loss": 0.0883, + "num_input_tokens_seen": 192876180, + "step": 2113 + }, + { + "epoch": 8.804166666666667, + "loss": 0.09994374215602875, + "loss_ce": 0.00044117873767390847, + "loss_iou": 0.38671875, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 192876180, + "step": 2113 + }, + { + "epoch": 8.808333333333334, + "grad_norm": 1.6761793703721553, + "learning_rate": 5e-05, + "loss": 0.1109, + "num_input_tokens_seen": 192967564, + "step": 2114 + }, + { + "epoch": 8.808333333333334, + "loss": 0.037985123693943024, + "loss_ce": 0.00040272765909321606, + "loss_iou": 0.3203125, + "loss_num": 0.00750732421875, + "loss_xval": 0.03759765625, + "num_input_tokens_seen": 192967564, + "step": 2114 + }, + { + "epoch": 8.8125, + "grad_norm": 1.3339770433832159, + "learning_rate": 5e-05, + "loss": 0.0947, + "num_input_tokens_seen": 193058600, + "step": 2115 + }, + { + "epoch": 8.8125, + "loss": 0.13931246101856232, + "loss_ce": 4.5497559767682105e-05, + "loss_iou": 0.298828125, + "loss_num": 0.02783203125, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 193058600, + "step": 2115 + }, + { + "epoch": 8.816666666666666, + "grad_norm": 8.546449636295195, + "learning_rate": 5e-05, + "loss": 0.1091, + "num_input_tokens_seen": 193149564, + "step": 2116 + }, + { + "epoch": 8.816666666666666, + "loss": 0.10763582587242126, + "loss_ce": 0.00012239051284268498, + "loss_iou": 0.2265625, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 193149564, + "step": 2116 + }, + { + "epoch": 8.820833333333333, + "grad_norm": 1.7750513370782777, + "learning_rate": 5e-05, + "loss": 0.074, + "num_input_tokens_seen": 193240292, + "step": 2117 + }, + { + "epoch": 8.820833333333333, + "loss": 0.06835909187793732, + "loss_ce": 0.0012814588844776154, + "loss_iou": 0.123046875, + "loss_num": 0.013427734375, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 193240292, + "step": 2117 + }, + { + "epoch": 8.825, + "grad_norm": 2.7249429235580584, + "learning_rate": 5e-05, + "loss": 0.0783, + "num_input_tokens_seen": 193331388, + "step": 2118 + }, + { + "epoch": 8.825, + "loss": 0.09143895655870438, + "loss_ce": 2.3549444449599832e-05, + "loss_iou": 0.2734375, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 193331388, + "step": 2118 + }, + { + "epoch": 8.829166666666667, + "grad_norm": 1.9231219016705554, + "learning_rate": 5e-05, + "loss": 0.1003, + "num_input_tokens_seen": 193422096, + "step": 2119 + }, + { + "epoch": 8.829166666666667, + "loss": 0.07505150139331818, + "loss_ce": 0.0005580966244451702, + "loss_iou": 0.36328125, + "loss_num": 0.014892578125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 193422096, + "step": 2119 + }, + { + "epoch": 8.833333333333334, + "grad_norm": 5.275844547716279, + "learning_rate": 5e-05, + "loss": 0.0962, + "num_input_tokens_seen": 193513700, + "step": 2120 + }, + { + "epoch": 8.833333333333334, + "loss": 0.06332780420780182, + "loss_ce": 0.0007972927996888757, + "loss_iou": 0.302734375, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 193513700, + "step": 2120 + }, + { + "epoch": 8.8375, + "grad_norm": 16.596431523665018, + "learning_rate": 5e-05, + "loss": 0.109, + "num_input_tokens_seen": 193604280, + "step": 2121 + }, + { + "epoch": 8.8375, + "loss": 0.13335970044136047, + "loss_ce": 0.00015810200420673937, + "loss_iou": 0.3359375, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 193604280, + "step": 2121 + }, + { + "epoch": 8.841666666666667, + "grad_norm": 2.582045655864519, + "learning_rate": 5e-05, + "loss": 0.0771, + "num_input_tokens_seen": 193695776, + "step": 2122 + }, + { + "epoch": 8.841666666666667, + "loss": 0.09266501665115356, + "loss_ce": 0.00013571848103310913, + "loss_iou": 0.32421875, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 193695776, + "step": 2122 + }, + { + "epoch": 8.845833333333333, + "grad_norm": 2.0875391094102547, + "learning_rate": 5e-05, + "loss": 0.0894, + "num_input_tokens_seen": 193787284, + "step": 2123 + }, + { + "epoch": 8.845833333333333, + "loss": 0.10424083471298218, + "loss_ce": 8.434802293777466e-05, + "loss_iou": 0.16015625, + "loss_num": 0.020751953125, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 193787284, + "step": 2123 + }, + { + "epoch": 8.85, + "grad_norm": 2.8181687136177556, + "learning_rate": 5e-05, + "loss": 0.1328, + "num_input_tokens_seen": 193877640, + "step": 2124 + }, + { + "epoch": 8.85, + "loss": 0.10266932845115662, + "loss_ce": 8.199165677069686e-06, + "loss_iou": 0.22265625, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 193877640, + "step": 2124 + }, + { + "epoch": 8.854166666666666, + "grad_norm": 3.8795960865784234, + "learning_rate": 5e-05, + "loss": 0.0518, + "num_input_tokens_seen": 193968252, + "step": 2125 + }, + { + "epoch": 8.854166666666666, + "loss": 0.046281665563583374, + "loss_ce": 0.000520557165145874, + "loss_iou": 0.208984375, + "loss_num": 0.0091552734375, + "loss_xval": 0.045654296875, + "num_input_tokens_seen": 193968252, + "step": 2125 + }, + { + "epoch": 8.858333333333333, + "grad_norm": 3.9635752523662466, + "learning_rate": 5e-05, + "loss": 0.0807, + "num_input_tokens_seen": 194060032, + "step": 2126 + }, + { + "epoch": 8.858333333333333, + "loss": 0.10270962119102478, + "loss_ce": 0.0006588352262042463, + "loss_iou": 0.384765625, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 194060032, + "step": 2126 + }, + { + "epoch": 8.8625, + "grad_norm": 4.323210515511157, + "learning_rate": 5e-05, + "loss": 0.0717, + "num_input_tokens_seen": 194151192, + "step": 2127 + }, + { + "epoch": 8.8625, + "loss": 0.09275850653648376, + "loss_ce": 7.662278221687302e-05, + "loss_iou": 0.2314453125, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 194151192, + "step": 2127 + }, + { + "epoch": 8.866666666666667, + "grad_norm": 3.2666282041716883, + "learning_rate": 5e-05, + "loss": 0.0785, + "num_input_tokens_seen": 194242152, + "step": 2128 + }, + { + "epoch": 8.866666666666667, + "loss": 0.11563707888126373, + "loss_ce": 3.6494038795353845e-05, + "loss_iou": 0.29296875, + "loss_num": 0.0230712890625, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 194242152, + "step": 2128 + }, + { + "epoch": 8.870833333333334, + "grad_norm": 4.249712852950774, + "learning_rate": 5e-05, + "loss": 0.0782, + "num_input_tokens_seen": 194332956, + "step": 2129 + }, + { + "epoch": 8.870833333333334, + "loss": 0.06841768324375153, + "loss_ce": 2.7786783903138712e-05, + "loss_iou": 0.21484375, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 194332956, + "step": 2129 + }, + { + "epoch": 8.875, + "grad_norm": 2.698944918523565, + "learning_rate": 5e-05, + "loss": 0.0929, + "num_input_tokens_seen": 194424260, + "step": 2130 + }, + { + "epoch": 8.875, + "loss": 0.06714097410440445, + "loss_ce": 0.0006660611252300441, + "loss_iou": 0.150390625, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 194424260, + "step": 2130 + }, + { + "epoch": 8.879166666666666, + "grad_norm": 2.3540455144523484, + "learning_rate": 5e-05, + "loss": 0.0696, + "num_input_tokens_seen": 194515688, + "step": 2131 + }, + { + "epoch": 8.879166666666666, + "loss": 0.05055814981460571, + "loss_ce": 0.00023466537822969258, + "loss_iou": 0.2412109375, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 194515688, + "step": 2131 + }, + { + "epoch": 8.883333333333333, + "grad_norm": 4.2071846466237215, + "learning_rate": 5e-05, + "loss": 0.1608, + "num_input_tokens_seen": 194607016, + "step": 2132 + }, + { + "epoch": 8.883333333333333, + "loss": 0.09522165358066559, + "loss_ce": 0.0016700156265869737, + "loss_iou": 0.2255859375, + "loss_num": 0.0186767578125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 194607016, + "step": 2132 + }, + { + "epoch": 8.8875, + "grad_norm": 2.757105928706443, + "learning_rate": 5e-05, + "loss": 0.0725, + "num_input_tokens_seen": 194698100, + "step": 2133 + }, + { + "epoch": 8.8875, + "loss": 0.0668044462800026, + "loss_ce": 0.001542604062706232, + "loss_iou": 0.224609375, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 194698100, + "step": 2133 + }, + { + "epoch": 8.891666666666667, + "grad_norm": 2.5204276745096235, + "learning_rate": 5e-05, + "loss": 0.0413, + "num_input_tokens_seen": 194789372, + "step": 2134 + }, + { + "epoch": 8.891666666666667, + "loss": 0.037501260638237, + "loss_ce": 2.5677058147266507e-05, + "loss_iou": 0.31640625, + "loss_num": 0.00750732421875, + "loss_xval": 0.03759765625, + "num_input_tokens_seen": 194789372, + "step": 2134 + }, + { + "epoch": 8.895833333333334, + "grad_norm": 3.8010790125482563, + "learning_rate": 5e-05, + "loss": 0.0831, + "num_input_tokens_seen": 194880476, + "step": 2135 + }, + { + "epoch": 8.895833333333334, + "loss": 0.09368692338466644, + "loss_ce": 2.8475475119194016e-05, + "loss_iou": 0.26953125, + "loss_num": 0.0186767578125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 194880476, + "step": 2135 + }, + { + "epoch": 8.9, + "grad_norm": 7.765168397738465, + "learning_rate": 5e-05, + "loss": 0.0963, + "num_input_tokens_seen": 194971812, + "step": 2136 + }, + { + "epoch": 8.9, + "loss": 0.11584746837615967, + "loss_ce": 0.0007199091487564147, + "loss_iou": 0.294921875, + "loss_num": 0.0230712890625, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 194971812, + "step": 2136 + }, + { + "epoch": 8.904166666666667, + "grad_norm": 2.8015176763074128, + "learning_rate": 5e-05, + "loss": 0.0904, + "num_input_tokens_seen": 195063124, + "step": 2137 + }, + { + "epoch": 8.904166666666667, + "loss": 0.12815745174884796, + "loss_ce": 2.1764270059065893e-05, + "loss_iou": 0.29296875, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 195063124, + "step": 2137 + }, + { + "epoch": 8.908333333333333, + "grad_norm": 6.391269107177382, + "learning_rate": 5e-05, + "loss": 0.0697, + "num_input_tokens_seen": 195153872, + "step": 2138 + }, + { + "epoch": 8.908333333333333, + "loss": 0.0845765620470047, + "loss_ce": 0.0005158971180208027, + "loss_iou": 0.302734375, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 195153872, + "step": 2138 + }, + { + "epoch": 8.9125, + "grad_norm": 3.908464589092032, + "learning_rate": 5e-05, + "loss": 0.1081, + "num_input_tokens_seen": 195245420, + "step": 2139 + }, + { + "epoch": 8.9125, + "loss": 0.13477161526679993, + "loss_ce": 0.0002501318231225014, + "loss_iou": 0.3125, + "loss_num": 0.0269775390625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 195245420, + "step": 2139 + }, + { + "epoch": 8.916666666666666, + "grad_norm": 2.900077761683489, + "learning_rate": 5e-05, + "loss": 0.0822, + "num_input_tokens_seen": 195336768, + "step": 2140 + }, + { + "epoch": 8.916666666666666, + "loss": 0.07415474206209183, + "loss_ce": 0.0010041063651442528, + "loss_iou": 0.1455078125, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 195336768, + "step": 2140 + }, + { + "epoch": 8.920833333333333, + "grad_norm": 4.150521780138764, + "learning_rate": 5e-05, + "loss": 0.1183, + "num_input_tokens_seen": 195428360, + "step": 2141 + }, + { + "epoch": 8.920833333333333, + "loss": 0.1628822684288025, + "loss_ce": 0.0022148583084344864, + "loss_iou": 0.1806640625, + "loss_num": 0.0322265625, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 195428360, + "step": 2141 + }, + { + "epoch": 8.925, + "grad_norm": 3.2825950683825718, + "learning_rate": 5e-05, + "loss": 0.053, + "num_input_tokens_seen": 195519640, + "step": 2142 + }, + { + "epoch": 8.925, + "loss": 0.057274091988801956, + "loss_ce": 0.00041984367999248207, + "loss_iou": 0.3203125, + "loss_num": 0.0113525390625, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 195519640, + "step": 2142 + }, + { + "epoch": 8.929166666666667, + "grad_norm": 3.5734075717896667, + "learning_rate": 5e-05, + "loss": 0.1432, + "num_input_tokens_seen": 195610488, + "step": 2143 + }, + { + "epoch": 8.929166666666667, + "loss": 0.17504486441612244, + "loss_ce": 0.0001104677066905424, + "loss_iou": 0.244140625, + "loss_num": 0.034912109375, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 195610488, + "step": 2143 + }, + { + "epoch": 8.933333333333334, + "grad_norm": 2.965201678323881, + "learning_rate": 5e-05, + "loss": 0.0833, + "num_input_tokens_seen": 195701544, + "step": 2144 + }, + { + "epoch": 8.933333333333334, + "loss": 0.10431183129549026, + "loss_ce": 0.0018948402721434832, + "loss_iou": 0.2138671875, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 195701544, + "step": 2144 + }, + { + "epoch": 8.9375, + "grad_norm": 2.9355869635988614, + "learning_rate": 5e-05, + "loss": 0.0889, + "num_input_tokens_seen": 195792244, + "step": 2145 + }, + { + "epoch": 8.9375, + "loss": 0.0809515118598938, + "loss_ce": 0.00011044730490539223, + "loss_iou": 0.359375, + "loss_num": 0.01611328125, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 195792244, + "step": 2145 + }, + { + "epoch": 8.941666666666666, + "grad_norm": 13.929199195866536, + "learning_rate": 5e-05, + "loss": 0.1184, + "num_input_tokens_seen": 195882756, + "step": 2146 + }, + { + "epoch": 8.941666666666666, + "loss": 0.09679819643497467, + "loss_ce": 0.00036264234222471714, + "loss_iou": 0.31640625, + "loss_num": 0.019287109375, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 195882756, + "step": 2146 + }, + { + "epoch": 8.945833333333333, + "grad_norm": 2.251894875754947, + "learning_rate": 5e-05, + "loss": 0.0614, + "num_input_tokens_seen": 195973952, + "step": 2147 + }, + { + "epoch": 8.945833333333333, + "loss": 0.06221667304635048, + "loss_ce": 6.762475823052227e-05, + "loss_iou": 0.2890625, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 195973952, + "step": 2147 + }, + { + "epoch": 8.95, + "grad_norm": 5.5139699775706905, + "learning_rate": 5e-05, + "loss": 0.0687, + "num_input_tokens_seen": 196065400, + "step": 2148 + }, + { + "epoch": 8.95, + "loss": 0.07538923621177673, + "loss_ce": 0.00017866550479084253, + "loss_iou": 0.37890625, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 196065400, + "step": 2148 + }, + { + "epoch": 8.954166666666667, + "grad_norm": 6.863944184162462, + "learning_rate": 5e-05, + "loss": 0.1038, + "num_input_tokens_seen": 196156408, + "step": 2149 + }, + { + "epoch": 8.954166666666667, + "loss": 0.08523640036582947, + "loss_ce": 7.71006743889302e-05, + "loss_iou": 0.359375, + "loss_num": 0.01708984375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 196156408, + "step": 2149 + }, + { + "epoch": 8.958333333333334, + "grad_norm": 2.6859021474928895, + "learning_rate": 5e-05, + "loss": 0.1013, + "num_input_tokens_seen": 196247756, + "step": 2150 + }, + { + "epoch": 8.958333333333334, + "loss": 0.07768000662326813, + "loss_ce": 0.0005315648741088808, + "loss_iou": 0.27734375, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 196247756, + "step": 2150 + }, + { + "epoch": 8.9625, + "grad_norm": 4.759145776420309, + "learning_rate": 5e-05, + "loss": 0.0937, + "num_input_tokens_seen": 196337072, + "step": 2151 + }, + { + "epoch": 8.9625, + "loss": 0.0869455337524414, + "loss_ce": 7.724837632849813e-05, + "loss_iou": 0.275390625, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 196337072, + "step": 2151 + }, + { + "epoch": 8.966666666666667, + "grad_norm": 2.7736352381734872, + "learning_rate": 5e-05, + "loss": 0.0613, + "num_input_tokens_seen": 196428672, + "step": 2152 + }, + { + "epoch": 8.966666666666667, + "loss": 0.06051965802907944, + "loss_ce": 0.0017122854478657246, + "loss_iou": 0.2021484375, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 196428672, + "step": 2152 + }, + { + "epoch": 8.970833333333333, + "grad_norm": 2.455637897021221, + "learning_rate": 5e-05, + "loss": 0.119, + "num_input_tokens_seen": 196519872, + "step": 2153 + }, + { + "epoch": 8.970833333333333, + "loss": 0.1420656442642212, + "loss_ce": 2.1564021153608337e-05, + "loss_iou": 0.37109375, + "loss_num": 0.0283203125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 196519872, + "step": 2153 + }, + { + "epoch": 8.975, + "grad_norm": 2.8515738371990134, + "learning_rate": 5e-05, + "loss": 0.0657, + "num_input_tokens_seen": 196611176, + "step": 2154 + }, + { + "epoch": 8.975, + "loss": 0.06914485991001129, + "loss_ce": 0.0012585069052875042, + "loss_iou": 0.1884765625, + "loss_num": 0.01361083984375, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 196611176, + "step": 2154 + }, + { + "epoch": 8.979166666666666, + "grad_norm": 20.28384126551533, + "learning_rate": 5e-05, + "loss": 0.1043, + "num_input_tokens_seen": 196702356, + "step": 2155 + }, + { + "epoch": 8.979166666666666, + "loss": 0.1420624554157257, + "loss_ce": 1.8393449863651767e-05, + "loss_iou": 0.38671875, + "loss_num": 0.0284423828125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 196702356, + "step": 2155 + }, + { + "epoch": 8.983333333333333, + "grad_norm": 1.48734902741768, + "learning_rate": 5e-05, + "loss": 0.0885, + "num_input_tokens_seen": 196792268, + "step": 2156 + }, + { + "epoch": 8.983333333333333, + "loss": 0.06732428073883057, + "loss_ce": 2.5076310521399137e-06, + "loss_iou": 0.1630859375, + "loss_num": 0.013427734375, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 196792268, + "step": 2156 + }, + { + "epoch": 8.9875, + "grad_norm": 5.590013731005448, + "learning_rate": 5e-05, + "loss": 0.1031, + "num_input_tokens_seen": 196883568, + "step": 2157 + }, + { + "epoch": 8.9875, + "loss": 0.10056240856647491, + "loss_ce": 0.0037148739211261272, + "loss_iou": 0.2734375, + "loss_num": 0.0194091796875, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 196883568, + "step": 2157 + }, + { + "epoch": 8.991666666666667, + "grad_norm": 5.6620871011814256, + "learning_rate": 5e-05, + "loss": 0.1169, + "num_input_tokens_seen": 196974492, + "step": 2158 + }, + { + "epoch": 8.991666666666667, + "loss": 0.1554386019706726, + "loss_ce": 0.00027197724557481706, + "loss_iou": 0.1748046875, + "loss_num": 0.031005859375, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 196974492, + "step": 2158 + }, + { + "epoch": 8.995833333333334, + "grad_norm": 7.269429389298034, + "learning_rate": 5e-05, + "loss": 0.0774, + "num_input_tokens_seen": 197066204, + "step": 2159 + }, + { + "epoch": 8.995833333333334, + "loss": 0.047470733523368835, + "loss_ce": 0.0006796590751037002, + "loss_iou": 0.2470703125, + "loss_num": 0.00933837890625, + "loss_xval": 0.046875, + "num_input_tokens_seen": 197066204, + "step": 2159 + }, + { + "epoch": 9.0, + "grad_norm": 2.956852458352327, + "learning_rate": 5e-05, + "loss": 0.1225, + "num_input_tokens_seen": 197157384, + "step": 2160 + }, + { + "epoch": 9.0, + "loss": 0.039815984666347504, + "loss_ce": 0.0024777266662567854, + "loss_iou": 0.298828125, + "loss_num": 0.007476806640625, + "loss_xval": 0.037353515625, + "num_input_tokens_seen": 197157384, + "step": 2160 + }, + { + "epoch": 9.004166666666666, + "grad_norm": 4.667322136523386, + "learning_rate": 5e-05, + "loss": 0.0947, + "num_input_tokens_seen": 197249284, + "step": 2161 + }, + { + "epoch": 9.004166666666666, + "loss": 0.07299579679965973, + "loss_ce": 0.0029732147231698036, + "loss_iou": 0.29296875, + "loss_num": 0.0140380859375, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 197249284, + "step": 2161 + }, + { + "epoch": 9.008333333333333, + "grad_norm": 2.7944442905740963, + "learning_rate": 5e-05, + "loss": 0.0634, + "num_input_tokens_seen": 197340308, + "step": 2162 + }, + { + "epoch": 9.008333333333333, + "loss": 0.04382137954235077, + "loss_ce": 0.002492950763553381, + "loss_iou": 0.1162109375, + "loss_num": 0.00830078125, + "loss_xval": 0.041259765625, + "num_input_tokens_seen": 197340308, + "step": 2162 + }, + { + "epoch": 9.0125, + "grad_norm": 3.390843847331497, + "learning_rate": 5e-05, + "loss": 0.0544, + "num_input_tokens_seen": 197431232, + "step": 2163 + }, + { + "epoch": 9.0125, + "loss": 0.04338308051228523, + "loss_ce": 2.3423449420079123e-06, + "loss_iou": 0.2001953125, + "loss_num": 0.0086669921875, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 197431232, + "step": 2163 + }, + { + "epoch": 9.016666666666667, + "grad_norm": 3.6245159569976284, + "learning_rate": 5e-05, + "loss": 0.055, + "num_input_tokens_seen": 197522612, + "step": 2164 + }, + { + "epoch": 9.016666666666667, + "loss": 0.0630885437130928, + "loss_ce": 0.00045121321454644203, + "loss_iou": 0.23046875, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 197522612, + "step": 2164 + }, + { + "epoch": 9.020833333333334, + "grad_norm": 2.1136200405152223, + "learning_rate": 5e-05, + "loss": 0.102, + "num_input_tokens_seen": 197613884, + "step": 2165 + }, + { + "epoch": 9.020833333333334, + "loss": 0.09729062765836716, + "loss_ce": 0.00030576009885407984, + "loss_iou": 0.310546875, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 197613884, + "step": 2165 + }, + { + "epoch": 9.025, + "grad_norm": 6.339762525414115, + "learning_rate": 5e-05, + "loss": 0.1056, + "num_input_tokens_seen": 197705376, + "step": 2166 + }, + { + "epoch": 9.025, + "loss": 0.07680866122245789, + "loss_ce": 0.00013324561587069184, + "loss_iou": 0.39453125, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 197705376, + "step": 2166 + }, + { + "epoch": 9.029166666666667, + "grad_norm": 11.265367650244409, + "learning_rate": 5e-05, + "loss": 0.1051, + "num_input_tokens_seen": 197796280, + "step": 2167 + }, + { + "epoch": 9.029166666666667, + "loss": 0.04952111840248108, + "loss_ce": 0.00012841640273109078, + "loss_iou": 0.197265625, + "loss_num": 0.0098876953125, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 197796280, + "step": 2167 + }, + { + "epoch": 9.033333333333333, + "grad_norm": 5.4515887725256045, + "learning_rate": 5e-05, + "loss": 0.0574, + "num_input_tokens_seen": 197887884, + "step": 2168 + }, + { + "epoch": 9.033333333333333, + "loss": 0.03975671902298927, + "loss_ce": 7.574854407721432e-06, + "loss_iou": 0.17578125, + "loss_num": 0.0079345703125, + "loss_xval": 0.039794921875, + "num_input_tokens_seen": 197887884, + "step": 2168 + }, + { + "epoch": 9.0375, + "grad_norm": 7.046759699386093, + "learning_rate": 5e-05, + "loss": 0.0788, + "num_input_tokens_seen": 197979352, + "step": 2169 + }, + { + "epoch": 9.0375, + "loss": 0.12461234629154205, + "loss_ce": 0.0014586546458303928, + "loss_iou": 0.11083984375, + "loss_num": 0.024658203125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 197979352, + "step": 2169 + }, + { + "epoch": 9.041666666666666, + "grad_norm": 2.3299737528421494, + "learning_rate": 5e-05, + "loss": 0.1036, + "num_input_tokens_seen": 198071212, + "step": 2170 + }, + { + "epoch": 9.041666666666666, + "loss": 0.07513460516929626, + "loss_ce": 0.000549639982637018, + "loss_iou": 0.359375, + "loss_num": 0.014892578125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 198071212, + "step": 2170 + }, + { + "epoch": 9.045833333333333, + "grad_norm": 2.700252359047943, + "learning_rate": 5e-05, + "loss": 0.1007, + "num_input_tokens_seen": 198162104, + "step": 2171 + }, + { + "epoch": 9.045833333333333, + "loss": 0.07018221169710159, + "loss_ce": 9.859356214292347e-05, + "loss_iou": 0.3125, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 198162104, + "step": 2171 + }, + { + "epoch": 9.05, + "grad_norm": 3.98467720280014, + "learning_rate": 5e-05, + "loss": 0.1, + "num_input_tokens_seen": 198253448, + "step": 2172 + }, + { + "epoch": 9.05, + "loss": 0.12369519472122192, + "loss_ce": 0.0005033643683418632, + "loss_iou": 0.2490234375, + "loss_num": 0.024658203125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 198253448, + "step": 2172 + }, + { + "epoch": 9.054166666666667, + "grad_norm": 4.505368366392643, + "learning_rate": 5e-05, + "loss": 0.0555, + "num_input_tokens_seen": 198344568, + "step": 2173 + }, + { + "epoch": 9.054166666666667, + "loss": 0.0655064806342125, + "loss_ce": 1.575993883307092e-05, + "loss_iou": 0.328125, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 198344568, + "step": 2173 + }, + { + "epoch": 9.058333333333334, + "grad_norm": 4.808297095727311, + "learning_rate": 5e-05, + "loss": 0.07, + "num_input_tokens_seen": 198435648, + "step": 2174 + }, + { + "epoch": 9.058333333333334, + "loss": 0.05769220367074013, + "loss_ce": 1.3981023585074581e-05, + "loss_iou": 0.39453125, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 198435648, + "step": 2174 + }, + { + "epoch": 9.0625, + "grad_norm": 1.808092681487952, + "learning_rate": 5e-05, + "loss": 0.054, + "num_input_tokens_seen": 198527572, + "step": 2175 + }, + { + "epoch": 9.0625, + "loss": 0.05472852289676666, + "loss_ce": 7.154025661293417e-05, + "loss_iou": 0.330078125, + "loss_num": 0.01092529296875, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 198527572, + "step": 2175 + }, + { + "epoch": 9.066666666666666, + "grad_norm": 4.707635649040197, + "learning_rate": 5e-05, + "loss": 0.0728, + "num_input_tokens_seen": 198618784, + "step": 2176 + }, + { + "epoch": 9.066666666666666, + "loss": 0.045978933572769165, + "loss_ce": 0.0002636008430272341, + "loss_iou": 0.359375, + "loss_num": 0.0091552734375, + "loss_xval": 0.045654296875, + "num_input_tokens_seen": 198618784, + "step": 2176 + }, + { + "epoch": 9.070833333333333, + "grad_norm": 7.944324392423701, + "learning_rate": 5e-05, + "loss": 0.108, + "num_input_tokens_seen": 198710052, + "step": 2177 + }, + { + "epoch": 9.070833333333333, + "loss": 0.13008880615234375, + "loss_ce": 0.0007705655298195779, + "loss_iou": 0.3359375, + "loss_num": 0.02587890625, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 198710052, + "step": 2177 + }, + { + "epoch": 9.075, + "grad_norm": 4.735021600903335, + "learning_rate": 5e-05, + "loss": 0.0709, + "num_input_tokens_seen": 198800676, + "step": 2178 + }, + { + "epoch": 9.075, + "loss": 0.06908006966114044, + "loss_ce": 6.456708069890738e-05, + "loss_iou": 0.189453125, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 198800676, + "step": 2178 + }, + { + "epoch": 9.079166666666667, + "grad_norm": 3.1598471577438465, + "learning_rate": 5e-05, + "loss": 0.0826, + "num_input_tokens_seen": 198892164, + "step": 2179 + }, + { + "epoch": 9.079166666666667, + "loss": 0.06824992597103119, + "loss_ce": 2.7882007998414338e-05, + "loss_iou": 0.263671875, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 198892164, + "step": 2179 + }, + { + "epoch": 9.083333333333334, + "grad_norm": 7.784147543964003, + "learning_rate": 5e-05, + "loss": 0.0941, + "num_input_tokens_seen": 198983280, + "step": 2180 + }, + { + "epoch": 9.083333333333334, + "loss": 0.08407273143529892, + "loss_ce": 0.00025620521046221256, + "loss_iou": 0.228515625, + "loss_num": 0.0167236328125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 198983280, + "step": 2180 + }, + { + "epoch": 9.0875, + "grad_norm": 4.691636733756171, + "learning_rate": 5e-05, + "loss": 0.0871, + "num_input_tokens_seen": 199074576, + "step": 2181 + }, + { + "epoch": 9.0875, + "loss": 0.09998394548892975, + "loss_ce": 8.361989785043988e-06, + "loss_iou": 0.314453125, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 199074576, + "step": 2181 + }, + { + "epoch": 9.091666666666667, + "grad_norm": 21.62193954692702, + "learning_rate": 5e-05, + "loss": 0.0605, + "num_input_tokens_seen": 199165880, + "step": 2182 + }, + { + "epoch": 9.091666666666667, + "loss": 0.03593273088335991, + "loss_ce": 0.00021190733241382986, + "loss_iou": 0.240234375, + "loss_num": 0.00714111328125, + "loss_xval": 0.03564453125, + "num_input_tokens_seen": 199165880, + "step": 2182 + }, + { + "epoch": 9.095833333333333, + "grad_norm": 4.401089957899346, + "learning_rate": 5e-05, + "loss": 0.069, + "num_input_tokens_seen": 199257320, + "step": 2183 + }, + { + "epoch": 9.095833333333333, + "loss": 0.06900110840797424, + "loss_ce": 1.6123740351758897e-05, + "loss_iou": 0.271484375, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 199257320, + "step": 2183 + }, + { + "epoch": 9.1, + "grad_norm": 6.244533248984985, + "learning_rate": 5e-05, + "loss": 0.0673, + "num_input_tokens_seen": 199348444, + "step": 2184 + }, + { + "epoch": 9.1, + "loss": 0.05675504356622696, + "loss_ce": 0.0001067883349605836, + "loss_iou": 0.1689453125, + "loss_num": 0.01129150390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 199348444, + "step": 2184 + }, + { + "epoch": 9.104166666666666, + "grad_norm": 2.6506570582332705, + "learning_rate": 5e-05, + "loss": 0.0666, + "num_input_tokens_seen": 199440092, + "step": 2185 + }, + { + "epoch": 9.104166666666666, + "loss": 0.05698202922940254, + "loss_ce": 2.096708158205729e-05, + "loss_iou": 0.30859375, + "loss_num": 0.01141357421875, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 199440092, + "step": 2185 + }, + { + "epoch": 9.108333333333333, + "grad_norm": 1.819478487887473, + "learning_rate": 5e-05, + "loss": 0.0518, + "num_input_tokens_seen": 199531432, + "step": 2186 + }, + { + "epoch": 9.108333333333333, + "loss": 0.054636985063552856, + "loss_ce": 0.0002394043403910473, + "loss_iou": 0.216796875, + "loss_num": 0.0108642578125, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 199531432, + "step": 2186 + }, + { + "epoch": 9.1125, + "grad_norm": 3.826257550497812, + "learning_rate": 5e-05, + "loss": 0.1092, + "num_input_tokens_seen": 199623184, + "step": 2187 + }, + { + "epoch": 9.1125, + "loss": 0.0903702974319458, + "loss_ce": 0.0007859497563913465, + "loss_iou": 0.2216796875, + "loss_num": 0.0179443359375, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 199623184, + "step": 2187 + }, + { + "epoch": 9.116666666666667, + "grad_norm": 3.7921324553123066, + "learning_rate": 5e-05, + "loss": 0.0571, + "num_input_tokens_seen": 199715248, + "step": 2188 + }, + { + "epoch": 9.116666666666667, + "loss": 0.06321577727794647, + "loss_ce": 7.490571442758664e-05, + "loss_iou": 0.4453125, + "loss_num": 0.01263427734375, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 199715248, + "step": 2188 + }, + { + "epoch": 9.120833333333334, + "grad_norm": 6.619413944901631, + "learning_rate": 5e-05, + "loss": 0.1029, + "num_input_tokens_seen": 199806600, + "step": 2189 + }, + { + "epoch": 9.120833333333334, + "loss": 0.1414223164319992, + "loss_ce": 0.0012398207327350974, + "loss_iou": 0.2490234375, + "loss_num": 0.028076171875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 199806600, + "step": 2189 + }, + { + "epoch": 9.125, + "grad_norm": 0.6229946226192945, + "learning_rate": 5e-05, + "loss": 0.0467, + "num_input_tokens_seen": 199898248, + "step": 2190 + }, + { + "epoch": 9.125, + "loss": 0.051244426518678665, + "loss_ce": 0.0004860666231252253, + "loss_iou": 0.2119140625, + "loss_num": 0.0101318359375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 199898248, + "step": 2190 + }, + { + "epoch": 9.129166666666666, + "grad_norm": 2.7563084923358896, + "learning_rate": 5e-05, + "loss": 0.0615, + "num_input_tokens_seen": 199989508, + "step": 2191 + }, + { + "epoch": 9.129166666666666, + "loss": 0.09039859473705292, + "loss_ce": 0.00017337693134322762, + "loss_iou": 0.259765625, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 199989508, + "step": 2191 + }, + { + "epoch": 9.133333333333333, + "grad_norm": 3.372297274259936, + "learning_rate": 5e-05, + "loss": 0.0919, + "num_input_tokens_seen": 200080684, + "step": 2192 + }, + { + "epoch": 9.133333333333333, + "loss": 0.08768831193447113, + "loss_ce": 1.1310762602079194e-05, + "loss_iou": 0.421875, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 200080684, + "step": 2192 + }, + { + "epoch": 9.1375, + "grad_norm": 2.30061741959669, + "learning_rate": 5e-05, + "loss": 0.1092, + "num_input_tokens_seen": 200170408, + "step": 2193 + }, + { + "epoch": 9.1375, + "loss": 0.10583975911140442, + "loss_ce": 0.0013780894223600626, + "loss_iou": 0.1962890625, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 200170408, + "step": 2193 + }, + { + "epoch": 9.141666666666667, + "grad_norm": 3.7881185986113937, + "learning_rate": 5e-05, + "loss": 0.049, + "num_input_tokens_seen": 200262016, + "step": 2194 + }, + { + "epoch": 9.141666666666667, + "loss": 0.046697668731212616, + "loss_ce": 2.1029973140684888e-05, + "loss_iou": 0.31640625, + "loss_num": 0.00933837890625, + "loss_xval": 0.046630859375, + "num_input_tokens_seen": 200262016, + "step": 2194 + }, + { + "epoch": 9.145833333333334, + "grad_norm": 10.356708455363178, + "learning_rate": 5e-05, + "loss": 0.0768, + "num_input_tokens_seen": 200352756, + "step": 2195 + }, + { + "epoch": 9.145833333333334, + "loss": 0.08656169474124908, + "loss_ce": 1.384494225931121e-05, + "loss_iou": 0.302734375, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 200352756, + "step": 2195 + }, + { + "epoch": 9.15, + "grad_norm": 1.4011253078496293, + "learning_rate": 5e-05, + "loss": 0.0872, + "num_input_tokens_seen": 200444596, + "step": 2196 + }, + { + "epoch": 9.15, + "loss": 0.10002411901950836, + "loss_ce": 0.0002926679444499314, + "loss_iou": 0.33984375, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 200444596, + "step": 2196 + }, + { + "epoch": 9.154166666666667, + "grad_norm": 4.355472283675736, + "learning_rate": 5e-05, + "loss": 0.138, + "num_input_tokens_seen": 200534124, + "step": 2197 + }, + { + "epoch": 9.154166666666667, + "loss": 0.17626036703586578, + "loss_ce": 3.661260416265577e-05, + "loss_iou": 0.1611328125, + "loss_num": 0.03515625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 200534124, + "step": 2197 + }, + { + "epoch": 9.158333333333333, + "grad_norm": 3.2374131152319343, + "learning_rate": 5e-05, + "loss": 0.0938, + "num_input_tokens_seen": 200625164, + "step": 2198 + }, + { + "epoch": 9.158333333333333, + "loss": 0.05326056480407715, + "loss_ce": 0.001289131585508585, + "loss_iou": 0.146484375, + "loss_num": 0.0103759765625, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 200625164, + "step": 2198 + }, + { + "epoch": 9.1625, + "grad_norm": 5.774485586525706, + "learning_rate": 5e-05, + "loss": 0.0629, + "num_input_tokens_seen": 200715712, + "step": 2199 + }, + { + "epoch": 9.1625, + "loss": 0.061350684612989426, + "loss_ce": 0.0002239774912595749, + "loss_iou": 0.24609375, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 200715712, + "step": 2199 + }, + { + "epoch": 9.166666666666666, + "grad_norm": 6.673424562477215, + "learning_rate": 5e-05, + "loss": 0.0838, + "num_input_tokens_seen": 200806972, + "step": 2200 + }, + { + "epoch": 9.166666666666666, + "loss": 0.07667578756809235, + "loss_ce": 0.0026554071810096502, + "loss_iou": 0.1806640625, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 200806972, + "step": 2200 + }, + { + "epoch": 9.170833333333333, + "grad_norm": 4.918866754467832, + "learning_rate": 5e-05, + "loss": 0.0779, + "num_input_tokens_seen": 200898196, + "step": 2201 + }, + { + "epoch": 9.170833333333333, + "loss": 0.08252261579036713, + "loss_ce": 0.010867348872125149, + "loss_iou": 0.232421875, + "loss_num": 0.0142822265625, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 200898196, + "step": 2201 + }, + { + "epoch": 9.175, + "grad_norm": 3.3958933070224075, + "learning_rate": 5e-05, + "loss": 0.07, + "num_input_tokens_seen": 200989876, + "step": 2202 + }, + { + "epoch": 9.175, + "loss": 0.06549730896949768, + "loss_ce": 0.00015917142445687205, + "loss_iou": 0.1435546875, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 200989876, + "step": 2202 + }, + { + "epoch": 9.179166666666667, + "grad_norm": 2.2801243360680954, + "learning_rate": 5e-05, + "loss": 0.0666, + "num_input_tokens_seen": 201080428, + "step": 2203 + }, + { + "epoch": 9.179166666666667, + "loss": 0.08679927885532379, + "loss_ce": 7.290714165719692e-06, + "loss_iou": 0.46484375, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 201080428, + "step": 2203 + }, + { + "epoch": 9.183333333333334, + "grad_norm": 2.455656751871585, + "learning_rate": 5e-05, + "loss": 0.0908, + "num_input_tokens_seen": 201171728, + "step": 2204 + }, + { + "epoch": 9.183333333333334, + "loss": 0.1050679013133049, + "loss_ce": 0.0014454597840085626, + "loss_iou": 0.267578125, + "loss_num": 0.020751953125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 201171728, + "step": 2204 + }, + { + "epoch": 9.1875, + "grad_norm": 6.308043081686157, + "learning_rate": 5e-05, + "loss": 0.1348, + "num_input_tokens_seen": 201262216, + "step": 2205 + }, + { + "epoch": 9.1875, + "loss": 0.1272621750831604, + "loss_ce": 0.0002937901590485126, + "loss_iou": 0.263671875, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 201262216, + "step": 2205 + }, + { + "epoch": 9.191666666666666, + "grad_norm": 24.558690680981883, + "learning_rate": 5e-05, + "loss": 0.079, + "num_input_tokens_seen": 201353840, + "step": 2206 + }, + { + "epoch": 9.191666666666666, + "loss": 0.067722387611866, + "loss_ce": 0.004337374120950699, + "loss_iou": 0.2373046875, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 201353840, + "step": 2206 + }, + { + "epoch": 9.195833333333333, + "grad_norm": 2.437776954669518, + "learning_rate": 5e-05, + "loss": 0.0748, + "num_input_tokens_seen": 201444832, + "step": 2207 + }, + { + "epoch": 9.195833333333333, + "loss": 0.11072726547718048, + "loss_ce": 2.475509791111108e-05, + "loss_iou": 0.140625, + "loss_num": 0.0220947265625, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 201444832, + "step": 2207 + }, + { + "epoch": 9.2, + "grad_norm": 1.3908832751169862, + "learning_rate": 5e-05, + "loss": 0.0446, + "num_input_tokens_seen": 201534856, + "step": 2208 + }, + { + "epoch": 9.2, + "loss": 0.03558644652366638, + "loss_ce": 0.000109762855572626, + "loss_iou": 0.09912109375, + "loss_num": 0.007080078125, + "loss_xval": 0.035400390625, + "num_input_tokens_seen": 201534856, + "step": 2208 + }, + { + "epoch": 9.204166666666667, + "grad_norm": 3.6286645530410246, + "learning_rate": 5e-05, + "loss": 0.0759, + "num_input_tokens_seen": 201626312, + "step": 2209 + }, + { + "epoch": 9.204166666666667, + "loss": 0.07812509685754776, + "loss_ce": 7.639089017175138e-05, + "loss_iou": 0.1787109375, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 201626312, + "step": 2209 + }, + { + "epoch": 9.208333333333334, + "grad_norm": 2.7511377380974533, + "learning_rate": 5e-05, + "loss": 0.116, + "num_input_tokens_seen": 201716604, + "step": 2210 + }, + { + "epoch": 9.208333333333334, + "loss": 0.17026406526565552, + "loss_ce": 0.000128566927742213, + "loss_iou": 0.37890625, + "loss_num": 0.0341796875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 201716604, + "step": 2210 + }, + { + "epoch": 9.2125, + "grad_norm": 9.112774236165663, + "learning_rate": 5e-05, + "loss": 0.0682, + "num_input_tokens_seen": 201808304, + "step": 2211 + }, + { + "epoch": 9.2125, + "loss": 0.051056019961833954, + "loss_ce": 0.00045787671115249395, + "loss_iou": 0.1923828125, + "loss_num": 0.0101318359375, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 201808304, + "step": 2211 + }, + { + "epoch": 9.216666666666667, + "grad_norm": 4.895428761300017, + "learning_rate": 5e-05, + "loss": 0.0662, + "num_input_tokens_seen": 201899716, + "step": 2212 + }, + { + "epoch": 9.216666666666667, + "loss": 0.08767993748188019, + "loss_ce": 7.922661461634561e-05, + "loss_iou": 0.3828125, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 201899716, + "step": 2212 + }, + { + "epoch": 9.220833333333333, + "grad_norm": 4.647479505670012, + "learning_rate": 5e-05, + "loss": 0.1367, + "num_input_tokens_seen": 201990580, + "step": 2213 + }, + { + "epoch": 9.220833333333333, + "loss": 0.21614643931388855, + "loss_ce": 0.00089642294915393, + "loss_iou": 0.171875, + "loss_num": 0.04296875, + "loss_xval": 0.21484375, + "num_input_tokens_seen": 201990580, + "step": 2213 + }, + { + "epoch": 9.225, + "grad_norm": 3.508364549794977, + "learning_rate": 5e-05, + "loss": 0.078, + "num_input_tokens_seen": 202082188, + "step": 2214 + }, + { + "epoch": 9.225, + "loss": 0.07295480370521545, + "loss_ce": 0.0007044363301247358, + "loss_iou": 0.283203125, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 202082188, + "step": 2214 + }, + { + "epoch": 9.229166666666666, + "grad_norm": 2.0454420409331497, + "learning_rate": 5e-05, + "loss": 0.0441, + "num_input_tokens_seen": 202174192, + "step": 2215 + }, + { + "epoch": 9.229166666666666, + "loss": 0.03788067027926445, + "loss_ce": 0.00013042415957897902, + "loss_iou": 0.2451171875, + "loss_num": 0.007537841796875, + "loss_xval": 0.037841796875, + "num_input_tokens_seen": 202174192, + "step": 2215 + }, + { + "epoch": 9.233333333333333, + "grad_norm": 2.0495107961366528, + "learning_rate": 5e-05, + "loss": 0.0787, + "num_input_tokens_seen": 202265876, + "step": 2216 + }, + { + "epoch": 9.233333333333333, + "loss": 0.04935688525438309, + "loss_ce": 0.0004219515831209719, + "loss_iou": 0.32421875, + "loss_num": 0.009765625, + "loss_xval": 0.048828125, + "num_input_tokens_seen": 202265876, + "step": 2216 + }, + { + "epoch": 9.2375, + "grad_norm": 2.517464451287442, + "learning_rate": 5e-05, + "loss": 0.0667, + "num_input_tokens_seen": 202357740, + "step": 2217 + }, + { + "epoch": 9.2375, + "loss": 0.04978005215525627, + "loss_ce": 0.0004407581582199782, + "loss_iou": 0.201171875, + "loss_num": 0.0098876953125, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 202357740, + "step": 2217 + }, + { + "epoch": 9.241666666666667, + "grad_norm": 3.3097343702421282, + "learning_rate": 5e-05, + "loss": 0.0788, + "num_input_tokens_seen": 202447096, + "step": 2218 + }, + { + "epoch": 9.241666666666667, + "loss": 0.05562786012887955, + "loss_ce": 0.00014690443640574813, + "loss_iou": 0.1982421875, + "loss_num": 0.0111083984375, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 202447096, + "step": 2218 + }, + { + "epoch": 9.245833333333334, + "grad_norm": 2.6334267837027703, + "learning_rate": 5e-05, + "loss": 0.0835, + "num_input_tokens_seen": 202538308, + "step": 2219 + }, + { + "epoch": 9.245833333333334, + "loss": 0.054739732295274734, + "loss_ce": 5.2235387556720525e-05, + "loss_iou": 0.33984375, + "loss_num": 0.01092529296875, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 202538308, + "step": 2219 + }, + { + "epoch": 9.25, + "grad_norm": 9.03035066197087, + "learning_rate": 5e-05, + "loss": 0.0477, + "num_input_tokens_seen": 202629564, + "step": 2220 + }, + { + "epoch": 9.25, + "loss": 0.06619110703468323, + "loss_ce": 4.425767110660672e-05, + "loss_iou": 0.33203125, + "loss_num": 0.01324462890625, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 202629564, + "step": 2220 + }, + { + "epoch": 9.254166666666666, + "grad_norm": 3.3762716990766264, + "learning_rate": 5e-05, + "loss": 0.1171, + "num_input_tokens_seen": 202720532, + "step": 2221 + }, + { + "epoch": 9.254166666666666, + "loss": 0.12677684426307678, + "loss_ce": 6.820567705290159e-06, + "loss_iou": 0.259765625, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 202720532, + "step": 2221 + }, + { + "epoch": 9.258333333333333, + "grad_norm": 10.875326064101861, + "learning_rate": 5e-05, + "loss": 0.0732, + "num_input_tokens_seen": 202811636, + "step": 2222 + }, + { + "epoch": 9.258333333333333, + "loss": 0.07245151698589325, + "loss_ce": 2.7908758966077585e-06, + "loss_iou": 0.265625, + "loss_num": 0.0145263671875, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 202811636, + "step": 2222 + }, + { + "epoch": 9.2625, + "grad_norm": 3.7498775049362583, + "learning_rate": 5e-05, + "loss": 0.0976, + "num_input_tokens_seen": 202903444, + "step": 2223 + }, + { + "epoch": 9.2625, + "loss": 0.07416309416294098, + "loss_ce": 9.693222818896174e-05, + "loss_iou": 0.267578125, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 202903444, + "step": 2223 + }, + { + "epoch": 9.266666666666667, + "grad_norm": 4.332373597067934, + "learning_rate": 5e-05, + "loss": 0.0844, + "num_input_tokens_seen": 202994716, + "step": 2224 + }, + { + "epoch": 9.266666666666667, + "loss": 0.07626857608556747, + "loss_ce": 3.566586383385584e-05, + "loss_iou": 0.23046875, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 202994716, + "step": 2224 + }, + { + "epoch": 9.270833333333334, + "grad_norm": 2.2371257085799505, + "learning_rate": 5e-05, + "loss": 0.0635, + "num_input_tokens_seen": 203086340, + "step": 2225 + }, + { + "epoch": 9.270833333333334, + "loss": 0.08375569432973862, + "loss_ce": 3.0716892069904134e-05, + "loss_iou": 0.314453125, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 203086340, + "step": 2225 + }, + { + "epoch": 9.275, + "grad_norm": 1.7012267752678156, + "learning_rate": 5e-05, + "loss": 0.0869, + "num_input_tokens_seen": 203177744, + "step": 2226 + }, + { + "epoch": 9.275, + "loss": 0.14392778277397156, + "loss_ce": 0.00018999181338585913, + "loss_iou": 0.26953125, + "loss_num": 0.0286865234375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 203177744, + "step": 2226 + }, + { + "epoch": 9.279166666666667, + "grad_norm": 1.865567681651061, + "learning_rate": 5e-05, + "loss": 0.0983, + "num_input_tokens_seen": 203270012, + "step": 2227 + }, + { + "epoch": 9.279166666666667, + "loss": 0.11878585815429688, + "loss_ce": 0.0067253089509904385, + "loss_iou": 0.2275390625, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 203270012, + "step": 2227 + }, + { + "epoch": 9.283333333333333, + "grad_norm": 2.2906264696681062, + "learning_rate": 5e-05, + "loss": 0.0827, + "num_input_tokens_seen": 203361336, + "step": 2228 + }, + { + "epoch": 9.283333333333333, + "loss": 0.11910998076200485, + "loss_ce": 0.004455443471670151, + "loss_iou": 0.19921875, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 203361336, + "step": 2228 + }, + { + "epoch": 9.2875, + "grad_norm": 3.3780007096420315, + "learning_rate": 5e-05, + "loss": 0.0721, + "num_input_tokens_seen": 203451800, + "step": 2229 + }, + { + "epoch": 9.2875, + "loss": 0.08809817582368851, + "loss_ce": 0.00048221100587397814, + "loss_iou": 0.216796875, + "loss_num": 0.017578125, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 203451800, + "step": 2229 + }, + { + "epoch": 9.291666666666666, + "grad_norm": 3.513922891850055, + "learning_rate": 5e-05, + "loss": 0.0808, + "num_input_tokens_seen": 203543576, + "step": 2230 + }, + { + "epoch": 9.291666666666666, + "loss": 0.07334207743406296, + "loss_ce": 0.002266637748107314, + "loss_iou": 0.2490234375, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 203543576, + "step": 2230 + }, + { + "epoch": 9.295833333333333, + "grad_norm": 46.55263213397429, + "learning_rate": 5e-05, + "loss": 0.0861, + "num_input_tokens_seen": 203634712, + "step": 2231 + }, + { + "epoch": 9.295833333333333, + "loss": 0.10764908045530319, + "loss_ce": 0.0007154846098273993, + "loss_iou": 0.2001953125, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 203634712, + "step": 2231 + }, + { + "epoch": 9.3, + "grad_norm": 4.245117723090011, + "learning_rate": 5e-05, + "loss": 0.1047, + "num_input_tokens_seen": 203725908, + "step": 2232 + }, + { + "epoch": 9.3, + "loss": 0.10209492594003677, + "loss_ce": 4.4143747800262645e-05, + "loss_iou": 0.1640625, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 203725908, + "step": 2232 + }, + { + "epoch": 9.304166666666667, + "grad_norm": 4.580751775042531, + "learning_rate": 5e-05, + "loss": 0.0996, + "num_input_tokens_seen": 203817044, + "step": 2233 + }, + { + "epoch": 9.304166666666667, + "loss": 0.07633492350578308, + "loss_ce": 2.5714654839248396e-05, + "loss_iou": 0.267578125, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 203817044, + "step": 2233 + }, + { + "epoch": 9.308333333333334, + "grad_norm": 5.19491098728478, + "learning_rate": 5e-05, + "loss": 0.1068, + "num_input_tokens_seen": 203908056, + "step": 2234 + }, + { + "epoch": 9.308333333333334, + "loss": 0.10972404479980469, + "loss_ce": 4.387239459902048e-05, + "loss_iou": 0.296875, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 203908056, + "step": 2234 + }, + { + "epoch": 9.3125, + "grad_norm": 3.127703276953714, + "learning_rate": 5e-05, + "loss": 0.0536, + "num_input_tokens_seen": 203999944, + "step": 2235 + }, + { + "epoch": 9.3125, + "loss": 0.058495134115219116, + "loss_ce": 0.0009847574401646852, + "loss_iou": 0.208984375, + "loss_num": 0.011474609375, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 203999944, + "step": 2235 + }, + { + "epoch": 9.316666666666666, + "grad_norm": 5.97333492039574, + "learning_rate": 5e-05, + "loss": 0.087, + "num_input_tokens_seen": 204091188, + "step": 2236 + }, + { + "epoch": 9.316666666666666, + "loss": 0.06130867451429367, + "loss_ce": 2.9378070394159295e-05, + "loss_iou": 0.263671875, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 204091188, + "step": 2236 + }, + { + "epoch": 9.320833333333333, + "grad_norm": 3.4424382346355586, + "learning_rate": 5e-05, + "loss": 0.0723, + "num_input_tokens_seen": 204182404, + "step": 2237 + }, + { + "epoch": 9.320833333333333, + "loss": 0.07460271567106247, + "loss_ce": 2.5382661988260224e-05, + "loss_iou": 0.2138671875, + "loss_num": 0.014892578125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 204182404, + "step": 2237 + }, + { + "epoch": 9.325, + "grad_norm": 2.138306168086027, + "learning_rate": 5e-05, + "loss": 0.1213, + "num_input_tokens_seen": 204273752, + "step": 2238 + }, + { + "epoch": 9.325, + "loss": 0.11179852485656738, + "loss_ce": 0.0001957409840542823, + "loss_iou": 0.32421875, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 204273752, + "step": 2238 + }, + { + "epoch": 9.329166666666667, + "grad_norm": 1.6961706423169747, + "learning_rate": 5e-05, + "loss": 0.085, + "num_input_tokens_seen": 204365528, + "step": 2239 + }, + { + "epoch": 9.329166666666667, + "loss": 0.06735285371541977, + "loss_ce": 0.0017400594661012292, + "loss_iou": 0.240234375, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 204365528, + "step": 2239 + }, + { + "epoch": 9.333333333333334, + "grad_norm": 6.645834701948286, + "learning_rate": 5e-05, + "loss": 0.0882, + "num_input_tokens_seen": 204457388, + "step": 2240 + }, + { + "epoch": 9.333333333333334, + "loss": 0.11958567798137665, + "loss_ce": 0.00017039466183632612, + "loss_iou": 0.26171875, + "loss_num": 0.0238037109375, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 204457388, + "step": 2240 + }, + { + "epoch": 9.3375, + "grad_norm": 3.59784310282802, + "learning_rate": 5e-05, + "loss": 0.0569, + "num_input_tokens_seen": 204548732, + "step": 2241 + }, + { + "epoch": 9.3375, + "loss": 0.0499962717294693, + "loss_ce": 0.0001458074984839186, + "loss_iou": 0.154296875, + "loss_num": 0.00994873046875, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 204548732, + "step": 2241 + }, + { + "epoch": 9.341666666666667, + "grad_norm": 2.2014844455334837, + "learning_rate": 5e-05, + "loss": 0.1281, + "num_input_tokens_seen": 204639824, + "step": 2242 + }, + { + "epoch": 9.341666666666667, + "loss": 0.07682133466005325, + "loss_ce": 0.00016117449558805674, + "loss_iou": 0.291015625, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 204639824, + "step": 2242 + }, + { + "epoch": 9.345833333333333, + "grad_norm": 3.077157982106598, + "learning_rate": 5e-05, + "loss": 0.0614, + "num_input_tokens_seen": 204730496, + "step": 2243 + }, + { + "epoch": 9.345833333333333, + "loss": 0.07074464857578278, + "loss_ce": 2.015555219259113e-05, + "loss_iou": 0.31640625, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 204730496, + "step": 2243 + }, + { + "epoch": 9.35, + "grad_norm": 2.4578862173343317, + "learning_rate": 5e-05, + "loss": 0.0665, + "num_input_tokens_seen": 204821520, + "step": 2244 + }, + { + "epoch": 9.35, + "loss": 0.05760511755943298, + "loss_ce": 3.1891840990283526e-06, + "loss_iou": 0.291015625, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 204821520, + "step": 2244 + }, + { + "epoch": 9.354166666666666, + "grad_norm": 7.810310706978085, + "learning_rate": 5e-05, + "loss": 0.0761, + "num_input_tokens_seen": 204913148, + "step": 2245 + }, + { + "epoch": 9.354166666666666, + "loss": 0.09471295028924942, + "loss_ce": 1.6910182239371352e-05, + "loss_iou": 0.185546875, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 204913148, + "step": 2245 + }, + { + "epoch": 9.358333333333333, + "grad_norm": 3.393904127948915, + "learning_rate": 5e-05, + "loss": 0.0769, + "num_input_tokens_seen": 205004668, + "step": 2246 + }, + { + "epoch": 9.358333333333333, + "loss": 0.07558546960353851, + "loss_ce": 0.005860439967364073, + "loss_iou": 0.1533203125, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 205004668, + "step": 2246 + }, + { + "epoch": 9.3625, + "grad_norm": 5.296393204114102, + "learning_rate": 5e-05, + "loss": 0.095, + "num_input_tokens_seen": 205096320, + "step": 2247 + }, + { + "epoch": 9.3625, + "loss": 0.10556471347808838, + "loss_ce": 0.003697034204378724, + "loss_iou": 0.244140625, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 205096320, + "step": 2247 + }, + { + "epoch": 9.366666666666667, + "grad_norm": 3.98805561043107, + "learning_rate": 5e-05, + "loss": 0.0975, + "num_input_tokens_seen": 205187728, + "step": 2248 + }, + { + "epoch": 9.366666666666667, + "loss": 0.07330742478370667, + "loss_ce": 4.2080778257513884e-06, + "loss_iou": 0.25390625, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 205187728, + "step": 2248 + }, + { + "epoch": 9.370833333333334, + "grad_norm": 3.422412576639185, + "learning_rate": 5e-05, + "loss": 0.0622, + "num_input_tokens_seen": 205279156, + "step": 2249 + }, + { + "epoch": 9.370833333333334, + "loss": 0.0616975836455822, + "loss_ce": 6.733382178936154e-05, + "loss_iou": 0.333984375, + "loss_num": 0.0123291015625, + "loss_xval": 0.0615234375, + "num_input_tokens_seen": 205279156, + "step": 2249 + }, + { + "epoch": 9.375, + "grad_norm": 3.952350471522899, + "learning_rate": 5e-05, + "loss": 0.0564, + "num_input_tokens_seen": 205370052, + "step": 2250 + }, + { + "epoch": 9.375, + "eval_seeclick_CIoU": 0.2784217670559883, + "eval_seeclick_GIoU": 0.27900155633687973, + "eval_seeclick_IoU": 0.36821986734867096, + "eval_seeclick_MAE_all": 0.08717832714319229, + "eval_seeclick_MAE_h": 0.0708407387137413, + "eval_seeclick_MAE_w": 0.18288671225309372, + "eval_seeclick_MAE_x_boxes": 0.1732628047466278, + "eval_seeclick_MAE_y_boxes": 0.07997602596879005, + "eval_seeclick_NUM_probability": 0.9999985694885254, + "eval_seeclick_inside_bbox": 0.5397727340459824, + "eval_seeclick_loss": 0.5365479588508606, + "eval_seeclick_loss_ce": 0.136946901679039, + "eval_seeclick_loss_iou": 0.4412841796875, + "eval_seeclick_loss_num": 0.080352783203125, + "eval_seeclick_loss_xval": 0.40203857421875, + "eval_seeclick_runtime": 80.7149, + "eval_seeclick_samples_per_second": 0.533, + "eval_seeclick_steps_per_second": 0.025, + "num_input_tokens_seen": 205370052, + "step": 2250 + }, + { + "epoch": 9.375, + "eval_icons_CIoU": 0.3527114614844322, + "eval_icons_GIoU": 0.3811039477586746, + "eval_icons_IoU": 0.43005380034446716, + "eval_icons_MAE_all": 0.06621737964451313, + "eval_icons_MAE_h": 0.13642537221312523, + "eval_icons_MAE_w": 0.09623197466135025, + "eval_icons_MAE_x_boxes": 0.09396588802337646, + "eval_icons_MAE_y_boxes": 0.13910193741321564, + "eval_icons_NUM_probability": 0.9999993145465851, + "eval_icons_inside_bbox": 0.5503472238779068, + "eval_icons_loss": 0.31992390751838684, + "eval_icons_loss_ce": 1.8518157958169468e-06, + "eval_icons_loss_iou": 0.3302001953125, + "eval_icons_loss_num": 0.0670318603515625, + "eval_icons_loss_xval": 0.335205078125, + "eval_icons_runtime": 97.4337, + "eval_icons_samples_per_second": 0.513, + "eval_icons_steps_per_second": 0.021, + "num_input_tokens_seen": 205370052, + "step": 2250 + }, + { + "epoch": 9.375, + "eval_screenspot_CIoU": 0.39432772994041443, + "eval_screenspot_GIoU": 0.3775731921195984, + "eval_screenspot_IoU": 0.4619280795256297, + "eval_screenspot_MAE_all": 0.0979969451824824, + "eval_screenspot_MAE_h": 0.09322212388118108, + "eval_screenspot_MAE_w": 0.19270053009192148, + "eval_screenspot_MAE_x_boxes": 0.19473616778850555, + "eval_screenspot_MAE_y_boxes": 0.08440528810024261, + "eval_screenspot_NUM_probability": 0.9999984304110209, + "eval_screenspot_inside_bbox": 0.7058333357175192, + "eval_screenspot_loss": 0.4944130480289459, + "eval_screenspot_loss_ce": 9.930451142281527e-06, + "eval_screenspot_loss_iou": 0.4032389322916667, + "eval_screenspot_loss_num": 0.099884033203125, + "eval_screenspot_loss_xval": 0.4994303385416667, + "eval_screenspot_runtime": 150.0641, + "eval_screenspot_samples_per_second": 0.593, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 205370052, + "step": 2250 + }, + { + "epoch": 9.375, + "eval_compot_CIoU": 0.40679484605789185, + "eval_compot_GIoU": 0.3917129784822464, + "eval_compot_IoU": 0.48430830240249634, + "eval_compot_MAE_all": 0.06649945117533207, + "eval_compot_MAE_h": 0.09491590782999992, + "eval_compot_MAE_w": 0.1506771482527256, + "eval_compot_MAE_x_boxes": 0.14771829172968864, + "eval_compot_MAE_y_boxes": 0.09101338312029839, + "eval_compot_NUM_probability": 0.9999987185001373, + "eval_compot_inside_bbox": 0.6614583432674408, + "eval_compot_loss": 0.3391985595226288, + "eval_compot_loss_ce": 0.025836432352662086, + "eval_compot_loss_iou": 0.31207275390625, + "eval_compot_loss_num": 0.0590057373046875, + "eval_compot_loss_xval": 0.2950439453125, + "eval_compot_runtime": 86.1939, + "eval_compot_samples_per_second": 0.58, + "eval_compot_steps_per_second": 0.023, + "num_input_tokens_seen": 205370052, + "step": 2250 + }, + { + "epoch": 9.375, + "loss": 0.3358990252017975, + "loss_ce": 0.02400936558842659, + "loss_iou": 0.30859375, + "loss_num": 0.0625, + "loss_xval": 0.3125, + "num_input_tokens_seen": 205370052, + "step": 2250 + }, + { + "epoch": 9.379166666666666, + "grad_norm": 6.726949224713207, + "learning_rate": 5e-05, + "loss": 0.1254, + "num_input_tokens_seen": 205462060, + "step": 2251 + }, + { + "epoch": 9.379166666666666, + "loss": 0.10401031374931335, + "loss_ce": 0.0011660760501399636, + "loss_iou": 0.37890625, + "loss_num": 0.0205078125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 205462060, + "step": 2251 + }, + { + "epoch": 9.383333333333333, + "grad_norm": 3.5720176536381114, + "learning_rate": 5e-05, + "loss": 0.0887, + "num_input_tokens_seen": 205553276, + "step": 2252 + }, + { + "epoch": 9.383333333333333, + "loss": 0.10866258293390274, + "loss_ce": 0.0006456149276345968, + "loss_iou": 0.228515625, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 205553276, + "step": 2252 + }, + { + "epoch": 9.3875, + "grad_norm": 4.580756312669723, + "learning_rate": 5e-05, + "loss": 0.0668, + "num_input_tokens_seen": 205644856, + "step": 2253 + }, + { + "epoch": 9.3875, + "loss": 0.07860147953033447, + "loss_ce": 0.00017130102787632495, + "loss_iou": 0.28515625, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 205644856, + "step": 2253 + }, + { + "epoch": 9.391666666666667, + "grad_norm": 2.528667604033413, + "learning_rate": 5e-05, + "loss": 0.1104, + "num_input_tokens_seen": 205735956, + "step": 2254 + }, + { + "epoch": 9.391666666666667, + "loss": 0.12907829880714417, + "loss_ce": 4.210660335957073e-06, + "loss_iou": 0.3125, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 205735956, + "step": 2254 + }, + { + "epoch": 9.395833333333334, + "grad_norm": 3.322085127164889, + "learning_rate": 5e-05, + "loss": 0.0855, + "num_input_tokens_seen": 205827392, + "step": 2255 + }, + { + "epoch": 9.395833333333334, + "loss": 0.06169138848781586, + "loss_ce": 0.0031891947146505117, + "loss_iou": 0.2890625, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 205827392, + "step": 2255 + }, + { + "epoch": 9.4, + "grad_norm": 3.450005752931726, + "learning_rate": 5e-05, + "loss": 0.0439, + "num_input_tokens_seen": 205919032, + "step": 2256 + }, + { + "epoch": 9.4, + "loss": 0.03724807873368263, + "loss_ce": 0.0003904743352904916, + "loss_iou": 0.1484375, + "loss_num": 0.00738525390625, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 205919032, + "step": 2256 + }, + { + "epoch": 9.404166666666667, + "grad_norm": 2.9762252585224616, + "learning_rate": 5e-05, + "loss": 0.1238, + "num_input_tokens_seen": 206010268, + "step": 2257 + }, + { + "epoch": 9.404166666666667, + "loss": 0.0545886866748333, + "loss_ce": 5.3776235290570185e-05, + "loss_iou": 0.171875, + "loss_num": 0.01092529296875, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 206010268, + "step": 2257 + }, + { + "epoch": 9.408333333333333, + "grad_norm": 2.8275104606861894, + "learning_rate": 5e-05, + "loss": 0.1069, + "num_input_tokens_seen": 206102272, + "step": 2258 + }, + { + "epoch": 9.408333333333333, + "loss": 0.11684298515319824, + "loss_ce": 0.0014712885022163391, + "loss_iou": 0.251953125, + "loss_num": 0.0230712890625, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 206102272, + "step": 2258 + }, + { + "epoch": 9.4125, + "grad_norm": 3.639553308870559, + "learning_rate": 5e-05, + "loss": 0.0822, + "num_input_tokens_seen": 206193836, + "step": 2259 + }, + { + "epoch": 9.4125, + "loss": 0.0892866849899292, + "loss_ce": 0.00020587486505974084, + "loss_iou": 0.427734375, + "loss_num": 0.017822265625, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 206193836, + "step": 2259 + }, + { + "epoch": 9.416666666666666, + "grad_norm": 2.973670802416314, + "learning_rate": 5e-05, + "loss": 0.1252, + "num_input_tokens_seen": 206284796, + "step": 2260 + }, + { + "epoch": 9.416666666666666, + "loss": 0.13764168322086334, + "loss_ce": 0.0003278392250649631, + "loss_iou": 0.3046875, + "loss_num": 0.0274658203125, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 206284796, + "step": 2260 + }, + { + "epoch": 9.420833333333333, + "grad_norm": 5.92163105063818, + "learning_rate": 5e-05, + "loss": 0.1038, + "num_input_tokens_seen": 206376552, + "step": 2261 + }, + { + "epoch": 9.420833333333333, + "loss": 0.13112279772758484, + "loss_ce": 0.0009348143939860165, + "loss_iou": 0.265625, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 206376552, + "step": 2261 + }, + { + "epoch": 9.425, + "grad_norm": 6.48320500481271, + "learning_rate": 5e-05, + "loss": 0.0882, + "num_input_tokens_seen": 206467724, + "step": 2262 + }, + { + "epoch": 9.425, + "loss": 0.0606074333190918, + "loss_ce": 1.4783408914809115e-05, + "loss_iou": 0.298828125, + "loss_num": 0.01214599609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 206467724, + "step": 2262 + }, + { + "epoch": 9.429166666666667, + "grad_norm": 2.385781764686817, + "learning_rate": 5e-05, + "loss": 0.0765, + "num_input_tokens_seen": 206558988, + "step": 2263 + }, + { + "epoch": 9.429166666666667, + "loss": 0.08593662828207016, + "loss_ce": 0.00013645211583934724, + "loss_iou": 0.1796875, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 206558988, + "step": 2263 + }, + { + "epoch": 9.433333333333334, + "grad_norm": 4.987948990102972, + "learning_rate": 5e-05, + "loss": 0.0658, + "num_input_tokens_seen": 206649740, + "step": 2264 + }, + { + "epoch": 9.433333333333334, + "loss": 0.03622180223464966, + "loss_ce": 0.002896607154980302, + "loss_iou": 0.21875, + "loss_num": 0.00665283203125, + "loss_xval": 0.033203125, + "num_input_tokens_seen": 206649740, + "step": 2264 + }, + { + "epoch": 9.4375, + "grad_norm": 4.131020051739761, + "learning_rate": 5e-05, + "loss": 0.1127, + "num_input_tokens_seen": 206740656, + "step": 2265 + }, + { + "epoch": 9.4375, + "loss": 0.15009549260139465, + "loss_ce": 1.0045349881693255e-05, + "loss_iou": 0.35546875, + "loss_num": 0.030029296875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 206740656, + "step": 2265 + }, + { + "epoch": 9.441666666666666, + "grad_norm": 3.589618825751963, + "learning_rate": 5e-05, + "loss": 0.0876, + "num_input_tokens_seen": 206832280, + "step": 2266 + }, + { + "epoch": 9.441666666666666, + "loss": 0.09583691507577896, + "loss_ce": 0.001598631264641881, + "loss_iou": 0.31640625, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 206832280, + "step": 2266 + }, + { + "epoch": 9.445833333333333, + "grad_norm": 2.959962675425685, + "learning_rate": 5e-05, + "loss": 0.077, + "num_input_tokens_seen": 206923680, + "step": 2267 + }, + { + "epoch": 9.445833333333333, + "loss": 0.07070231437683105, + "loss_ce": 0.00032878227648325264, + "loss_iou": 0.25390625, + "loss_num": 0.01409912109375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 206923680, + "step": 2267 + }, + { + "epoch": 9.45, + "grad_norm": 2.1554765527304856, + "learning_rate": 5e-05, + "loss": 0.0735, + "num_input_tokens_seen": 207015212, + "step": 2268 + }, + { + "epoch": 9.45, + "loss": 0.09321004897356033, + "loss_ce": 0.0005968199693597853, + "loss_iou": 0.1708984375, + "loss_num": 0.0184326171875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 207015212, + "step": 2268 + }, + { + "epoch": 9.454166666666667, + "grad_norm": 1.687814277230721, + "learning_rate": 5e-05, + "loss": 0.0551, + "num_input_tokens_seen": 207106856, + "step": 2269 + }, + { + "epoch": 9.454166666666667, + "loss": 0.05360877513885498, + "loss_ce": 0.00012672031880356371, + "loss_iou": 0.2890625, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 207106856, + "step": 2269 + }, + { + "epoch": 9.458333333333334, + "grad_norm": 11.917138995240949, + "learning_rate": 5e-05, + "loss": 0.0837, + "num_input_tokens_seen": 207198372, + "step": 2270 + }, + { + "epoch": 9.458333333333334, + "loss": 0.06492700427770615, + "loss_ce": 0.00011530078336363658, + "loss_iou": 0.162109375, + "loss_num": 0.012939453125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 207198372, + "step": 2270 + }, + { + "epoch": 9.4625, + "grad_norm": 1.3942957040035504, + "learning_rate": 5e-05, + "loss": 0.0604, + "num_input_tokens_seen": 207289668, + "step": 2271 + }, + { + "epoch": 9.4625, + "loss": 0.056693121790885925, + "loss_ce": 0.0005865513230673969, + "loss_iou": 0.09716796875, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 207289668, + "step": 2271 + }, + { + "epoch": 9.466666666666667, + "grad_norm": 2.351656427742343, + "learning_rate": 5e-05, + "loss": 0.0882, + "num_input_tokens_seen": 207380392, + "step": 2272 + }, + { + "epoch": 9.466666666666667, + "loss": 0.11181023716926575, + "loss_ce": 0.0016417772276327014, + "loss_iou": 0.349609375, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 207380392, + "step": 2272 + }, + { + "epoch": 9.470833333333333, + "grad_norm": 4.158043720550659, + "learning_rate": 5e-05, + "loss": 0.0938, + "num_input_tokens_seen": 207471628, + "step": 2273 + }, + { + "epoch": 9.470833333333333, + "loss": 0.0949772447347641, + "loss_ce": 9.809455514186993e-05, + "loss_iou": 0.314453125, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 207471628, + "step": 2273 + }, + { + "epoch": 9.475, + "grad_norm": 4.92816733376496, + "learning_rate": 5e-05, + "loss": 0.1023, + "num_input_tokens_seen": 207563016, + "step": 2274 + }, + { + "epoch": 9.475, + "loss": 0.06635133922100067, + "loss_ce": 0.0008301038178615272, + "loss_iou": 0.296875, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 207563016, + "step": 2274 + }, + { + "epoch": 9.479166666666666, + "grad_norm": 2.790930691242936, + "learning_rate": 5e-05, + "loss": 0.0524, + "num_input_tokens_seen": 207654544, + "step": 2275 + }, + { + "epoch": 9.479166666666666, + "loss": 0.03971107304096222, + "loss_ce": 0.0029679089784622192, + "loss_iou": 0.30078125, + "loss_num": 0.007354736328125, + "loss_xval": 0.03662109375, + "num_input_tokens_seen": 207654544, + "step": 2275 + }, + { + "epoch": 9.483333333333333, + "grad_norm": 9.756186645468425, + "learning_rate": 5e-05, + "loss": 0.0891, + "num_input_tokens_seen": 207745812, + "step": 2276 + }, + { + "epoch": 9.483333333333333, + "loss": 0.05628746375441551, + "loss_ce": 7.408284727716818e-05, + "loss_iou": 0.37109375, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 207745812, + "step": 2276 + }, + { + "epoch": 9.4875, + "grad_norm": 4.426166362680703, + "learning_rate": 5e-05, + "loss": 0.0785, + "num_input_tokens_seen": 207836940, + "step": 2277 + }, + { + "epoch": 9.4875, + "loss": 0.11567720770835876, + "loss_ce": 4.6097604354145005e-05, + "loss_iou": 0.390625, + "loss_num": 0.023193359375, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 207836940, + "step": 2277 + }, + { + "epoch": 9.491666666666667, + "grad_norm": 4.623619751098643, + "learning_rate": 5e-05, + "loss": 0.0987, + "num_input_tokens_seen": 207928944, + "step": 2278 + }, + { + "epoch": 9.491666666666667, + "loss": 0.1042831540107727, + "loss_ce": 0.002354443771764636, + "loss_iou": 0.255859375, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 207928944, + "step": 2278 + }, + { + "epoch": 9.495833333333334, + "grad_norm": 2.1600688546351123, + "learning_rate": 5e-05, + "loss": 0.0766, + "num_input_tokens_seen": 208019900, + "step": 2279 + }, + { + "epoch": 9.495833333333334, + "loss": 0.06627009809017181, + "loss_ce": 0.0011303334031254053, + "loss_iou": 0.20703125, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 208019900, + "step": 2279 + }, + { + "epoch": 9.5, + "grad_norm": 4.642681277020734, + "learning_rate": 5e-05, + "loss": 0.0678, + "num_input_tokens_seen": 208111080, + "step": 2280 + }, + { + "epoch": 9.5, + "loss": 0.03994332253932953, + "loss_ce": 0.0023876256309449673, + "loss_iou": 0.1611328125, + "loss_num": 0.00750732421875, + "loss_xval": 0.03759765625, + "num_input_tokens_seen": 208111080, + "step": 2280 + }, + { + "epoch": 9.504166666666666, + "grad_norm": 2.2019849939713634, + "learning_rate": 5e-05, + "loss": 0.0757, + "num_input_tokens_seen": 208202460, + "step": 2281 + }, + { + "epoch": 9.504166666666666, + "loss": 0.053781673312187195, + "loss_ce": 2.495828812243417e-05, + "loss_iou": 0.3046875, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 208202460, + "step": 2281 + }, + { + "epoch": 9.508333333333333, + "grad_norm": 6.0104846611295635, + "learning_rate": 5e-05, + "loss": 0.1031, + "num_input_tokens_seen": 208294084, + "step": 2282 + }, + { + "epoch": 9.508333333333333, + "loss": 0.14557743072509766, + "loss_ce": 8.48758063511923e-05, + "loss_iou": 0.26953125, + "loss_num": 0.029052734375, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 208294084, + "step": 2282 + }, + { + "epoch": 9.5125, + "grad_norm": 4.870647425843709, + "learning_rate": 5e-05, + "loss": 0.0573, + "num_input_tokens_seen": 208385216, + "step": 2283 + }, + { + "epoch": 9.5125, + "loss": 0.06885615736246109, + "loss_ce": 3.90209024772048e-05, + "loss_iou": 0.34375, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 208385216, + "step": 2283 + }, + { + "epoch": 9.516666666666667, + "grad_norm": 3.3629716139339156, + "learning_rate": 5e-05, + "loss": 0.0416, + "num_input_tokens_seen": 208476716, + "step": 2284 + }, + { + "epoch": 9.516666666666667, + "loss": 0.04319828748703003, + "loss_ce": 0.0014044629642739892, + "loss_iou": 0.271484375, + "loss_num": 0.00836181640625, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 208476716, + "step": 2284 + }, + { + "epoch": 9.520833333333334, + "grad_norm": 4.8645127234217105, + "learning_rate": 5e-05, + "loss": 0.071, + "num_input_tokens_seen": 208567748, + "step": 2285 + }, + { + "epoch": 9.520833333333334, + "loss": 0.07986044883728027, + "loss_ce": 0.00022483064094558358, + "loss_iou": 0.296875, + "loss_num": 0.0159912109375, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 208567748, + "step": 2285 + }, + { + "epoch": 9.525, + "grad_norm": 6.745878397974651, + "learning_rate": 5e-05, + "loss": 0.0979, + "num_input_tokens_seen": 208658852, + "step": 2286 + }, + { + "epoch": 9.525, + "loss": 0.09872304648160934, + "loss_ce": 1.3940521057520527e-05, + "loss_iou": 0.3828125, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 208658852, + "step": 2286 + }, + { + "epoch": 9.529166666666667, + "grad_norm": 37.867432870339535, + "learning_rate": 5e-05, + "loss": 0.0878, + "num_input_tokens_seen": 208750688, + "step": 2287 + }, + { + "epoch": 9.529166666666667, + "loss": 0.06559404730796814, + "loss_ce": 0.00016435694124083966, + "loss_iou": 0.3359375, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 208750688, + "step": 2287 + }, + { + "epoch": 9.533333333333333, + "grad_norm": 5.478800482735125, + "learning_rate": 5e-05, + "loss": 0.0734, + "num_input_tokens_seen": 208842404, + "step": 2288 + }, + { + "epoch": 9.533333333333333, + "loss": 0.1185697540640831, + "loss_ce": 0.00013103854143992066, + "loss_iou": 0.275390625, + "loss_num": 0.023681640625, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 208842404, + "step": 2288 + }, + { + "epoch": 9.5375, + "grad_norm": 3.186946107916803, + "learning_rate": 5e-05, + "loss": 0.0617, + "num_input_tokens_seen": 208933540, + "step": 2289 + }, + { + "epoch": 9.5375, + "loss": 0.05266163498163223, + "loss_ce": 4.9333386414218694e-05, + "loss_iou": 0.30859375, + "loss_num": 0.010498046875, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 208933540, + "step": 2289 + }, + { + "epoch": 9.541666666666666, + "grad_norm": 2.6175938993499805, + "learning_rate": 5e-05, + "loss": 0.0702, + "num_input_tokens_seen": 209024860, + "step": 2290 + }, + { + "epoch": 9.541666666666666, + "loss": 0.06243140995502472, + "loss_ce": 0.0002823605027515441, + "loss_iou": 0.171875, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 209024860, + "step": 2290 + }, + { + "epoch": 9.545833333333333, + "grad_norm": 1.8831217358853127, + "learning_rate": 5e-05, + "loss": 0.0653, + "num_input_tokens_seen": 209116392, + "step": 2291 + }, + { + "epoch": 9.545833333333333, + "loss": 0.06283891201019287, + "loss_ce": 0.00021684322564397007, + "loss_iou": 0.34765625, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 209116392, + "step": 2291 + }, + { + "epoch": 9.55, + "grad_norm": 1.9650237682346743, + "learning_rate": 5e-05, + "loss": 0.0868, + "num_input_tokens_seen": 209207740, + "step": 2292 + }, + { + "epoch": 9.55, + "loss": 0.11798623949289322, + "loss_ce": 6.631258293054998e-05, + "loss_iou": 0.2470703125, + "loss_num": 0.0235595703125, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 209207740, + "step": 2292 + }, + { + "epoch": 9.554166666666667, + "grad_norm": 1.0195349557343432, + "learning_rate": 5e-05, + "loss": 0.0814, + "num_input_tokens_seen": 209299368, + "step": 2293 + }, + { + "epoch": 9.554166666666667, + "loss": 0.0581989549100399, + "loss_ce": 7.822553743608296e-05, + "loss_iou": 0.2236328125, + "loss_num": 0.0115966796875, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 209299368, + "step": 2293 + }, + { + "epoch": 9.558333333333334, + "grad_norm": 2.0366843665613104, + "learning_rate": 5e-05, + "loss": 0.0889, + "num_input_tokens_seen": 209390852, + "step": 2294 + }, + { + "epoch": 9.558333333333334, + "loss": 0.0881442278623581, + "loss_ce": 0.0012835754314437509, + "loss_iou": 0.17578125, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 209390852, + "step": 2294 + }, + { + "epoch": 9.5625, + "grad_norm": 2.8187444641894417, + "learning_rate": 5e-05, + "loss": 0.0814, + "num_input_tokens_seen": 209482280, + "step": 2295 + }, + { + "epoch": 9.5625, + "loss": 0.08741619437932968, + "loss_ce": 1.3851060430170037e-05, + "loss_iou": 0.375, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 209482280, + "step": 2295 + }, + { + "epoch": 9.566666666666666, + "grad_norm": 6.942068475760814, + "learning_rate": 5e-05, + "loss": 0.0902, + "num_input_tokens_seen": 209573388, + "step": 2296 + }, + { + "epoch": 9.566666666666666, + "loss": 0.04752349853515625, + "loss_ce": 2.288810719619505e-05, + "loss_iou": 0.3671875, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 209573388, + "step": 2296 + }, + { + "epoch": 9.570833333333333, + "grad_norm": 3.919273820651139, + "learning_rate": 5e-05, + "loss": 0.0824, + "num_input_tokens_seen": 209665200, + "step": 2297 + }, + { + "epoch": 9.570833333333333, + "loss": 0.09870068728923798, + "loss_ce": 0.00018994146375916898, + "loss_iou": 0.345703125, + "loss_num": 0.0196533203125, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 209665200, + "step": 2297 + }, + { + "epoch": 9.575, + "grad_norm": 2.9869843848344875, + "learning_rate": 5e-05, + "loss": 0.0857, + "num_input_tokens_seen": 209756420, + "step": 2298 + }, + { + "epoch": 9.575, + "loss": 0.09328018128871918, + "loss_ce": 0.001880028867162764, + "loss_iou": 0.25390625, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 209756420, + "step": 2298 + }, + { + "epoch": 9.579166666666667, + "grad_norm": 3.168385742863224, + "learning_rate": 5e-05, + "loss": 0.0704, + "num_input_tokens_seen": 209847368, + "step": 2299 + }, + { + "epoch": 9.579166666666667, + "loss": 0.05465655028820038, + "loss_ce": 0.00048784760292619467, + "loss_iou": 0.220703125, + "loss_num": 0.01080322265625, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 209847368, + "step": 2299 + }, + { + "epoch": 9.583333333333334, + "grad_norm": 2.964641575652676, + "learning_rate": 5e-05, + "loss": 0.06, + "num_input_tokens_seen": 209938492, + "step": 2300 + }, + { + "epoch": 9.583333333333334, + "loss": 0.07439778745174408, + "loss_ce": 1.118627824325813e-05, + "loss_iou": 0.3203125, + "loss_num": 0.014892578125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 209938492, + "step": 2300 + }, + { + "epoch": 9.5875, + "grad_norm": 3.3481929061012217, + "learning_rate": 5e-05, + "loss": 0.1061, + "num_input_tokens_seen": 210029232, + "step": 2301 + }, + { + "epoch": 9.5875, + "loss": 0.07980884611606598, + "loss_ce": 0.0019737626425921917, + "loss_iou": 0.2392578125, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 210029232, + "step": 2301 + }, + { + "epoch": 9.591666666666667, + "grad_norm": 3.0061991395338636, + "learning_rate": 5e-05, + "loss": 0.0603, + "num_input_tokens_seen": 210120564, + "step": 2302 + }, + { + "epoch": 9.591666666666667, + "loss": 0.05892244726419449, + "loss_ce": 2.3518419766332954e-05, + "loss_iou": 0.263671875, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 210120564, + "step": 2302 + }, + { + "epoch": 9.595833333333333, + "grad_norm": 3.001238624970012, + "learning_rate": 5e-05, + "loss": 0.0606, + "num_input_tokens_seen": 210212024, + "step": 2303 + }, + { + "epoch": 9.595833333333333, + "loss": 0.04750463739037514, + "loss_ce": 0.0016672349302098155, + "loss_iou": 0.310546875, + "loss_num": 0.0091552734375, + "loss_xval": 0.0458984375, + "num_input_tokens_seen": 210212024, + "step": 2303 + }, + { + "epoch": 9.6, + "grad_norm": 3.1328769634590325, + "learning_rate": 5e-05, + "loss": 0.0414, + "num_input_tokens_seen": 210303912, + "step": 2304 + }, + { + "epoch": 9.6, + "loss": 0.027168650180101395, + "loss_ce": 0.0002902920823544264, + "loss_iou": 0.2392578125, + "loss_num": 0.00537109375, + "loss_xval": 0.02685546875, + "num_input_tokens_seen": 210303912, + "step": 2304 + }, + { + "epoch": 9.604166666666666, + "grad_norm": 2.9176307605044727, + "learning_rate": 5e-05, + "loss": 0.0697, + "num_input_tokens_seen": 210395604, + "step": 2305 + }, + { + "epoch": 9.604166666666666, + "loss": 0.06715308129787445, + "loss_ce": 0.001814942224882543, + "loss_iou": 0.26171875, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 210395604, + "step": 2305 + }, + { + "epoch": 9.608333333333333, + "grad_norm": 3.8731669151747314, + "learning_rate": 5e-05, + "loss": 0.0722, + "num_input_tokens_seen": 210487352, + "step": 2306 + }, + { + "epoch": 9.608333333333333, + "loss": 0.06469616293907166, + "loss_ce": 0.0007770942756906152, + "loss_iou": 0.267578125, + "loss_num": 0.01275634765625, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 210487352, + "step": 2306 + }, + { + "epoch": 9.6125, + "grad_norm": 1.7329658587218324, + "learning_rate": 5e-05, + "loss": 0.0536, + "num_input_tokens_seen": 210578192, + "step": 2307 + }, + { + "epoch": 9.6125, + "loss": 0.06999445706605911, + "loss_ce": 0.0007653271895833313, + "loss_iou": 0.181640625, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 210578192, + "step": 2307 + }, + { + "epoch": 9.616666666666667, + "grad_norm": 2.633133076304407, + "learning_rate": 5e-05, + "loss": 0.0784, + "num_input_tokens_seen": 210669848, + "step": 2308 + }, + { + "epoch": 9.616666666666667, + "loss": 0.07915782928466797, + "loss_ce": 0.00040722257108427584, + "loss_iou": 0.1865234375, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 210669848, + "step": 2308 + }, + { + "epoch": 9.620833333333334, + "grad_norm": 3.2415885083087583, + "learning_rate": 5e-05, + "loss": 0.0704, + "num_input_tokens_seen": 210761176, + "step": 2309 + }, + { + "epoch": 9.620833333333334, + "loss": 0.06210331618785858, + "loss_ce": 1.5305309716495685e-05, + "loss_iou": 0.30859375, + "loss_num": 0.01239013671875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 210761176, + "step": 2309 + }, + { + "epoch": 9.625, + "grad_norm": 10.46700965607754, + "learning_rate": 5e-05, + "loss": 0.1214, + "num_input_tokens_seen": 210851532, + "step": 2310 + }, + { + "epoch": 9.625, + "loss": 0.20068180561065674, + "loss_ce": 0.00024236088211182505, + "loss_iou": 0.28515625, + "loss_num": 0.0400390625, + "loss_xval": 0.2001953125, + "num_input_tokens_seen": 210851532, + "step": 2310 + }, + { + "epoch": 9.629166666666666, + "grad_norm": 9.77035819381619, + "learning_rate": 5e-05, + "loss": 0.0818, + "num_input_tokens_seen": 210942648, + "step": 2311 + }, + { + "epoch": 9.629166666666666, + "loss": 0.06990273296833038, + "loss_ce": 0.0010855919681489468, + "loss_iou": 0.279296875, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 210942648, + "step": 2311 + }, + { + "epoch": 9.633333333333333, + "grad_norm": 2.268298287488433, + "learning_rate": 5e-05, + "loss": 0.0872, + "num_input_tokens_seen": 211033656, + "step": 2312 + }, + { + "epoch": 9.633333333333333, + "loss": 0.08489096164703369, + "loss_ce": 6.3188363128574565e-06, + "loss_iou": 0.31640625, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 211033656, + "step": 2312 + }, + { + "epoch": 9.6375, + "grad_norm": 2.7690051300003744, + "learning_rate": 5e-05, + "loss": 0.1414, + "num_input_tokens_seen": 211125328, + "step": 2313 + }, + { + "epoch": 9.6375, + "loss": 0.17821158468723297, + "loss_ce": 0.0017894639167934656, + "loss_iou": 0.216796875, + "loss_num": 0.035400390625, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 211125328, + "step": 2313 + }, + { + "epoch": 9.641666666666667, + "grad_norm": 2.403599267282493, + "learning_rate": 5e-05, + "loss": 0.0839, + "num_input_tokens_seen": 211216188, + "step": 2314 + }, + { + "epoch": 9.641666666666667, + "loss": 0.09419400244951248, + "loss_ce": 1.4980028026911896e-06, + "loss_iou": 0.2216796875, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 211216188, + "step": 2314 + }, + { + "epoch": 9.645833333333334, + "grad_norm": 2.024190396115235, + "learning_rate": 5e-05, + "loss": 0.0625, + "num_input_tokens_seen": 211307148, + "step": 2315 + }, + { + "epoch": 9.645833333333334, + "loss": 0.08138015121221542, + "loss_ce": 6.606592796742916e-05, + "loss_iou": 0.2451171875, + "loss_num": 0.0162353515625, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 211307148, + "step": 2315 + }, + { + "epoch": 9.65, + "grad_norm": 7.034624926758856, + "learning_rate": 5e-05, + "loss": 0.1537, + "num_input_tokens_seen": 211398560, + "step": 2316 + }, + { + "epoch": 9.65, + "loss": 0.11996078491210938, + "loss_ce": 0.0004081761871930212, + "loss_iou": 0.2392578125, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 211398560, + "step": 2316 + }, + { + "epoch": 9.654166666666667, + "grad_norm": 3.433862137729614, + "learning_rate": 5e-05, + "loss": 0.068, + "num_input_tokens_seen": 211488688, + "step": 2317 + }, + { + "epoch": 9.654166666666667, + "loss": 0.07582279294729233, + "loss_ce": 1.7128168110502884e-05, + "loss_iou": 0.3125, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 211488688, + "step": 2317 + }, + { + "epoch": 9.658333333333333, + "grad_norm": 4.1438214713878585, + "learning_rate": 5e-05, + "loss": 0.0558, + "num_input_tokens_seen": 211579856, + "step": 2318 + }, + { + "epoch": 9.658333333333333, + "loss": 0.07438762485980988, + "loss_ce": 0.00027568096993491054, + "loss_iou": 0.134765625, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 211579856, + "step": 2318 + }, + { + "epoch": 9.6625, + "grad_norm": 3.0284126105680227, + "learning_rate": 5e-05, + "loss": 0.1231, + "num_input_tokens_seen": 211670124, + "step": 2319 + }, + { + "epoch": 9.6625, + "loss": 0.05692708119750023, + "loss_ce": 0.00017964384460356086, + "loss_iou": 0.259765625, + "loss_num": 0.0113525390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 211670124, + "step": 2319 + }, + { + "epoch": 9.666666666666666, + "grad_norm": 6.624790027554082, + "learning_rate": 5e-05, + "loss": 0.0827, + "num_input_tokens_seen": 211761708, + "step": 2320 + }, + { + "epoch": 9.666666666666666, + "loss": 0.10713419318199158, + "loss_ce": 9.378503455081955e-05, + "loss_iou": 0.3359375, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 211761708, + "step": 2320 + }, + { + "epoch": 9.670833333333333, + "grad_norm": 3.8665033268834113, + "learning_rate": 5e-05, + "loss": 0.0541, + "num_input_tokens_seen": 211852468, + "step": 2321 + }, + { + "epoch": 9.670833333333333, + "loss": 0.05054951831698418, + "loss_ce": 0.0011568169575184584, + "loss_iou": 0.27734375, + "loss_num": 0.0098876953125, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 211852468, + "step": 2321 + }, + { + "epoch": 9.675, + "grad_norm": 4.1451423402198895, + "learning_rate": 5e-05, + "loss": 0.0549, + "num_input_tokens_seen": 211943768, + "step": 2322 + }, + { + "epoch": 9.675, + "loss": 0.06782028824090958, + "loss_ce": 1.023104414343834e-05, + "loss_iou": 0.21484375, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 211943768, + "step": 2322 + }, + { + "epoch": 9.679166666666667, + "grad_norm": 6.308823290773596, + "learning_rate": 5e-05, + "loss": 0.1002, + "num_input_tokens_seen": 212031848, + "step": 2323 + }, + { + "epoch": 9.679166666666667, + "loss": 0.05495281517505646, + "loss_ce": 2.1176834707148373e-05, + "loss_iou": 0.28515625, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 212031848, + "step": 2323 + }, + { + "epoch": 9.683333333333334, + "grad_norm": 2.2750972936487774, + "learning_rate": 5e-05, + "loss": 0.0459, + "num_input_tokens_seen": 212123040, + "step": 2324 + }, + { + "epoch": 9.683333333333334, + "loss": 0.04357624053955078, + "loss_ce": 0.00026417168555781245, + "loss_iou": 0.2431640625, + "loss_num": 0.0086669921875, + "loss_xval": 0.043212890625, + "num_input_tokens_seen": 212123040, + "step": 2324 + }, + { + "epoch": 9.6875, + "grad_norm": 2.5181649970463935, + "learning_rate": 5e-05, + "loss": 0.0633, + "num_input_tokens_seen": 212214276, + "step": 2325 + }, + { + "epoch": 9.6875, + "loss": 0.0611778125166893, + "loss_ce": 0.0005698998575098813, + "loss_iou": 0.33984375, + "loss_num": 0.01214599609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 212214276, + "step": 2325 + }, + { + "epoch": 9.691666666666666, + "grad_norm": 47.32888864378727, + "learning_rate": 5e-05, + "loss": 0.1016, + "num_input_tokens_seen": 212306284, + "step": 2326 + }, + { + "epoch": 9.691666666666666, + "loss": 0.11467814445495605, + "loss_ce": 0.000801810179837048, + "loss_iou": 0.26171875, + "loss_num": 0.0228271484375, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 212306284, + "step": 2326 + }, + { + "epoch": 9.695833333333333, + "grad_norm": 4.5252150406005125, + "learning_rate": 5e-05, + "loss": 0.0758, + "num_input_tokens_seen": 212396928, + "step": 2327 + }, + { + "epoch": 9.695833333333333, + "loss": 0.07074105739593506, + "loss_ce": 1.3064613995084073e-06, + "loss_iou": 0.267578125, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 212396928, + "step": 2327 + }, + { + "epoch": 9.7, + "grad_norm": 5.0140978725153955, + "learning_rate": 5e-05, + "loss": 0.0749, + "num_input_tokens_seen": 212487820, + "step": 2328 + }, + { + "epoch": 9.7, + "loss": 0.0942697674036026, + "loss_ce": 9.707949857329368e-07, + "loss_iou": 0.33984375, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 212487820, + "step": 2328 + }, + { + "epoch": 9.704166666666667, + "grad_norm": 1.9934241094260983, + "learning_rate": 5e-05, + "loss": 0.1041, + "num_input_tokens_seen": 212579456, + "step": 2329 + }, + { + "epoch": 9.704166666666667, + "loss": 0.09087227284908295, + "loss_ce": 0.0002198061702074483, + "loss_iou": 0.279296875, + "loss_num": 0.01806640625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 212579456, + "step": 2329 + }, + { + "epoch": 9.708333333333334, + "grad_norm": 7.854491895558061, + "learning_rate": 5e-05, + "loss": 0.0602, + "num_input_tokens_seen": 212670164, + "step": 2330 + }, + { + "epoch": 9.708333333333334, + "loss": 0.06276330351829529, + "loss_ce": 3.900450792571064e-06, + "loss_iou": 0.3515625, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 212670164, + "step": 2330 + }, + { + "epoch": 9.7125, + "grad_norm": 3.9516540639993845, + "learning_rate": 5e-05, + "loss": 0.1026, + "num_input_tokens_seen": 212761568, + "step": 2331 + }, + { + "epoch": 9.7125, + "loss": 0.06600821018218994, + "loss_ce": 1.3949293133919127e-05, + "loss_iou": 0.27734375, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 212761568, + "step": 2331 + }, + { + "epoch": 9.716666666666667, + "grad_norm": 2.8755187332405554, + "learning_rate": 5e-05, + "loss": 0.0727, + "num_input_tokens_seen": 212852804, + "step": 2332 + }, + { + "epoch": 9.716666666666667, + "loss": 0.07823988795280457, + "loss_ce": 2.3335524019785225e-05, + "loss_iou": 0.287109375, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 212852804, + "step": 2332 + }, + { + "epoch": 9.720833333333333, + "grad_norm": 4.2736728794931125, + "learning_rate": 5e-05, + "loss": 0.1083, + "num_input_tokens_seen": 212943956, + "step": 2333 + }, + { + "epoch": 9.720833333333333, + "loss": 0.13756805658340454, + "loss_ce": 1.0072521035908721e-05, + "loss_iou": 0.1728515625, + "loss_num": 0.0274658203125, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 212943956, + "step": 2333 + }, + { + "epoch": 9.725, + "grad_norm": 3.335770224120245, + "learning_rate": 5e-05, + "loss": 0.1073, + "num_input_tokens_seen": 213034888, + "step": 2334 + }, + { + "epoch": 9.725, + "loss": 0.15872299671173096, + "loss_ce": 0.002099153818562627, + "loss_iou": 0.28515625, + "loss_num": 0.03125, + "loss_xval": 0.15625, + "num_input_tokens_seen": 213034888, + "step": 2334 + }, + { + "epoch": 9.729166666666666, + "grad_norm": 12.569562571201159, + "learning_rate": 5e-05, + "loss": 0.1035, + "num_input_tokens_seen": 213126568, + "step": 2335 + }, + { + "epoch": 9.729166666666666, + "loss": 0.13710667192935944, + "loss_ce": 0.001364489900879562, + "loss_iou": 0.287109375, + "loss_num": 0.0272216796875, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 213126568, + "step": 2335 + }, + { + "epoch": 9.733333333333333, + "grad_norm": 4.464492026423258, + "learning_rate": 5e-05, + "loss": 0.0994, + "num_input_tokens_seen": 213217640, + "step": 2336 + }, + { + "epoch": 9.733333333333333, + "loss": 0.07574643939733505, + "loss_ce": 0.0007952642044983804, + "loss_iou": 0.1787109375, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 213217640, + "step": 2336 + }, + { + "epoch": 9.7375, + "grad_norm": 3.6168676118183, + "learning_rate": 5e-05, + "loss": 0.0662, + "num_input_tokens_seen": 213309268, + "step": 2337 + }, + { + "epoch": 9.7375, + "loss": 0.05703987181186676, + "loss_ce": 0.0003382100840099156, + "loss_iou": 0.419921875, + "loss_num": 0.0113525390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 213309268, + "step": 2337 + }, + { + "epoch": 9.741666666666667, + "grad_norm": 9.748831522555728, + "learning_rate": 5e-05, + "loss": 0.1086, + "num_input_tokens_seen": 213400104, + "step": 2338 + }, + { + "epoch": 9.741666666666667, + "loss": 0.09555191546678543, + "loss_ce": 1.3787457646685652e-06, + "loss_iou": 0.251953125, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 213400104, + "step": 2338 + }, + { + "epoch": 9.745833333333334, + "grad_norm": 2.19442959998253, + "learning_rate": 5e-05, + "loss": 0.0405, + "num_input_tokens_seen": 213491216, + "step": 2339 + }, + { + "epoch": 9.745833333333334, + "loss": 0.031318824738264084, + "loss_ce": 0.00029770893161185086, + "loss_iou": 0.1533203125, + "loss_num": 0.006195068359375, + "loss_xval": 0.031005859375, + "num_input_tokens_seen": 213491216, + "step": 2339 + }, + { + "epoch": 9.75, + "grad_norm": 15.327886100065887, + "learning_rate": 5e-05, + "loss": 0.0885, + "num_input_tokens_seen": 213582480, + "step": 2340 + }, + { + "epoch": 9.75, + "loss": 0.09976033121347427, + "loss_ce": 0.0010664837900549173, + "loss_iou": 0.140625, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 213582480, + "step": 2340 + }, + { + "epoch": 9.754166666666666, + "grad_norm": 3.3946902286936345, + "learning_rate": 5e-05, + "loss": 0.1089, + "num_input_tokens_seen": 213673028, + "step": 2341 + }, + { + "epoch": 9.754166666666666, + "loss": 0.0917673110961914, + "loss_ce": 9.517025318928063e-07, + "loss_iou": 0.2265625, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 213673028, + "step": 2341 + }, + { + "epoch": 9.758333333333333, + "grad_norm": 2.8068820704716306, + "learning_rate": 5e-05, + "loss": 0.0753, + "num_input_tokens_seen": 213764164, + "step": 2342 + }, + { + "epoch": 9.758333333333333, + "loss": 0.06515133380889893, + "loss_ce": 1.1558730875549372e-05, + "loss_iou": 0.287109375, + "loss_num": 0.0130615234375, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 213764164, + "step": 2342 + }, + { + "epoch": 9.7625, + "grad_norm": 1.7118709907416432, + "learning_rate": 5e-05, + "loss": 0.1154, + "num_input_tokens_seen": 213855484, + "step": 2343 + }, + { + "epoch": 9.7625, + "loss": 0.15148527920246124, + "loss_ce": 2.6543737476458773e-05, + "loss_iou": 0.4609375, + "loss_num": 0.0302734375, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 213855484, + "step": 2343 + }, + { + "epoch": 9.766666666666667, + "grad_norm": 1.7780286715792515, + "learning_rate": 5e-05, + "loss": 0.0461, + "num_input_tokens_seen": 213947032, + "step": 2344 + }, + { + "epoch": 9.766666666666667, + "loss": 0.0518774576485157, + "loss_ce": 0.0020727699156850576, + "loss_iou": 0.1259765625, + "loss_num": 0.00994873046875, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 213947032, + "step": 2344 + }, + { + "epoch": 9.770833333333334, + "grad_norm": 1.9309264666783899, + "learning_rate": 5e-05, + "loss": 0.1121, + "num_input_tokens_seen": 214038300, + "step": 2345 + }, + { + "epoch": 9.770833333333334, + "loss": 0.1719518005847931, + "loss_ce": 0.0018162998603656888, + "loss_iou": 0.154296875, + "loss_num": 0.033935546875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 214038300, + "step": 2345 + }, + { + "epoch": 9.775, + "grad_norm": 1.712701511235156, + "learning_rate": 5e-05, + "loss": 0.0456, + "num_input_tokens_seen": 214129548, + "step": 2346 + }, + { + "epoch": 9.775, + "loss": 0.06391113996505737, + "loss_ce": 0.00010651241609593853, + "loss_iou": 0.15234375, + "loss_num": 0.01275634765625, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 214129548, + "step": 2346 + }, + { + "epoch": 9.779166666666667, + "grad_norm": 5.577947780444153, + "learning_rate": 5e-05, + "loss": 0.1246, + "num_input_tokens_seen": 214220536, + "step": 2347 + }, + { + "epoch": 9.779166666666667, + "loss": 0.12966987490653992, + "loss_ce": 0.0036322667729109526, + "loss_iou": 0.287109375, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 214220536, + "step": 2347 + }, + { + "epoch": 9.783333333333333, + "grad_norm": 3.5191047619911178, + "learning_rate": 5e-05, + "loss": 0.063, + "num_input_tokens_seen": 214312616, + "step": 2348 + }, + { + "epoch": 9.783333333333333, + "loss": 0.06377019733190536, + "loss_ce": 0.0005988088087178767, + "loss_iou": 0.27734375, + "loss_num": 0.01263427734375, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 214312616, + "step": 2348 + }, + { + "epoch": 9.7875, + "grad_norm": 2.1775888706689095, + "learning_rate": 5e-05, + "loss": 0.0747, + "num_input_tokens_seen": 214404052, + "step": 2349 + }, + { + "epoch": 9.7875, + "loss": 0.05960691720247269, + "loss_ce": 0.0003570413973648101, + "loss_iou": 0.234375, + "loss_num": 0.0118408203125, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 214404052, + "step": 2349 + }, + { + "epoch": 9.791666666666666, + "grad_norm": 2.8138942849599156, + "learning_rate": 5e-05, + "loss": 0.0597, + "num_input_tokens_seen": 214495420, + "step": 2350 + }, + { + "epoch": 9.791666666666666, + "loss": 0.0710035040974617, + "loss_ce": 3.4879423765232787e-05, + "loss_iou": 0.244140625, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 214495420, + "step": 2350 + }, + { + "epoch": 9.795833333333333, + "grad_norm": 4.1030473929463405, + "learning_rate": 5e-05, + "loss": 0.1055, + "num_input_tokens_seen": 214586960, + "step": 2351 + }, + { + "epoch": 9.795833333333333, + "loss": 0.06355182826519012, + "loss_ce": 0.0002125888568116352, + "loss_iou": 0.34375, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 214586960, + "step": 2351 + }, + { + "epoch": 9.8, + "grad_norm": 2.679079200599434, + "learning_rate": 5e-05, + "loss": 0.0554, + "num_input_tokens_seen": 214677932, + "step": 2352 + }, + { + "epoch": 9.8, + "loss": 0.05392606183886528, + "loss_ce": 1.5020291357359383e-06, + "loss_iou": 0.294921875, + "loss_num": 0.01080322265625, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 214677932, + "step": 2352 + }, + { + "epoch": 9.804166666666667, + "grad_norm": 5.000155119702331, + "learning_rate": 5e-05, + "loss": 0.0933, + "num_input_tokens_seen": 214769072, + "step": 2353 + }, + { + "epoch": 9.804166666666667, + "loss": 0.1154058575630188, + "loss_ce": 3.632328343883273e-06, + "loss_iou": 0.3046875, + "loss_num": 0.0230712890625, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 214769072, + "step": 2353 + }, + { + "epoch": 9.808333333333334, + "grad_norm": 5.068399377116598, + "learning_rate": 5e-05, + "loss": 0.0634, + "num_input_tokens_seen": 214860520, + "step": 2354 + }, + { + "epoch": 9.808333333333334, + "loss": 0.03795129433274269, + "loss_ce": 4.0833965613273904e-05, + "loss_iou": 0.2314453125, + "loss_num": 0.007598876953125, + "loss_xval": 0.037841796875, + "num_input_tokens_seen": 214860520, + "step": 2354 + }, + { + "epoch": 9.8125, + "grad_norm": 2.9163140624956725, + "learning_rate": 5e-05, + "loss": 0.0786, + "num_input_tokens_seen": 214951808, + "step": 2355 + }, + { + "epoch": 9.8125, + "loss": 0.11772525310516357, + "loss_ce": 0.0017584576271474361, + "loss_iou": 0.263671875, + "loss_num": 0.023193359375, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 214951808, + "step": 2355 + }, + { + "epoch": 9.816666666666666, + "grad_norm": 4.650004655183908, + "learning_rate": 5e-05, + "loss": 0.0907, + "num_input_tokens_seen": 215042492, + "step": 2356 + }, + { + "epoch": 9.816666666666666, + "loss": 0.1099902018904686, + "loss_ce": 4.852836809732253e-06, + "loss_iou": 0.23828125, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 215042492, + "step": 2356 + }, + { + "epoch": 9.820833333333333, + "grad_norm": 3.6950360384389236, + "learning_rate": 5e-05, + "loss": 0.0586, + "num_input_tokens_seen": 215133576, + "step": 2357 + }, + { + "epoch": 9.820833333333333, + "loss": 0.07307278364896774, + "loss_ce": 5.947624231339432e-05, + "loss_iou": 0.30078125, + "loss_num": 0.01458740234375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 215133576, + "step": 2357 + }, + { + "epoch": 9.825, + "grad_norm": 4.495764643011328, + "learning_rate": 5e-05, + "loss": 0.1072, + "num_input_tokens_seen": 215224968, + "step": 2358 + }, + { + "epoch": 9.825, + "loss": 0.13321489095687866, + "loss_ce": 5.671513918059645e-06, + "loss_iou": 0.29296875, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 215224968, + "step": 2358 + }, + { + "epoch": 9.829166666666667, + "grad_norm": 2.9290558110499694, + "learning_rate": 5e-05, + "loss": 0.061, + "num_input_tokens_seen": 215316080, + "step": 2359 + }, + { + "epoch": 9.829166666666667, + "loss": 0.06919078528881073, + "loss_ce": 0.0004346802306827158, + "loss_iou": 0.32421875, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 215316080, + "step": 2359 + }, + { + "epoch": 9.833333333333334, + "grad_norm": 4.526856507028311, + "learning_rate": 5e-05, + "loss": 0.0948, + "num_input_tokens_seen": 215407492, + "step": 2360 + }, + { + "epoch": 9.833333333333334, + "loss": 0.11653508245944977, + "loss_ce": 0.0003546580846887082, + "loss_iou": 0.29296875, + "loss_num": 0.023193359375, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 215407492, + "step": 2360 + }, + { + "epoch": 9.8375, + "grad_norm": 3.022980742192059, + "learning_rate": 5e-05, + "loss": 0.0805, + "num_input_tokens_seen": 215498740, + "step": 2361 + }, + { + "epoch": 9.8375, + "loss": 0.07529893517494202, + "loss_ce": 0.0025374058168381453, + "loss_iou": 0.2412109375, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 215498740, + "step": 2361 + }, + { + "epoch": 9.841666666666667, + "grad_norm": 3.0630692547168064, + "learning_rate": 5e-05, + "loss": 0.1162, + "num_input_tokens_seen": 215589256, + "step": 2362 + }, + { + "epoch": 9.841666666666667, + "loss": 0.1477106511592865, + "loss_ce": 5.569358563661808e-06, + "loss_iou": 0.2099609375, + "loss_num": 0.029541015625, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 215589256, + "step": 2362 + }, + { + "epoch": 9.845833333333333, + "grad_norm": 2.3315894574014413, + "learning_rate": 5e-05, + "loss": 0.0586, + "num_input_tokens_seen": 215680120, + "step": 2363 + }, + { + "epoch": 9.845833333333333, + "loss": 0.0856175646185875, + "loss_ce": 5.013221766603237e-07, + "loss_iou": 0.1845703125, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 215680120, + "step": 2363 + }, + { + "epoch": 9.85, + "grad_norm": 2.7572666404328, + "learning_rate": 5e-05, + "loss": 0.0431, + "num_input_tokens_seen": 215771780, + "step": 2364 + }, + { + "epoch": 9.85, + "loss": 0.03435160592198372, + "loss_ce": 0.00021388079039752483, + "loss_iou": 0.2373046875, + "loss_num": 0.0068359375, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 215771780, + "step": 2364 + }, + { + "epoch": 9.854166666666666, + "grad_norm": 3.2252818942955037, + "learning_rate": 5e-05, + "loss": 0.0996, + "num_input_tokens_seen": 215863700, + "step": 2365 + }, + { + "epoch": 9.854166666666666, + "loss": 0.08640223741531372, + "loss_ce": 0.0008614607504568994, + "loss_iou": 0.251953125, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 215863700, + "step": 2365 + }, + { + "epoch": 9.858333333333333, + "grad_norm": 2.7280946601298734, + "learning_rate": 5e-05, + "loss": 0.0904, + "num_input_tokens_seen": 215954660, + "step": 2366 + }, + { + "epoch": 9.858333333333333, + "loss": 0.12923277914524078, + "loss_ce": 0.0001891975407488644, + "loss_iou": 0.212890625, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 215954660, + "step": 2366 + }, + { + "epoch": 9.8625, + "grad_norm": 2.7140980275793356, + "learning_rate": 5e-05, + "loss": 0.0844, + "num_input_tokens_seen": 216046256, + "step": 2367 + }, + { + "epoch": 9.8625, + "loss": 0.0715366005897522, + "loss_ce": 6.443824531743303e-05, + "loss_iou": 0.306640625, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 216046256, + "step": 2367 + }, + { + "epoch": 9.866666666666667, + "grad_norm": 2.8434266178903465, + "learning_rate": 5e-05, + "loss": 0.0873, + "num_input_tokens_seen": 216137476, + "step": 2368 + }, + { + "epoch": 9.866666666666667, + "loss": 0.08151215314865112, + "loss_ce": 0.00033539917785674334, + "loss_iou": 0.349609375, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 216137476, + "step": 2368 + }, + { + "epoch": 9.870833333333334, + "grad_norm": 3.4262262928827703, + "learning_rate": 5e-05, + "loss": 0.0643, + "num_input_tokens_seen": 216229316, + "step": 2369 + }, + { + "epoch": 9.870833333333334, + "loss": 0.07249397784471512, + "loss_ce": 0.0011438806541264057, + "loss_iou": 0.3359375, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 216229316, + "step": 2369 + }, + { + "epoch": 9.875, + "grad_norm": 2.777978724374111, + "learning_rate": 5e-05, + "loss": 0.1048, + "num_input_tokens_seen": 216320344, + "step": 2370 + }, + { + "epoch": 9.875, + "loss": 0.11107797920703888, + "loss_ce": 8.55453617987223e-05, + "loss_iou": 0.240234375, + "loss_num": 0.022216796875, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 216320344, + "step": 2370 + }, + { + "epoch": 9.879166666666666, + "grad_norm": 4.830728497464694, + "learning_rate": 5e-05, + "loss": 0.1089, + "num_input_tokens_seen": 216411052, + "step": 2371 + }, + { + "epoch": 9.879166666666666, + "loss": 0.09860274195671082, + "loss_ce": 1.57094000314828e-05, + "loss_iou": 0.287109375, + "loss_num": 0.0196533203125, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 216411052, + "step": 2371 + }, + { + "epoch": 9.883333333333333, + "grad_norm": 4.156025898707058, + "learning_rate": 5e-05, + "loss": 0.0823, + "num_input_tokens_seen": 216502236, + "step": 2372 + }, + { + "epoch": 9.883333333333333, + "loss": 0.052403755486011505, + "loss_ce": 0.000912973249796778, + "loss_iou": 0.255859375, + "loss_num": 0.01031494140625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 216502236, + "step": 2372 + }, + { + "epoch": 9.8875, + "grad_norm": 3.158859098815327, + "learning_rate": 5e-05, + "loss": 0.0895, + "num_input_tokens_seen": 216593540, + "step": 2373 + }, + { + "epoch": 9.8875, + "loss": 0.03290058299899101, + "loss_ce": 0.0008342385408468544, + "loss_iou": 0.216796875, + "loss_num": 0.00640869140625, + "loss_xval": 0.031982421875, + "num_input_tokens_seen": 216593540, + "step": 2373 + }, + { + "epoch": 9.891666666666667, + "grad_norm": 2.3270527447832663, + "learning_rate": 5e-05, + "loss": 0.0591, + "num_input_tokens_seen": 216684660, + "step": 2374 + }, + { + "epoch": 9.891666666666667, + "loss": 0.04258023202419281, + "loss_ce": 0.0002981259021908045, + "loss_iou": 0.16015625, + "loss_num": 0.00848388671875, + "loss_xval": 0.042236328125, + "num_input_tokens_seen": 216684660, + "step": 2374 + }, + { + "epoch": 9.895833333333334, + "grad_norm": 4.043780359087197, + "learning_rate": 5e-05, + "loss": 0.1232, + "num_input_tokens_seen": 216776372, + "step": 2375 + }, + { + "epoch": 9.895833333333334, + "loss": 0.1797737032175064, + "loss_ce": 0.00026931928005069494, + "loss_iou": 0.224609375, + "loss_num": 0.035888671875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 216776372, + "step": 2375 + }, + { + "epoch": 9.9, + "grad_norm": 1.631130023892962, + "learning_rate": 5e-05, + "loss": 0.0824, + "num_input_tokens_seen": 216868132, + "step": 2376 + }, + { + "epoch": 9.9, + "loss": 0.062147513031959534, + "loss_ce": 9.001667058328167e-05, + "loss_iou": 0.296875, + "loss_num": 0.01239013671875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 216868132, + "step": 2376 + }, + { + "epoch": 9.904166666666667, + "grad_norm": 1.6375843259787302, + "learning_rate": 5e-05, + "loss": 0.0519, + "num_input_tokens_seen": 216959320, + "step": 2377 + }, + { + "epoch": 9.904166666666667, + "loss": 0.05292154848575592, + "loss_ce": 4.064571839990094e-06, + "loss_iou": 0.30078125, + "loss_num": 0.01055908203125, + "loss_xval": 0.052978515625, + "num_input_tokens_seen": 216959320, + "step": 2377 + }, + { + "epoch": 9.908333333333333, + "grad_norm": 3.789057407933149, + "learning_rate": 5e-05, + "loss": 0.082, + "num_input_tokens_seen": 217050848, + "step": 2378 + }, + { + "epoch": 9.908333333333333, + "loss": 0.09067431837320328, + "loss_ce": 0.0018223864026367664, + "loss_iou": 0.07958984375, + "loss_num": 0.017822265625, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 217050848, + "step": 2378 + }, + { + "epoch": 9.9125, + "grad_norm": 3.407605174263242, + "learning_rate": 5e-05, + "loss": 0.072, + "num_input_tokens_seen": 217141616, + "step": 2379 + }, + { + "epoch": 9.9125, + "loss": 0.09490478038787842, + "loss_ce": 3.325934085296467e-05, + "loss_iou": 0.296875, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 217141616, + "step": 2379 + }, + { + "epoch": 9.916666666666666, + "grad_norm": 5.964912675949317, + "learning_rate": 5e-05, + "loss": 0.1063, + "num_input_tokens_seen": 217233004, + "step": 2380 + }, + { + "epoch": 9.916666666666666, + "loss": 0.09552451968193054, + "loss_ce": 4.49842082161922e-06, + "loss_iou": 0.26953125, + "loss_num": 0.01904296875, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 217233004, + "step": 2380 + }, + { + "epoch": 9.920833333333333, + "grad_norm": 5.675693701891781, + "learning_rate": 5e-05, + "loss": 0.0801, + "num_input_tokens_seen": 217325000, + "step": 2381 + }, + { + "epoch": 9.920833333333333, + "loss": 0.05819493532180786, + "loss_ce": 2.8428947189240716e-05, + "loss_iou": 0.3515625, + "loss_num": 0.01165771484375, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 217325000, + "step": 2381 + }, + { + "epoch": 9.925, + "grad_norm": 1.9800756013243537, + "learning_rate": 5e-05, + "loss": 0.067, + "num_input_tokens_seen": 217415812, + "step": 2382 + }, + { + "epoch": 9.925, + "loss": 0.06221006438136101, + "loss_ce": 0.0009307658183388412, + "loss_iou": 0.25, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 217415812, + "step": 2382 + }, + { + "epoch": 9.929166666666667, + "grad_norm": 7.826859746114933, + "learning_rate": 5e-05, + "loss": 0.0744, + "num_input_tokens_seen": 217506640, + "step": 2383 + }, + { + "epoch": 9.929166666666667, + "loss": 0.08806528151035309, + "loss_ce": 0.0002509501646272838, + "loss_iou": 0.2138671875, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 217506640, + "step": 2383 + }, + { + "epoch": 9.933333333333334, + "grad_norm": 2.9482377996316744, + "learning_rate": 5e-05, + "loss": 0.0504, + "num_input_tokens_seen": 217597960, + "step": 2384 + }, + { + "epoch": 9.933333333333334, + "loss": 0.04804066941142082, + "loss_ce": 6.000054327159887e-06, + "loss_iou": 0.3125, + "loss_num": 0.00958251953125, + "loss_xval": 0.048095703125, + "num_input_tokens_seen": 217597960, + "step": 2384 + }, + { + "epoch": 9.9375, + "grad_norm": 8.245569509710974, + "learning_rate": 5e-05, + "loss": 0.1383, + "num_input_tokens_seen": 217688896, + "step": 2385 + }, + { + "epoch": 9.9375, + "loss": 0.11219567805528641, + "loss_ce": 2.8321906938799657e-05, + "loss_iou": 0.3515625, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 217688896, + "step": 2385 + }, + { + "epoch": 9.941666666666666, + "grad_norm": 3.428894122120708, + "learning_rate": 5e-05, + "loss": 0.0636, + "num_input_tokens_seen": 217780696, + "step": 2386 + }, + { + "epoch": 9.941666666666666, + "loss": 0.06880239397287369, + "loss_ce": 0.0025792501401156187, + "loss_iou": 0.279296875, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 217780696, + "step": 2386 + }, + { + "epoch": 9.945833333333333, + "grad_norm": 4.2734668776189375, + "learning_rate": 5e-05, + "loss": 0.1047, + "num_input_tokens_seen": 217872132, + "step": 2387 + }, + { + "epoch": 9.945833333333333, + "loss": 0.13083070516586304, + "loss_ce": 0.0021075578406453133, + "loss_iou": 0.3359375, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 217872132, + "step": 2387 + }, + { + "epoch": 9.95, + "grad_norm": 4.8866090440778285, + "learning_rate": 5e-05, + "loss": 0.0648, + "num_input_tokens_seen": 217963468, + "step": 2388 + }, + { + "epoch": 9.95, + "loss": 0.06608153879642487, + "loss_ce": 0.0019030753755941987, + "loss_iou": 0.22265625, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 217963468, + "step": 2388 + }, + { + "epoch": 9.954166666666667, + "grad_norm": 2.0819408855510817, + "learning_rate": 5e-05, + "loss": 0.0792, + "num_input_tokens_seen": 218055508, + "step": 2389 + }, + { + "epoch": 9.954166666666667, + "loss": 0.10832661390304565, + "loss_ce": 0.0008589604403823614, + "loss_iou": 0.2392578125, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 218055508, + "step": 2389 + }, + { + "epoch": 9.958333333333334, + "grad_norm": 1.7007160550678941, + "learning_rate": 5e-05, + "loss": 0.0888, + "num_input_tokens_seen": 218146548, + "step": 2390 + }, + { + "epoch": 9.958333333333334, + "loss": 0.09715352952480316, + "loss_ce": 0.00013815786223858595, + "loss_iou": 0.30078125, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 218146548, + "step": 2390 + }, + { + "epoch": 9.9625, + "grad_norm": 2.832922464483346, + "learning_rate": 5e-05, + "loss": 0.0721, + "num_input_tokens_seen": 218237904, + "step": 2391 + }, + { + "epoch": 9.9625, + "loss": 0.07945854961872101, + "loss_ce": 0.0006087534129619598, + "loss_iou": 0.306640625, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 218237904, + "step": 2391 + }, + { + "epoch": 9.966666666666667, + "grad_norm": 2.27150866322334, + "learning_rate": 5e-05, + "loss": 0.0789, + "num_input_tokens_seen": 218329788, + "step": 2392 + }, + { + "epoch": 9.966666666666667, + "loss": 0.06808695942163467, + "loss_ce": 0.00036845580325461924, + "loss_iou": 0.1513671875, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 218329788, + "step": 2392 + }, + { + "epoch": 9.970833333333333, + "grad_norm": 3.3544890310831064, + "learning_rate": 5e-05, + "loss": 0.1286, + "num_input_tokens_seen": 218421056, + "step": 2393 + }, + { + "epoch": 9.970833333333333, + "loss": 0.1268257051706314, + "loss_ce": 0.00011672836990328506, + "loss_iou": 0.33203125, + "loss_num": 0.0252685546875, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 218421056, + "step": 2393 + }, + { + "epoch": 9.975, + "grad_norm": 15.697672226925908, + "learning_rate": 5e-05, + "loss": 0.0724, + "num_input_tokens_seen": 218513060, + "step": 2394 + }, + { + "epoch": 9.975, + "loss": 0.09975279122591019, + "loss_ce": 0.004324328154325485, + "loss_iou": 0.134765625, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 218513060, + "step": 2394 + }, + { + "epoch": 9.979166666666666, + "grad_norm": 13.437865154392682, + "learning_rate": 5e-05, + "loss": 0.1062, + "num_input_tokens_seen": 218604308, + "step": 2395 + }, + { + "epoch": 9.979166666666666, + "loss": 0.1018124371767044, + "loss_ce": 0.0006314095808193088, + "loss_iou": 0.263671875, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 218604308, + "step": 2395 + }, + { + "epoch": 9.983333333333333, + "grad_norm": 2.1136336116403878, + "learning_rate": 5e-05, + "loss": 0.0574, + "num_input_tokens_seen": 218694532, + "step": 2396 + }, + { + "epoch": 9.983333333333333, + "loss": 0.03833974897861481, + "loss_ce": 2.492999192327261e-05, + "loss_iou": 0.23046875, + "loss_num": 0.007659912109375, + "loss_xval": 0.038330078125, + "num_input_tokens_seen": 218694532, + "step": 2396 + }, + { + "epoch": 9.9875, + "grad_norm": 4.0907594526213265, + "learning_rate": 5e-05, + "loss": 0.0637, + "num_input_tokens_seen": 218786656, + "step": 2397 + }, + { + "epoch": 9.9875, + "loss": 0.047209057956933975, + "loss_ce": 0.000761302886530757, + "loss_iou": 0.3515625, + "loss_num": 0.00927734375, + "loss_xval": 0.04638671875, + "num_input_tokens_seen": 218786656, + "step": 2397 + }, + { + "epoch": 9.991666666666667, + "grad_norm": 3.173601061282437, + "learning_rate": 5e-05, + "loss": 0.0958, + "num_input_tokens_seen": 218877772, + "step": 2398 + }, + { + "epoch": 9.991666666666667, + "loss": 0.0674939751625061, + "loss_ce": 1.1977379472227767e-05, + "loss_iou": 0.201171875, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 218877772, + "step": 2398 + }, + { + "epoch": 9.995833333333334, + "grad_norm": 4.917086807008557, + "learning_rate": 5e-05, + "loss": 0.1076, + "num_input_tokens_seen": 218969860, + "step": 2399 + }, + { + "epoch": 9.995833333333334, + "loss": 0.08607882261276245, + "loss_ce": 0.0025216902140527964, + "loss_iou": 0.310546875, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 218969860, + "step": 2399 + }, + { + "epoch": 10.0, + "grad_norm": 5.4402695278723305, + "learning_rate": 5e-05, + "loss": 0.1207, + "num_input_tokens_seen": 219060548, + "step": 2400 + }, + { + "epoch": 10.0, + "loss": 0.09793904423713684, + "loss_ce": 8.13117094367044e-06, + "loss_iou": 0.34765625, + "loss_num": 0.01953125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 219060548, + "step": 2400 + }, + { + "epoch": 10.004166666666666, + "grad_norm": 3.0268333764807838, + "learning_rate": 5e-05, + "loss": 0.0823, + "num_input_tokens_seen": 219150468, + "step": 2401 + }, + { + "epoch": 10.004166666666666, + "loss": 0.07914966344833374, + "loss_ce": 0.0003380189591553062, + "loss_iou": 0.40234375, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 219150468, + "step": 2401 + }, + { + "epoch": 10.008333333333333, + "grad_norm": 3.55544659192017, + "learning_rate": 5e-05, + "loss": 0.0813, + "num_input_tokens_seen": 219242128, + "step": 2402 + }, + { + "epoch": 10.008333333333333, + "loss": 0.11007954180240631, + "loss_ce": 0.00013996948837302625, + "loss_iou": 0.26171875, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 219242128, + "step": 2402 + }, + { + "epoch": 10.0125, + "grad_norm": 2.8460970426000416, + "learning_rate": 5e-05, + "loss": 0.0988, + "num_input_tokens_seen": 219333524, + "step": 2403 + }, + { + "epoch": 10.0125, + "loss": 0.057223014533519745, + "loss_ce": 3.307330553070642e-05, + "loss_iou": 0.359375, + "loss_num": 0.01141357421875, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 219333524, + "step": 2403 + }, + { + "epoch": 10.016666666666667, + "grad_norm": 1.801013103785767, + "learning_rate": 5e-05, + "loss": 0.0551, + "num_input_tokens_seen": 219424884, + "step": 2404 + }, + { + "epoch": 10.016666666666667, + "loss": 0.054716162383556366, + "loss_ce": 2.1029693016316742e-05, + "loss_iou": 0.255859375, + "loss_num": 0.01092529296875, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 219424884, + "step": 2404 + }, + { + "epoch": 10.020833333333334, + "grad_norm": 3.1932161522855353, + "learning_rate": 5e-05, + "loss": 0.0895, + "num_input_tokens_seen": 219515724, + "step": 2405 + }, + { + "epoch": 10.020833333333334, + "loss": 0.09836345911026001, + "loss_ce": 5.310107098921435e-06, + "loss_iou": 0.0, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 219515724, + "step": 2405 + }, + { + "epoch": 10.025, + "grad_norm": 2.7569867058474657, + "learning_rate": 5e-05, + "loss": 0.0683, + "num_input_tokens_seen": 219606804, + "step": 2406 + }, + { + "epoch": 10.025, + "loss": 0.04205465316772461, + "loss_ce": 8.535310189472511e-05, + "loss_iou": 0.2158203125, + "loss_num": 0.00836181640625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 219606804, + "step": 2406 + }, + { + "epoch": 10.029166666666667, + "grad_norm": 3.201256579939997, + "learning_rate": 5e-05, + "loss": 0.0667, + "num_input_tokens_seen": 219697536, + "step": 2407 + }, + { + "epoch": 10.029166666666667, + "loss": 0.06935551762580872, + "loss_ce": 3.484290209598839e-05, + "loss_iou": 0.2890625, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 219697536, + "step": 2407 + }, + { + "epoch": 10.033333333333333, + "grad_norm": 3.1797158746431307, + "learning_rate": 5e-05, + "loss": 0.1001, + "num_input_tokens_seen": 219789392, + "step": 2408 + }, + { + "epoch": 10.033333333333333, + "loss": 0.08047311007976532, + "loss_ce": 0.00012033308303216472, + "loss_iou": 0.3359375, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 219789392, + "step": 2408 + }, + { + "epoch": 10.0375, + "grad_norm": 3.494274889843191, + "learning_rate": 5e-05, + "loss": 0.1097, + "num_input_tokens_seen": 219880764, + "step": 2409 + }, + { + "epoch": 10.0375, + "loss": 0.1517402082681656, + "loss_ce": 0.0002814657927956432, + "loss_iou": 0.220703125, + "loss_num": 0.0302734375, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 219880764, + "step": 2409 + }, + { + "epoch": 10.041666666666666, + "grad_norm": 4.472146277590022, + "learning_rate": 5e-05, + "loss": 0.086, + "num_input_tokens_seen": 219972004, + "step": 2410 + }, + { + "epoch": 10.041666666666666, + "loss": 0.10514138638973236, + "loss_ce": 3.884359830408357e-05, + "loss_iou": 0.2353515625, + "loss_num": 0.0211181640625, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 219972004, + "step": 2410 + }, + { + "epoch": 10.045833333333333, + "grad_norm": 2.2622531373039223, + "learning_rate": 5e-05, + "loss": 0.1177, + "num_input_tokens_seen": 220062736, + "step": 2411 + }, + { + "epoch": 10.045833333333333, + "loss": 0.054421041160821915, + "loss_ce": 0.0005575146642513573, + "loss_iou": 0.21484375, + "loss_num": 0.0107421875, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 220062736, + "step": 2411 + }, + { + "epoch": 10.05, + "grad_norm": 2.0006320317028243, + "learning_rate": 5e-05, + "loss": 0.0556, + "num_input_tokens_seen": 220154128, + "step": 2412 + }, + { + "epoch": 10.05, + "loss": 0.035746023058891296, + "loss_ce": 0.0013221934204921126, + "loss_iou": 0.296875, + "loss_num": 0.00689697265625, + "loss_xval": 0.034423828125, + "num_input_tokens_seen": 220154128, + "step": 2412 + }, + { + "epoch": 10.054166666666667, + "grad_norm": 2.8703796099596146, + "learning_rate": 5e-05, + "loss": 0.0883, + "num_input_tokens_seen": 220245464, + "step": 2413 + }, + { + "epoch": 10.054166666666667, + "loss": 0.12158460915088654, + "loss_ce": 1.7835860489867628e-05, + "loss_iou": 0.35546875, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 220245464, + "step": 2413 + }, + { + "epoch": 10.058333333333334, + "grad_norm": 3.156257832441717, + "learning_rate": 5e-05, + "loss": 0.0765, + "num_input_tokens_seen": 220336884, + "step": 2414 + }, + { + "epoch": 10.058333333333334, + "loss": 0.10016626864671707, + "loss_ce": 0.002448986517265439, + "loss_iou": 0.263671875, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 220336884, + "step": 2414 + }, + { + "epoch": 10.0625, + "grad_norm": 1.5155234429258053, + "learning_rate": 5e-05, + "loss": 0.075, + "num_input_tokens_seen": 220428424, + "step": 2415 + }, + { + "epoch": 10.0625, + "loss": 0.09825599193572998, + "loss_ce": 0.000370861409464851, + "loss_iou": 0.283203125, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 220428424, + "step": 2415 + }, + { + "epoch": 10.066666666666666, + "grad_norm": 3.9094783921140146, + "learning_rate": 5e-05, + "loss": 0.0552, + "num_input_tokens_seen": 220520116, + "step": 2416 + }, + { + "epoch": 10.066666666666666, + "loss": 0.06594032049179077, + "loss_ce": 0.0002893781056627631, + "loss_iou": 0.12255859375, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 220520116, + "step": 2416 + }, + { + "epoch": 10.070833333333333, + "grad_norm": 5.7386299952124045, + "learning_rate": 5e-05, + "loss": 0.0596, + "num_input_tokens_seen": 220611108, + "step": 2417 + }, + { + "epoch": 10.070833333333333, + "loss": 0.04351577162742615, + "loss_ce": 1.2962243999936618e-05, + "loss_iou": 0.2578125, + "loss_num": 0.00872802734375, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 220611108, + "step": 2417 + }, + { + "epoch": 10.075, + "grad_norm": 13.471698733873914, + "learning_rate": 5e-05, + "loss": 0.0511, + "num_input_tokens_seen": 220702400, + "step": 2418 + }, + { + "epoch": 10.075, + "loss": 0.05986550450325012, + "loss_ce": 0.00011208564683329314, + "loss_iou": 0.23046875, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 220702400, + "step": 2418 + }, + { + "epoch": 10.079166666666667, + "grad_norm": 2.2063943344886954, + "learning_rate": 5e-05, + "loss": 0.0926, + "num_input_tokens_seen": 220792056, + "step": 2419 + }, + { + "epoch": 10.079166666666667, + "loss": 0.11690068989992142, + "loss_ce": 0.0017883825348690152, + "loss_iou": 0.18359375, + "loss_num": 0.0230712890625, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 220792056, + "step": 2419 + }, + { + "epoch": 10.083333333333334, + "grad_norm": 2.1165109910760656, + "learning_rate": 5e-05, + "loss": 0.0793, + "num_input_tokens_seen": 220883616, + "step": 2420 + }, + { + "epoch": 10.083333333333334, + "loss": 0.073494553565979, + "loss_ce": 0.0004659837868530303, + "loss_iou": 0.3203125, + "loss_num": 0.01458740234375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 220883616, + "step": 2420 + }, + { + "epoch": 10.0875, + "grad_norm": 4.4274156190618745, + "learning_rate": 5e-05, + "loss": 0.0835, + "num_input_tokens_seen": 220974820, + "step": 2421 + }, + { + "epoch": 10.0875, + "loss": 0.10872413218021393, + "loss_ce": 0.0009665669058449566, + "loss_iou": 0.306640625, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 220974820, + "step": 2421 + }, + { + "epoch": 10.091666666666667, + "grad_norm": 14.36812894515794, + "learning_rate": 5e-05, + "loss": 0.0527, + "num_input_tokens_seen": 221066324, + "step": 2422 + }, + { + "epoch": 10.091666666666667, + "loss": 0.04289761185646057, + "loss_ce": 0.002934845397248864, + "loss_iou": 0.375, + "loss_num": 0.00799560546875, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 221066324, + "step": 2422 + }, + { + "epoch": 10.095833333333333, + "grad_norm": 2.791532517943939, + "learning_rate": 5e-05, + "loss": 0.0809, + "num_input_tokens_seen": 221157812, + "step": 2423 + }, + { + "epoch": 10.095833333333333, + "loss": 0.10510668158531189, + "loss_ce": 0.0006297547952271998, + "loss_iou": 0.36328125, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 221157812, + "step": 2423 + }, + { + "epoch": 10.1, + "grad_norm": 4.795963465485498, + "learning_rate": 5e-05, + "loss": 0.0957, + "num_input_tokens_seen": 221248892, + "step": 2424 + }, + { + "epoch": 10.1, + "loss": 0.09126611053943634, + "loss_ce": 0.0003542409685906023, + "loss_iou": 0.361328125, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 221248892, + "step": 2424 + }, + { + "epoch": 10.104166666666666, + "grad_norm": 1.3895802639770385, + "learning_rate": 5e-05, + "loss": 0.0439, + "num_input_tokens_seen": 221340344, + "step": 2425 + }, + { + "epoch": 10.104166666666666, + "loss": 0.04654834046959877, + "loss_ce": 0.00019213651830796152, + "loss_iou": 0.3125, + "loss_num": 0.00927734375, + "loss_xval": 0.04638671875, + "num_input_tokens_seen": 221340344, + "step": 2425 + }, + { + "epoch": 10.108333333333333, + "grad_norm": 1.9300053468786738, + "learning_rate": 5e-05, + "loss": 0.0567, + "num_input_tokens_seen": 221430728, + "step": 2426 + }, + { + "epoch": 10.108333333333333, + "loss": 0.0767325758934021, + "loss_ce": 0.00025553052546456456, + "loss_iou": 0.287109375, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 221430728, + "step": 2426 + }, + { + "epoch": 10.1125, + "grad_norm": 6.7978455117570205, + "learning_rate": 5e-05, + "loss": 0.1173, + "num_input_tokens_seen": 221522208, + "step": 2427 + }, + { + "epoch": 10.1125, + "loss": 0.12275524437427521, + "loss_ce": 1.3544628927775193e-05, + "loss_iou": 0.294921875, + "loss_num": 0.0245361328125, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 221522208, + "step": 2427 + }, + { + "epoch": 10.116666666666667, + "grad_norm": 3.729614802969419, + "learning_rate": 5e-05, + "loss": 0.0793, + "num_input_tokens_seen": 221612324, + "step": 2428 + }, + { + "epoch": 10.116666666666667, + "loss": 0.07310568541288376, + "loss_ce": 0.00018393303616903722, + "loss_iou": 0.376953125, + "loss_num": 0.01458740234375, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 221612324, + "step": 2428 + }, + { + "epoch": 10.120833333333334, + "grad_norm": 2.520190672936384, + "learning_rate": 5e-05, + "loss": 0.0638, + "num_input_tokens_seen": 221703620, + "step": 2429 + }, + { + "epoch": 10.120833333333334, + "loss": 0.06499192863702774, + "loss_ce": 0.0001725930196698755, + "loss_iou": 0.255859375, + "loss_num": 0.012939453125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 221703620, + "step": 2429 + }, + { + "epoch": 10.125, + "grad_norm": 2.4152431639328533, + "learning_rate": 5e-05, + "loss": 0.0932, + "num_input_tokens_seen": 221795388, + "step": 2430 + }, + { + "epoch": 10.125, + "loss": 0.05093023553490639, + "loss_ce": 0.00010320887668058276, + "loss_iou": 0.3046875, + "loss_num": 0.0101318359375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 221795388, + "step": 2430 + }, + { + "epoch": 10.129166666666666, + "grad_norm": 5.861639988839476, + "learning_rate": 5e-05, + "loss": 0.0797, + "num_input_tokens_seen": 221886460, + "step": 2431 + }, + { + "epoch": 10.129166666666666, + "loss": 0.08886352926492691, + "loss_ce": 3.969657882407773e-06, + "loss_iou": 0.232421875, + "loss_num": 0.017822265625, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 221886460, + "step": 2431 + }, + { + "epoch": 10.133333333333333, + "grad_norm": 2.749109833223461, + "learning_rate": 5e-05, + "loss": 0.059, + "num_input_tokens_seen": 221978136, + "step": 2432 + }, + { + "epoch": 10.133333333333333, + "loss": 0.07125408947467804, + "loss_ce": 7.183963316492736e-05, + "loss_iou": 0.28125, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 221978136, + "step": 2432 + }, + { + "epoch": 10.1375, + "grad_norm": 4.697700481651981, + "learning_rate": 5e-05, + "loss": 0.0989, + "num_input_tokens_seen": 222069384, + "step": 2433 + }, + { + "epoch": 10.1375, + "loss": 0.05989304929971695, + "loss_ce": 4.8080084525281563e-05, + "loss_iou": 0.287109375, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 222069384, + "step": 2433 + }, + { + "epoch": 10.141666666666667, + "grad_norm": 1.8668463225797145, + "learning_rate": 5e-05, + "loss": 0.0838, + "num_input_tokens_seen": 222160292, + "step": 2434 + }, + { + "epoch": 10.141666666666667, + "loss": 0.14080928266048431, + "loss_ce": 1.1779565056713182e-06, + "loss_iou": 0.31640625, + "loss_num": 0.0281982421875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 222160292, + "step": 2434 + }, + { + "epoch": 10.145833333333334, + "grad_norm": 4.570146064664159, + "learning_rate": 5e-05, + "loss": 0.0869, + "num_input_tokens_seen": 222251872, + "step": 2435 + }, + { + "epoch": 10.145833333333334, + "loss": 0.08597946166992188, + "loss_ce": 0.0011863745748996735, + "loss_iou": 0.365234375, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 222251872, + "step": 2435 + }, + { + "epoch": 10.15, + "grad_norm": 5.6751823791908285, + "learning_rate": 5e-05, + "loss": 0.0897, + "num_input_tokens_seen": 222343836, + "step": 2436 + }, + { + "epoch": 10.15, + "loss": 0.10517530143260956, + "loss_ce": 1.1719241229002364e-05, + "loss_iou": 0.279296875, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 222343836, + "step": 2436 + }, + { + "epoch": 10.154166666666667, + "grad_norm": 1.972712912402272, + "learning_rate": 5e-05, + "loss": 0.0825, + "num_input_tokens_seen": 222435432, + "step": 2437 + }, + { + "epoch": 10.154166666666667, + "loss": 0.10965215414762497, + "loss_ce": 4.063854430569336e-05, + "loss_iou": 0.291015625, + "loss_num": 0.02197265625, + "loss_xval": 0.109375, + "num_input_tokens_seen": 222435432, + "step": 2437 + }, + { + "epoch": 10.158333333333333, + "grad_norm": 4.3981004493047635, + "learning_rate": 5e-05, + "loss": 0.051, + "num_input_tokens_seen": 222526416, + "step": 2438 + }, + { + "epoch": 10.158333333333333, + "loss": 0.04471606761217117, + "loss_ce": 0.0001909200509544462, + "loss_iou": 0.3046875, + "loss_num": 0.0089111328125, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 222526416, + "step": 2438 + }, + { + "epoch": 10.1625, + "grad_norm": 1.7735384153770875, + "learning_rate": 5e-05, + "loss": 0.0924, + "num_input_tokens_seen": 222617280, + "step": 2439 + }, + { + "epoch": 10.1625, + "loss": 0.09482355415821075, + "loss_ce": 0.00012750302266795188, + "loss_iou": 0.09814453125, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 222617280, + "step": 2439 + }, + { + "epoch": 10.166666666666666, + "grad_norm": 12.698027434976936, + "learning_rate": 5e-05, + "loss": 0.0734, + "num_input_tokens_seen": 222709132, + "step": 2440 + }, + { + "epoch": 10.166666666666666, + "loss": 0.11245200037956238, + "loss_ce": 4.050401912536472e-05, + "loss_iou": 0.0537109375, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 222709132, + "step": 2440 + }, + { + "epoch": 10.170833333333333, + "grad_norm": 3.190471324233273, + "learning_rate": 5e-05, + "loss": 0.0583, + "num_input_tokens_seen": 222800892, + "step": 2441 + }, + { + "epoch": 10.170833333333333, + "loss": 0.05578252300620079, + "loss_ce": 0.0001947572163771838, + "loss_iou": 0.1806640625, + "loss_num": 0.0111083984375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 222800892, + "step": 2441 + }, + { + "epoch": 10.175, + "grad_norm": 2.2801811064612574, + "learning_rate": 5e-05, + "loss": 0.0729, + "num_input_tokens_seen": 222892336, + "step": 2442 + }, + { + "epoch": 10.175, + "loss": 0.08900895714759827, + "loss_ce": 0.0002638417645357549, + "loss_iou": 0.390625, + "loss_num": 0.0177001953125, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 222892336, + "step": 2442 + }, + { + "epoch": 10.179166666666667, + "grad_norm": 2.4598503645782115, + "learning_rate": 5e-05, + "loss": 0.1039, + "num_input_tokens_seen": 222983220, + "step": 2443 + }, + { + "epoch": 10.179166666666667, + "loss": 0.13636144995689392, + "loss_ce": 2.417125870124437e-05, + "loss_iou": 0.296875, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 222983220, + "step": 2443 + }, + { + "epoch": 10.183333333333334, + "grad_norm": 4.214439285952087, + "learning_rate": 5e-05, + "loss": 0.0618, + "num_input_tokens_seen": 223074292, + "step": 2444 + }, + { + "epoch": 10.183333333333334, + "loss": 0.054007645696401596, + "loss_ce": 0.00014411890879273415, + "loss_iou": 0.400390625, + "loss_num": 0.01080322265625, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 223074292, + "step": 2444 + }, + { + "epoch": 10.1875, + "grad_norm": 3.0707843729279003, + "learning_rate": 5e-05, + "loss": 0.0868, + "num_input_tokens_seen": 223165536, + "step": 2445 + }, + { + "epoch": 10.1875, + "loss": 0.06411126255989075, + "loss_ce": 0.0001616842782823369, + "loss_iou": 0.255859375, + "loss_num": 0.01275634765625, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 223165536, + "step": 2445 + }, + { + "epoch": 10.191666666666666, + "grad_norm": 5.227605457388482, + "learning_rate": 5e-05, + "loss": 0.085, + "num_input_tokens_seen": 223256980, + "step": 2446 + }, + { + "epoch": 10.191666666666666, + "loss": 0.07259244471788406, + "loss_ce": 0.0001131939425249584, + "loss_iou": 0.361328125, + "loss_num": 0.0145263671875, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 223256980, + "step": 2446 + }, + { + "epoch": 10.195833333333333, + "grad_norm": 2.310431922091145, + "learning_rate": 5e-05, + "loss": 0.0746, + "num_input_tokens_seen": 223348508, + "step": 2447 + }, + { + "epoch": 10.195833333333333, + "loss": 0.07903735339641571, + "loss_ce": 0.00019518463523127139, + "loss_iou": 0.197265625, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 223348508, + "step": 2447 + }, + { + "epoch": 10.2, + "grad_norm": 8.548814866250257, + "learning_rate": 5e-05, + "loss": 0.0703, + "num_input_tokens_seen": 223439892, + "step": 2448 + }, + { + "epoch": 10.2, + "loss": 0.06143535301089287, + "loss_ce": 3.467700935289031e-06, + "loss_iou": 0.11376953125, + "loss_num": 0.01226806640625, + "loss_xval": 0.0615234375, + "num_input_tokens_seen": 223439892, + "step": 2448 + }, + { + "epoch": 10.204166666666667, + "grad_norm": 1.3559549692124349, + "learning_rate": 5e-05, + "loss": 0.0834, + "num_input_tokens_seen": 223531256, + "step": 2449 + }, + { + "epoch": 10.204166666666667, + "loss": 0.0661230981349945, + "loss_ce": 6.771959760953905e-06, + "loss_iou": 0.326171875, + "loss_num": 0.01324462890625, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 223531256, + "step": 2449 + }, + { + "epoch": 10.208333333333334, + "grad_norm": 1.1895310174092164, + "learning_rate": 5e-05, + "loss": 0.0609, + "num_input_tokens_seen": 223622460, + "step": 2450 + }, + { + "epoch": 10.208333333333334, + "loss": 0.06462086737155914, + "loss_ce": 1.515869917056989e-05, + "loss_iou": 0.27734375, + "loss_num": 0.012939453125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 223622460, + "step": 2450 + }, + { + "epoch": 10.2125, + "grad_norm": 2.7657554203330577, + "learning_rate": 5e-05, + "loss": 0.0888, + "num_input_tokens_seen": 223713960, + "step": 2451 + }, + { + "epoch": 10.2125, + "loss": 0.07661904394626617, + "loss_ce": 0.0004929460119456053, + "loss_iou": 0.38671875, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 223713960, + "step": 2451 + }, + { + "epoch": 10.216666666666667, + "grad_norm": 3.2893812369272637, + "learning_rate": 5e-05, + "loss": 0.0967, + "num_input_tokens_seen": 223805260, + "step": 2452 + }, + { + "epoch": 10.216666666666667, + "loss": 0.1292020082473755, + "loss_ce": 6.686578126391396e-05, + "loss_iou": 0.22265625, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 223805260, + "step": 2452 + }, + { + "epoch": 10.220833333333333, + "grad_norm": 1.2475696406825638, + "learning_rate": 5e-05, + "loss": 0.0504, + "num_input_tokens_seen": 223896604, + "step": 2453 + }, + { + "epoch": 10.220833333333333, + "loss": 0.05976950749754906, + "loss_ce": 9.238149505108595e-05, + "loss_iou": 0.3046875, + "loss_num": 0.011962890625, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 223896604, + "step": 2453 + }, + { + "epoch": 10.225, + "grad_norm": 2.364589120670526, + "learning_rate": 5e-05, + "loss": 0.0594, + "num_input_tokens_seen": 223987908, + "step": 2454 + }, + { + "epoch": 10.225, + "loss": 0.040394507348537445, + "loss_ce": 8.84165710886009e-05, + "loss_iou": 0.19140625, + "loss_num": 0.008056640625, + "loss_xval": 0.040283203125, + "num_input_tokens_seen": 223987908, + "step": 2454 + }, + { + "epoch": 10.229166666666666, + "grad_norm": 1.537721777952214, + "learning_rate": 5e-05, + "loss": 0.0473, + "num_input_tokens_seen": 224079124, + "step": 2455 + }, + { + "epoch": 10.229166666666666, + "loss": 0.04819488525390625, + "loss_ce": 0.0006408722838386893, + "loss_iou": 0.2490234375, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 224079124, + "step": 2455 + }, + { + "epoch": 10.233333333333333, + "grad_norm": 4.249976349145265, + "learning_rate": 5e-05, + "loss": 0.0389, + "num_input_tokens_seen": 224170684, + "step": 2456 + }, + { + "epoch": 10.233333333333333, + "loss": 0.043090589344501495, + "loss_ce": 0.00037361119757406414, + "loss_iou": 0.265625, + "loss_num": 0.008544921875, + "loss_xval": 0.042724609375, + "num_input_tokens_seen": 224170684, + "step": 2456 + }, + { + "epoch": 10.2375, + "grad_norm": 3.0823394236162, + "learning_rate": 5e-05, + "loss": 0.1423, + "num_input_tokens_seen": 224261828, + "step": 2457 + }, + { + "epoch": 10.2375, + "loss": 0.13820458948612213, + "loss_ce": 0.0003261679084971547, + "loss_iou": 0.30078125, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 224261828, + "step": 2457 + }, + { + "epoch": 10.241666666666667, + "grad_norm": 1.923597296697882, + "learning_rate": 5e-05, + "loss": 0.0835, + "num_input_tokens_seen": 224353200, + "step": 2458 + }, + { + "epoch": 10.241666666666667, + "loss": 0.05683402344584465, + "loss_ce": 4.081109000253491e-05, + "loss_iou": 0.326171875, + "loss_num": 0.0113525390625, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 224353200, + "step": 2458 + }, + { + "epoch": 10.245833333333334, + "grad_norm": 3.913499891437732, + "learning_rate": 5e-05, + "loss": 0.0453, + "num_input_tokens_seen": 224444804, + "step": 2459 + }, + { + "epoch": 10.245833333333334, + "loss": 0.026809057220816612, + "loss_ce": 6.039956497261301e-05, + "loss_iou": 0.220703125, + "loss_num": 0.005340576171875, + "loss_xval": 0.0267333984375, + "num_input_tokens_seen": 224444804, + "step": 2459 + }, + { + "epoch": 10.25, + "grad_norm": 1.0660942413214372, + "learning_rate": 5e-05, + "loss": 0.1453, + "num_input_tokens_seen": 224535972, + "step": 2460 + }, + { + "epoch": 10.25, + "loss": 0.1777527630329132, + "loss_ce": 0.00019386685744393617, + "loss_iou": 0.2373046875, + "loss_num": 0.035400390625, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 224535972, + "step": 2460 + }, + { + "epoch": 10.254166666666666, + "grad_norm": 5.531253479696742, + "learning_rate": 5e-05, + "loss": 0.0446, + "num_input_tokens_seen": 224627484, + "step": 2461 + }, + { + "epoch": 10.254166666666666, + "loss": 0.04134564474225044, + "loss_ce": 0.0007114895852282643, + "loss_iou": 0.2041015625, + "loss_num": 0.00811767578125, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 224627484, + "step": 2461 + }, + { + "epoch": 10.258333333333333, + "grad_norm": 1.8363294736874196, + "learning_rate": 5e-05, + "loss": 0.0558, + "num_input_tokens_seen": 224718824, + "step": 2462 + }, + { + "epoch": 10.258333333333333, + "loss": 0.06277695298194885, + "loss_ce": 2.2918047761777416e-06, + "loss_iou": 0.310546875, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 224718824, + "step": 2462 + }, + { + "epoch": 10.2625, + "grad_norm": 1.8460751885950988, + "learning_rate": 5e-05, + "loss": 0.0761, + "num_input_tokens_seen": 224810796, + "step": 2463 + }, + { + "epoch": 10.2625, + "loss": 0.10428975522518158, + "loss_ce": 0.00013325779582373798, + "loss_iou": 0.1708984375, + "loss_num": 0.0208740234375, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 224810796, + "step": 2463 + }, + { + "epoch": 10.266666666666667, + "grad_norm": 1.4976747720517578, + "learning_rate": 5e-05, + "loss": 0.0808, + "num_input_tokens_seen": 224901868, + "step": 2464 + }, + { + "epoch": 10.266666666666667, + "loss": 0.07676523923873901, + "loss_ce": 0.0001432291028322652, + "loss_iou": 0.23828125, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 224901868, + "step": 2464 + }, + { + "epoch": 10.270833333333334, + "grad_norm": 3.112973875767646, + "learning_rate": 5e-05, + "loss": 0.0575, + "num_input_tokens_seen": 224993660, + "step": 2465 + }, + { + "epoch": 10.270833333333334, + "loss": 0.07043145596981049, + "loss_ce": 0.00016473176947329193, + "loss_iou": 0.2314453125, + "loss_num": 0.01409912109375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 224993660, + "step": 2465 + }, + { + "epoch": 10.275, + "grad_norm": 7.317607041578287, + "learning_rate": 5e-05, + "loss": 0.0696, + "num_input_tokens_seen": 225084528, + "step": 2466 + }, + { + "epoch": 10.275, + "loss": 0.05415572226047516, + "loss_ce": 4.8060854169307277e-05, + "loss_iou": 0.384765625, + "loss_num": 0.01080322265625, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 225084528, + "step": 2466 + }, + { + "epoch": 10.279166666666667, + "grad_norm": 3.3420650983507394, + "learning_rate": 5e-05, + "loss": 0.0818, + "num_input_tokens_seen": 225175876, + "step": 2467 + }, + { + "epoch": 10.279166666666667, + "loss": 0.10633950680494308, + "loss_ce": 1.6262883946183138e-05, + "loss_iou": 0.341796875, + "loss_num": 0.021240234375, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 225175876, + "step": 2467 + }, + { + "epoch": 10.283333333333333, + "grad_norm": 3.436964712975802, + "learning_rate": 5e-05, + "loss": 0.0846, + "num_input_tokens_seen": 225266960, + "step": 2468 + }, + { + "epoch": 10.283333333333333, + "loss": 0.09859128296375275, + "loss_ce": 1.9506447642925195e-05, + "loss_iou": 0.30078125, + "loss_num": 0.0196533203125, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 225266960, + "step": 2468 + }, + { + "epoch": 10.2875, + "grad_norm": 3.1825697171052942, + "learning_rate": 5e-05, + "loss": 0.0694, + "num_input_tokens_seen": 225358192, + "step": 2469 + }, + { + "epoch": 10.2875, + "loss": 0.05997881665825844, + "loss_ce": 7.280804857145995e-05, + "loss_iou": 0.296875, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 225358192, + "step": 2469 + }, + { + "epoch": 10.291666666666666, + "grad_norm": 4.272989950784549, + "learning_rate": 5e-05, + "loss": 0.0771, + "num_input_tokens_seen": 225449832, + "step": 2470 + }, + { + "epoch": 10.291666666666666, + "loss": 0.08155052363872528, + "loss_ce": 0.0001906609977595508, + "loss_iou": 0.279296875, + "loss_num": 0.0162353515625, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 225449832, + "step": 2470 + }, + { + "epoch": 10.295833333333333, + "grad_norm": 34.74430851314654, + "learning_rate": 5e-05, + "loss": 0.0872, + "num_input_tokens_seen": 225540424, + "step": 2471 + }, + { + "epoch": 10.295833333333333, + "loss": 0.1386980563402176, + "loss_ce": 1.0921379725914448e-05, + "loss_iou": 0.384765625, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 225540424, + "step": 2471 + }, + { + "epoch": 10.3, + "grad_norm": 3.0368183871807917, + "learning_rate": 5e-05, + "loss": 0.0712, + "num_input_tokens_seen": 225630200, + "step": 2472 + }, + { + "epoch": 10.3, + "loss": 0.04973556473851204, + "loss_ce": 0.0013957209885120392, + "loss_iou": 0.3125, + "loss_num": 0.0096435546875, + "loss_xval": 0.04833984375, + "num_input_tokens_seen": 225630200, + "step": 2472 + }, + { + "epoch": 10.304166666666667, + "grad_norm": 3.523903342921853, + "learning_rate": 5e-05, + "loss": 0.0673, + "num_input_tokens_seen": 225720748, + "step": 2473 + }, + { + "epoch": 10.304166666666667, + "loss": 0.05381819233298302, + "loss_ce": 0.000320877181366086, + "loss_iou": 0.34765625, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 225720748, + "step": 2473 + }, + { + "epoch": 10.308333333333334, + "grad_norm": 4.619375355597199, + "learning_rate": 5e-05, + "loss": 0.0674, + "num_input_tokens_seen": 225811212, + "step": 2474 + }, + { + "epoch": 10.308333333333334, + "loss": 0.06829918175935745, + "loss_ce": 8.393885764235165e-07, + "loss_iou": 0.35546875, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 225811212, + "step": 2474 + }, + { + "epoch": 10.3125, + "grad_norm": 2.4553281904077906, + "learning_rate": 5e-05, + "loss": 0.0579, + "num_input_tokens_seen": 225902520, + "step": 2475 + }, + { + "epoch": 10.3125, + "loss": 0.046314314007759094, + "loss_ce": 3.887810635205824e-06, + "loss_iou": 0.28125, + "loss_num": 0.00927734375, + "loss_xval": 0.04638671875, + "num_input_tokens_seen": 225902520, + "step": 2475 + }, + { + "epoch": 10.316666666666666, + "grad_norm": 5.3015376076212775, + "learning_rate": 5e-05, + "loss": 0.0837, + "num_input_tokens_seen": 225993824, + "step": 2476 + }, + { + "epoch": 10.316666666666666, + "loss": 0.05360734462738037, + "loss_ce": 0.0008271909318864346, + "loss_iou": 0.203125, + "loss_num": 0.01055908203125, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 225993824, + "step": 2476 + }, + { + "epoch": 10.320833333333333, + "grad_norm": 4.167800578454227, + "learning_rate": 5e-05, + "loss": 0.0957, + "num_input_tokens_seen": 226085280, + "step": 2477 + }, + { + "epoch": 10.320833333333333, + "loss": 0.12834444642066956, + "loss_ce": 2.764263854260207e-06, + "loss_iou": 0.3046875, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 226085280, + "step": 2477 + }, + { + "epoch": 10.325, + "grad_norm": 5.162117634743744, + "learning_rate": 5e-05, + "loss": 0.072, + "num_input_tokens_seen": 226177060, + "step": 2478 + }, + { + "epoch": 10.325, + "loss": 0.05269046127796173, + "loss_ce": 0.00023074712953530252, + "loss_iou": 0.302734375, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 226177060, + "step": 2478 + }, + { + "epoch": 10.329166666666667, + "grad_norm": 2.6086503652713957, + "learning_rate": 5e-05, + "loss": 0.0551, + "num_input_tokens_seen": 226267776, + "step": 2479 + }, + { + "epoch": 10.329166666666667, + "loss": 0.03804173693060875, + "loss_ce": 1.683563277765643e-05, + "loss_iou": 0.2431640625, + "loss_num": 0.007598876953125, + "loss_xval": 0.0380859375, + "num_input_tokens_seen": 226267776, + "step": 2479 + }, + { + "epoch": 10.333333333333334, + "grad_norm": 4.493073901021718, + "learning_rate": 5e-05, + "loss": 0.0669, + "num_input_tokens_seen": 226358440, + "step": 2480 + }, + { + "epoch": 10.333333333333334, + "loss": 0.05348392203450203, + "loss_ce": 1.865989361249376e-06, + "loss_iou": 0.26953125, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 226358440, + "step": 2480 + }, + { + "epoch": 10.3375, + "grad_norm": 2.343907113482161, + "learning_rate": 5e-05, + "loss": 0.0466, + "num_input_tokens_seen": 226449808, + "step": 2481 + }, + { + "epoch": 10.3375, + "loss": 0.03878547623753548, + "loss_ce": 1.2893915481981821e-05, + "loss_iou": 0.380859375, + "loss_num": 0.00775146484375, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 226449808, + "step": 2481 + }, + { + "epoch": 10.341666666666667, + "grad_norm": 6.73918095196411, + "learning_rate": 5e-05, + "loss": 0.1366, + "num_input_tokens_seen": 226541812, + "step": 2482 + }, + { + "epoch": 10.341666666666667, + "loss": 0.1784840226173401, + "loss_ce": 0.00026137454551644623, + "loss_iou": 0.35546875, + "loss_num": 0.03564453125, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 226541812, + "step": 2482 + }, + { + "epoch": 10.345833333333333, + "grad_norm": 2.8465636354751473, + "learning_rate": 5e-05, + "loss": 0.0637, + "num_input_tokens_seen": 226632944, + "step": 2483 + }, + { + "epoch": 10.345833333333333, + "loss": 0.08737257122993469, + "loss_ce": 7.506472456952906e-07, + "loss_iou": 0.30078125, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 226632944, + "step": 2483 + }, + { + "epoch": 10.35, + "grad_norm": 3.0168112348538316, + "learning_rate": 5e-05, + "loss": 0.0897, + "num_input_tokens_seen": 226724220, + "step": 2484 + }, + { + "epoch": 10.35, + "loss": 0.04172979295253754, + "loss_ce": 0.00010381372703704983, + "loss_iou": 0.294921875, + "loss_num": 0.00830078125, + "loss_xval": 0.04150390625, + "num_input_tokens_seen": 226724220, + "step": 2484 + }, + { + "epoch": 10.354166666666666, + "grad_norm": 7.525334521925941, + "learning_rate": 5e-05, + "loss": 0.0572, + "num_input_tokens_seen": 226815480, + "step": 2485 + }, + { + "epoch": 10.354166666666666, + "loss": 0.048039909452199936, + "loss_ce": 5.101898204884492e-05, + "loss_iou": 0.302734375, + "loss_num": 0.00958251953125, + "loss_xval": 0.048095703125, + "num_input_tokens_seen": 226815480, + "step": 2485 + }, + { + "epoch": 10.358333333333333, + "grad_norm": 4.079229634153809, + "learning_rate": 5e-05, + "loss": 0.0713, + "num_input_tokens_seen": 226906372, + "step": 2486 + }, + { + "epoch": 10.358333333333333, + "loss": 0.09189343452453613, + "loss_ce": 5.005502316635102e-06, + "loss_iou": 0.275390625, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 226906372, + "step": 2486 + }, + { + "epoch": 10.3625, + "grad_norm": 2.3354944781338545, + "learning_rate": 5e-05, + "loss": 0.0636, + "num_input_tokens_seen": 226998504, + "step": 2487 + }, + { + "epoch": 10.3625, + "loss": 0.05715341866016388, + "loss_ce": 0.0005738280597142875, + "loss_iou": 0.18359375, + "loss_num": 0.0113525390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 226998504, + "step": 2487 + }, + { + "epoch": 10.366666666666667, + "grad_norm": 3.8303454841396722, + "learning_rate": 5e-05, + "loss": 0.0955, + "num_input_tokens_seen": 227089372, + "step": 2488 + }, + { + "epoch": 10.366666666666667, + "loss": 0.08700613677501678, + "loss_ce": 0.003235388780012727, + "loss_iou": 0.263671875, + "loss_num": 0.0167236328125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 227089372, + "step": 2488 + }, + { + "epoch": 10.370833333333334, + "grad_norm": 3.429327852601159, + "learning_rate": 5e-05, + "loss": 0.0859, + "num_input_tokens_seen": 227180756, + "step": 2489 + }, + { + "epoch": 10.370833333333334, + "loss": 0.03858550265431404, + "loss_ce": 1.128264375438448e-05, + "loss_iou": 0.33203125, + "loss_num": 0.007720947265625, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 227180756, + "step": 2489 + }, + { + "epoch": 10.375, + "grad_norm": 3.16423758573065, + "learning_rate": 5e-05, + "loss": 0.0762, + "num_input_tokens_seen": 227271620, + "step": 2490 + }, + { + "epoch": 10.375, + "loss": 0.0924580842256546, + "loss_ce": 5.079701168142492e-06, + "loss_iou": 0.216796875, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 227271620, + "step": 2490 + }, + { + "epoch": 10.379166666666666, + "grad_norm": 4.138720832179852, + "learning_rate": 5e-05, + "loss": 0.0908, + "num_input_tokens_seen": 227362088, + "step": 2491 + }, + { + "epoch": 10.379166666666666, + "loss": 0.09778620302677155, + "loss_ce": 7.887525498517789e-06, + "loss_iou": 0.447265625, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 227362088, + "step": 2491 + }, + { + "epoch": 10.383333333333333, + "grad_norm": 4.817565170683859, + "learning_rate": 5e-05, + "loss": 0.0812, + "num_input_tokens_seen": 227453448, + "step": 2492 + }, + { + "epoch": 10.383333333333333, + "loss": 0.08528520911931992, + "loss_ce": 3.835527422779705e-06, + "loss_iou": 0.1748046875, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 227453448, + "step": 2492 + }, + { + "epoch": 10.3875, + "grad_norm": 3.2928127220446988, + "learning_rate": 5e-05, + "loss": 0.0936, + "num_input_tokens_seen": 227544780, + "step": 2493 + }, + { + "epoch": 10.3875, + "loss": 0.07505002617835999, + "loss_ce": 0.004905372392386198, + "loss_iou": 0.2392578125, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 227544780, + "step": 2493 + }, + { + "epoch": 10.391666666666667, + "grad_norm": 4.6372402442600515, + "learning_rate": 5e-05, + "loss": 0.0689, + "num_input_tokens_seen": 227636708, + "step": 2494 + }, + { + "epoch": 10.391666666666667, + "loss": 0.09854992479085922, + "loss_ce": 0.0006647917907685041, + "loss_iou": 0.150390625, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 227636708, + "step": 2494 + }, + { + "epoch": 10.395833333333334, + "grad_norm": 1.6189678476279272, + "learning_rate": 5e-05, + "loss": 0.0703, + "num_input_tokens_seen": 227728100, + "step": 2495 + }, + { + "epoch": 10.395833333333334, + "loss": 0.07708020508289337, + "loss_ce": 2.331818177481182e-05, + "loss_iou": 0.28515625, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 227728100, + "step": 2495 + }, + { + "epoch": 10.4, + "grad_norm": 1.3578423056075073, + "learning_rate": 5e-05, + "loss": 0.071, + "num_input_tokens_seen": 227818952, + "step": 2496 + }, + { + "epoch": 10.4, + "loss": 0.07700711488723755, + "loss_ce": 2.6529296519584022e-05, + "loss_iou": 0.12890625, + "loss_num": 0.015380859375, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 227818952, + "step": 2496 + }, + { + "epoch": 10.404166666666667, + "grad_norm": 1.8049258585400736, + "learning_rate": 5e-05, + "loss": 0.0881, + "num_input_tokens_seen": 227909404, + "step": 2497 + }, + { + "epoch": 10.404166666666667, + "loss": 0.05526716262102127, + "loss_ce": 7.46006662666332e-06, + "loss_iou": 0.244140625, + "loss_num": 0.01104736328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 227909404, + "step": 2497 + }, + { + "epoch": 10.408333333333333, + "grad_norm": 2.9045362810221476, + "learning_rate": 5e-05, + "loss": 0.0737, + "num_input_tokens_seen": 228000976, + "step": 2498 + }, + { + "epoch": 10.408333333333333, + "loss": 0.07178732752799988, + "loss_ce": 0.00047538039507344365, + "loss_iou": 0.271484375, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 228000976, + "step": 2498 + }, + { + "epoch": 10.4125, + "grad_norm": 2.2211804392233496, + "learning_rate": 5e-05, + "loss": 0.0436, + "num_input_tokens_seen": 228092024, + "step": 2499 + }, + { + "epoch": 10.4125, + "loss": 0.04628128185868263, + "loss_ce": 1.3733673540627933e-06, + "loss_iou": 0.2412109375, + "loss_num": 0.00927734375, + "loss_xval": 0.04638671875, + "num_input_tokens_seen": 228092024, + "step": 2499 + }, + { + "epoch": 10.416666666666666, + "grad_norm": 2.9487757030361825, + "learning_rate": 5e-05, + "loss": 0.0734, + "num_input_tokens_seen": 228183328, + "step": 2500 + }, + { + "epoch": 10.416666666666666, + "eval_seeclick_CIoU": 0.3149303123354912, + "eval_seeclick_GIoU": 0.29233773797750473, + "eval_seeclick_IoU": 0.3987307697534561, + "eval_seeclick_MAE_all": 0.08514390140771866, + "eval_seeclick_MAE_h": 0.07179565727710724, + "eval_seeclick_MAE_w": 0.15848005563020706, + "eval_seeclick_MAE_x_boxes": 0.16587074100971222, + "eval_seeclick_MAE_y_boxes": 0.07483186945319176, + "eval_seeclick_NUM_probability": 0.999997466802597, + "eval_seeclick_inside_bbox": 0.6321022808551788, + "eval_seeclick_loss": 0.5096176862716675, + "eval_seeclick_loss_ce": 0.1360682100057602, + "eval_seeclick_loss_iou": 0.504638671875, + "eval_seeclick_loss_num": 0.07415771484375, + "eval_seeclick_loss_xval": 0.3709716796875, + "eval_seeclick_runtime": 75.6845, + "eval_seeclick_samples_per_second": 0.568, + "eval_seeclick_steps_per_second": 0.026, + "num_input_tokens_seen": 228183328, + "step": 2500 + }, + { + "epoch": 10.416666666666666, + "eval_icons_CIoU": 0.4136456251144409, + "eval_icons_GIoU": 0.41529805958271027, + "eval_icons_IoU": 0.47623029351234436, + "eval_icons_MAE_all": 0.06310141086578369, + "eval_icons_MAE_h": 0.11154510080814362, + "eval_icons_MAE_w": 0.09058283641934395, + "eval_icons_MAE_x_boxes": 0.09419732540845871, + "eval_icons_MAE_y_boxes": 0.12008867785334587, + "eval_icons_NUM_probability": 0.9999987185001373, + "eval_icons_inside_bbox": 0.7170138955116272, + "eval_icons_loss": 0.32223400473594666, + "eval_icons_loss_ce": 9.500573696641368e-06, + "eval_icons_loss_iou": 0.3427734375, + "eval_icons_loss_num": 0.06640625, + "eval_icons_loss_xval": 0.3321533203125, + "eval_icons_runtime": 98.1797, + "eval_icons_samples_per_second": 0.509, + "eval_icons_steps_per_second": 0.02, + "num_input_tokens_seen": 228183328, + "step": 2500 + }, + { + "epoch": 10.416666666666666, + "eval_screenspot_CIoU": 0.39047037561734516, + "eval_screenspot_GIoU": 0.3841150104999542, + "eval_screenspot_IoU": 0.46335774660110474, + "eval_screenspot_MAE_all": 0.09988050907850266, + "eval_screenspot_MAE_h": 0.09078333526849747, + "eval_screenspot_MAE_w": 0.20148720343907675, + "eval_screenspot_MAE_x_boxes": 0.19923935333887735, + "eval_screenspot_MAE_y_boxes": 0.08086183667182922, + "eval_screenspot_NUM_probability": 0.9999897480010986, + "eval_screenspot_inside_bbox": 0.7295833428700765, + "eval_screenspot_loss": 0.4993094205856323, + "eval_screenspot_loss_ce": 0.004859724128133773, + "eval_screenspot_loss_iou": 0.3956705729166667, + "eval_screenspot_loss_num": 0.09987894694010417, + "eval_screenspot_loss_xval": 0.4995524088541667, + "eval_screenspot_runtime": 151.0375, + "eval_screenspot_samples_per_second": 0.589, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 228183328, + "step": 2500 + }, + { + "epoch": 10.416666666666666, + "eval_compot_CIoU": 0.45688967406749725, + "eval_compot_GIoU": 0.44939421117305756, + "eval_compot_IoU": 0.5281907916069031, + "eval_compot_MAE_all": 0.05896926671266556, + "eval_compot_MAE_h": 0.07026198133826256, + "eval_compot_MAE_w": 0.1460394188761711, + "eval_compot_MAE_x_boxes": 0.14645987004041672, + "eval_compot_MAE_y_boxes": 0.07101576030254364, + "eval_compot_NUM_probability": 0.9999970197677612, + "eval_compot_inside_bbox": 0.7361111044883728, + "eval_compot_loss": 0.30137625336647034, + "eval_compot_loss_ce": 0.028895296156406403, + "eval_compot_loss_iou": 0.29559326171875, + "eval_compot_loss_num": 0.052188873291015625, + "eval_compot_loss_xval": 0.2608489990234375, + "eval_compot_runtime": 86.7894, + "eval_compot_samples_per_second": 0.576, + "eval_compot_steps_per_second": 0.023, + "num_input_tokens_seen": 228183328, + "step": 2500 + }, + { + "epoch": 10.416666666666666, + "loss": 0.3029475212097168, + "loss_ce": 0.025023944675922394, + "loss_iou": 0.275390625, + "loss_num": 0.0556640625, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 228183328, + "step": 2500 + }, + { + "epoch": 10.420833333333333, + "grad_norm": 2.5303109849448733, + "learning_rate": 5e-05, + "loss": 0.1039, + "num_input_tokens_seen": 228274200, + "step": 2501 + }, + { + "epoch": 10.420833333333333, + "loss": 0.07500009983778, + "loss_ce": 3.149094027321553e-06, + "loss_iou": 0.4375, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 228274200, + "step": 2501 + }, + { + "epoch": 10.425, + "grad_norm": 2.5967014863549074, + "learning_rate": 5e-05, + "loss": 0.0699, + "num_input_tokens_seen": 228364664, + "step": 2502 + }, + { + "epoch": 10.425, + "loss": 0.09338469803333282, + "loss_ce": 9.118302841670811e-07, + "loss_iou": 0.25390625, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 228364664, + "step": 2502 + }, + { + "epoch": 10.429166666666667, + "grad_norm": 7.713983434423728, + "learning_rate": 5e-05, + "loss": 0.0939, + "num_input_tokens_seen": 228455736, + "step": 2503 + }, + { + "epoch": 10.429166666666667, + "loss": 0.12286948412656784, + "loss_ce": 5.714358849218115e-06, + "loss_iou": 0.4765625, + "loss_num": 0.0245361328125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 228455736, + "step": 2503 + }, + { + "epoch": 10.433333333333334, + "grad_norm": 3.87512890789201, + "learning_rate": 5e-05, + "loss": 0.0893, + "num_input_tokens_seen": 228547240, + "step": 2504 + }, + { + "epoch": 10.433333333333334, + "loss": 0.07037439942359924, + "loss_ce": 8.583416502006003e-07, + "loss_iou": 0.373046875, + "loss_num": 0.01409912109375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 228547240, + "step": 2504 + }, + { + "epoch": 10.4375, + "grad_norm": 4.133812777201116, + "learning_rate": 5e-05, + "loss": 0.05, + "num_input_tokens_seen": 228639228, + "step": 2505 + }, + { + "epoch": 10.4375, + "loss": 0.04950854182243347, + "loss_ce": 0.0004667960456572473, + "loss_iou": 0.34375, + "loss_num": 0.00982666015625, + "loss_xval": 0.049072265625, + "num_input_tokens_seen": 228639228, + "step": 2505 + }, + { + "epoch": 10.441666666666666, + "grad_norm": 7.732185340749662, + "learning_rate": 5e-05, + "loss": 0.0786, + "num_input_tokens_seen": 228730848, + "step": 2506 + }, + { + "epoch": 10.441666666666666, + "loss": 0.05839722603559494, + "loss_ce": 1.835384864534717e-06, + "loss_iou": 0.2001953125, + "loss_num": 0.01165771484375, + "loss_xval": 0.058349609375, + "num_input_tokens_seen": 228730848, + "step": 2506 + }, + { + "epoch": 10.445833333333333, + "grad_norm": 3.433041817943037, + "learning_rate": 5e-05, + "loss": 0.0486, + "num_input_tokens_seen": 228821788, + "step": 2507 + }, + { + "epoch": 10.445833333333333, + "loss": 0.03542107343673706, + "loss_ce": 0.0001351251994492486, + "loss_iou": 0.09716796875, + "loss_num": 0.007049560546875, + "loss_xval": 0.035400390625, + "num_input_tokens_seen": 228821788, + "step": 2507 + }, + { + "epoch": 10.45, + "grad_norm": 3.121765436554312, + "learning_rate": 5e-05, + "loss": 0.0601, + "num_input_tokens_seen": 228912956, + "step": 2508 + }, + { + "epoch": 10.45, + "loss": 0.07435625791549683, + "loss_ce": 4.214227374177426e-05, + "loss_iou": 0.2001953125, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 228912956, + "step": 2508 + }, + { + "epoch": 10.454166666666667, + "grad_norm": 2.0324462426108636, + "learning_rate": 5e-05, + "loss": 0.0511, + "num_input_tokens_seen": 229004264, + "step": 2509 + }, + { + "epoch": 10.454166666666667, + "loss": 0.044322483241558075, + "loss_ce": 0.00011014081974280998, + "loss_iou": 0.1826171875, + "loss_num": 0.00885009765625, + "loss_xval": 0.044189453125, + "num_input_tokens_seen": 229004264, + "step": 2509 + }, + { + "epoch": 10.458333333333334, + "grad_norm": 2.1366102938245723, + "learning_rate": 5e-05, + "loss": 0.0548, + "num_input_tokens_seen": 229095928, + "step": 2510 + }, + { + "epoch": 10.458333333333334, + "loss": 0.07690685987472534, + "loss_ce": 0.002535521052777767, + "loss_iou": 0.26953125, + "loss_num": 0.014892578125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 229095928, + "step": 2510 + }, + { + "epoch": 10.4625, + "grad_norm": 2.19711807686499, + "learning_rate": 5e-05, + "loss": 0.0847, + "num_input_tokens_seen": 229186916, + "step": 2511 + }, + { + "epoch": 10.4625, + "loss": 0.0904402881860733, + "loss_ce": 3.196924808435142e-05, + "loss_iou": 0.28125, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 229186916, + "step": 2511 + }, + { + "epoch": 10.466666666666667, + "grad_norm": 4.7434556280046865, + "learning_rate": 5e-05, + "loss": 0.086, + "num_input_tokens_seen": 229278420, + "step": 2512 + }, + { + "epoch": 10.466666666666667, + "loss": 0.10490299016237259, + "loss_ce": 0.0016315041575580835, + "loss_iou": 0.3671875, + "loss_num": 0.0206298828125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 229278420, + "step": 2512 + }, + { + "epoch": 10.470833333333333, + "grad_norm": 4.876042891988366, + "learning_rate": 5e-05, + "loss": 0.067, + "num_input_tokens_seen": 229370024, + "step": 2513 + }, + { + "epoch": 10.470833333333333, + "loss": 0.06508772075176239, + "loss_ce": 0.0031675598584115505, + "loss_iou": 0.224609375, + "loss_num": 0.01239013671875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 229370024, + "step": 2513 + }, + { + "epoch": 10.475, + "grad_norm": 3.5778712305029354, + "learning_rate": 5e-05, + "loss": 0.0914, + "num_input_tokens_seen": 229461440, + "step": 2514 + }, + { + "epoch": 10.475, + "loss": 0.10127786546945572, + "loss_ce": 0.000813998281955719, + "loss_iou": 0.38671875, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 229461440, + "step": 2514 + }, + { + "epoch": 10.479166666666666, + "grad_norm": 2.5220713836612223, + "learning_rate": 5e-05, + "loss": 0.0767, + "num_input_tokens_seen": 229552776, + "step": 2515 + }, + { + "epoch": 10.479166666666666, + "loss": 0.04373849928379059, + "loss_ce": 0.0002585825277492404, + "loss_iou": 0.11376953125, + "loss_num": 0.0086669921875, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 229552776, + "step": 2515 + }, + { + "epoch": 10.483333333333333, + "grad_norm": 1.26617865145252, + "learning_rate": 5e-05, + "loss": 0.0781, + "num_input_tokens_seen": 229643696, + "step": 2516 + }, + { + "epoch": 10.483333333333333, + "loss": 0.08795313537120819, + "loss_ce": 1.478557919654122e-06, + "loss_iou": 0.2158203125, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 229643696, + "step": 2516 + }, + { + "epoch": 10.4875, + "grad_norm": 1.1901244706406917, + "learning_rate": 5e-05, + "loss": 0.0775, + "num_input_tokens_seen": 229734900, + "step": 2517 + }, + { + "epoch": 10.4875, + "loss": 0.04605482146143913, + "loss_ce": 0.0002479375689290464, + "loss_iou": 0.2392578125, + "loss_num": 0.0091552734375, + "loss_xval": 0.0458984375, + "num_input_tokens_seen": 229734900, + "step": 2517 + }, + { + "epoch": 10.491666666666667, + "grad_norm": 2.702939547986177, + "learning_rate": 5e-05, + "loss": 0.058, + "num_input_tokens_seen": 229826620, + "step": 2518 + }, + { + "epoch": 10.491666666666667, + "loss": 0.06707486510276794, + "loss_ce": 0.0004855117294937372, + "loss_iou": 0.33203125, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 229826620, + "step": 2518 + }, + { + "epoch": 10.495833333333334, + "grad_norm": 3.7483403240961337, + "learning_rate": 5e-05, + "loss": 0.0565, + "num_input_tokens_seen": 229917808, + "step": 2519 + }, + { + "epoch": 10.495833333333334, + "loss": 0.0458785817027092, + "loss_ce": 2.5918541723513044e-05, + "loss_iou": 0.263671875, + "loss_num": 0.0091552734375, + "loss_xval": 0.0458984375, + "num_input_tokens_seen": 229917808, + "step": 2519 + }, + { + "epoch": 10.5, + "grad_norm": 2.40528501657669, + "learning_rate": 5e-05, + "loss": 0.0588, + "num_input_tokens_seen": 230009924, + "step": 2520 + }, + { + "epoch": 10.5, + "loss": 0.03555392846465111, + "loss_ce": 0.0005693387938663363, + "loss_iou": 0.197265625, + "loss_num": 0.006988525390625, + "loss_xval": 0.034912109375, + "num_input_tokens_seen": 230009924, + "step": 2520 + }, + { + "epoch": 10.504166666666666, + "grad_norm": 3.4734031929175573, + "learning_rate": 5e-05, + "loss": 0.0616, + "num_input_tokens_seen": 230100984, + "step": 2521 + }, + { + "epoch": 10.504166666666666, + "loss": 0.044868022203445435, + "loss_ce": 5.295852679410018e-05, + "loss_iou": 0.25390625, + "loss_num": 0.00897216796875, + "loss_xval": 0.044921875, + "num_input_tokens_seen": 230100984, + "step": 2521 + }, + { + "epoch": 10.508333333333333, + "grad_norm": 5.234999676194138, + "learning_rate": 5e-05, + "loss": 0.0484, + "num_input_tokens_seen": 230192776, + "step": 2522 + }, + { + "epoch": 10.508333333333333, + "loss": 0.054706450551748276, + "loss_ce": 0.00014102361456025392, + "loss_iou": 0.173828125, + "loss_num": 0.01092529296875, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 230192776, + "step": 2522 + }, + { + "epoch": 10.5125, + "grad_norm": 3.749565842887734, + "learning_rate": 5e-05, + "loss": 0.0605, + "num_input_tokens_seen": 230284256, + "step": 2523 + }, + { + "epoch": 10.5125, + "loss": 0.06063781678676605, + "loss_ce": 4.516471744864248e-05, + "loss_iou": 0.146484375, + "loss_num": 0.01214599609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 230284256, + "step": 2523 + }, + { + "epoch": 10.516666666666667, + "grad_norm": 3.3831922261258116, + "learning_rate": 5e-05, + "loss": 0.0676, + "num_input_tokens_seen": 230375284, + "step": 2524 + }, + { + "epoch": 10.516666666666667, + "loss": 0.05358021706342697, + "loss_ce": 2.186834899475798e-05, + "loss_iou": 0.328125, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 230375284, + "step": 2524 + }, + { + "epoch": 10.520833333333334, + "grad_norm": 3.6643316429433046, + "learning_rate": 5e-05, + "loss": 0.0749, + "num_input_tokens_seen": 230466500, + "step": 2525 + }, + { + "epoch": 10.520833333333334, + "loss": 0.10528016090393066, + "loss_ce": 0.0007498256163671613, + "loss_iou": 0.154296875, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 230466500, + "step": 2525 + }, + { + "epoch": 10.525, + "grad_norm": 2.9672800808077473, + "learning_rate": 5e-05, + "loss": 0.0751, + "num_input_tokens_seen": 230557956, + "step": 2526 + }, + { + "epoch": 10.525, + "loss": 0.07855658233165741, + "loss_ce": 4.3351515159884e-06, + "loss_iou": 0.2041015625, + "loss_num": 0.015625, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 230557956, + "step": 2526 + }, + { + "epoch": 10.529166666666667, + "grad_norm": 5.584652279705522, + "learning_rate": 5e-05, + "loss": 0.0652, + "num_input_tokens_seen": 230649600, + "step": 2527 + }, + { + "epoch": 10.529166666666667, + "loss": 0.03324050456285477, + "loss_ce": 2.2119897039374337e-05, + "loss_iou": 0.244140625, + "loss_num": 0.00665283203125, + "loss_xval": 0.033203125, + "num_input_tokens_seen": 230649600, + "step": 2527 + }, + { + "epoch": 10.533333333333333, + "grad_norm": 2.918245547540015, + "learning_rate": 5e-05, + "loss": 0.0618, + "num_input_tokens_seen": 230740764, + "step": 2528 + }, + { + "epoch": 10.533333333333333, + "loss": 0.04487369954586029, + "loss_ce": 2.8118285626987927e-05, + "loss_iou": 0.28515625, + "loss_num": 0.00897216796875, + "loss_xval": 0.044921875, + "num_input_tokens_seen": 230740764, + "step": 2528 + }, + { + "epoch": 10.5375, + "grad_norm": 2.0239125246620118, + "learning_rate": 5e-05, + "loss": 0.0686, + "num_input_tokens_seen": 230832676, + "step": 2529 + }, + { + "epoch": 10.5375, + "loss": 0.04169199988245964, + "loss_ce": 0.0004474924935493618, + "loss_iou": 0.29296875, + "loss_num": 0.00823974609375, + "loss_xval": 0.041259765625, + "num_input_tokens_seen": 230832676, + "step": 2529 + }, + { + "epoch": 10.541666666666666, + "grad_norm": 1.7093839443831509, + "learning_rate": 5e-05, + "loss": 0.0512, + "num_input_tokens_seen": 230924080, + "step": 2530 + }, + { + "epoch": 10.541666666666666, + "loss": 0.03623117879033089, + "loss_ce": 3.7330690247472376e-05, + "loss_iou": 0.193359375, + "loss_num": 0.007232666015625, + "loss_xval": 0.0361328125, + "num_input_tokens_seen": 230924080, + "step": 2530 + }, + { + "epoch": 10.545833333333333, + "grad_norm": 5.344313472098823, + "learning_rate": 5e-05, + "loss": 0.1128, + "num_input_tokens_seen": 231015492, + "step": 2531 + }, + { + "epoch": 10.545833333333333, + "loss": 0.12598028779029846, + "loss_ce": 0.0008887368021532893, + "loss_iou": 0.23046875, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 231015492, + "step": 2531 + }, + { + "epoch": 10.55, + "grad_norm": 2.217578905840367, + "learning_rate": 5e-05, + "loss": 0.0719, + "num_input_tokens_seen": 231107208, + "step": 2532 + }, + { + "epoch": 10.55, + "loss": 0.09489748626947403, + "loss_ce": 0.0016052497085183859, + "loss_iou": 0.236328125, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 231107208, + "step": 2532 + }, + { + "epoch": 10.554166666666667, + "grad_norm": 13.061473664615402, + "learning_rate": 5e-05, + "loss": 0.0702, + "num_input_tokens_seen": 231198376, + "step": 2533 + }, + { + "epoch": 10.554166666666667, + "loss": 0.06628895550966263, + "loss_ce": 4.775848992721876e-06, + "loss_iou": 0.357421875, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 231198376, + "step": 2533 + }, + { + "epoch": 10.558333333333334, + "grad_norm": 4.530044256683953, + "learning_rate": 5e-05, + "loss": 0.0964, + "num_input_tokens_seen": 231288708, + "step": 2534 + }, + { + "epoch": 10.558333333333334, + "loss": 0.10373524576425552, + "loss_ce": 0.0003416899999137968, + "loss_iou": 0.27734375, + "loss_num": 0.0206298828125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 231288708, + "step": 2534 + }, + { + "epoch": 10.5625, + "grad_norm": 3.1504933757286473, + "learning_rate": 5e-05, + "loss": 0.0859, + "num_input_tokens_seen": 231379416, + "step": 2535 + }, + { + "epoch": 10.5625, + "loss": 0.08838987350463867, + "loss_ce": 2.6227504349662922e-05, + "loss_iou": 0.318359375, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 231379416, + "step": 2535 + }, + { + "epoch": 10.566666666666666, + "grad_norm": 2.0429632457317712, + "learning_rate": 5e-05, + "loss": 0.0817, + "num_input_tokens_seen": 231469732, + "step": 2536 + }, + { + "epoch": 10.566666666666666, + "loss": 0.05402039363980293, + "loss_ce": 8.057022205321118e-05, + "loss_iou": 0.2490234375, + "loss_num": 0.01080322265625, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 231469732, + "step": 2536 + }, + { + "epoch": 10.570833333333333, + "grad_norm": 7.041689939197253, + "learning_rate": 5e-05, + "loss": 0.0881, + "num_input_tokens_seen": 231561780, + "step": 2537 + }, + { + "epoch": 10.570833333333333, + "loss": 0.12750005722045898, + "loss_ce": 0.0014472047332674265, + "loss_iou": 0.267578125, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 231561780, + "step": 2537 + }, + { + "epoch": 10.575, + "grad_norm": 5.025073454078799, + "learning_rate": 5e-05, + "loss": 0.0727, + "num_input_tokens_seen": 231653080, + "step": 2538 + }, + { + "epoch": 10.575, + "loss": 0.045722320675849915, + "loss_ce": 0.00022060942137613893, + "loss_iou": 0.22265625, + "loss_num": 0.00909423828125, + "loss_xval": 0.04541015625, + "num_input_tokens_seen": 231653080, + "step": 2538 + }, + { + "epoch": 10.579166666666667, + "grad_norm": 4.000437161547661, + "learning_rate": 5e-05, + "loss": 0.0809, + "num_input_tokens_seen": 231744568, + "step": 2539 + }, + { + "epoch": 10.579166666666667, + "loss": 0.09481997787952423, + "loss_ce": 0.0008105772431008518, + "loss_iou": 0.296875, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 231744568, + "step": 2539 + }, + { + "epoch": 10.583333333333334, + "grad_norm": 4.126768940670154, + "learning_rate": 5e-05, + "loss": 0.0747, + "num_input_tokens_seen": 231836040, + "step": 2540 + }, + { + "epoch": 10.583333333333334, + "loss": 0.0900723934173584, + "loss_ce": 0.002563235815614462, + "loss_iou": 0.30859375, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 231836040, + "step": 2540 + }, + { + "epoch": 10.5875, + "grad_norm": 3.0471114034022024, + "learning_rate": 5e-05, + "loss": 0.0604, + "num_input_tokens_seen": 231927320, + "step": 2541 + }, + { + "epoch": 10.5875, + "loss": 0.06366278976202011, + "loss_ce": 0.00046088872477412224, + "loss_iou": 0.330078125, + "loss_num": 0.01263427734375, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 231927320, + "step": 2541 + }, + { + "epoch": 10.591666666666667, + "grad_norm": 4.389275583769407, + "learning_rate": 5e-05, + "loss": 0.0666, + "num_input_tokens_seen": 232018744, + "step": 2542 + }, + { + "epoch": 10.591666666666667, + "loss": 0.09230601787567139, + "loss_ce": 0.0013178624212741852, + "loss_iou": 0.296875, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 232018744, + "step": 2542 + }, + { + "epoch": 10.595833333333333, + "grad_norm": 4.083220168684195, + "learning_rate": 5e-05, + "loss": 0.107, + "num_input_tokens_seen": 232110568, + "step": 2543 + }, + { + "epoch": 10.595833333333333, + "loss": 0.127569779753685, + "loss_ce": 0.00014362900401465595, + "loss_iou": 0.224609375, + "loss_num": 0.0255126953125, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 232110568, + "step": 2543 + }, + { + "epoch": 10.6, + "grad_norm": 2.2540952928617104, + "learning_rate": 5e-05, + "loss": 0.087, + "num_input_tokens_seen": 232202672, + "step": 2544 + }, + { + "epoch": 10.6, + "loss": 0.07920961081981659, + "loss_ce": 0.0016644495772197843, + "loss_iou": 0.1884765625, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 232202672, + "step": 2544 + }, + { + "epoch": 10.604166666666666, + "grad_norm": 1.7401042439735654, + "learning_rate": 5e-05, + "loss": 0.087, + "num_input_tokens_seen": 232293812, + "step": 2545 + }, + { + "epoch": 10.604166666666666, + "loss": 0.08084672689437866, + "loss_ce": 0.0001735099358484149, + "loss_iou": 0.25, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 232293812, + "step": 2545 + }, + { + "epoch": 10.608333333333333, + "grad_norm": 2.5874497229374778, + "learning_rate": 5e-05, + "loss": 0.1018, + "num_input_tokens_seen": 232384532, + "step": 2546 + }, + { + "epoch": 10.608333333333333, + "loss": 0.09803829342126846, + "loss_ce": 3.1087609386304393e-05, + "loss_iou": 0.298828125, + "loss_num": 0.01953125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 232384532, + "step": 2546 + }, + { + "epoch": 10.6125, + "grad_norm": 3.1909477246298508, + "learning_rate": 5e-05, + "loss": 0.0777, + "num_input_tokens_seen": 232475536, + "step": 2547 + }, + { + "epoch": 10.6125, + "loss": 0.09706706553697586, + "loss_ce": 5.904821591684595e-06, + "loss_iou": 0.3125, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 232475536, + "step": 2547 + }, + { + "epoch": 10.616666666666667, + "grad_norm": 11.624037688005709, + "learning_rate": 5e-05, + "loss": 0.0733, + "num_input_tokens_seen": 232566972, + "step": 2548 + }, + { + "epoch": 10.616666666666667, + "loss": 0.08421897143125534, + "loss_ce": 5.714677172363736e-06, + "loss_iou": 0.302734375, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 232566972, + "step": 2548 + }, + { + "epoch": 10.620833333333334, + "grad_norm": 2.889980560632474, + "learning_rate": 5e-05, + "loss": 0.0738, + "num_input_tokens_seen": 232658416, + "step": 2549 + }, + { + "epoch": 10.620833333333334, + "loss": 0.07642048597335815, + "loss_ce": 4.466603968467098e-06, + "loss_iou": 0.380859375, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 232658416, + "step": 2549 + }, + { + "epoch": 10.625, + "grad_norm": 2.3197193197672, + "learning_rate": 5e-05, + "loss": 0.0387, + "num_input_tokens_seen": 232749400, + "step": 2550 + }, + { + "epoch": 10.625, + "loss": 0.045643728226423264, + "loss_ce": 0.0016984152607619762, + "loss_iou": 0.212890625, + "loss_num": 0.0087890625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 232749400, + "step": 2550 + }, + { + "epoch": 10.629166666666666, + "grad_norm": 2.9763418010603013, + "learning_rate": 5e-05, + "loss": 0.0382, + "num_input_tokens_seen": 232839592, + "step": 2551 + }, + { + "epoch": 10.629166666666666, + "loss": 0.025699859485030174, + "loss_ce": 7.8733210102655e-06, + "loss_iou": 0.2099609375, + "loss_num": 0.005126953125, + "loss_xval": 0.025634765625, + "num_input_tokens_seen": 232839592, + "step": 2551 + }, + { + "epoch": 10.633333333333333, + "grad_norm": 3.2701773771201594, + "learning_rate": 5e-05, + "loss": 0.0629, + "num_input_tokens_seen": 232932300, + "step": 2552 + }, + { + "epoch": 10.633333333333333, + "loss": 0.08045702427625656, + "loss_ce": 0.0002110503555741161, + "loss_iou": 0.263671875, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 232932300, + "step": 2552 + }, + { + "epoch": 10.6375, + "grad_norm": 2.3879359360601127, + "learning_rate": 5e-05, + "loss": 0.0666, + "num_input_tokens_seen": 233023152, + "step": 2553 + }, + { + "epoch": 10.6375, + "loss": 0.06964591145515442, + "loss_ce": 0.00015738507499918342, + "loss_iou": 0.15234375, + "loss_num": 0.013916015625, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 233023152, + "step": 2553 + }, + { + "epoch": 10.641666666666667, + "grad_norm": 5.26547456458262, + "learning_rate": 5e-05, + "loss": 0.0491, + "num_input_tokens_seen": 233114204, + "step": 2554 + }, + { + "epoch": 10.641666666666667, + "loss": 0.04762497916817665, + "loss_ce": 0.00011673916014842689, + "loss_iou": 0.1875, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 233114204, + "step": 2554 + }, + { + "epoch": 10.645833333333334, + "grad_norm": 2.687133977675234, + "learning_rate": 5e-05, + "loss": 0.0627, + "num_input_tokens_seen": 233205552, + "step": 2555 + }, + { + "epoch": 10.645833333333334, + "loss": 0.05727348476648331, + "loss_ce": 7.248850124597084e-06, + "loss_iou": 0.21484375, + "loss_num": 0.01141357421875, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 233205552, + "step": 2555 + }, + { + "epoch": 10.65, + "grad_norm": 2.9639326833082666, + "learning_rate": 5e-05, + "loss": 0.0473, + "num_input_tokens_seen": 233296880, + "step": 2556 + }, + { + "epoch": 10.65, + "loss": 0.04553859680891037, + "loss_ce": 0.00023525467258878052, + "loss_iou": 0.279296875, + "loss_num": 0.009033203125, + "loss_xval": 0.04541015625, + "num_input_tokens_seen": 233296880, + "step": 2556 + }, + { + "epoch": 10.654166666666667, + "grad_norm": 2.45748708235402, + "learning_rate": 5e-05, + "loss": 0.1089, + "num_input_tokens_seen": 233387816, + "step": 2557 + }, + { + "epoch": 10.654166666666667, + "loss": 0.0732211172580719, + "loss_ce": 0.00042143999598920345, + "loss_iou": 0.2294921875, + "loss_num": 0.01458740234375, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 233387816, + "step": 2557 + }, + { + "epoch": 10.658333333333333, + "grad_norm": 5.809434756244132, + "learning_rate": 5e-05, + "loss": 0.0518, + "num_input_tokens_seen": 233478620, + "step": 2558 + }, + { + "epoch": 10.658333333333333, + "loss": 0.06174005568027496, + "loss_ce": 2.998542186105624e-06, + "loss_iou": 0.310546875, + "loss_num": 0.0123291015625, + "loss_xval": 0.061767578125, + "num_input_tokens_seen": 233478620, + "step": 2558 + }, + { + "epoch": 10.6625, + "grad_norm": 13.545347879170736, + "learning_rate": 5e-05, + "loss": 0.0716, + "num_input_tokens_seen": 233570304, + "step": 2559 + }, + { + "epoch": 10.6625, + "loss": 0.07798929512500763, + "loss_ce": 0.00036783432005904615, + "loss_iou": 0.1728515625, + "loss_num": 0.01556396484375, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 233570304, + "step": 2559 + }, + { + "epoch": 10.666666666666666, + "grad_norm": 3.0516130652912024, + "learning_rate": 5e-05, + "loss": 0.0562, + "num_input_tokens_seen": 233661480, + "step": 2560 + }, + { + "epoch": 10.666666666666666, + "loss": 0.041023723781108856, + "loss_ce": 5.387671626522206e-05, + "loss_iou": 0.314453125, + "loss_num": 0.0081787109375, + "loss_xval": 0.041015625, + "num_input_tokens_seen": 233661480, + "step": 2560 + }, + { + "epoch": 10.670833333333333, + "grad_norm": 2.5559688986889144, + "learning_rate": 5e-05, + "loss": 0.0437, + "num_input_tokens_seen": 233753020, + "step": 2561 + }, + { + "epoch": 10.670833333333333, + "loss": 0.03713707625865936, + "loss_ce": 0.00019554520258679986, + "loss_iou": 0.13671875, + "loss_num": 0.00738525390625, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 233753020, + "step": 2561 + }, + { + "epoch": 10.675, + "grad_norm": 4.498740216899864, + "learning_rate": 5e-05, + "loss": 0.1109, + "num_input_tokens_seen": 233844764, + "step": 2562 + }, + { + "epoch": 10.675, + "loss": 0.14017170667648315, + "loss_ce": 4.261473077349365e-05, + "loss_iou": 0.15234375, + "loss_num": 0.028076171875, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 233844764, + "step": 2562 + }, + { + "epoch": 10.679166666666667, + "grad_norm": 4.092962491430164, + "learning_rate": 5e-05, + "loss": 0.0647, + "num_input_tokens_seen": 233936216, + "step": 2563 + }, + { + "epoch": 10.679166666666667, + "loss": 0.06921578198671341, + "loss_ce": 0.0005436029750853777, + "loss_iou": 0.1181640625, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 233936216, + "step": 2563 + }, + { + "epoch": 10.683333333333334, + "grad_norm": 1.8805188688713392, + "learning_rate": 5e-05, + "loss": 0.0623, + "num_input_tokens_seen": 234027008, + "step": 2564 + }, + { + "epoch": 10.683333333333334, + "loss": 0.09404385089874268, + "loss_ce": 3.933888820029097e-06, + "loss_iou": 0.2392578125, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 234027008, + "step": 2564 + }, + { + "epoch": 10.6875, + "grad_norm": 2.359065599566822, + "learning_rate": 5e-05, + "loss": 0.0704, + "num_input_tokens_seen": 234117888, + "step": 2565 + }, + { + "epoch": 10.6875, + "loss": 0.07438018172979355, + "loss_ce": 0.0009396261302754283, + "loss_iou": 0.21875, + "loss_num": 0.01470947265625, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 234117888, + "step": 2565 + }, + { + "epoch": 10.691666666666666, + "grad_norm": 11.401119252449865, + "learning_rate": 5e-05, + "loss": 0.0601, + "num_input_tokens_seen": 234209816, + "step": 2566 + }, + { + "epoch": 10.691666666666666, + "loss": 0.045751944184303284, + "loss_ce": 5.949885962763801e-05, + "loss_iou": 0.33984375, + "loss_num": 0.0091552734375, + "loss_xval": 0.045654296875, + "num_input_tokens_seen": 234209816, + "step": 2566 + }, + { + "epoch": 10.695833333333333, + "grad_norm": 5.266923879339174, + "learning_rate": 5e-05, + "loss": 0.077, + "num_input_tokens_seen": 234301648, + "step": 2567 + }, + { + "epoch": 10.695833333333333, + "loss": 0.0653052031993866, + "loss_ce": 0.00030275885364972055, + "loss_iou": 0.298828125, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 234301648, + "step": 2567 + }, + { + "epoch": 10.7, + "grad_norm": 5.586196928970968, + "learning_rate": 5e-05, + "loss": 0.0897, + "num_input_tokens_seen": 234392848, + "step": 2568 + }, + { + "epoch": 10.7, + "loss": 0.09796257317066193, + "loss_ce": 0.0005962373688817024, + "loss_iou": 0.373046875, + "loss_num": 0.01953125, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 234392848, + "step": 2568 + }, + { + "epoch": 10.704166666666667, + "grad_norm": 2.099581042369191, + "learning_rate": 5e-05, + "loss": 0.1066, + "num_input_tokens_seen": 234483628, + "step": 2569 + }, + { + "epoch": 10.704166666666667, + "loss": 0.11219096183776855, + "loss_ce": 7.143721632019151e-07, + "loss_iou": 0.21484375, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 234483628, + "step": 2569 + }, + { + "epoch": 10.708333333333334, + "grad_norm": 3.739938443996037, + "learning_rate": 5e-05, + "loss": 0.1042, + "num_input_tokens_seen": 234574800, + "step": 2570 + }, + { + "epoch": 10.708333333333334, + "loss": 0.1270475685596466, + "loss_ce": 3.3405965950805694e-05, + "loss_iou": 0.2392578125, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 234574800, + "step": 2570 + }, + { + "epoch": 10.7125, + "grad_norm": 5.373325813911932, + "learning_rate": 5e-05, + "loss": 0.0603, + "num_input_tokens_seen": 234666544, + "step": 2571 + }, + { + "epoch": 10.7125, + "loss": 0.05830381438136101, + "loss_ce": 0.0001296793925575912, + "loss_iou": 0.240234375, + "loss_num": 0.01165771484375, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 234666544, + "step": 2571 + }, + { + "epoch": 10.716666666666667, + "grad_norm": 1.9381368497514062, + "learning_rate": 5e-05, + "loss": 0.0888, + "num_input_tokens_seen": 234757532, + "step": 2572 + }, + { + "epoch": 10.716666666666667, + "loss": 0.08373898267745972, + "loss_ce": 1.4005401681060903e-05, + "loss_iou": 0.263671875, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 234757532, + "step": 2572 + }, + { + "epoch": 10.720833333333333, + "grad_norm": 15.191232019692768, + "learning_rate": 5e-05, + "loss": 0.1624, + "num_input_tokens_seen": 234848952, + "step": 2573 + }, + { + "epoch": 10.720833333333333, + "loss": 0.21807676553726196, + "loss_ce": 0.00047117145732045174, + "loss_iou": 0.1240234375, + "loss_num": 0.04345703125, + "loss_xval": 0.2177734375, + "num_input_tokens_seen": 234848952, + "step": 2573 + }, + { + "epoch": 10.725, + "grad_norm": 5.944073703575605, + "learning_rate": 5e-05, + "loss": 0.106, + "num_input_tokens_seen": 234940592, + "step": 2574 + }, + { + "epoch": 10.725, + "loss": 0.06124042719602585, + "loss_ce": 2.2165504560689442e-05, + "loss_iou": 0.3359375, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 234940592, + "step": 2574 + }, + { + "epoch": 10.729166666666666, + "grad_norm": 4.891518135126141, + "learning_rate": 5e-05, + "loss": 0.0982, + "num_input_tokens_seen": 235032148, + "step": 2575 + }, + { + "epoch": 10.729166666666666, + "loss": 0.13088181614875793, + "loss_ce": 0.019523173570632935, + "loss_iou": 0.2734375, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 235032148, + "step": 2575 + }, + { + "epoch": 10.733333333333333, + "grad_norm": 6.031822907931793, + "learning_rate": 5e-05, + "loss": 0.1062, + "num_input_tokens_seen": 235123412, + "step": 2576 + }, + { + "epoch": 10.733333333333333, + "loss": 0.11574000120162964, + "loss_ce": 0.002123055746778846, + "loss_iou": 0.2265625, + "loss_num": 0.022705078125, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 235123412, + "step": 2576 + }, + { + "epoch": 10.7375, + "grad_norm": 7.517324480847781, + "learning_rate": 5e-05, + "loss": 0.0844, + "num_input_tokens_seen": 235214944, + "step": 2577 + }, + { + "epoch": 10.7375, + "loss": 0.09054480493068695, + "loss_ce": 0.00038061931263655424, + "loss_iou": 0.06640625, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 235214944, + "step": 2577 + }, + { + "epoch": 10.741666666666667, + "grad_norm": 2.871107651385034, + "learning_rate": 5e-05, + "loss": 0.0719, + "num_input_tokens_seen": 235306616, + "step": 2578 + }, + { + "epoch": 10.741666666666667, + "loss": 0.08062739670276642, + "loss_ce": 3.0476576284854673e-05, + "loss_iou": 0.28515625, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 235306616, + "step": 2578 + }, + { + "epoch": 10.745833333333334, + "grad_norm": 2.5856460886512402, + "learning_rate": 5e-05, + "loss": 0.1758, + "num_input_tokens_seen": 235397184, + "step": 2579 + }, + { + "epoch": 10.745833333333334, + "loss": 0.24237322807312012, + "loss_ce": 2.62036132880894e-06, + "loss_iou": 0.185546875, + "loss_num": 0.048583984375, + "loss_xval": 0.2421875, + "num_input_tokens_seen": 235397184, + "step": 2579 + }, + { + "epoch": 10.75, + "grad_norm": 5.199548367122784, + "learning_rate": 5e-05, + "loss": 0.0849, + "num_input_tokens_seen": 235488592, + "step": 2580 + }, + { + "epoch": 10.75, + "loss": 0.09395498037338257, + "loss_ce": 0.0002583862515166402, + "loss_iou": 0.064453125, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 235488592, + "step": 2580 + }, + { + "epoch": 10.754166666666666, + "grad_norm": 2.1957185686027225, + "learning_rate": 5e-05, + "loss": 0.0687, + "num_input_tokens_seen": 235579488, + "step": 2581 + }, + { + "epoch": 10.754166666666666, + "loss": 0.07799072563648224, + "loss_ce": 3.053093678317964e-06, + "loss_iou": 0.1435546875, + "loss_num": 0.01556396484375, + "loss_xval": 0.078125, + "num_input_tokens_seen": 235579488, + "step": 2581 + }, + { + "epoch": 10.758333333333333, + "grad_norm": 6.386006493395034, + "learning_rate": 5e-05, + "loss": 0.0594, + "num_input_tokens_seen": 235670732, + "step": 2582 + }, + { + "epoch": 10.758333333333333, + "loss": 0.05041830986738205, + "loss_ce": 3.2693253615434514e-06, + "loss_iou": 0.35546875, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 235670732, + "step": 2582 + }, + { + "epoch": 10.7625, + "grad_norm": 3.321241011083686, + "learning_rate": 5e-05, + "loss": 0.0945, + "num_input_tokens_seen": 235762244, + "step": 2583 + }, + { + "epoch": 10.7625, + "loss": 0.09518767893314362, + "loss_ce": 0.00030090424115769565, + "loss_iou": 0.2236328125, + "loss_num": 0.01904296875, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 235762244, + "step": 2583 + }, + { + "epoch": 10.766666666666667, + "grad_norm": 1.7715456393543487, + "learning_rate": 5e-05, + "loss": 0.075, + "num_input_tokens_seen": 235853820, + "step": 2584 + }, + { + "epoch": 10.766666666666667, + "loss": 0.07396815717220306, + "loss_ce": 0.0009090721141546965, + "loss_iou": 0.25390625, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 235853820, + "step": 2584 + }, + { + "epoch": 10.770833333333334, + "grad_norm": 21.63454670478571, + "learning_rate": 5e-05, + "loss": 0.1392, + "num_input_tokens_seen": 235944788, + "step": 2585 + }, + { + "epoch": 10.770833333333334, + "loss": 0.18823395669460297, + "loss_ce": 1.532650117042067e-06, + "loss_iou": 0.34375, + "loss_num": 0.03759765625, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 235944788, + "step": 2585 + }, + { + "epoch": 10.775, + "grad_norm": 2.631021488424049, + "learning_rate": 5e-05, + "loss": 0.0716, + "num_input_tokens_seen": 236036360, + "step": 2586 + }, + { + "epoch": 10.775, + "loss": 0.07843972742557526, + "loss_ce": 0.0016269797924906015, + "loss_iou": 0.158203125, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 236036360, + "step": 2586 + }, + { + "epoch": 10.779166666666667, + "grad_norm": 3.2757981534867824, + "learning_rate": 5e-05, + "loss": 0.0912, + "num_input_tokens_seen": 236127020, + "step": 2587 + }, + { + "epoch": 10.779166666666667, + "loss": 0.11449539661407471, + "loss_ce": 0.0005198677536100149, + "loss_iou": 0.234375, + "loss_num": 0.0228271484375, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 236127020, + "step": 2587 + }, + { + "epoch": 10.783333333333333, + "grad_norm": 4.218455808026256, + "learning_rate": 5e-05, + "loss": 0.0706, + "num_input_tokens_seen": 236218416, + "step": 2588 + }, + { + "epoch": 10.783333333333333, + "loss": 0.05689278990030289, + "loss_ce": 2.3287324438570067e-05, + "loss_iou": 0.28125, + "loss_num": 0.0113525390625, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 236218416, + "step": 2588 + }, + { + "epoch": 10.7875, + "grad_norm": 2.7576194479682266, + "learning_rate": 5e-05, + "loss": 0.0562, + "num_input_tokens_seen": 236309272, + "step": 2589 + }, + { + "epoch": 10.7875, + "loss": 0.03268555551767349, + "loss_ce": 1.229744611919159e-06, + "loss_iou": 0.1689453125, + "loss_num": 0.00653076171875, + "loss_xval": 0.03271484375, + "num_input_tokens_seen": 236309272, + "step": 2589 + }, + { + "epoch": 10.791666666666666, + "grad_norm": 2.956816374332155, + "learning_rate": 5e-05, + "loss": 0.0842, + "num_input_tokens_seen": 236400560, + "step": 2590 + }, + { + "epoch": 10.791666666666666, + "loss": 0.09306585043668747, + "loss_ce": 1.7757985915523022e-05, + "loss_iou": 0.251953125, + "loss_num": 0.0185546875, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 236400560, + "step": 2590 + }, + { + "epoch": 10.795833333333333, + "grad_norm": 2.098715879741373, + "learning_rate": 5e-05, + "loss": 0.0749, + "num_input_tokens_seen": 236492296, + "step": 2591 + }, + { + "epoch": 10.795833333333333, + "loss": 0.08957656472921371, + "loss_ce": 0.0009382657590322196, + "loss_iou": 0.3515625, + "loss_num": 0.0177001953125, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 236492296, + "step": 2591 + }, + { + "epoch": 10.8, + "grad_norm": 3.2751263712768846, + "learning_rate": 5e-05, + "loss": 0.1033, + "num_input_tokens_seen": 236584492, + "step": 2592 + }, + { + "epoch": 10.8, + "loss": 0.1428438276052475, + "loss_ce": 0.0007082168012857437, + "loss_iou": 0.3359375, + "loss_num": 0.0284423828125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 236584492, + "step": 2592 + }, + { + "epoch": 10.804166666666667, + "grad_norm": 4.761788300939806, + "learning_rate": 5e-05, + "loss": 0.0722, + "num_input_tokens_seen": 236676088, + "step": 2593 + }, + { + "epoch": 10.804166666666667, + "loss": 0.08923020958900452, + "loss_ce": 8.836462075123563e-05, + "loss_iou": 0.310546875, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 236676088, + "step": 2593 + }, + { + "epoch": 10.808333333333334, + "grad_norm": 2.6994689974716977, + "learning_rate": 5e-05, + "loss": 0.0568, + "num_input_tokens_seen": 236767300, + "step": 2594 + }, + { + "epoch": 10.808333333333334, + "loss": 0.0671512633562088, + "loss_ce": 2.0224437321303412e-05, + "loss_iou": 0.2431640625, + "loss_num": 0.013427734375, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 236767300, + "step": 2594 + }, + { + "epoch": 10.8125, + "grad_norm": 5.731491341541353, + "learning_rate": 5e-05, + "loss": 0.0899, + "num_input_tokens_seen": 236859044, + "step": 2595 + }, + { + "epoch": 10.8125, + "loss": 0.0962974950671196, + "loss_ce": 0.0019981807563453913, + "loss_iou": 0.322265625, + "loss_num": 0.0189208984375, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 236859044, + "step": 2595 + }, + { + "epoch": 10.816666666666666, + "grad_norm": 4.80794396287632, + "learning_rate": 5e-05, + "loss": 0.0712, + "num_input_tokens_seen": 236950460, + "step": 2596 + }, + { + "epoch": 10.816666666666666, + "loss": 0.06012987345457077, + "loss_ce": 4.0764378354651853e-05, + "loss_iou": 0.34765625, + "loss_num": 0.01202392578125, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 236950460, + "step": 2596 + }, + { + "epoch": 10.820833333333333, + "grad_norm": 3.2646915505200527, + "learning_rate": 5e-05, + "loss": 0.1233, + "num_input_tokens_seen": 237041240, + "step": 2597 + }, + { + "epoch": 10.820833333333333, + "loss": 0.1575869917869568, + "loss_ce": 9.482464520260692e-06, + "loss_iou": 0.26953125, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 237041240, + "step": 2597 + }, + { + "epoch": 10.825, + "grad_norm": 3.8923479859874397, + "learning_rate": 5e-05, + "loss": 0.0551, + "num_input_tokens_seen": 237132320, + "step": 2598 + }, + { + "epoch": 10.825, + "loss": 0.07181952893733978, + "loss_ce": 1.1675666428345721e-05, + "loss_iou": 0.2294921875, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 237132320, + "step": 2598 + }, + { + "epoch": 10.829166666666667, + "grad_norm": 3.9108713110099256, + "learning_rate": 5e-05, + "loss": 0.0582, + "num_input_tokens_seen": 237223864, + "step": 2599 + }, + { + "epoch": 10.829166666666667, + "loss": 0.05638699233531952, + "loss_ce": 0.0020809650886803865, + "loss_iou": 0.267578125, + "loss_num": 0.0108642578125, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 237223864, + "step": 2599 + }, + { + "epoch": 10.833333333333334, + "grad_norm": 2.547917471524506, + "learning_rate": 5e-05, + "loss": 0.0606, + "num_input_tokens_seen": 237315552, + "step": 2600 + }, + { + "epoch": 10.833333333333334, + "loss": 0.048316840082407, + "loss_ce": 0.0009230421273969114, + "loss_iou": 0.275390625, + "loss_num": 0.00946044921875, + "loss_xval": 0.04736328125, + "num_input_tokens_seen": 237315552, + "step": 2600 + }, + { + "epoch": 10.8375, + "grad_norm": 7.022061095017465, + "learning_rate": 5e-05, + "loss": 0.067, + "num_input_tokens_seen": 237407428, + "step": 2601 + }, + { + "epoch": 10.8375, + "loss": 0.05036468803882599, + "loss_ce": 0.00019378944125492126, + "loss_iou": 0.3515625, + "loss_num": 0.010009765625, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 237407428, + "step": 2601 + }, + { + "epoch": 10.841666666666667, + "grad_norm": 7.585959599877752, + "learning_rate": 5e-05, + "loss": 0.0962, + "num_input_tokens_seen": 237498360, + "step": 2602 + }, + { + "epoch": 10.841666666666667, + "loss": 0.08449165523052216, + "loss_ce": 1.8997769075213e-05, + "loss_iou": 0.314453125, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 237498360, + "step": 2602 + }, + { + "epoch": 10.845833333333333, + "grad_norm": 7.524249545336901, + "learning_rate": 5e-05, + "loss": 0.1283, + "num_input_tokens_seen": 237589592, + "step": 2603 + }, + { + "epoch": 10.845833333333333, + "loss": 0.1548469066619873, + "loss_ce": 0.002121679950505495, + "loss_iou": 0.3203125, + "loss_num": 0.030517578125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 237589592, + "step": 2603 + }, + { + "epoch": 10.85, + "grad_norm": 5.884465054260644, + "learning_rate": 5e-05, + "loss": 0.0835, + "num_input_tokens_seen": 237679388, + "step": 2604 + }, + { + "epoch": 10.85, + "loss": 0.08232827484607697, + "loss_ce": 6.813806248828769e-05, + "loss_iou": 0.30078125, + "loss_num": 0.0164794921875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 237679388, + "step": 2604 + }, + { + "epoch": 10.854166666666666, + "grad_norm": 6.344010520408848, + "learning_rate": 5e-05, + "loss": 0.0872, + "num_input_tokens_seen": 237770712, + "step": 2605 + }, + { + "epoch": 10.854166666666666, + "loss": 0.12124098837375641, + "loss_ce": 0.00011671679385472089, + "loss_iou": 0.224609375, + "loss_num": 0.024169921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 237770712, + "step": 2605 + }, + { + "epoch": 10.858333333333333, + "grad_norm": 2.1007016750079925, + "learning_rate": 5e-05, + "loss": 0.0967, + "num_input_tokens_seen": 237862096, + "step": 2606 + }, + { + "epoch": 10.858333333333333, + "loss": 0.1339530646800995, + "loss_ce": 4.193175118416548e-05, + "loss_iou": 0.181640625, + "loss_num": 0.0267333984375, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 237862096, + "step": 2606 + }, + { + "epoch": 10.8625, + "grad_norm": 2.0791420139647303, + "learning_rate": 5e-05, + "loss": 0.0458, + "num_input_tokens_seen": 237953916, + "step": 2607 + }, + { + "epoch": 10.8625, + "loss": 0.05738446116447449, + "loss_ce": 4.956373595632613e-05, + "loss_iou": 0.220703125, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 237953916, + "step": 2607 + }, + { + "epoch": 10.866666666666667, + "grad_norm": 3.145243872953806, + "learning_rate": 5e-05, + "loss": 0.0436, + "num_input_tokens_seen": 238045100, + "step": 2608 + }, + { + "epoch": 10.866666666666667, + "loss": 0.05611884593963623, + "loss_ce": 6.568455864908174e-05, + "loss_iou": 0.1962890625, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 238045100, + "step": 2608 + }, + { + "epoch": 10.870833333333334, + "grad_norm": 3.5832832158308277, + "learning_rate": 5e-05, + "loss": 0.0659, + "num_input_tokens_seen": 238136528, + "step": 2609 + }, + { + "epoch": 10.870833333333334, + "loss": 0.08797188103199005, + "loss_ce": 0.0010883386712521315, + "loss_iou": 0.1435546875, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 238136528, + "step": 2609 + }, + { + "epoch": 10.875, + "grad_norm": 5.225832555854499, + "learning_rate": 5e-05, + "loss": 0.0861, + "num_input_tokens_seen": 238228552, + "step": 2610 + }, + { + "epoch": 10.875, + "loss": 0.06515424698591232, + "loss_ce": 0.0020591537468135357, + "loss_iou": 0.197265625, + "loss_num": 0.01263427734375, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 238228552, + "step": 2610 + }, + { + "epoch": 10.879166666666666, + "grad_norm": 3.5337897267468645, + "learning_rate": 5e-05, + "loss": 0.048, + "num_input_tokens_seen": 238319624, + "step": 2611 + }, + { + "epoch": 10.879166666666666, + "loss": 0.035280607640743256, + "loss_ce": 0.0001701373839750886, + "loss_iou": 0.27734375, + "loss_num": 0.00701904296875, + "loss_xval": 0.03515625, + "num_input_tokens_seen": 238319624, + "step": 2611 + }, + { + "epoch": 10.883333333333333, + "grad_norm": 2.669757033141276, + "learning_rate": 5e-05, + "loss": 0.0631, + "num_input_tokens_seen": 238411064, + "step": 2612 + }, + { + "epoch": 10.883333333333333, + "loss": 0.07555267959833145, + "loss_ce": 2.1668691260856576e-05, + "loss_iou": 0.18359375, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 238411064, + "step": 2612 + }, + { + "epoch": 10.8875, + "grad_norm": 24.944381665824572, + "learning_rate": 5e-05, + "loss": 0.0967, + "num_input_tokens_seen": 238502616, + "step": 2613 + }, + { + "epoch": 10.8875, + "loss": 0.10890813171863556, + "loss_ce": 5.193160177441314e-05, + "loss_iou": 0.275390625, + "loss_num": 0.0218505859375, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 238502616, + "step": 2613 + }, + { + "epoch": 10.891666666666667, + "grad_norm": 3.9529351305551583, + "learning_rate": 5e-05, + "loss": 0.117, + "num_input_tokens_seen": 238593976, + "step": 2614 + }, + { + "epoch": 10.891666666666667, + "loss": 0.15259791910648346, + "loss_ce": 2.5290042685810477e-05, + "loss_iou": 0.306640625, + "loss_num": 0.030517578125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 238593976, + "step": 2614 + }, + { + "epoch": 10.895833333333334, + "grad_norm": 3.306602582204345, + "learning_rate": 5e-05, + "loss": 0.0585, + "num_input_tokens_seen": 238685196, + "step": 2615 + }, + { + "epoch": 10.895833333333334, + "loss": 0.07463531196117401, + "loss_ce": 0.00047758998698554933, + "loss_iou": 0.373046875, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 238685196, + "step": 2615 + }, + { + "epoch": 10.9, + "grad_norm": 11.791081179572238, + "learning_rate": 5e-05, + "loss": 0.0928, + "num_input_tokens_seen": 238776636, + "step": 2616 + }, + { + "epoch": 10.9, + "loss": 0.05548732355237007, + "loss_ce": 0.00040309398900717497, + "loss_iou": 0.2265625, + "loss_num": 0.010986328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 238776636, + "step": 2616 + }, + { + "epoch": 10.904166666666667, + "grad_norm": 3.243954880467529, + "learning_rate": 5e-05, + "loss": 0.0865, + "num_input_tokens_seen": 238867920, + "step": 2617 + }, + { + "epoch": 10.904166666666667, + "loss": 0.0947527140378952, + "loss_ce": 4.1406026866752654e-05, + "loss_iou": 0.228515625, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 238867920, + "step": 2617 + }, + { + "epoch": 10.908333333333333, + "grad_norm": 3.0316435369779824, + "learning_rate": 5e-05, + "loss": 0.0592, + "num_input_tokens_seen": 238959192, + "step": 2618 + }, + { + "epoch": 10.908333333333333, + "loss": 0.06768861413002014, + "loss_ce": 0.0001989835436688736, + "loss_iou": 0.1884765625, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 238959192, + "step": 2618 + }, + { + "epoch": 10.9125, + "grad_norm": 4.897163052854971, + "learning_rate": 5e-05, + "loss": 0.128, + "num_input_tokens_seen": 239050212, + "step": 2619 + }, + { + "epoch": 10.9125, + "loss": 0.14370054006576538, + "loss_ce": 0.0013970638392493129, + "loss_iou": 0.21875, + "loss_num": 0.0284423828125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 239050212, + "step": 2619 + }, + { + "epoch": 10.916666666666666, + "grad_norm": 7.104457595482057, + "learning_rate": 5e-05, + "loss": 0.1073, + "num_input_tokens_seen": 239142140, + "step": 2620 + }, + { + "epoch": 10.916666666666666, + "loss": 0.14011695981025696, + "loss_ce": 0.002284326357766986, + "loss_iou": 0.1806640625, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 239142140, + "step": 2620 + }, + { + "epoch": 10.920833333333333, + "grad_norm": 1.5029055909970825, + "learning_rate": 5e-05, + "loss": 0.076, + "num_input_tokens_seen": 239233036, + "step": 2621 + }, + { + "epoch": 10.920833333333333, + "loss": 0.048193223774433136, + "loss_ce": 5.966435310256202e-06, + "loss_iou": 0.1591796875, + "loss_num": 0.0096435546875, + "loss_xval": 0.048095703125, + "num_input_tokens_seen": 239233036, + "step": 2621 + }, + { + "epoch": 10.925, + "grad_norm": 3.191602960669044, + "learning_rate": 5e-05, + "loss": 0.1152, + "num_input_tokens_seen": 239323756, + "step": 2622 + }, + { + "epoch": 10.925, + "loss": 0.16263824701309204, + "loss_ce": 2.4303417376358993e-06, + "loss_iou": 0.26953125, + "loss_num": 0.032470703125, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 239323756, + "step": 2622 + }, + { + "epoch": 10.929166666666667, + "grad_norm": 3.065510203947232, + "learning_rate": 5e-05, + "loss": 0.0605, + "num_input_tokens_seen": 239415352, + "step": 2623 + }, + { + "epoch": 10.929166666666667, + "loss": 0.051864929497241974, + "loss_ce": 0.0011141971917822957, + "loss_iou": 0.19921875, + "loss_num": 0.0101318359375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 239415352, + "step": 2623 + }, + { + "epoch": 10.933333333333334, + "grad_norm": 2.946121885961593, + "learning_rate": 5e-05, + "loss": 0.0713, + "num_input_tokens_seen": 239506196, + "step": 2624 + }, + { + "epoch": 10.933333333333334, + "loss": 0.06659112870693207, + "loss_ce": 0.00015435564273502678, + "loss_iou": 0.248046875, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 239506196, + "step": 2624 + }, + { + "epoch": 10.9375, + "grad_norm": 2.685711695313958, + "learning_rate": 5e-05, + "loss": 0.0653, + "num_input_tokens_seen": 239597536, + "step": 2625 + }, + { + "epoch": 10.9375, + "loss": 0.07138238847255707, + "loss_ce": 0.002153257606551051, + "loss_iou": 0.2001953125, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 239597536, + "step": 2625 + }, + { + "epoch": 10.941666666666666, + "grad_norm": 9.989995815244335, + "learning_rate": 5e-05, + "loss": 0.0647, + "num_input_tokens_seen": 239688564, + "step": 2626 + }, + { + "epoch": 10.941666666666666, + "loss": 0.04970329999923706, + "loss_ce": 0.0002648217196110636, + "loss_iou": 0.201171875, + "loss_num": 0.0098876953125, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 239688564, + "step": 2626 + }, + { + "epoch": 10.945833333333333, + "grad_norm": 4.675366276387425, + "learning_rate": 5e-05, + "loss": 0.0684, + "num_input_tokens_seen": 239779924, + "step": 2627 + }, + { + "epoch": 10.945833333333333, + "loss": 0.060884036123752594, + "loss_ce": 4.724512109532952e-05, + "loss_iou": 0.26953125, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 239779924, + "step": 2627 + }, + { + "epoch": 10.95, + "grad_norm": 8.176285506220234, + "learning_rate": 5e-05, + "loss": 0.1314, + "num_input_tokens_seen": 239871828, + "step": 2628 + }, + { + "epoch": 10.95, + "loss": 0.11220882833003998, + "loss_ce": 0.0012774209026247263, + "loss_iou": 0.2080078125, + "loss_num": 0.022216796875, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 239871828, + "step": 2628 + }, + { + "epoch": 10.954166666666667, + "grad_norm": 3.0610626513608596, + "learning_rate": 5e-05, + "loss": 0.0506, + "num_input_tokens_seen": 239962868, + "step": 2629 + }, + { + "epoch": 10.954166666666667, + "loss": 0.02921304665505886, + "loss_ce": 0.00024423663853667676, + "loss_iou": 0.2470703125, + "loss_num": 0.00579833984375, + "loss_xval": 0.0289306640625, + "num_input_tokens_seen": 239962868, + "step": 2629 + }, + { + "epoch": 10.958333333333334, + "grad_norm": 3.2222858925683404, + "learning_rate": 5e-05, + "loss": 0.099, + "num_input_tokens_seen": 240054444, + "step": 2630 + }, + { + "epoch": 10.958333333333334, + "loss": 0.1313033401966095, + "loss_ce": 1.4491429283225443e-06, + "loss_iou": 0.322265625, + "loss_num": 0.0262451171875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 240054444, + "step": 2630 + }, + { + "epoch": 10.9625, + "grad_norm": 3.3929942447700796, + "learning_rate": 5e-05, + "loss": 0.0768, + "num_input_tokens_seen": 240146056, + "step": 2631 + }, + { + "epoch": 10.9625, + "loss": 0.06336264312267303, + "loss_ce": 0.0006490138475783169, + "loss_iou": 0.265625, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 240146056, + "step": 2631 + }, + { + "epoch": 10.966666666666667, + "grad_norm": 2.5227606594562912, + "learning_rate": 5e-05, + "loss": 0.0467, + "num_input_tokens_seen": 240237644, + "step": 2632 + }, + { + "epoch": 10.966666666666667, + "loss": 0.05070841312408447, + "loss_ce": 1.8717042621574365e-05, + "loss_iou": 0.318359375, + "loss_num": 0.0101318359375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 240237644, + "step": 2632 + }, + { + "epoch": 10.970833333333333, + "grad_norm": 2.520264622085186, + "learning_rate": 5e-05, + "loss": 0.0741, + "num_input_tokens_seen": 240326952, + "step": 2633 + }, + { + "epoch": 10.970833333333333, + "loss": 0.0733042061328888, + "loss_ce": 9.8748421351047e-07, + "loss_iou": 0.1767578125, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 240326952, + "step": 2633 + }, + { + "epoch": 10.975, + "grad_norm": 1.9563081150893862, + "learning_rate": 5e-05, + "loss": 0.074, + "num_input_tokens_seen": 240417816, + "step": 2634 + }, + { + "epoch": 10.975, + "loss": 0.11251506209373474, + "loss_ce": 0.001293750829063356, + "loss_iou": 0.125, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 240417816, + "step": 2634 + }, + { + "epoch": 10.979166666666666, + "grad_norm": 5.065297860890833, + "learning_rate": 5e-05, + "loss": 0.0354, + "num_input_tokens_seen": 240509576, + "step": 2635 + }, + { + "epoch": 10.979166666666666, + "loss": 0.028818506747484207, + "loss_ce": 4.042866930831224e-05, + "loss_iou": 0.240234375, + "loss_num": 0.005767822265625, + "loss_xval": 0.02880859375, + "num_input_tokens_seen": 240509576, + "step": 2635 + }, + { + "epoch": 10.983333333333333, + "grad_norm": 3.16934427084196, + "learning_rate": 5e-05, + "loss": 0.057, + "num_input_tokens_seen": 240600848, + "step": 2636 + }, + { + "epoch": 10.983333333333333, + "loss": 0.0635094940662384, + "loss_ce": 2.4082135041680885e-06, + "loss_iou": 0.3203125, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 240600848, + "step": 2636 + }, + { + "epoch": 10.9875, + "grad_norm": 2.495854510196729, + "learning_rate": 5e-05, + "loss": 0.0881, + "num_input_tokens_seen": 240690652, + "step": 2637 + }, + { + "epoch": 10.9875, + "loss": 0.044383447617292404, + "loss_ce": 5.6665088777663186e-05, + "loss_iou": 0.322265625, + "loss_num": 0.00885009765625, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 240690652, + "step": 2637 + }, + { + "epoch": 10.991666666666667, + "grad_norm": 7.214089578682879, + "learning_rate": 5e-05, + "loss": 0.0991, + "num_input_tokens_seen": 240782300, + "step": 2638 + }, + { + "epoch": 10.991666666666667, + "loss": 0.05883432552218437, + "loss_ce": 0.0020868880674242973, + "loss_iou": 0.388671875, + "loss_num": 0.0113525390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 240782300, + "step": 2638 + }, + { + "epoch": 10.995833333333334, + "grad_norm": 2.7885933218806205, + "learning_rate": 5e-05, + "loss": 0.1149, + "num_input_tokens_seen": 240872792, + "step": 2639 + }, + { + "epoch": 10.995833333333334, + "loss": 0.14906570315361023, + "loss_ce": 2.5991162146965507e-06, + "loss_iou": 0.279296875, + "loss_num": 0.02978515625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 240872792, + "step": 2639 + }, + { + "epoch": 11.0, + "grad_norm": 2.0176084409762773, + "learning_rate": 5e-05, + "loss": 0.1402, + "num_input_tokens_seen": 240963908, + "step": 2640 + }, + { + "epoch": 11.0, + "loss": 0.15255336463451385, + "loss_ce": 5.702380076400004e-05, + "loss_iou": 0.216796875, + "loss_num": 0.030517578125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 240963908, + "step": 2640 + }, + { + "epoch": 11.004166666666666, + "grad_norm": 2.553639431002751, + "learning_rate": 5e-05, + "loss": 0.0723, + "num_input_tokens_seen": 241055888, + "step": 2641 + }, + { + "epoch": 11.004166666666666, + "loss": 0.07801361382007599, + "loss_ce": 0.0003005999606102705, + "loss_iou": 0.2412109375, + "loss_num": 0.01556396484375, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 241055888, + "step": 2641 + }, + { + "epoch": 11.008333333333333, + "grad_norm": 1.587553055646591, + "learning_rate": 5e-05, + "loss": 0.0379, + "num_input_tokens_seen": 241147336, + "step": 2642 + }, + { + "epoch": 11.008333333333333, + "loss": 0.03565359115600586, + "loss_ce": 1.6691326891304925e-05, + "loss_iou": 0.251953125, + "loss_num": 0.00714111328125, + "loss_xval": 0.03564453125, + "num_input_tokens_seen": 241147336, + "step": 2642 + }, + { + "epoch": 11.0125, + "grad_norm": 2.8004207008971833, + "learning_rate": 5e-05, + "loss": 0.1247, + "num_input_tokens_seen": 241238736, + "step": 2643 + }, + { + "epoch": 11.0125, + "loss": 0.10212784260511398, + "loss_ce": 0.00034409199724905193, + "loss_iou": 0.251953125, + "loss_num": 0.0203857421875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 241238736, + "step": 2643 + }, + { + "epoch": 11.016666666666667, + "grad_norm": 5.175036951556304, + "learning_rate": 5e-05, + "loss": 0.0914, + "num_input_tokens_seen": 241328948, + "step": 2644 + }, + { + "epoch": 11.016666666666667, + "loss": 0.11754395812749863, + "loss_ce": 8.180072472896427e-05, + "loss_iou": 0.412109375, + "loss_num": 0.0234375, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 241328948, + "step": 2644 + }, + { + "epoch": 11.020833333333334, + "grad_norm": 4.164233302103024, + "learning_rate": 5e-05, + "loss": 0.0742, + "num_input_tokens_seen": 241420064, + "step": 2645 + }, + { + "epoch": 11.020833333333334, + "loss": 0.10768741369247437, + "loss_ce": 8.243230695370585e-05, + "loss_iou": 0.21875, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 241420064, + "step": 2645 + }, + { + "epoch": 11.025, + "grad_norm": 14.355901836167327, + "learning_rate": 5e-05, + "loss": 0.0819, + "num_input_tokens_seen": 241511284, + "step": 2646 + }, + { + "epoch": 11.025, + "loss": 0.05288579314947128, + "loss_ce": 0.00034978328039869666, + "loss_iou": 0.3125, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 241511284, + "step": 2646 + }, + { + "epoch": 11.029166666666667, + "grad_norm": 2.7846388045000774, + "learning_rate": 5e-05, + "loss": 0.0566, + "num_input_tokens_seen": 241601988, + "step": 2647 + }, + { + "epoch": 11.029166666666667, + "loss": 0.0509650744497776, + "loss_ce": 3.123717033304274e-05, + "loss_iou": 0.2314453125, + "loss_num": 0.01019287109375, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 241601988, + "step": 2647 + }, + { + "epoch": 11.033333333333333, + "grad_norm": 1.847015603045074, + "learning_rate": 5e-05, + "loss": 0.0609, + "num_input_tokens_seen": 241693232, + "step": 2648 + }, + { + "epoch": 11.033333333333333, + "loss": 0.05165189504623413, + "loss_ce": 5.429990778793581e-05, + "loss_iou": 0.017578125, + "loss_num": 0.01031494140625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 241693232, + "step": 2648 + }, + { + "epoch": 11.0375, + "grad_norm": 3.794780395752318, + "learning_rate": 5e-05, + "loss": 0.0493, + "num_input_tokens_seen": 241784292, + "step": 2649 + }, + { + "epoch": 11.0375, + "loss": 0.05502014979720116, + "loss_ce": 2.7472731744637713e-05, + "loss_iou": 0.27734375, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 241784292, + "step": 2649 + }, + { + "epoch": 11.041666666666666, + "grad_norm": 4.257199059185294, + "learning_rate": 5e-05, + "loss": 0.0711, + "num_input_tokens_seen": 241875824, + "step": 2650 + }, + { + "epoch": 11.041666666666666, + "loss": 0.058951519429683685, + "loss_ce": 2.207309807999991e-05, + "loss_iou": 0.16796875, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 241875824, + "step": 2650 + }, + { + "epoch": 11.045833333333333, + "grad_norm": 4.761678759376161, + "learning_rate": 5e-05, + "loss": 0.0508, + "num_input_tokens_seen": 241968188, + "step": 2651 + }, + { + "epoch": 11.045833333333333, + "loss": 0.04210842028260231, + "loss_ce": 0.0004442967183422297, + "loss_iou": 0.306640625, + "loss_num": 0.00836181640625, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 241968188, + "step": 2651 + }, + { + "epoch": 11.05, + "grad_norm": 3.6609282092189925, + "learning_rate": 5e-05, + "loss": 0.0856, + "num_input_tokens_seen": 242059468, + "step": 2652 + }, + { + "epoch": 11.05, + "loss": 0.0841054618358612, + "loss_ce": 1.4272000044002198e-05, + "loss_iou": 0.365234375, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 242059468, + "step": 2652 + }, + { + "epoch": 11.054166666666667, + "grad_norm": 2.6215961232335614, + "learning_rate": 5e-05, + "loss": 0.1001, + "num_input_tokens_seen": 242150620, + "step": 2653 + }, + { + "epoch": 11.054166666666667, + "loss": 0.12753871083259583, + "loss_ce": 5.753432560595684e-06, + "loss_iou": 0.408203125, + "loss_num": 0.0255126953125, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 242150620, + "step": 2653 + }, + { + "epoch": 11.058333333333334, + "grad_norm": 3.6705535538829057, + "learning_rate": 5e-05, + "loss": 0.0422, + "num_input_tokens_seen": 242241964, + "step": 2654 + }, + { + "epoch": 11.058333333333334, + "loss": 0.04872170090675354, + "loss_ce": 8.017166692297906e-06, + "loss_iou": 0.296875, + "loss_num": 0.009765625, + "loss_xval": 0.048828125, + "num_input_tokens_seen": 242241964, + "step": 2654 + }, + { + "epoch": 11.0625, + "grad_norm": 3.096843310450073, + "learning_rate": 5e-05, + "loss": 0.0622, + "num_input_tokens_seen": 242333540, + "step": 2655 + }, + { + "epoch": 11.0625, + "loss": 0.06332937628030777, + "loss_ce": 0.00015799149696249515, + "loss_iou": 0.265625, + "loss_num": 0.01263427734375, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 242333540, + "step": 2655 + }, + { + "epoch": 11.066666666666666, + "grad_norm": 2.821347636168007, + "learning_rate": 5e-05, + "loss": 0.0491, + "num_input_tokens_seen": 242424904, + "step": 2656 + }, + { + "epoch": 11.066666666666666, + "loss": 0.04395774379372597, + "loss_ce": 0.00011924280261155218, + "loss_iou": 0.2578125, + "loss_num": 0.0087890625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 242424904, + "step": 2656 + }, + { + "epoch": 11.070833333333333, + "grad_norm": 1.9314475325144638, + "learning_rate": 5e-05, + "loss": 0.0359, + "num_input_tokens_seen": 242516172, + "step": 2657 + }, + { + "epoch": 11.070833333333333, + "loss": 0.040782131254673004, + "loss_ce": 4.116656054975465e-05, + "loss_iou": 0.25390625, + "loss_num": 0.00811767578125, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 242516172, + "step": 2657 + }, + { + "epoch": 11.075, + "grad_norm": 3.1928357911856935, + "learning_rate": 5e-05, + "loss": 0.0518, + "num_input_tokens_seen": 242607204, + "step": 2658 + }, + { + "epoch": 11.075, + "loss": 0.05490949749946594, + "loss_ce": 0.00016096464241854846, + "loss_iou": 0.2421875, + "loss_num": 0.01092529296875, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 242607204, + "step": 2658 + }, + { + "epoch": 11.079166666666667, + "grad_norm": 3.403930510911585, + "learning_rate": 5e-05, + "loss": 0.0723, + "num_input_tokens_seen": 242698216, + "step": 2659 + }, + { + "epoch": 11.079166666666667, + "loss": 0.06885186582803726, + "loss_ce": 0.0006450839573517442, + "loss_iou": 0.27734375, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 242698216, + "step": 2659 + }, + { + "epoch": 11.083333333333334, + "grad_norm": 3.0371808321247102, + "learning_rate": 5e-05, + "loss": 0.1277, + "num_input_tokens_seen": 242789448, + "step": 2660 + }, + { + "epoch": 11.083333333333334, + "loss": 0.12627384066581726, + "loss_ce": 0.0011212533572688699, + "loss_iou": 0.375, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 242789448, + "step": 2660 + }, + { + "epoch": 11.0875, + "grad_norm": 2.826800200963949, + "learning_rate": 5e-05, + "loss": 0.0501, + "num_input_tokens_seen": 242881280, + "step": 2661 + }, + { + "epoch": 11.0875, + "loss": 0.039384517818689346, + "loss_ce": 0.00023809520644135773, + "loss_iou": 0.1826171875, + "loss_num": 0.0078125, + "loss_xval": 0.0390625, + "num_input_tokens_seen": 242881280, + "step": 2661 + }, + { + "epoch": 11.091666666666667, + "grad_norm": 1.9785576025845197, + "learning_rate": 5e-05, + "loss": 0.0586, + "num_input_tokens_seen": 242972648, + "step": 2662 + }, + { + "epoch": 11.091666666666667, + "loss": 0.06952418386936188, + "loss_ce": 3.565673978300765e-05, + "loss_iou": 0.353515625, + "loss_num": 0.013916015625, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 242972648, + "step": 2662 + }, + { + "epoch": 11.095833333333333, + "grad_norm": 2.9877109491263703, + "learning_rate": 5e-05, + "loss": 0.0363, + "num_input_tokens_seen": 243063848, + "step": 2663 + }, + { + "epoch": 11.095833333333333, + "loss": 0.03487911820411682, + "loss_ce": 0.00013485604722518474, + "loss_iou": 0.287109375, + "loss_num": 0.0069580078125, + "loss_xval": 0.03466796875, + "num_input_tokens_seen": 243063848, + "step": 2663 + }, + { + "epoch": 11.1, + "grad_norm": 5.189930396367434, + "learning_rate": 5e-05, + "loss": 0.0552, + "num_input_tokens_seen": 243155072, + "step": 2664 + }, + { + "epoch": 11.1, + "loss": 0.04049532860517502, + "loss_ce": 1.3759843568550423e-05, + "loss_iou": 0.2578125, + "loss_num": 0.00811767578125, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 243155072, + "step": 2664 + }, + { + "epoch": 11.104166666666666, + "grad_norm": 4.169496908002205, + "learning_rate": 5e-05, + "loss": 0.0907, + "num_input_tokens_seen": 243245912, + "step": 2665 + }, + { + "epoch": 11.104166666666666, + "loss": 0.07048118859529495, + "loss_ce": 8.375644711122732e-07, + "loss_iou": 0.361328125, + "loss_num": 0.01409912109375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 243245912, + "step": 2665 + }, + { + "epoch": 11.108333333333333, + "grad_norm": 6.217395923068188, + "learning_rate": 5e-05, + "loss": 0.0828, + "num_input_tokens_seen": 243337164, + "step": 2666 + }, + { + "epoch": 11.108333333333333, + "loss": 0.07544789463281631, + "loss_ce": 0.00016102896188385785, + "loss_iou": 0.216796875, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 243337164, + "step": 2666 + }, + { + "epoch": 11.1125, + "grad_norm": 2.7128551737207593, + "learning_rate": 5e-05, + "loss": 0.0731, + "num_input_tokens_seen": 243427952, + "step": 2667 + }, + { + "epoch": 11.1125, + "loss": 0.0546993650496006, + "loss_ce": 1.186842655442888e-05, + "loss_iou": 0.3359375, + "loss_num": 0.01092529296875, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 243427952, + "step": 2667 + }, + { + "epoch": 11.116666666666667, + "grad_norm": 5.04724571226057, + "learning_rate": 5e-05, + "loss": 0.0777, + "num_input_tokens_seen": 243518432, + "step": 2668 + }, + { + "epoch": 11.116666666666667, + "loss": 0.05978452041745186, + "loss_ce": 5.869951564818621e-07, + "loss_iou": 0.37109375, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 243518432, + "step": 2668 + }, + { + "epoch": 11.120833333333334, + "grad_norm": 2.9978691447633525, + "learning_rate": 5e-05, + "loss": 0.0521, + "num_input_tokens_seen": 243609316, + "step": 2669 + }, + { + "epoch": 11.120833333333334, + "loss": 0.048206619918346405, + "loss_ce": 6.514426786452532e-05, + "loss_iou": 0.2373046875, + "loss_num": 0.0096435546875, + "loss_xval": 0.048095703125, + "num_input_tokens_seen": 243609316, + "step": 2669 + }, + { + "epoch": 11.125, + "grad_norm": 3.8070064442548017, + "learning_rate": 5e-05, + "loss": 0.043, + "num_input_tokens_seen": 243700540, + "step": 2670 + }, + { + "epoch": 11.125, + "loss": 0.04456840828061104, + "loss_ce": 2.8001604732708074e-05, + "loss_iou": 0.162109375, + "loss_num": 0.0089111328125, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 243700540, + "step": 2670 + }, + { + "epoch": 11.129166666666666, + "grad_norm": 2.5533515554126276, + "learning_rate": 5e-05, + "loss": 0.0806, + "num_input_tokens_seen": 243792088, + "step": 2671 + }, + { + "epoch": 11.129166666666666, + "loss": 0.07489380240440369, + "loss_ce": 3.6675114643003326e-06, + "loss_iou": 0.228515625, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 243792088, + "step": 2671 + }, + { + "epoch": 11.133333333333333, + "grad_norm": 8.76308530196958, + "learning_rate": 5e-05, + "loss": 0.0544, + "num_input_tokens_seen": 243883440, + "step": 2672 + }, + { + "epoch": 11.133333333333333, + "loss": 0.029476849362254143, + "loss_ce": 1.2126994079153519e-05, + "loss_iou": 0.2578125, + "loss_num": 0.005889892578125, + "loss_xval": 0.0294189453125, + "num_input_tokens_seen": 243883440, + "step": 2672 + }, + { + "epoch": 11.1375, + "grad_norm": 2.7411960830642803, + "learning_rate": 5e-05, + "loss": 0.0898, + "num_input_tokens_seen": 243974148, + "step": 2673 + }, + { + "epoch": 11.1375, + "loss": 0.10514024645090103, + "loss_ce": 0.0005259868921712041, + "loss_iou": 0.2421875, + "loss_num": 0.02099609375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 243974148, + "step": 2673 + }, + { + "epoch": 11.141666666666667, + "grad_norm": 1.6147312553770174, + "learning_rate": 5e-05, + "loss": 0.0478, + "num_input_tokens_seen": 244065188, + "step": 2674 + }, + { + "epoch": 11.141666666666667, + "loss": 0.06602882593870163, + "loss_ce": 2.6929230443784036e-05, + "loss_iou": 0.375, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 244065188, + "step": 2674 + }, + { + "epoch": 11.145833333333334, + "grad_norm": 2.9834652924516982, + "learning_rate": 5e-05, + "loss": 0.0558, + "num_input_tokens_seen": 244156980, + "step": 2675 + }, + { + "epoch": 11.145833333333334, + "loss": 0.061976782977581024, + "loss_ce": 3.3730986615410075e-05, + "loss_iou": 0.2490234375, + "loss_num": 0.01239013671875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 244156980, + "step": 2675 + }, + { + "epoch": 11.15, + "grad_norm": 14.041890501096185, + "learning_rate": 5e-05, + "loss": 0.0596, + "num_input_tokens_seen": 244247708, + "step": 2676 + }, + { + "epoch": 11.15, + "loss": 0.03552209213376045, + "loss_ce": 4.5408145524561405e-05, + "loss_iou": 0.2099609375, + "loss_num": 0.007110595703125, + "loss_xval": 0.035400390625, + "num_input_tokens_seen": 244247708, + "step": 2676 + }, + { + "epoch": 11.154166666666667, + "grad_norm": 2.064877630878675, + "learning_rate": 5e-05, + "loss": 0.0798, + "num_input_tokens_seen": 244339292, + "step": 2677 + }, + { + "epoch": 11.154166666666667, + "loss": 0.08990888297557831, + "loss_ce": 0.0033762939274311066, + "loss_iou": 0.37890625, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 244339292, + "step": 2677 + }, + { + "epoch": 11.158333333333333, + "grad_norm": 2.708907347781802, + "learning_rate": 5e-05, + "loss": 0.0495, + "num_input_tokens_seen": 244430316, + "step": 2678 + }, + { + "epoch": 11.158333333333333, + "loss": 0.04705560952425003, + "loss_ce": 1.276460534427315e-05, + "loss_iou": 0.291015625, + "loss_num": 0.0093994140625, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 244430316, + "step": 2678 + }, + { + "epoch": 11.1625, + "grad_norm": 6.248912733827709, + "learning_rate": 5e-05, + "loss": 0.112, + "num_input_tokens_seen": 244521460, + "step": 2679 + }, + { + "epoch": 11.1625, + "loss": 0.06119865924119949, + "loss_ce": 5.6695200328249484e-05, + "loss_iou": 0.216796875, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 244521460, + "step": 2679 + }, + { + "epoch": 11.166666666666666, + "grad_norm": 4.014238936093042, + "learning_rate": 5e-05, + "loss": 0.0923, + "num_input_tokens_seen": 244612076, + "step": 2680 + }, + { + "epoch": 11.166666666666666, + "loss": 0.08084017038345337, + "loss_ce": 1.4365771676239092e-05, + "loss_iou": 0.30859375, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 244612076, + "step": 2680 + }, + { + "epoch": 11.170833333333333, + "grad_norm": 2.365956557946269, + "learning_rate": 5e-05, + "loss": 0.0912, + "num_input_tokens_seen": 244703032, + "step": 2681 + }, + { + "epoch": 11.170833333333333, + "loss": 0.11085952818393707, + "loss_ce": 4.428637112141587e-06, + "loss_iou": 0.251953125, + "loss_num": 0.022216796875, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 244703032, + "step": 2681 + }, + { + "epoch": 11.175, + "grad_norm": 3.646809449283573, + "learning_rate": 5e-05, + "loss": 0.0722, + "num_input_tokens_seen": 244794256, + "step": 2682 + }, + { + "epoch": 11.175, + "loss": 0.11017481982707977, + "loss_ce": 6.361942268995335e-06, + "loss_iou": 0.0595703125, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 244794256, + "step": 2682 + }, + { + "epoch": 11.179166666666667, + "grad_norm": 1.5967211911593462, + "learning_rate": 5e-05, + "loss": 0.084, + "num_input_tokens_seen": 244884596, + "step": 2683 + }, + { + "epoch": 11.179166666666667, + "loss": 0.08981596678495407, + "loss_ce": 1.7995476810028777e-05, + "loss_iou": 0.2158203125, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 244884596, + "step": 2683 + }, + { + "epoch": 11.183333333333334, + "grad_norm": 2.1982779748437338, + "learning_rate": 5e-05, + "loss": 0.0531, + "num_input_tokens_seen": 244976148, + "step": 2684 + }, + { + "epoch": 11.183333333333334, + "loss": 0.04370079189538956, + "loss_ce": 0.0003200560749974102, + "loss_iou": 0.21484375, + "loss_num": 0.0086669921875, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 244976148, + "step": 2684 + }, + { + "epoch": 11.1875, + "grad_norm": 2.4019570743051095, + "learning_rate": 5e-05, + "loss": 0.1104, + "num_input_tokens_seen": 245067880, + "step": 2685 + }, + { + "epoch": 11.1875, + "loss": 0.07037793844938278, + "loss_ce": 1.9662955310195684e-05, + "loss_iou": 0.2265625, + "loss_num": 0.01409912109375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 245067880, + "step": 2685 + }, + { + "epoch": 11.191666666666666, + "grad_norm": 3.500552920517526, + "learning_rate": 5e-05, + "loss": 0.0908, + "num_input_tokens_seen": 245159316, + "step": 2686 + }, + { + "epoch": 11.191666666666666, + "loss": 0.1178303211927414, + "loss_ce": 1.950369096448412e-06, + "loss_iou": 0.3046875, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 245159316, + "step": 2686 + }, + { + "epoch": 11.195833333333333, + "grad_norm": 2.404240266456727, + "learning_rate": 5e-05, + "loss": 0.044, + "num_input_tokens_seen": 245251196, + "step": 2687 + }, + { + "epoch": 11.195833333333333, + "loss": 0.04409124702215195, + "loss_ce": 0.000130675412947312, + "loss_iou": 0.3671875, + "loss_num": 0.0087890625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 245251196, + "step": 2687 + }, + { + "epoch": 11.2, + "grad_norm": 4.848620713762468, + "learning_rate": 5e-05, + "loss": 0.0927, + "num_input_tokens_seen": 245341364, + "step": 2688 + }, + { + "epoch": 11.2, + "loss": 0.10234392434358597, + "loss_ce": 0.0017732457490637898, + "loss_iou": 0.2734375, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 245341364, + "step": 2688 + }, + { + "epoch": 11.204166666666667, + "grad_norm": 3.2238690970385178, + "learning_rate": 5e-05, + "loss": 0.1109, + "num_input_tokens_seen": 245432708, + "step": 2689 + }, + { + "epoch": 11.204166666666667, + "loss": 0.11461775749921799, + "loss_ce": 8.989145499072038e-06, + "loss_iou": 0.275390625, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 245432708, + "step": 2689 + }, + { + "epoch": 11.208333333333334, + "grad_norm": 7.191269458248552, + "learning_rate": 5e-05, + "loss": 0.0903, + "num_input_tokens_seen": 245523664, + "step": 2690 + }, + { + "epoch": 11.208333333333334, + "loss": 0.10043956339359283, + "loss_ce": 6.214509085111786e-06, + "loss_iou": 0.31640625, + "loss_num": 0.02001953125, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 245523664, + "step": 2690 + }, + { + "epoch": 11.2125, + "grad_norm": 3.191748444668608, + "learning_rate": 5e-05, + "loss": 0.0401, + "num_input_tokens_seen": 245614976, + "step": 2691 + }, + { + "epoch": 11.2125, + "loss": 0.039116621017456055, + "loss_ce": 1.597761183802504e-05, + "loss_iou": 0.2236328125, + "loss_num": 0.0078125, + "loss_xval": 0.0390625, + "num_input_tokens_seen": 245614976, + "step": 2691 + }, + { + "epoch": 11.216666666666667, + "grad_norm": 3.0015127581483783, + "learning_rate": 5e-05, + "loss": 0.0658, + "num_input_tokens_seen": 245705944, + "step": 2692 + }, + { + "epoch": 11.216666666666667, + "loss": 0.05734315514564514, + "loss_ce": 6.260463578655617e-07, + "loss_iou": 0.2119140625, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 245705944, + "step": 2692 + }, + { + "epoch": 11.220833333333333, + "grad_norm": 3.100443408978586, + "learning_rate": 5e-05, + "loss": 0.0749, + "num_input_tokens_seen": 245797184, + "step": 2693 + }, + { + "epoch": 11.220833333333333, + "loss": 0.06344582140445709, + "loss_ce": 0.0009000458521768451, + "loss_iou": 0.20703125, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 245797184, + "step": 2693 + }, + { + "epoch": 11.225, + "grad_norm": 6.253250655357221, + "learning_rate": 5e-05, + "loss": 0.089, + "num_input_tokens_seen": 245888588, + "step": 2694 + }, + { + "epoch": 11.225, + "loss": 0.07886232435703278, + "loss_ce": 0.00037111277924850583, + "loss_iou": 0.38671875, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 245888588, + "step": 2694 + }, + { + "epoch": 11.229166666666666, + "grad_norm": 2.1730678405329913, + "learning_rate": 5e-05, + "loss": 0.1075, + "num_input_tokens_seen": 245979896, + "step": 2695 + }, + { + "epoch": 11.229166666666666, + "loss": 0.0903480052947998, + "loss_ce": 7.157451591410791e-07, + "loss_iou": 0.240234375, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 245979896, + "step": 2695 + }, + { + "epoch": 11.233333333333333, + "grad_norm": 4.14898510788073, + "learning_rate": 5e-05, + "loss": 0.057, + "num_input_tokens_seen": 246071696, + "step": 2696 + }, + { + "epoch": 11.233333333333333, + "loss": 0.05042353272438049, + "loss_ce": 4.282536974642426e-05, + "loss_iou": 0.123046875, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 246071696, + "step": 2696 + }, + { + "epoch": 11.2375, + "grad_norm": 3.46826227696615, + "learning_rate": 5e-05, + "loss": 0.0599, + "num_input_tokens_seen": 246163656, + "step": 2697 + }, + { + "epoch": 11.2375, + "loss": 0.028534265235066414, + "loss_ce": 0.00029024691320955753, + "loss_iou": 0.158203125, + "loss_num": 0.005645751953125, + "loss_xval": 0.0281982421875, + "num_input_tokens_seen": 246163656, + "step": 2697 + }, + { + "epoch": 11.241666666666667, + "grad_norm": 1.7889033940175771, + "learning_rate": 5e-05, + "loss": 0.0671, + "num_input_tokens_seen": 246254440, + "step": 2698 + }, + { + "epoch": 11.241666666666667, + "loss": 0.06817486882209778, + "loss_ce": 0.0010819713352248073, + "loss_iou": 0.0, + "loss_num": 0.013427734375, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 246254440, + "step": 2698 + }, + { + "epoch": 11.245833333333334, + "grad_norm": 1.8621407970482446, + "learning_rate": 5e-05, + "loss": 0.0849, + "num_input_tokens_seen": 246346288, + "step": 2699 + }, + { + "epoch": 11.245833333333334, + "loss": 0.11108222603797913, + "loss_ce": 1.3498207408702001e-05, + "loss_iou": 0.2177734375, + "loss_num": 0.022216796875, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 246346288, + "step": 2699 + }, + { + "epoch": 11.25, + "grad_norm": 5.07881587238784, + "learning_rate": 5e-05, + "loss": 0.1127, + "num_input_tokens_seen": 246437464, + "step": 2700 + }, + { + "epoch": 11.25, + "loss": 0.16123591363430023, + "loss_ce": 7.258645200636238e-05, + "loss_iou": 0.150390625, + "loss_num": 0.0322265625, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 246437464, + "step": 2700 + }, + { + "epoch": 11.254166666666666, + "grad_norm": 3.8278222792890824, + "learning_rate": 5e-05, + "loss": 0.064, + "num_input_tokens_seen": 246528996, + "step": 2701 + }, + { + "epoch": 11.254166666666666, + "loss": 0.06611582636833191, + "loss_ce": 1.4749471120012458e-05, + "loss_iou": 0.35546875, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 246528996, + "step": 2701 + }, + { + "epoch": 11.258333333333333, + "grad_norm": 2.5501182541847394, + "learning_rate": 5e-05, + "loss": 0.0676, + "num_input_tokens_seen": 246620564, + "step": 2702 + }, + { + "epoch": 11.258333333333333, + "loss": 0.03480696678161621, + "loss_ce": 1.6694054920662893e-06, + "loss_iou": 0.267578125, + "loss_num": 0.0069580078125, + "loss_xval": 0.034912109375, + "num_input_tokens_seen": 246620564, + "step": 2702 + }, + { + "epoch": 11.2625, + "grad_norm": 2.466748870865673, + "learning_rate": 5e-05, + "loss": 0.039, + "num_input_tokens_seen": 246711844, + "step": 2703 + }, + { + "epoch": 11.2625, + "loss": 0.03872073069214821, + "loss_ce": 9.183676411339547e-06, + "loss_iou": 0.287109375, + "loss_num": 0.00775146484375, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 246711844, + "step": 2703 + }, + { + "epoch": 11.266666666666667, + "grad_norm": 3.856505806043897, + "learning_rate": 5e-05, + "loss": 0.0973, + "num_input_tokens_seen": 246803444, + "step": 2704 + }, + { + "epoch": 11.266666666666667, + "loss": 0.08916931599378586, + "loss_ce": 8.850642188917845e-05, + "loss_iou": 0.185546875, + "loss_num": 0.017822265625, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 246803444, + "step": 2704 + }, + { + "epoch": 11.270833333333334, + "grad_norm": 3.611251296625748, + "learning_rate": 5e-05, + "loss": 0.0681, + "num_input_tokens_seen": 246894804, + "step": 2705 + }, + { + "epoch": 11.270833333333334, + "loss": 0.0859089121222496, + "loss_ce": 0.0005207245703786612, + "loss_iou": 0.216796875, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 246894804, + "step": 2705 + }, + { + "epoch": 11.275, + "grad_norm": 2.758755755769881, + "learning_rate": 5e-05, + "loss": 0.0589, + "num_input_tokens_seen": 246986168, + "step": 2706 + }, + { + "epoch": 11.275, + "loss": 0.08567283302545547, + "loss_ce": 9.99398162093712e-06, + "loss_iou": 0.337890625, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 246986168, + "step": 2706 + }, + { + "epoch": 11.279166666666667, + "grad_norm": 28.694230527247903, + "learning_rate": 5e-05, + "loss": 0.1072, + "num_input_tokens_seen": 247077700, + "step": 2707 + }, + { + "epoch": 11.279166666666667, + "loss": 0.10691899806261063, + "loss_ce": 6.615737220272422e-07, + "loss_iou": 0.2177734375, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 247077700, + "step": 2707 + }, + { + "epoch": 11.283333333333333, + "grad_norm": 1.5594716242777975, + "learning_rate": 5e-05, + "loss": 0.0612, + "num_input_tokens_seen": 247169228, + "step": 2708 + }, + { + "epoch": 11.283333333333333, + "loss": 0.056843891739845276, + "loss_ce": 4.9039017540053464e-06, + "loss_iou": 0.1357421875, + "loss_num": 0.0113525390625, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 247169228, + "step": 2708 + }, + { + "epoch": 11.2875, + "grad_norm": 4.545860092599523, + "learning_rate": 5e-05, + "loss": 0.0647, + "num_input_tokens_seen": 247260608, + "step": 2709 + }, + { + "epoch": 11.2875, + "loss": 0.06523621082305908, + "loss_ce": 3.5400349588599056e-05, + "loss_iou": 0.326171875, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 247260608, + "step": 2709 + }, + { + "epoch": 11.291666666666666, + "grad_norm": 1.9214195941139585, + "learning_rate": 5e-05, + "loss": 0.0825, + "num_input_tokens_seen": 247351872, + "step": 2710 + }, + { + "epoch": 11.291666666666666, + "loss": 0.07823637127876282, + "loss_ce": 8.086175512289628e-05, + "loss_iou": 0.30859375, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 247351872, + "step": 2710 + }, + { + "epoch": 11.295833333333333, + "grad_norm": 4.218596955138657, + "learning_rate": 5e-05, + "loss": 0.0874, + "num_input_tokens_seen": 247442660, + "step": 2711 + }, + { + "epoch": 11.295833333333333, + "loss": 0.04063517600297928, + "loss_ce": 1.0193945172431995e-06, + "loss_iou": 0.310546875, + "loss_num": 0.00811767578125, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 247442660, + "step": 2711 + }, + { + "epoch": 11.3, + "grad_norm": 3.676734520825449, + "learning_rate": 5e-05, + "loss": 0.063, + "num_input_tokens_seen": 247533776, + "step": 2712 + }, + { + "epoch": 11.3, + "loss": 0.06818170100450516, + "loss_ce": 9.698516078060493e-05, + "loss_iou": 0.291015625, + "loss_num": 0.01361083984375, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 247533776, + "step": 2712 + }, + { + "epoch": 11.304166666666667, + "grad_norm": 3.470390920762771, + "learning_rate": 5e-05, + "loss": 0.0557, + "num_input_tokens_seen": 247624792, + "step": 2713 + }, + { + "epoch": 11.304166666666667, + "loss": 0.0611143596470356, + "loss_ce": 2.911293449869845e-06, + "loss_iou": 0.396484375, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 247624792, + "step": 2713 + }, + { + "epoch": 11.308333333333334, + "grad_norm": 2.9181044624824435, + "learning_rate": 5e-05, + "loss": 0.0668, + "num_input_tokens_seen": 247716444, + "step": 2714 + }, + { + "epoch": 11.308333333333334, + "loss": 0.06357040256261826, + "loss_ce": 0.0007041930221021175, + "loss_iou": 0.2890625, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 247716444, + "step": 2714 + }, + { + "epoch": 11.3125, + "grad_norm": 3.0094011954333326, + "learning_rate": 5e-05, + "loss": 0.0505, + "num_input_tokens_seen": 247807880, + "step": 2715 + }, + { + "epoch": 11.3125, + "loss": 0.047051601111888885, + "loss_ce": 2.401344318059273e-05, + "loss_iou": 0.232421875, + "loss_num": 0.0093994140625, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 247807880, + "step": 2715 + }, + { + "epoch": 11.316666666666666, + "grad_norm": 4.432788501298371, + "learning_rate": 5e-05, + "loss": 0.105, + "num_input_tokens_seen": 247899236, + "step": 2716 + }, + { + "epoch": 11.316666666666666, + "loss": 0.0781235545873642, + "loss_ce": 5.9586993302218616e-05, + "loss_iou": 0.427734375, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 247899236, + "step": 2716 + }, + { + "epoch": 11.320833333333333, + "grad_norm": 2.784805843158077, + "learning_rate": 5e-05, + "loss": 0.1038, + "num_input_tokens_seen": 247990644, + "step": 2717 + }, + { + "epoch": 11.320833333333333, + "loss": 0.051045119762420654, + "loss_ce": 4.472130513022421e-06, + "loss_iou": 0.21484375, + "loss_num": 0.01019287109375, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 247990644, + "step": 2717 + }, + { + "epoch": 11.325, + "grad_norm": 6.738147447697919, + "learning_rate": 5e-05, + "loss": 0.059, + "num_input_tokens_seen": 248081828, + "step": 2718 + }, + { + "epoch": 11.325, + "loss": 0.05768699571490288, + "loss_ce": 0.0005123146111145616, + "loss_iou": 0.26953125, + "loss_num": 0.01141357421875, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 248081828, + "step": 2718 + }, + { + "epoch": 11.329166666666667, + "grad_norm": 4.4089728566049615, + "learning_rate": 5e-05, + "loss": 0.1044, + "num_input_tokens_seen": 248173364, + "step": 2719 + }, + { + "epoch": 11.329166666666667, + "loss": 0.1089344322681427, + "loss_ce": 7.822787301847711e-05, + "loss_iou": 0.302734375, + "loss_num": 0.0218505859375, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 248173364, + "step": 2719 + }, + { + "epoch": 11.333333333333334, + "grad_norm": 1.9846478948713728, + "learning_rate": 5e-05, + "loss": 0.0528, + "num_input_tokens_seen": 248265452, + "step": 2720 + }, + { + "epoch": 11.333333333333334, + "loss": 0.04808041825890541, + "loss_ce": 0.003967257682234049, + "loss_iou": 0.33984375, + "loss_num": 0.00885009765625, + "loss_xval": 0.044189453125, + "num_input_tokens_seen": 248265452, + "step": 2720 + }, + { + "epoch": 11.3375, + "grad_norm": 1.9982745678749323, + "learning_rate": 5e-05, + "loss": 0.0356, + "num_input_tokens_seen": 248357340, + "step": 2721 + }, + { + "epoch": 11.3375, + "loss": 0.04231679067015648, + "loss_ce": 0.00013196113286539912, + "loss_iou": 0.125, + "loss_num": 0.0084228515625, + "loss_xval": 0.042236328125, + "num_input_tokens_seen": 248357340, + "step": 2721 + }, + { + "epoch": 11.341666666666667, + "grad_norm": 2.984575890838546, + "learning_rate": 5e-05, + "loss": 0.0404, + "num_input_tokens_seen": 248448232, + "step": 2722 + }, + { + "epoch": 11.341666666666667, + "loss": 0.04854791611433029, + "loss_ce": 0.0001775545097189024, + "loss_iou": 0.1796875, + "loss_num": 0.00970458984375, + "loss_xval": 0.04833984375, + "num_input_tokens_seen": 248448232, + "step": 2722 + }, + { + "epoch": 11.345833333333333, + "grad_norm": 2.700649135100673, + "learning_rate": 5e-05, + "loss": 0.0511, + "num_input_tokens_seen": 248539736, + "step": 2723 + }, + { + "epoch": 11.345833333333333, + "loss": 0.04843775928020477, + "loss_ce": 3.688073411467485e-05, + "loss_iou": 0.23046875, + "loss_num": 0.0096435546875, + "loss_xval": 0.04833984375, + "num_input_tokens_seen": 248539736, + "step": 2723 + }, + { + "epoch": 11.35, + "grad_norm": 2.142703007126909, + "learning_rate": 5e-05, + "loss": 0.0593, + "num_input_tokens_seen": 248630668, + "step": 2724 + }, + { + "epoch": 11.35, + "loss": 0.05542871356010437, + "loss_ce": 6.982320337556303e-05, + "loss_iou": 0.34375, + "loss_num": 0.01104736328125, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 248630668, + "step": 2724 + }, + { + "epoch": 11.354166666666666, + "grad_norm": 3.4553891155628005, + "learning_rate": 5e-05, + "loss": 0.0649, + "num_input_tokens_seen": 248722216, + "step": 2725 + }, + { + "epoch": 11.354166666666666, + "loss": 0.10151135921478271, + "loss_ce": 9.894505637930706e-06, + "loss_iou": 0.357421875, + "loss_num": 0.020263671875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 248722216, + "step": 2725 + }, + { + "epoch": 11.358333333333333, + "grad_norm": 4.9126542442498105, + "learning_rate": 5e-05, + "loss": 0.0569, + "num_input_tokens_seen": 248813456, + "step": 2726 + }, + { + "epoch": 11.358333333333333, + "loss": 0.033823929727077484, + "loss_ce": 2.8231502255948726e-06, + "loss_iou": 0.330078125, + "loss_num": 0.00677490234375, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 248813456, + "step": 2726 + }, + { + "epoch": 11.3625, + "grad_norm": 3.2736521351266825, + "learning_rate": 5e-05, + "loss": 0.0798, + "num_input_tokens_seen": 248905088, + "step": 2727 + }, + { + "epoch": 11.3625, + "loss": 0.06761668622493744, + "loss_ce": 0.0003406820760574192, + "loss_iou": 0.341796875, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 248905088, + "step": 2727 + }, + { + "epoch": 11.366666666666667, + "grad_norm": 1.8359191390557765, + "learning_rate": 5e-05, + "loss": 0.08, + "num_input_tokens_seen": 248997364, + "step": 2728 + }, + { + "epoch": 11.366666666666667, + "loss": 0.08889603614807129, + "loss_ce": 0.00046372690121643245, + "loss_iou": 0.263671875, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 248997364, + "step": 2728 + }, + { + "epoch": 11.370833333333334, + "grad_norm": 4.613524160928555, + "learning_rate": 5e-05, + "loss": 0.096, + "num_input_tokens_seen": 249087984, + "step": 2729 + }, + { + "epoch": 11.370833333333334, + "loss": 0.11847412586212158, + "loss_ce": 4.888382591161644e-06, + "loss_iou": 0.1708984375, + "loss_num": 0.023681640625, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 249087984, + "step": 2729 + }, + { + "epoch": 11.375, + "grad_norm": 6.472476130650295, + "learning_rate": 5e-05, + "loss": 0.0841, + "num_input_tokens_seen": 249179364, + "step": 2730 + }, + { + "epoch": 11.375, + "loss": 0.07751025259494781, + "loss_ce": 0.0004075966135133058, + "loss_iou": 0.240234375, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 249179364, + "step": 2730 + }, + { + "epoch": 11.379166666666666, + "grad_norm": 2.0262396552769864, + "learning_rate": 5e-05, + "loss": 0.0923, + "num_input_tokens_seen": 249270408, + "step": 2731 + }, + { + "epoch": 11.379166666666666, + "loss": 0.10283870995044708, + "loss_ce": 8.602441812399775e-05, + "loss_iou": 0.287109375, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 249270408, + "step": 2731 + }, + { + "epoch": 11.383333333333333, + "grad_norm": 1.8024602462731834, + "learning_rate": 5e-05, + "loss": 0.0621, + "num_input_tokens_seen": 249361720, + "step": 2732 + }, + { + "epoch": 11.383333333333333, + "loss": 0.09584569931030273, + "loss_ce": 6.6278429585509e-05, + "loss_iou": 0.185546875, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 249361720, + "step": 2732 + }, + { + "epoch": 11.3875, + "grad_norm": 2.940021352095748, + "learning_rate": 5e-05, + "loss": 0.0733, + "num_input_tokens_seen": 249452956, + "step": 2733 + }, + { + "epoch": 11.3875, + "loss": 0.03529635816812515, + "loss_ce": 0.00144473509863019, + "loss_iou": 0.2138671875, + "loss_num": 0.00677490234375, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 249452956, + "step": 2733 + }, + { + "epoch": 11.391666666666667, + "grad_norm": 2.600382492122339, + "learning_rate": 5e-05, + "loss": 0.0802, + "num_input_tokens_seen": 249544408, + "step": 2734 + }, + { + "epoch": 11.391666666666667, + "loss": 0.08233761042356491, + "loss_ce": 0.00016902832430787385, + "loss_iou": 0.263671875, + "loss_num": 0.0164794921875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 249544408, + "step": 2734 + }, + { + "epoch": 11.395833333333334, + "grad_norm": 3.4005438341922667, + "learning_rate": 5e-05, + "loss": 0.0667, + "num_input_tokens_seen": 249636064, + "step": 2735 + }, + { + "epoch": 11.395833333333334, + "loss": 0.08299972116947174, + "loss_ce": 7.166857812990202e-06, + "loss_iou": 0.291015625, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 249636064, + "step": 2735 + }, + { + "epoch": 11.4, + "grad_norm": 13.1219533614889, + "learning_rate": 5e-05, + "loss": 0.0647, + "num_input_tokens_seen": 249727908, + "step": 2736 + }, + { + "epoch": 11.4, + "loss": 0.047019585967063904, + "loss_ce": 2.2515387172461487e-05, + "loss_iou": 0.33984375, + "loss_num": 0.0093994140625, + "loss_xval": 0.046875, + "num_input_tokens_seen": 249727908, + "step": 2736 + }, + { + "epoch": 11.404166666666667, + "grad_norm": 2.7704786263125496, + "learning_rate": 5e-05, + "loss": 0.0516, + "num_input_tokens_seen": 249819412, + "step": 2737 + }, + { + "epoch": 11.404166666666667, + "loss": 0.04526345431804657, + "loss_ce": 5.886092367290985e-06, + "loss_iou": 0.328125, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 249819412, + "step": 2737 + }, + { + "epoch": 11.408333333333333, + "grad_norm": 4.015097845565541, + "learning_rate": 5e-05, + "loss": 0.0713, + "num_input_tokens_seen": 249911020, + "step": 2738 + }, + { + "epoch": 11.408333333333333, + "loss": 0.04373963177204132, + "loss_ce": 0.001976326573640108, + "loss_iou": 0.2333984375, + "loss_num": 0.00836181640625, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 249911020, + "step": 2738 + }, + { + "epoch": 11.4125, + "grad_norm": 5.749573921039443, + "learning_rate": 5e-05, + "loss": 0.1062, + "num_input_tokens_seen": 250002288, + "step": 2739 + }, + { + "epoch": 11.4125, + "loss": 0.10650154948234558, + "loss_ce": 0.000971770437899977, + "loss_iou": 0.1953125, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 250002288, + "step": 2739 + }, + { + "epoch": 11.416666666666666, + "grad_norm": 1.7547716698846578, + "learning_rate": 5e-05, + "loss": 0.0484, + "num_input_tokens_seen": 250093536, + "step": 2740 + }, + { + "epoch": 11.416666666666666, + "loss": 0.04704135283827782, + "loss_ce": 0.000944552302826196, + "loss_iou": 0.07275390625, + "loss_num": 0.00921630859375, + "loss_xval": 0.046142578125, + "num_input_tokens_seen": 250093536, + "step": 2740 + }, + { + "epoch": 11.420833333333333, + "grad_norm": 3.7529070742068873, + "learning_rate": 5e-05, + "loss": 0.1585, + "num_input_tokens_seen": 250184244, + "step": 2741 + }, + { + "epoch": 11.420833333333333, + "loss": 0.1299409568309784, + "loss_ce": 4.7433445615752134e-06, + "loss_iou": 0.2197265625, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 250184244, + "step": 2741 + }, + { + "epoch": 11.425, + "grad_norm": 1.8625930195822442, + "learning_rate": 5e-05, + "loss": 0.0728, + "num_input_tokens_seen": 250274520, + "step": 2742 + }, + { + "epoch": 11.425, + "loss": 0.05740495026111603, + "loss_ce": 0.00010056864994112402, + "loss_iou": 0.08544921875, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 250274520, + "step": 2742 + }, + { + "epoch": 11.429166666666667, + "grad_norm": 2.4351725931958885, + "learning_rate": 5e-05, + "loss": 0.0673, + "num_input_tokens_seen": 250365952, + "step": 2743 + }, + { + "epoch": 11.429166666666667, + "loss": 0.0731714516878128, + "loss_ce": 0.0004175424110144377, + "loss_iou": 0.36328125, + "loss_num": 0.01458740234375, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 250365952, + "step": 2743 + }, + { + "epoch": 11.433333333333334, + "grad_norm": 2.413316258176711, + "learning_rate": 5e-05, + "loss": 0.0592, + "num_input_tokens_seen": 250457084, + "step": 2744 + }, + { + "epoch": 11.433333333333334, + "loss": 0.07355280220508575, + "loss_ce": 3.5956800275016576e-05, + "loss_iou": 0.291015625, + "loss_num": 0.01470947265625, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 250457084, + "step": 2744 + }, + { + "epoch": 11.4375, + "grad_norm": 5.637883827271504, + "learning_rate": 5e-05, + "loss": 0.0441, + "num_input_tokens_seen": 250548420, + "step": 2745 + }, + { + "epoch": 11.4375, + "loss": 0.026842396706342697, + "loss_ce": 1.7445854609832168e-05, + "loss_iou": 0.1962890625, + "loss_num": 0.00537109375, + "loss_xval": 0.02685546875, + "num_input_tokens_seen": 250548420, + "step": 2745 + }, + { + "epoch": 11.441666666666666, + "grad_norm": 2.3023188917249247, + "learning_rate": 5e-05, + "loss": 0.0776, + "num_input_tokens_seen": 250639996, + "step": 2746 + }, + { + "epoch": 11.441666666666666, + "loss": 0.07687580585479736, + "loss_ce": 0.00012409850023686886, + "loss_iou": 0.27734375, + "loss_num": 0.015380859375, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 250639996, + "step": 2746 + }, + { + "epoch": 11.445833333333333, + "grad_norm": 4.086735347534926, + "learning_rate": 5e-05, + "loss": 0.082, + "num_input_tokens_seen": 250731156, + "step": 2747 + }, + { + "epoch": 11.445833333333333, + "loss": 0.03926856815814972, + "loss_ce": 3.822279904852621e-05, + "loss_iou": 0.21484375, + "loss_num": 0.0078125, + "loss_xval": 0.039306640625, + "num_input_tokens_seen": 250731156, + "step": 2747 + }, + { + "epoch": 11.45, + "grad_norm": 3.7251995120016272, + "learning_rate": 5e-05, + "loss": 0.0849, + "num_input_tokens_seen": 250822536, + "step": 2748 + }, + { + "epoch": 11.45, + "loss": 0.08701753616333008, + "loss_ce": 0.0001950228470377624, + "loss_iou": 0.294921875, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 250822536, + "step": 2748 + }, + { + "epoch": 11.454166666666667, + "grad_norm": 2.4634828035844323, + "learning_rate": 5e-05, + "loss": 0.044, + "num_input_tokens_seen": 250913728, + "step": 2749 + }, + { + "epoch": 11.454166666666667, + "loss": 0.03424752503633499, + "loss_ce": 5.2576619054889306e-05, + "loss_iou": 0.298828125, + "loss_num": 0.0068359375, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 250913728, + "step": 2749 + }, + { + "epoch": 11.458333333333334, + "grad_norm": 3.431238291107999, + "learning_rate": 5e-05, + "loss": 0.0764, + "num_input_tokens_seen": 251004924, + "step": 2750 + }, + { + "epoch": 11.458333333333334, + "eval_seeclick_CIoU": 0.29619458317756653, + "eval_seeclick_GIoU": 0.2929905205965042, + "eval_seeclick_IoU": 0.38835375010967255, + "eval_seeclick_MAE_all": 0.09226639196276665, + "eval_seeclick_MAE_h": 0.08143088221549988, + "eval_seeclick_MAE_w": 0.19786543399095535, + "eval_seeclick_MAE_x_boxes": 0.19208704680204391, + "eval_seeclick_MAE_y_boxes": 0.08390780910849571, + "eval_seeclick_NUM_probability": 0.9999978244304657, + "eval_seeclick_inside_bbox": 0.6150568127632141, + "eval_seeclick_loss": 0.5580732822418213, + "eval_seeclick_loss_ce": 0.13431841880083084, + "eval_seeclick_loss_iou": 0.42510986328125, + "eval_seeclick_loss_num": 0.0828704833984375, + "eval_seeclick_loss_xval": 0.41436767578125, + "eval_seeclick_runtime": 79.9593, + "eval_seeclick_samples_per_second": 0.538, + "eval_seeclick_steps_per_second": 0.025, + "num_input_tokens_seen": 251004924, + "step": 2750 + }, + { + "epoch": 11.458333333333334, + "eval_icons_CIoU": 0.29433034360408783, + "eval_icons_GIoU": 0.3072480261325836, + "eval_icons_IoU": 0.38313308358192444, + "eval_icons_MAE_all": 0.07298702001571655, + "eval_icons_MAE_h": 0.17794279009103775, + "eval_icons_MAE_w": 0.08765166997909546, + "eval_icons_MAE_x_boxes": 0.08846120536327362, + "eval_icons_MAE_y_boxes": 0.1775236800312996, + "eval_icons_NUM_probability": 0.9999988079071045, + "eval_icons_inside_bbox": 0.5069444477558136, + "eval_icons_loss": 0.35328853130340576, + "eval_icons_loss_ce": 0.0008648704388178885, + "eval_icons_loss_iou": 0.275634765625, + "eval_icons_loss_num": 0.073883056640625, + "eval_icons_loss_xval": 0.36932373046875, + "eval_icons_runtime": 98.2904, + "eval_icons_samples_per_second": 0.509, + "eval_icons_steps_per_second": 0.02, + "num_input_tokens_seen": 251004924, + "step": 2750 + }, + { + "epoch": 11.458333333333334, + "eval_screenspot_CIoU": 0.3644411067167918, + "eval_screenspot_GIoU": 0.35429301857948303, + "eval_screenspot_IoU": 0.4447065393129985, + "eval_screenspot_MAE_all": 0.10152472058931987, + "eval_screenspot_MAE_h": 0.09767068674166997, + "eval_screenspot_MAE_w": 0.203730175892512, + "eval_screenspot_MAE_x_boxes": 0.2037352075179418, + "eval_screenspot_MAE_y_boxes": 0.08715710788965225, + "eval_screenspot_NUM_probability": 0.9992983738581339, + "eval_screenspot_inside_bbox": 0.7012499968210856, + "eval_screenspot_loss": 0.5108309388160706, + "eval_screenspot_loss_ce": 0.0012114184373785974, + "eval_screenspot_loss_iou": 0.3665364583333333, + "eval_screenspot_loss_num": 0.102203369140625, + "eval_screenspot_loss_xval": 0.5108235677083334, + "eval_screenspot_runtime": 153.1364, + "eval_screenspot_samples_per_second": 0.581, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 251004924, + "step": 2750 + }, + { + "epoch": 11.458333333333334, + "eval_compot_CIoU": 0.5426531434059143, + "eval_compot_GIoU": 0.5361522138118744, + "eval_compot_IoU": 0.5986569225788116, + "eval_compot_MAE_all": 0.048880767077207565, + "eval_compot_MAE_h": 0.06029001250863075, + "eval_compot_MAE_w": 0.11704185605049133, + "eval_compot_MAE_x_boxes": 0.11615481600165367, + "eval_compot_MAE_y_boxes": 0.05879940651357174, + "eval_compot_NUM_probability": 0.9999973475933075, + "eval_compot_inside_bbox": 0.8107638955116272, + "eval_compot_loss": 0.2820165157318115, + "eval_compot_loss_ce": 0.04021947830915451, + "eval_compot_loss_iou": 0.2904052734375, + "eval_compot_loss_num": 0.042430877685546875, + "eval_compot_loss_xval": 0.2122650146484375, + "eval_compot_runtime": 97.599, + "eval_compot_samples_per_second": 0.512, + "eval_compot_steps_per_second": 0.02, + "num_input_tokens_seen": 251004924, + "step": 2750 + }, + { + "epoch": 11.458333333333334, + "loss": 0.23834848403930664, + "loss_ce": 0.03562020882964134, + "loss_iou": 0.2734375, + "loss_num": 0.04052734375, + "loss_xval": 0.203125, + "num_input_tokens_seen": 251004924, + "step": 2750 + }, + { + "epoch": 11.4625, + "grad_norm": 9.404948579296532, + "learning_rate": 5e-05, + "loss": 0.064, + "num_input_tokens_seen": 251096120, + "step": 2751 + }, + { + "epoch": 11.4625, + "loss": 0.06319372355937958, + "loss_ce": 0.00017492602637503296, + "loss_iou": 0.37109375, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 251096120, + "step": 2751 + }, + { + "epoch": 11.466666666666667, + "grad_norm": 3.0181016834500314, + "learning_rate": 5e-05, + "loss": 0.1042, + "num_input_tokens_seen": 251187304, + "step": 2752 + }, + { + "epoch": 11.466666666666667, + "loss": 0.07687968760728836, + "loss_ce": 0.00342387892305851, + "loss_iou": 0.2578125, + "loss_num": 0.01470947265625, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 251187304, + "step": 2752 + }, + { + "epoch": 11.470833333333333, + "grad_norm": 6.415262920707865, + "learning_rate": 5e-05, + "loss": 0.1131, + "num_input_tokens_seen": 251278548, + "step": 2753 + }, + { + "epoch": 11.470833333333333, + "loss": 0.09908459335565567, + "loss_ce": 0.00010846274381037802, + "loss_iou": 0.267578125, + "loss_num": 0.019775390625, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 251278548, + "step": 2753 + }, + { + "epoch": 11.475, + "grad_norm": 5.777631940298475, + "learning_rate": 5e-05, + "loss": 0.0578, + "num_input_tokens_seen": 251369688, + "step": 2754 + }, + { + "epoch": 11.475, + "loss": 0.05238595977425575, + "loss_ce": 0.00036874954821541905, + "loss_iou": 0.267578125, + "loss_num": 0.0103759765625, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 251369688, + "step": 2754 + }, + { + "epoch": 11.479166666666666, + "grad_norm": 6.699880381934479, + "learning_rate": 5e-05, + "loss": 0.0666, + "num_input_tokens_seen": 251461016, + "step": 2755 + }, + { + "epoch": 11.479166666666666, + "loss": 0.05163790285587311, + "loss_ce": 2.1608900624414673e-06, + "loss_iou": 0.345703125, + "loss_num": 0.01031494140625, + "loss_xval": 0.0517578125, + "num_input_tokens_seen": 251461016, + "step": 2755 + }, + { + "epoch": 11.483333333333333, + "grad_norm": 5.016164344500406, + "learning_rate": 5e-05, + "loss": 0.054, + "num_input_tokens_seen": 251552476, + "step": 2756 + }, + { + "epoch": 11.483333333333333, + "loss": 0.048208437860012054, + "loss_ce": 2.1183048374950886e-05, + "loss_iou": 0.4140625, + "loss_num": 0.0096435546875, + "loss_xval": 0.048095703125, + "num_input_tokens_seen": 251552476, + "step": 2756 + }, + { + "epoch": 11.4875, + "grad_norm": 2.0772984630901044, + "learning_rate": 5e-05, + "loss": 0.0819, + "num_input_tokens_seen": 251644024, + "step": 2757 + }, + { + "epoch": 11.4875, + "loss": 0.07686302065849304, + "loss_ce": 6.55341282254085e-05, + "loss_iou": 0.345703125, + "loss_num": 0.015380859375, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 251644024, + "step": 2757 + }, + { + "epoch": 11.491666666666667, + "grad_norm": 2.9398763812012025, + "learning_rate": 5e-05, + "loss": 0.0635, + "num_input_tokens_seen": 251734604, + "step": 2758 + }, + { + "epoch": 11.491666666666667, + "loss": 0.06483888626098633, + "loss_ce": 1.955397783603985e-05, + "loss_iou": 0.2138671875, + "loss_num": 0.012939453125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 251734604, + "step": 2758 + }, + { + "epoch": 11.495833333333334, + "grad_norm": 4.8316124027285285, + "learning_rate": 5e-05, + "loss": 0.0546, + "num_input_tokens_seen": 251826580, + "step": 2759 + }, + { + "epoch": 11.495833333333334, + "loss": 0.05621996894478798, + "loss_ce": 0.00018206711683887988, + "loss_iou": 0.1435546875, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 251826580, + "step": 2759 + }, + { + "epoch": 11.5, + "grad_norm": 2.3090663766733255, + "learning_rate": 5e-05, + "loss": 0.0615, + "num_input_tokens_seen": 251918256, + "step": 2760 + }, + { + "epoch": 11.5, + "loss": 0.07426677644252777, + "loss_ce": 0.0002616445126477629, + "loss_iou": 0.28515625, + "loss_num": 0.0147705078125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 251918256, + "step": 2760 + }, + { + "epoch": 11.504166666666666, + "grad_norm": 3.6694203312693396, + "learning_rate": 5e-05, + "loss": 0.0732, + "num_input_tokens_seen": 252010080, + "step": 2761 + }, + { + "epoch": 11.504166666666666, + "loss": 0.07445695996284485, + "loss_ce": 0.0017946104053407907, + "loss_iou": 0.32421875, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 252010080, + "step": 2761 + }, + { + "epoch": 11.508333333333333, + "grad_norm": 3.1695559146201897, + "learning_rate": 5e-05, + "loss": 0.048, + "num_input_tokens_seen": 252100984, + "step": 2762 + }, + { + "epoch": 11.508333333333333, + "loss": 0.05864902213215828, + "loss_ce": 9.49107197811827e-06, + "loss_iou": 0.30859375, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 252100984, + "step": 2762 + }, + { + "epoch": 11.5125, + "grad_norm": 2.6170723158105194, + "learning_rate": 5e-05, + "loss": 0.0595, + "num_input_tokens_seen": 252191872, + "step": 2763 + }, + { + "epoch": 11.5125, + "loss": 0.0470140241086483, + "loss_ce": 1.6982100987661397e-06, + "loss_iou": 0.1982421875, + "loss_num": 0.0093994140625, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 252191872, + "step": 2763 + }, + { + "epoch": 11.516666666666667, + "grad_norm": 2.2240632395882294, + "learning_rate": 5e-05, + "loss": 0.0668, + "num_input_tokens_seen": 252282840, + "step": 2764 + }, + { + "epoch": 11.516666666666667, + "loss": 0.07522941380739212, + "loss_ce": 3.586279717637808e-06, + "loss_iou": 0.27734375, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 252282840, + "step": 2764 + }, + { + "epoch": 11.520833333333334, + "grad_norm": 1.936684607025272, + "learning_rate": 5e-05, + "loss": 0.0601, + "num_input_tokens_seen": 252374224, + "step": 2765 + }, + { + "epoch": 11.520833333333334, + "loss": 0.06849393248558044, + "loss_ce": 0.0002108556218445301, + "loss_iou": 0.322265625, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 252374224, + "step": 2765 + }, + { + "epoch": 11.525, + "grad_norm": 2.8227182617362883, + "learning_rate": 5e-05, + "loss": 0.0958, + "num_input_tokens_seen": 252465888, + "step": 2766 + }, + { + "epoch": 11.525, + "loss": 0.1218826025724411, + "loss_ce": 0.000239536224398762, + "loss_iou": 0.1943359375, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 252465888, + "step": 2766 + }, + { + "epoch": 11.529166666666667, + "grad_norm": 2.131730407889572, + "learning_rate": 5e-05, + "loss": 0.0564, + "num_input_tokens_seen": 252557832, + "step": 2767 + }, + { + "epoch": 11.529166666666667, + "loss": 0.08378443866968155, + "loss_ce": 0.0012191261630505323, + "loss_iou": 0.25390625, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 252557832, + "step": 2767 + }, + { + "epoch": 11.533333333333333, + "grad_norm": 2.3278290162303645, + "learning_rate": 5e-05, + "loss": 0.099, + "num_input_tokens_seen": 252647696, + "step": 2768 + }, + { + "epoch": 11.533333333333333, + "loss": 0.07774877548217773, + "loss_ce": 0.001607423764653504, + "loss_iou": 0.14453125, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 252647696, + "step": 2768 + }, + { + "epoch": 11.5375, + "grad_norm": 1.8376633462341854, + "learning_rate": 5e-05, + "loss": 0.0582, + "num_input_tokens_seen": 252738876, + "step": 2769 + }, + { + "epoch": 11.5375, + "loss": 0.042166076600551605, + "loss_ce": 6.0432503232732415e-06, + "loss_iou": 0.3046875, + "loss_num": 0.0084228515625, + "loss_xval": 0.042236328125, + "num_input_tokens_seen": 252738876, + "step": 2769 + }, + { + "epoch": 11.541666666666666, + "grad_norm": 3.362258235142838, + "learning_rate": 5e-05, + "loss": 0.0483, + "num_input_tokens_seen": 252829700, + "step": 2770 + }, + { + "epoch": 11.541666666666666, + "loss": 0.03307725116610527, + "loss_ce": 1.908603917399887e-05, + "loss_iou": 0.31640625, + "loss_num": 0.006622314453125, + "loss_xval": 0.032958984375, + "num_input_tokens_seen": 252829700, + "step": 2770 + }, + { + "epoch": 11.545833333333333, + "grad_norm": 3.098440746580563, + "learning_rate": 5e-05, + "loss": 0.0883, + "num_input_tokens_seen": 252920408, + "step": 2771 + }, + { + "epoch": 11.545833333333333, + "loss": 0.10354944318532944, + "loss_ce": 3.3009541766659822e-06, + "loss_iou": 0.162109375, + "loss_num": 0.020751953125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 252920408, + "step": 2771 + }, + { + "epoch": 11.55, + "grad_norm": 2.2481952482408585, + "learning_rate": 5e-05, + "loss": 0.0833, + "num_input_tokens_seen": 253011180, + "step": 2772 + }, + { + "epoch": 11.55, + "loss": 0.07058833539485931, + "loss_ce": 1.1733255860235658e-06, + "loss_iou": 0.23828125, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 253011180, + "step": 2772 + }, + { + "epoch": 11.554166666666667, + "grad_norm": 2.378779411614109, + "learning_rate": 5e-05, + "loss": 0.0758, + "num_input_tokens_seen": 253102820, + "step": 2773 + }, + { + "epoch": 11.554166666666667, + "loss": 0.09102697670459747, + "loss_ce": 8.459096716251224e-05, + "loss_iou": 0.32421875, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 253102820, + "step": 2773 + }, + { + "epoch": 11.558333333333334, + "grad_norm": 3.626487783840794, + "learning_rate": 5e-05, + "loss": 0.1212, + "num_input_tokens_seen": 253193792, + "step": 2774 + }, + { + "epoch": 11.558333333333334, + "loss": 0.07658842206001282, + "loss_ce": 0.00014189038483891636, + "loss_iou": 0.326171875, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 253193792, + "step": 2774 + }, + { + "epoch": 11.5625, + "grad_norm": 5.01509653664042, + "learning_rate": 5e-05, + "loss": 0.0667, + "num_input_tokens_seen": 253285112, + "step": 2775 + }, + { + "epoch": 11.5625, + "loss": 0.045781366527080536, + "loss_ce": 8.128983608912677e-05, + "loss_iou": 0.234375, + "loss_num": 0.0091552734375, + "loss_xval": 0.045654296875, + "num_input_tokens_seen": 253285112, + "step": 2775 + }, + { + "epoch": 11.566666666666666, + "grad_norm": 4.398346693808524, + "learning_rate": 5e-05, + "loss": 0.0512, + "num_input_tokens_seen": 253377040, + "step": 2776 + }, + { + "epoch": 11.566666666666666, + "loss": 0.0686916932463646, + "loss_ce": 0.0004391258116811514, + "loss_iou": 0.306640625, + "loss_num": 0.01361083984375, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 253377040, + "step": 2776 + }, + { + "epoch": 11.570833333333333, + "grad_norm": 3.5203956737821467, + "learning_rate": 5e-05, + "loss": 0.0903, + "num_input_tokens_seen": 253468468, + "step": 2777 + }, + { + "epoch": 11.570833333333333, + "loss": 0.1309206485748291, + "loss_ce": 7.865828592912294e-06, + "loss_iou": 0.2216796875, + "loss_num": 0.0262451171875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 253468468, + "step": 2777 + }, + { + "epoch": 11.575, + "grad_norm": 3.509405073027444, + "learning_rate": 5e-05, + "loss": 0.0548, + "num_input_tokens_seen": 253559120, + "step": 2778 + }, + { + "epoch": 11.575, + "loss": 0.06349446624517441, + "loss_ce": 6.367945752572268e-05, + "loss_iou": 0.27734375, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 253559120, + "step": 2778 + }, + { + "epoch": 11.579166666666667, + "grad_norm": 4.479791757883328, + "learning_rate": 5e-05, + "loss": 0.0567, + "num_input_tokens_seen": 253650464, + "step": 2779 + }, + { + "epoch": 11.579166666666667, + "loss": 0.07993124425411224, + "loss_ce": 5.7041561376536265e-06, + "loss_iou": 0.197265625, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 253650464, + "step": 2779 + }, + { + "epoch": 11.583333333333334, + "grad_norm": 2.6582836288121126, + "learning_rate": 5e-05, + "loss": 0.0845, + "num_input_tokens_seen": 253742120, + "step": 2780 + }, + { + "epoch": 11.583333333333334, + "loss": 0.033640384674072266, + "loss_ce": 2.5271636332035996e-05, + "loss_iou": 0.1328125, + "loss_num": 0.0067138671875, + "loss_xval": 0.03369140625, + "num_input_tokens_seen": 253742120, + "step": 2780 + }, + { + "epoch": 11.5875, + "grad_norm": 4.125095437639992, + "learning_rate": 5e-05, + "loss": 0.0688, + "num_input_tokens_seen": 253833700, + "step": 2781 + }, + { + "epoch": 11.5875, + "loss": 0.10041318088769913, + "loss_ce": 0.0033215084113180637, + "loss_iou": 0.3359375, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 253833700, + "step": 2781 + }, + { + "epoch": 11.591666666666667, + "grad_norm": 3.9652818320745413, + "learning_rate": 5e-05, + "loss": 0.1176, + "num_input_tokens_seen": 253925124, + "step": 2782 + }, + { + "epoch": 11.591666666666667, + "loss": 0.15131857991218567, + "loss_ce": 1.243268525286112e-05, + "loss_iou": 0.34375, + "loss_num": 0.0302734375, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 253925124, + "step": 2782 + }, + { + "epoch": 11.595833333333333, + "grad_norm": 2.9177510310641956, + "learning_rate": 5e-05, + "loss": 0.0796, + "num_input_tokens_seen": 254016228, + "step": 2783 + }, + { + "epoch": 11.595833333333333, + "loss": 0.10961540043354034, + "loss_ce": 1.1515976439113729e-05, + "loss_iou": 0.220703125, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 254016228, + "step": 2783 + }, + { + "epoch": 11.6, + "grad_norm": 2.7266875155345622, + "learning_rate": 5e-05, + "loss": 0.0539, + "num_input_tokens_seen": 254107736, + "step": 2784 + }, + { + "epoch": 11.6, + "loss": 0.06983380019664764, + "loss_ce": 1.95505663214135e-06, + "loss_iou": 0.310546875, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 254107736, + "step": 2784 + }, + { + "epoch": 11.604166666666666, + "grad_norm": 14.061937738446051, + "learning_rate": 5e-05, + "loss": 0.0697, + "num_input_tokens_seen": 254199140, + "step": 2785 + }, + { + "epoch": 11.604166666666666, + "loss": 0.08557083457708359, + "loss_ce": 0.0003504964697640389, + "loss_iou": 0.28515625, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 254199140, + "step": 2785 + }, + { + "epoch": 11.608333333333333, + "grad_norm": 3.4596467699047397, + "learning_rate": 5e-05, + "loss": 0.066, + "num_input_tokens_seen": 254290332, + "step": 2786 + }, + { + "epoch": 11.608333333333333, + "loss": 0.09167817234992981, + "loss_ce": 3.3687520044622943e-06, + "loss_iou": 0.31640625, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 254290332, + "step": 2786 + }, + { + "epoch": 11.6125, + "grad_norm": 3.109949228920906, + "learning_rate": 5e-05, + "loss": 0.0479, + "num_input_tokens_seen": 254381496, + "step": 2787 + }, + { + "epoch": 11.6125, + "loss": 0.046016875654459, + "loss_ce": 0.00027102508465759456, + "loss_iou": 0.25390625, + "loss_num": 0.0091552734375, + "loss_xval": 0.045654296875, + "num_input_tokens_seen": 254381496, + "step": 2787 + }, + { + "epoch": 11.616666666666667, + "grad_norm": 2.7695627569049885, + "learning_rate": 5e-05, + "loss": 0.0716, + "num_input_tokens_seen": 254473576, + "step": 2788 + }, + { + "epoch": 11.616666666666667, + "loss": 0.07978077232837677, + "loss_ce": 0.00017566655878908932, + "loss_iou": 0.1875, + "loss_num": 0.0159912109375, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 254473576, + "step": 2788 + }, + { + "epoch": 11.620833333333334, + "grad_norm": 1.2862383178885615, + "learning_rate": 5e-05, + "loss": 0.0349, + "num_input_tokens_seen": 254565020, + "step": 2789 + }, + { + "epoch": 11.620833333333334, + "loss": 0.03599901497364044, + "loss_ce": 3.5316618323122384e-06, + "loss_iou": 0.12451171875, + "loss_num": 0.0072021484375, + "loss_xval": 0.035888671875, + "num_input_tokens_seen": 254565020, + "step": 2789 + }, + { + "epoch": 11.625, + "grad_norm": 2.2476605782102315, + "learning_rate": 5e-05, + "loss": 0.0608, + "num_input_tokens_seen": 254656308, + "step": 2790 + }, + { + "epoch": 11.625, + "loss": 0.060362979769706726, + "loss_ce": 0.0013343519531190395, + "loss_iou": 0.2890625, + "loss_num": 0.0118408203125, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 254656308, + "step": 2790 + }, + { + "epoch": 11.629166666666666, + "grad_norm": 4.784563316647035, + "learning_rate": 5e-05, + "loss": 0.0768, + "num_input_tokens_seen": 254748008, + "step": 2791 + }, + { + "epoch": 11.629166666666666, + "loss": 0.1162891536951065, + "loss_ce": 0.0018787547014653683, + "loss_iou": 0.37109375, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 254748008, + "step": 2791 + }, + { + "epoch": 11.633333333333333, + "grad_norm": 3.3228327784466356, + "learning_rate": 5e-05, + "loss": 0.0777, + "num_input_tokens_seen": 254839640, + "step": 2792 + }, + { + "epoch": 11.633333333333333, + "loss": 0.0805789902806282, + "loss_ce": 0.0011112188221886754, + "loss_iou": 0.21875, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 254839640, + "step": 2792 + }, + { + "epoch": 11.6375, + "grad_norm": 48.217722451229285, + "learning_rate": 5e-05, + "loss": 0.0822, + "num_input_tokens_seen": 254931388, + "step": 2793 + }, + { + "epoch": 11.6375, + "loss": 0.061529166996479034, + "loss_ce": 0.00239373417571187, + "loss_iou": 0.181640625, + "loss_num": 0.0118408203125, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 254931388, + "step": 2793 + }, + { + "epoch": 11.641666666666667, + "grad_norm": 2.4388938083203824, + "learning_rate": 5e-05, + "loss": 0.0922, + "num_input_tokens_seen": 255022072, + "step": 2794 + }, + { + "epoch": 11.641666666666667, + "loss": 0.09992580860853195, + "loss_ce": 1.1263322448940016e-05, + "loss_iou": 0.359375, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 255022072, + "step": 2794 + }, + { + "epoch": 11.645833333333334, + "grad_norm": 62.60861556378519, + "learning_rate": 5e-05, + "loss": 0.0606, + "num_input_tokens_seen": 255113548, + "step": 2795 + }, + { + "epoch": 11.645833333333334, + "loss": 0.08678022027015686, + "loss_ce": 1.874838380899746e-05, + "loss_iou": 0.263671875, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 255113548, + "step": 2795 + }, + { + "epoch": 11.65, + "grad_norm": 4.333663691442244, + "learning_rate": 5e-05, + "loss": 0.0467, + "num_input_tokens_seen": 255204604, + "step": 2796 + }, + { + "epoch": 11.65, + "loss": 0.054408200085163116, + "loss_ce": 0.00011742699280148372, + "loss_iou": 0.310546875, + "loss_num": 0.0108642578125, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 255204604, + "step": 2796 + }, + { + "epoch": 11.654166666666667, + "grad_norm": 7.411968075893123, + "learning_rate": 5e-05, + "loss": 0.0657, + "num_input_tokens_seen": 255295664, + "step": 2797 + }, + { + "epoch": 11.654166666666667, + "loss": 0.04946771264076233, + "loss_ce": 0.000639589736238122, + "loss_iou": 0.2353515625, + "loss_num": 0.009765625, + "loss_xval": 0.048828125, + "num_input_tokens_seen": 255295664, + "step": 2797 + }, + { + "epoch": 11.658333333333333, + "grad_norm": 2.701084957754334, + "learning_rate": 5e-05, + "loss": 0.1053, + "num_input_tokens_seen": 255386836, + "step": 2798 + }, + { + "epoch": 11.658333333333333, + "loss": 0.11523690819740295, + "loss_ce": 9.409207268618047e-05, + "loss_iou": 0.2216796875, + "loss_num": 0.0230712890625, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 255386836, + "step": 2798 + }, + { + "epoch": 11.6625, + "grad_norm": 3.273955524452734, + "learning_rate": 5e-05, + "loss": 0.0875, + "num_input_tokens_seen": 255478100, + "step": 2799 + }, + { + "epoch": 11.6625, + "loss": 0.1136731430888176, + "loss_ce": 0.002314502838999033, + "loss_iou": 0.32421875, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 255478100, + "step": 2799 + }, + { + "epoch": 11.666666666666666, + "grad_norm": 3.242321040345061, + "learning_rate": 5e-05, + "loss": 0.1012, + "num_input_tokens_seen": 255569656, + "step": 2800 + }, + { + "epoch": 11.666666666666666, + "loss": 0.08774766325950623, + "loss_ce": 0.0009861922590062022, + "loss_iou": 0.32421875, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 255569656, + "step": 2800 + }, + { + "epoch": 11.670833333333333, + "grad_norm": 3.059187190578943, + "learning_rate": 5e-05, + "loss": 0.0728, + "num_input_tokens_seen": 255660884, + "step": 2801 + }, + { + "epoch": 11.670833333333333, + "loss": 0.0821562260389328, + "loss_ce": 6.393673538696021e-05, + "loss_iou": 0.220703125, + "loss_num": 0.0164794921875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 255660884, + "step": 2801 + }, + { + "epoch": 11.675, + "grad_norm": 2.4947240380219733, + "learning_rate": 5e-05, + "loss": 0.0692, + "num_input_tokens_seen": 255751908, + "step": 2802 + }, + { + "epoch": 11.675, + "loss": 0.05989711731672287, + "loss_ce": 6.366583420458483e-06, + "loss_iou": 0.19921875, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 255751908, + "step": 2802 + }, + { + "epoch": 11.679166666666667, + "grad_norm": 2.5267985250994074, + "learning_rate": 5e-05, + "loss": 0.0436, + "num_input_tokens_seen": 255843508, + "step": 2803 + }, + { + "epoch": 11.679166666666667, + "loss": 0.043543294072151184, + "loss_ce": 0.00014730200928170234, + "loss_iou": 0.189453125, + "loss_num": 0.0086669921875, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 255843508, + "step": 2803 + }, + { + "epoch": 11.683333333333334, + "grad_norm": 4.36168402933246, + "learning_rate": 5e-05, + "loss": 0.0923, + "num_input_tokens_seen": 255934776, + "step": 2804 + }, + { + "epoch": 11.683333333333334, + "loss": 0.06685806065797806, + "loss_ce": 0.0003297396469861269, + "loss_iou": 0.35546875, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 255934776, + "step": 2804 + }, + { + "epoch": 11.6875, + "grad_norm": 6.375631937181193, + "learning_rate": 5e-05, + "loss": 0.1221, + "num_input_tokens_seen": 256025172, + "step": 2805 + }, + { + "epoch": 11.6875, + "loss": 0.1451430320739746, + "loss_ce": 1.4369206837727688e-06, + "loss_iou": 0.23046875, + "loss_num": 0.029052734375, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 256025172, + "step": 2805 + }, + { + "epoch": 11.691666666666666, + "grad_norm": 2.585798896351954, + "learning_rate": 5e-05, + "loss": 0.0441, + "num_input_tokens_seen": 256117384, + "step": 2806 + }, + { + "epoch": 11.691666666666666, + "loss": 0.04989667236804962, + "loss_ce": 0.0005802658852189779, + "loss_iou": 0.26171875, + "loss_num": 0.0098876953125, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 256117384, + "step": 2806 + }, + { + "epoch": 11.695833333333333, + "grad_norm": 1.7965183540535414, + "learning_rate": 5e-05, + "loss": 0.0494, + "num_input_tokens_seen": 256208420, + "step": 2807 + }, + { + "epoch": 11.695833333333333, + "loss": 0.07193634659051895, + "loss_ce": 6.416817996068858e-06, + "loss_iou": 0.36328125, + "loss_num": 0.014404296875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 256208420, + "step": 2807 + }, + { + "epoch": 11.7, + "grad_norm": 2.2213844866692396, + "learning_rate": 5e-05, + "loss": 0.0674, + "num_input_tokens_seen": 256300508, + "step": 2808 + }, + { + "epoch": 11.7, + "loss": 0.0829494446516037, + "loss_ce": 0.00046043359907343984, + "loss_iou": 0.1708984375, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 256300508, + "step": 2808 + }, + { + "epoch": 11.704166666666667, + "grad_norm": 2.575061272063674, + "learning_rate": 5e-05, + "loss": 0.0813, + "num_input_tokens_seen": 256391228, + "step": 2809 + }, + { + "epoch": 11.704166666666667, + "loss": 0.12455101311206818, + "loss_ce": 6.981042679399252e-05, + "loss_iou": 0.0908203125, + "loss_num": 0.02490234375, + "loss_xval": 0.12451171875, + "num_input_tokens_seen": 256391228, + "step": 2809 + }, + { + "epoch": 11.708333333333334, + "grad_norm": 3.217577953810864, + "learning_rate": 5e-05, + "loss": 0.0775, + "num_input_tokens_seen": 256482424, + "step": 2810 + }, + { + "epoch": 11.708333333333334, + "loss": 0.09218916296958923, + "loss_ce": 4.1327919461764395e-05, + "loss_iou": 0.28125, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 256482424, + "step": 2810 + }, + { + "epoch": 11.7125, + "grad_norm": 3.50783614109373, + "learning_rate": 5e-05, + "loss": 0.0837, + "num_input_tokens_seen": 256573408, + "step": 2811 + }, + { + "epoch": 11.7125, + "loss": 0.11624173820018768, + "loss_ce": 0.0002749458944890648, + "loss_iou": 0.302734375, + "loss_num": 0.023193359375, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 256573408, + "step": 2811 + }, + { + "epoch": 11.716666666666667, + "grad_norm": 4.623612033239213, + "learning_rate": 5e-05, + "loss": 0.123, + "num_input_tokens_seen": 256663956, + "step": 2812 + }, + { + "epoch": 11.716666666666667, + "loss": 0.13129764795303345, + "loss_ce": 0.0002856797364074737, + "loss_iou": 0.337890625, + "loss_num": 0.0262451171875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 256663956, + "step": 2812 + }, + { + "epoch": 11.720833333333333, + "grad_norm": 2.3002718900676884, + "learning_rate": 5e-05, + "loss": 0.0413, + "num_input_tokens_seen": 256754852, + "step": 2813 + }, + { + "epoch": 11.720833333333333, + "loss": 0.04171242564916611, + "loss_ce": 0.0001780021848389879, + "loss_iou": 0.1875, + "loss_num": 0.00830078125, + "loss_xval": 0.04150390625, + "num_input_tokens_seen": 256754852, + "step": 2813 + }, + { + "epoch": 11.725, + "grad_norm": 3.5142776554663557, + "learning_rate": 5e-05, + "loss": 0.0897, + "num_input_tokens_seen": 256846128, + "step": 2814 + }, + { + "epoch": 11.725, + "loss": 0.10495474189519882, + "loss_ce": 4.796497250936227e-06, + "loss_iou": 0.119140625, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 256846128, + "step": 2814 + }, + { + "epoch": 11.729166666666666, + "grad_norm": 4.37602634922094, + "learning_rate": 5e-05, + "loss": 0.0679, + "num_input_tokens_seen": 256937748, + "step": 2815 + }, + { + "epoch": 11.729166666666666, + "loss": 0.05913905054330826, + "loss_ce": 0.00014857419591862708, + "loss_iou": 0.3828125, + "loss_num": 0.01177978515625, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 256937748, + "step": 2815 + }, + { + "epoch": 11.733333333333333, + "grad_norm": 2.1503700616017167, + "learning_rate": 5e-05, + "loss": 0.0625, + "num_input_tokens_seen": 257028948, + "step": 2816 + }, + { + "epoch": 11.733333333333333, + "loss": 0.034769974648952484, + "loss_ce": 1.0452968126628548e-05, + "loss_iou": 0.2734375, + "loss_num": 0.0069580078125, + "loss_xval": 0.03466796875, + "num_input_tokens_seen": 257028948, + "step": 2816 + }, + { + "epoch": 11.7375, + "grad_norm": 5.0109340144284795, + "learning_rate": 5e-05, + "loss": 0.1121, + "num_input_tokens_seen": 257120320, + "step": 2817 + }, + { + "epoch": 11.7375, + "loss": 0.1027803048491478, + "loss_ce": 0.00039382706745527685, + "loss_iou": 0.203125, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 257120320, + "step": 2817 + }, + { + "epoch": 11.741666666666667, + "grad_norm": 2.093254385288736, + "learning_rate": 5e-05, + "loss": 0.0906, + "num_input_tokens_seen": 257210756, + "step": 2818 + }, + { + "epoch": 11.741666666666667, + "loss": 0.06602243334054947, + "loss_ce": 2.8168045901111327e-05, + "loss_iou": 0.322265625, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 257210756, + "step": 2818 + }, + { + "epoch": 11.745833333333334, + "grad_norm": 2.9800228128313035, + "learning_rate": 5e-05, + "loss": 0.0721, + "num_input_tokens_seen": 257301984, + "step": 2819 + }, + { + "epoch": 11.745833333333334, + "loss": 0.09179629385471344, + "loss_ce": 2.9940814783913083e-05, + "loss_iou": 0.2265625, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 257301984, + "step": 2819 + }, + { + "epoch": 11.75, + "grad_norm": 5.110429224134522, + "learning_rate": 5e-05, + "loss": 0.0428, + "num_input_tokens_seen": 257393624, + "step": 2820 + }, + { + "epoch": 11.75, + "loss": 0.05140618979930878, + "loss_ce": 0.001487057888880372, + "loss_iou": 0.330078125, + "loss_num": 0.010009765625, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 257393624, + "step": 2820 + }, + { + "epoch": 11.754166666666666, + "grad_norm": 2.954461092840001, + "learning_rate": 5e-05, + "loss": 0.085, + "num_input_tokens_seen": 257484916, + "step": 2821 + }, + { + "epoch": 11.754166666666666, + "loss": 0.06782172620296478, + "loss_ce": 0.0014002150855958462, + "loss_iou": 0.25390625, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 257484916, + "step": 2821 + }, + { + "epoch": 11.758333333333333, + "grad_norm": 2.070142501990094, + "learning_rate": 5e-05, + "loss": 0.0604, + "num_input_tokens_seen": 257576552, + "step": 2822 + }, + { + "epoch": 11.758333333333333, + "loss": 0.05732317268848419, + "loss_ce": 2.6417277695145458e-05, + "loss_iou": 0.134765625, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 257576552, + "step": 2822 + }, + { + "epoch": 11.7625, + "grad_norm": 2.4238920761588987, + "learning_rate": 5e-05, + "loss": 0.0603, + "num_input_tokens_seen": 257667628, + "step": 2823 + }, + { + "epoch": 11.7625, + "loss": 0.0896691381931305, + "loss_ce": 2.3749711544951424e-05, + "loss_iou": 0.142578125, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 257667628, + "step": 2823 + }, + { + "epoch": 11.766666666666667, + "grad_norm": 4.637905965425538, + "learning_rate": 5e-05, + "loss": 0.0592, + "num_input_tokens_seen": 257758564, + "step": 2824 + }, + { + "epoch": 11.766666666666667, + "loss": 0.07250712811946869, + "loss_ce": 0.0027592037804424763, + "loss_iou": 0.09619140625, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 257758564, + "step": 2824 + }, + { + "epoch": 11.770833333333334, + "grad_norm": 5.46493963664531, + "learning_rate": 5e-05, + "loss": 0.0664, + "num_input_tokens_seen": 257849596, + "step": 2825 + }, + { + "epoch": 11.770833333333334, + "loss": 0.06953981518745422, + "loss_ce": 0.001790795475244522, + "loss_iou": 0.25390625, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 257849596, + "step": 2825 + }, + { + "epoch": 11.775, + "grad_norm": 6.457935238980587, + "learning_rate": 5e-05, + "loss": 0.0804, + "num_input_tokens_seen": 257940964, + "step": 2826 + }, + { + "epoch": 11.775, + "loss": 0.03701567277312279, + "loss_ce": 0.00013517978368327022, + "loss_iou": 0.26953125, + "loss_num": 0.00738525390625, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 257940964, + "step": 2826 + }, + { + "epoch": 11.779166666666667, + "grad_norm": 2.952868993992443, + "learning_rate": 5e-05, + "loss": 0.0649, + "num_input_tokens_seen": 258032144, + "step": 2827 + }, + { + "epoch": 11.779166666666667, + "loss": 0.07449017465114594, + "loss_ce": 1.2029305253236089e-05, + "loss_iou": 0.359375, + "loss_num": 0.014892578125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 258032144, + "step": 2827 + }, + { + "epoch": 11.783333333333333, + "grad_norm": 3.49494376828575, + "learning_rate": 5e-05, + "loss": 0.057, + "num_input_tokens_seen": 258123184, + "step": 2828 + }, + { + "epoch": 11.783333333333333, + "loss": 0.06829185783863068, + "loss_ce": 0.00039788178401067853, + "loss_iou": 0.2060546875, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 258123184, + "step": 2828 + }, + { + "epoch": 11.7875, + "grad_norm": 3.9275142395882807, + "learning_rate": 5e-05, + "loss": 0.0917, + "num_input_tokens_seen": 258214768, + "step": 2829 + }, + { + "epoch": 11.7875, + "loss": 0.11695680767297745, + "loss_ce": 0.0044690147042274475, + "loss_iou": 0.279296875, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 258214768, + "step": 2829 + }, + { + "epoch": 11.791666666666666, + "grad_norm": 2.8659715350381125, + "learning_rate": 5e-05, + "loss": 0.0603, + "num_input_tokens_seen": 258304748, + "step": 2830 + }, + { + "epoch": 11.791666666666666, + "loss": 0.06732072681188583, + "loss_ce": 6.581793059012853e-06, + "loss_iou": 0.2734375, + "loss_num": 0.013427734375, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 258304748, + "step": 2830 + }, + { + "epoch": 11.795833333333333, + "grad_norm": 2.522448317248731, + "learning_rate": 5e-05, + "loss": 0.0391, + "num_input_tokens_seen": 258396604, + "step": 2831 + }, + { + "epoch": 11.795833333333333, + "loss": 0.02837366610765457, + "loss_ce": 0.0002822370151989162, + "loss_iou": 0.2431640625, + "loss_num": 0.005615234375, + "loss_xval": 0.028076171875, + "num_input_tokens_seen": 258396604, + "step": 2831 + }, + { + "epoch": 11.8, + "grad_norm": 14.163127732807999, + "learning_rate": 5e-05, + "loss": 0.0591, + "num_input_tokens_seen": 258488080, + "step": 2832 + }, + { + "epoch": 11.8, + "loss": 0.06442129611968994, + "loss_ce": 0.0002199427835876122, + "loss_iou": 0.291015625, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 258488080, + "step": 2832 + }, + { + "epoch": 11.804166666666667, + "grad_norm": 3.2389575147775056, + "learning_rate": 5e-05, + "loss": 0.0678, + "num_input_tokens_seen": 258579568, + "step": 2833 + }, + { + "epoch": 11.804166666666667, + "loss": 0.06743942946195602, + "loss_ce": 8.713373972568661e-05, + "loss_iou": 0.251953125, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 258579568, + "step": 2833 + }, + { + "epoch": 11.808333333333334, + "grad_norm": 3.278984446766221, + "learning_rate": 5e-05, + "loss": 0.0452, + "num_input_tokens_seen": 258671068, + "step": 2834 + }, + { + "epoch": 11.808333333333334, + "loss": 0.04743684455752373, + "loss_ce": 0.00019563363457564265, + "loss_iou": 0.2412109375, + "loss_num": 0.00946044921875, + "loss_xval": 0.04736328125, + "num_input_tokens_seen": 258671068, + "step": 2834 + }, + { + "epoch": 11.8125, + "grad_norm": 2.7045741910549856, + "learning_rate": 5e-05, + "loss": 0.0677, + "num_input_tokens_seen": 258762528, + "step": 2835 + }, + { + "epoch": 11.8125, + "loss": 0.0750080943107605, + "loss_ce": 4.166929647908546e-05, + "loss_iou": 0.21484375, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 258762528, + "step": 2835 + }, + { + "epoch": 11.816666666666666, + "grad_norm": 8.82185220811526, + "learning_rate": 5e-05, + "loss": 0.1116, + "num_input_tokens_seen": 258853572, + "step": 2836 + }, + { + "epoch": 11.816666666666666, + "loss": 0.14761298894882202, + "loss_ce": 0.0003351602063048631, + "loss_iou": 0.36328125, + "loss_num": 0.029541015625, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 258853572, + "step": 2836 + }, + { + "epoch": 11.820833333333333, + "grad_norm": 5.2310030751405, + "learning_rate": 5e-05, + "loss": 0.1105, + "num_input_tokens_seen": 258944892, + "step": 2837 + }, + { + "epoch": 11.820833333333333, + "loss": 0.08534011244773865, + "loss_ce": 1.2963697372470051e-05, + "loss_iou": 0.34765625, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 258944892, + "step": 2837 + }, + { + "epoch": 11.825, + "grad_norm": 1.705351501456277, + "learning_rate": 5e-05, + "loss": 0.0665, + "num_input_tokens_seen": 259036288, + "step": 2838 + }, + { + "epoch": 11.825, + "loss": 0.05713297426700592, + "loss_ce": 1.9327546397107653e-05, + "loss_iou": 0.27734375, + "loss_num": 0.01141357421875, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 259036288, + "step": 2838 + }, + { + "epoch": 11.829166666666667, + "grad_norm": 1.1420082612365101, + "learning_rate": 5e-05, + "loss": 0.0571, + "num_input_tokens_seen": 259128004, + "step": 2839 + }, + { + "epoch": 11.829166666666667, + "loss": 0.06977026909589767, + "loss_ce": 0.0031351372599601746, + "loss_iou": 0.15625, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 259128004, + "step": 2839 + }, + { + "epoch": 11.833333333333334, + "grad_norm": 2.2371733830453264, + "learning_rate": 5e-05, + "loss": 0.0565, + "num_input_tokens_seen": 259219580, + "step": 2840 + }, + { + "epoch": 11.833333333333334, + "loss": 0.07266523689031601, + "loss_ce": 9.443731687497348e-05, + "loss_iou": 0.244140625, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 259219580, + "step": 2840 + }, + { + "epoch": 11.8375, + "grad_norm": 2.4504193950415103, + "learning_rate": 5e-05, + "loss": 0.0667, + "num_input_tokens_seen": 259310764, + "step": 2841 + }, + { + "epoch": 11.8375, + "loss": 0.049181997776031494, + "loss_ce": 2.9190573513915297e-06, + "loss_iou": 0.37109375, + "loss_num": 0.00982666015625, + "loss_xval": 0.049072265625, + "num_input_tokens_seen": 259310764, + "step": 2841 + }, + { + "epoch": 11.841666666666667, + "grad_norm": 6.426105045159625, + "learning_rate": 5e-05, + "loss": 0.111, + "num_input_tokens_seen": 259402568, + "step": 2842 + }, + { + "epoch": 11.841666666666667, + "loss": 0.08205066621303558, + "loss_ce": 0.004261358641088009, + "loss_iou": 0.279296875, + "loss_num": 0.01556396484375, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 259402568, + "step": 2842 + }, + { + "epoch": 11.845833333333333, + "grad_norm": 2.2911199553655894, + "learning_rate": 5e-05, + "loss": 0.1029, + "num_input_tokens_seen": 259493920, + "step": 2843 + }, + { + "epoch": 11.845833333333333, + "loss": 0.1470203995704651, + "loss_ce": 0.0003681685193441808, + "loss_iou": 0.232421875, + "loss_num": 0.029296875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 259493920, + "step": 2843 + }, + { + "epoch": 11.85, + "grad_norm": 1.7712901267125332, + "learning_rate": 5e-05, + "loss": 0.0499, + "num_input_tokens_seen": 259585264, + "step": 2844 + }, + { + "epoch": 11.85, + "loss": 0.05896005034446716, + "loss_ce": 7.720286703261081e-06, + "loss_iou": 0.208984375, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 259585264, + "step": 2844 + }, + { + "epoch": 11.854166666666666, + "grad_norm": 2.1354891999723775, + "learning_rate": 5e-05, + "loss": 0.058, + "num_input_tokens_seen": 259676860, + "step": 2845 + }, + { + "epoch": 11.854166666666666, + "loss": 0.03327609598636627, + "loss_ce": 0.0007367273210547864, + "loss_iou": 0.2080078125, + "loss_num": 0.006500244140625, + "loss_xval": 0.032470703125, + "num_input_tokens_seen": 259676860, + "step": 2845 + }, + { + "epoch": 11.858333333333333, + "grad_norm": 1.6946017824633848, + "learning_rate": 5e-05, + "loss": 0.0609, + "num_input_tokens_seen": 259767912, + "step": 2846 + }, + { + "epoch": 11.858333333333333, + "loss": 0.07876819372177124, + "loss_ce": 1.7584598026587628e-05, + "loss_iou": 0.27734375, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 259767912, + "step": 2846 + }, + { + "epoch": 11.8625, + "grad_norm": 2.7137875697412874, + "learning_rate": 5e-05, + "loss": 0.1371, + "num_input_tokens_seen": 259858860, + "step": 2847 + }, + { + "epoch": 11.8625, + "loss": 0.15086443722248077, + "loss_ce": 1.6057805623859167e-05, + "loss_iou": 0.2353515625, + "loss_num": 0.0302734375, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 259858860, + "step": 2847 + }, + { + "epoch": 11.866666666666667, + "grad_norm": 1.1937516546302895, + "learning_rate": 5e-05, + "loss": 0.0447, + "num_input_tokens_seen": 259951008, + "step": 2848 + }, + { + "epoch": 11.866666666666667, + "loss": 0.03670906648039818, + "loss_ce": 9.560144826536998e-05, + "loss_iou": 0.19921875, + "loss_num": 0.00732421875, + "loss_xval": 0.03662109375, + "num_input_tokens_seen": 259951008, + "step": 2848 + }, + { + "epoch": 11.870833333333334, + "grad_norm": 1.6499540676990112, + "learning_rate": 5e-05, + "loss": 0.0328, + "num_input_tokens_seen": 260042204, + "step": 2849 + }, + { + "epoch": 11.870833333333334, + "loss": 0.03667999804019928, + "loss_ce": 8.94237236934714e-05, + "loss_iou": 0.33984375, + "loss_num": 0.00732421875, + "loss_xval": 0.03662109375, + "num_input_tokens_seen": 260042204, + "step": 2849 + }, + { + "epoch": 11.875, + "grad_norm": 1.440890559983585, + "learning_rate": 5e-05, + "loss": 0.089, + "num_input_tokens_seen": 260132976, + "step": 2850 + }, + { + "epoch": 11.875, + "loss": 0.07991119474172592, + "loss_ce": 9.17267129807442e-07, + "loss_iou": 0.228515625, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 260132976, + "step": 2850 + }, + { + "epoch": 11.879166666666666, + "grad_norm": 3.725208714864202, + "learning_rate": 5e-05, + "loss": 0.0994, + "num_input_tokens_seen": 260224912, + "step": 2851 + }, + { + "epoch": 11.879166666666666, + "loss": 0.1331530511379242, + "loss_ce": 0.00018796537187881768, + "loss_iou": 0.29296875, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 260224912, + "step": 2851 + }, + { + "epoch": 11.883333333333333, + "grad_norm": 2.7663335524624015, + "learning_rate": 5e-05, + "loss": 0.0923, + "num_input_tokens_seen": 260316564, + "step": 2852 + }, + { + "epoch": 11.883333333333333, + "loss": 0.05577700585126877, + "loss_ce": 0.0012192062567919493, + "loss_iou": 0.251953125, + "loss_num": 0.01092529296875, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 260316564, + "step": 2852 + }, + { + "epoch": 11.8875, + "grad_norm": 5.732165254014268, + "learning_rate": 5e-05, + "loss": 0.0757, + "num_input_tokens_seen": 260407848, + "step": 2853 + }, + { + "epoch": 11.8875, + "loss": 0.09239616245031357, + "loss_ce": 0.0001796693541109562, + "loss_iou": 0.328125, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 260407848, + "step": 2853 + }, + { + "epoch": 11.891666666666667, + "grad_norm": 3.70790354718247, + "learning_rate": 5e-05, + "loss": 0.0969, + "num_input_tokens_seen": 260499980, + "step": 2854 + }, + { + "epoch": 11.891666666666667, + "loss": 0.10627731680870056, + "loss_ce": 0.001647809287533164, + "loss_iou": 0.279296875, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 260499980, + "step": 2854 + }, + { + "epoch": 11.895833333333334, + "grad_norm": 2.326751790263759, + "learning_rate": 5e-05, + "loss": 0.0789, + "num_input_tokens_seen": 260591020, + "step": 2855 + }, + { + "epoch": 11.895833333333334, + "loss": 0.0929078683257103, + "loss_ce": 1.2361353583401069e-05, + "loss_iou": 0.21484375, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 260591020, + "step": 2855 + }, + { + "epoch": 11.9, + "grad_norm": 5.539103654373248, + "learning_rate": 5e-05, + "loss": 0.1042, + "num_input_tokens_seen": 260683344, + "step": 2856 + }, + { + "epoch": 11.9, + "loss": 0.10079024732112885, + "loss_ce": 0.007009732071310282, + "loss_iou": 0.25390625, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 260683344, + "step": 2856 + }, + { + "epoch": 11.904166666666667, + "grad_norm": 5.560794328132688, + "learning_rate": 5e-05, + "loss": 0.0863, + "num_input_tokens_seen": 260774700, + "step": 2857 + }, + { + "epoch": 11.904166666666667, + "loss": 0.09718882292509079, + "loss_ce": 2.0858064090134576e-05, + "loss_iou": 0.275390625, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 260774700, + "step": 2857 + }, + { + "epoch": 11.908333333333333, + "grad_norm": 2.759221182078596, + "learning_rate": 5e-05, + "loss": 0.0829, + "num_input_tokens_seen": 260866232, + "step": 2858 + }, + { + "epoch": 11.908333333333333, + "loss": 0.06998462229967117, + "loss_ce": 6.122285412857309e-05, + "loss_iou": 0.26171875, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 260866232, + "step": 2858 + }, + { + "epoch": 11.9125, + "grad_norm": 2.564275124770271, + "learning_rate": 5e-05, + "loss": 0.0518, + "num_input_tokens_seen": 260957752, + "step": 2859 + }, + { + "epoch": 11.9125, + "loss": 0.053347524255514145, + "loss_ce": 3.3316449844278395e-05, + "loss_iou": 0.26953125, + "loss_num": 0.01068115234375, + "loss_xval": 0.05322265625, + "num_input_tokens_seen": 260957752, + "step": 2859 + }, + { + "epoch": 11.916666666666666, + "grad_norm": 3.333069768156953, + "learning_rate": 5e-05, + "loss": 0.063, + "num_input_tokens_seen": 261049304, + "step": 2860 + }, + { + "epoch": 11.916666666666666, + "loss": 0.06215094029903412, + "loss_ce": 1.7154019587906078e-05, + "loss_iou": 0.365234375, + "loss_num": 0.012451171875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 261049304, + "step": 2860 + }, + { + "epoch": 11.920833333333333, + "grad_norm": 3.0899776462038795, + "learning_rate": 5e-05, + "loss": 0.0796, + "num_input_tokens_seen": 261140836, + "step": 2861 + }, + { + "epoch": 11.920833333333333, + "loss": 0.07568001747131348, + "loss_ce": 0.0009615468443371356, + "loss_iou": 0.240234375, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 261140836, + "step": 2861 + }, + { + "epoch": 11.925, + "grad_norm": 2.9081179239353236, + "learning_rate": 5e-05, + "loss": 0.0542, + "num_input_tokens_seen": 261232416, + "step": 2862 + }, + { + "epoch": 11.925, + "loss": 0.061053868383169174, + "loss_ce": 0.0004535876796580851, + "loss_iou": 0.208984375, + "loss_num": 0.0120849609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 261232416, + "step": 2862 + }, + { + "epoch": 11.929166666666667, + "grad_norm": 3.6913574493836427, + "learning_rate": 5e-05, + "loss": 0.0845, + "num_input_tokens_seen": 261323292, + "step": 2863 + }, + { + "epoch": 11.929166666666667, + "loss": 0.07173296064138412, + "loss_ce": 1.3924227459938265e-06, + "loss_iou": 0.306640625, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 261323292, + "step": 2863 + }, + { + "epoch": 11.933333333333334, + "grad_norm": 7.352666525893405, + "learning_rate": 5e-05, + "loss": 0.0817, + "num_input_tokens_seen": 261413988, + "step": 2864 + }, + { + "epoch": 11.933333333333334, + "loss": 0.07763297855854034, + "loss_ce": 0.0035057791974395514, + "loss_iou": 0.396484375, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 261413988, + "step": 2864 + }, + { + "epoch": 11.9375, + "grad_norm": 3.8865203166046918, + "learning_rate": 5e-05, + "loss": 0.0584, + "num_input_tokens_seen": 261504740, + "step": 2865 + }, + { + "epoch": 11.9375, + "loss": 0.06813535839319229, + "loss_ce": 4.8647452786099166e-06, + "loss_iou": 0.34765625, + "loss_num": 0.01361083984375, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 261504740, + "step": 2865 + }, + { + "epoch": 11.941666666666666, + "grad_norm": 36.39431622438075, + "learning_rate": 5e-05, + "loss": 0.1249, + "num_input_tokens_seen": 261595296, + "step": 2866 + }, + { + "epoch": 11.941666666666666, + "loss": 0.1817329227924347, + "loss_ce": 7.592077508888906e-07, + "loss_iou": 0.2373046875, + "loss_num": 0.036376953125, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 261595296, + "step": 2866 + }, + { + "epoch": 11.945833333333333, + "grad_norm": 3.2830388964775645, + "learning_rate": 5e-05, + "loss": 0.0617, + "num_input_tokens_seen": 261686756, + "step": 2867 + }, + { + "epoch": 11.945833333333333, + "loss": 0.03829586133360863, + "loss_ce": 0.002208825433626771, + "loss_iou": 0.185546875, + "loss_num": 0.0072021484375, + "loss_xval": 0.0361328125, + "num_input_tokens_seen": 261686756, + "step": 2867 + }, + { + "epoch": 11.95, + "grad_norm": 1.8327089283590499, + "learning_rate": 5e-05, + "loss": 0.0779, + "num_input_tokens_seen": 261777792, + "step": 2868 + }, + { + "epoch": 11.95, + "loss": 0.0942518413066864, + "loss_ce": 4.407918095239438e-05, + "loss_iou": 0.259765625, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 261777792, + "step": 2868 + }, + { + "epoch": 11.954166666666667, + "grad_norm": 1.5163096220501042, + "learning_rate": 5e-05, + "loss": 0.0477, + "num_input_tokens_seen": 261869352, + "step": 2869 + }, + { + "epoch": 11.954166666666667, + "loss": 0.05169472098350525, + "loss_ce": 0.00010475327144376934, + "loss_iou": 0.26171875, + "loss_num": 0.01031494140625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 261869352, + "step": 2869 + }, + { + "epoch": 11.958333333333334, + "grad_norm": 8.579480300836751, + "learning_rate": 5e-05, + "loss": 0.0692, + "num_input_tokens_seen": 261960436, + "step": 2870 + }, + { + "epoch": 11.958333333333334, + "loss": 0.053049832582473755, + "loss_ce": 0.0016429764218628407, + "loss_iou": 0.1884765625, + "loss_num": 0.01031494140625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 261960436, + "step": 2870 + }, + { + "epoch": 11.9625, + "grad_norm": 1.856468335604494, + "learning_rate": 5e-05, + "loss": 0.0544, + "num_input_tokens_seen": 262051256, + "step": 2871 + }, + { + "epoch": 11.9625, + "loss": 0.023780783638358116, + "loss_ce": 0.00015254892059601843, + "loss_iou": 0.2373046875, + "loss_num": 0.004730224609375, + "loss_xval": 0.023681640625, + "num_input_tokens_seen": 262051256, + "step": 2871 + }, + { + "epoch": 11.966666666666667, + "grad_norm": 3.277751162934432, + "learning_rate": 5e-05, + "loss": 0.0761, + "num_input_tokens_seen": 262142500, + "step": 2872 + }, + { + "epoch": 11.966666666666667, + "loss": 0.04581213742494583, + "loss_ce": 0.00021887400362174958, + "loss_iou": 0.3515625, + "loss_num": 0.00909423828125, + "loss_xval": 0.045654296875, + "num_input_tokens_seen": 262142500, + "step": 2872 + }, + { + "epoch": 11.970833333333333, + "grad_norm": 34.78272795634807, + "learning_rate": 5e-05, + "loss": 0.0785, + "num_input_tokens_seen": 262233244, + "step": 2873 + }, + { + "epoch": 11.970833333333333, + "loss": 0.06828820705413818, + "loss_ce": 0.0003255601041018963, + "loss_iou": 0.24609375, + "loss_num": 0.01361083984375, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 262233244, + "step": 2873 + }, + { + "epoch": 11.975, + "grad_norm": 12.613292522757435, + "learning_rate": 5e-05, + "loss": 0.0518, + "num_input_tokens_seen": 262325024, + "step": 2874 + }, + { + "epoch": 11.975, + "loss": 0.05874905735254288, + "loss_ce": 0.00015530729433521628, + "loss_iou": 0.17578125, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 262325024, + "step": 2874 + }, + { + "epoch": 11.979166666666666, + "grad_norm": 2.789133765269862, + "learning_rate": 5e-05, + "loss": 0.0698, + "num_input_tokens_seen": 262416224, + "step": 2875 + }, + { + "epoch": 11.979166666666666, + "loss": 0.06322193145751953, + "loss_ce": 0.0025148349814116955, + "loss_iou": 0.310546875, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 262416224, + "step": 2875 + }, + { + "epoch": 11.983333333333333, + "grad_norm": 3.9294923350103423, + "learning_rate": 5e-05, + "loss": 0.0833, + "num_input_tokens_seen": 262508332, + "step": 2876 + }, + { + "epoch": 11.983333333333333, + "loss": 0.07195824384689331, + "loss_ce": 0.0007607348961755633, + "loss_iou": 0.275390625, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 262508332, + "step": 2876 + }, + { + "epoch": 11.9875, + "grad_norm": 4.479058343755453, + "learning_rate": 5e-05, + "loss": 0.0827, + "num_input_tokens_seen": 262600376, + "step": 2877 + }, + { + "epoch": 11.9875, + "loss": 0.0894709974527359, + "loss_ce": 8.714882824278902e-06, + "loss_iou": 0.333984375, + "loss_num": 0.0179443359375, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 262600376, + "step": 2877 + }, + { + "epoch": 11.991666666666667, + "grad_norm": 5.508719950001302, + "learning_rate": 5e-05, + "loss": 0.0811, + "num_input_tokens_seen": 262690988, + "step": 2878 + }, + { + "epoch": 11.991666666666667, + "loss": 0.09753264486789703, + "loss_ce": 0.00010526960977585986, + "loss_iou": 0.2734375, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 262690988, + "step": 2878 + }, + { + "epoch": 11.995833333333334, + "grad_norm": 2.7247469459373894, + "learning_rate": 5e-05, + "loss": 0.0737, + "num_input_tokens_seen": 262782476, + "step": 2879 + }, + { + "epoch": 11.995833333333334, + "loss": 0.03912658989429474, + "loss_ce": 0.00030823066481389105, + "loss_iou": 0.2578125, + "loss_num": 0.00775146484375, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 262782476, + "step": 2879 + }, + { + "epoch": 12.0, + "grad_norm": 3.3107134432842362, + "learning_rate": 5e-05, + "loss": 0.0822, + "num_input_tokens_seen": 262874424, + "step": 2880 + }, + { + "epoch": 12.0, + "loss": 0.09382101148366928, + "loss_ce": 0.0004982562968507409, + "loss_iou": 0.25390625, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 262874424, + "step": 2880 + }, + { + "epoch": 12.004166666666666, + "grad_norm": 2.058713282308722, + "learning_rate": 5e-05, + "loss": 0.0681, + "num_input_tokens_seen": 262965544, + "step": 2881 + }, + { + "epoch": 12.004166666666666, + "loss": 0.06187222898006439, + "loss_ce": 8.939013787312433e-05, + "loss_iou": 0.291015625, + "loss_num": 0.0123291015625, + "loss_xval": 0.061767578125, + "num_input_tokens_seen": 262965544, + "step": 2881 + }, + { + "epoch": 12.008333333333333, + "grad_norm": 3.115740501206554, + "learning_rate": 5e-05, + "loss": 0.0817, + "num_input_tokens_seen": 263056624, + "step": 2882 + }, + { + "epoch": 12.008333333333333, + "loss": 0.08259187638759613, + "loss_ce": 0.0008047710871323943, + "loss_iou": 0.2578125, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 263056624, + "step": 2882 + }, + { + "epoch": 12.0125, + "grad_norm": 4.965254203037331, + "learning_rate": 5e-05, + "loss": 0.1174, + "num_input_tokens_seen": 263148000, + "step": 2883 + }, + { + "epoch": 12.0125, + "loss": 0.1574704349040985, + "loss_ce": 0.00012181226338725537, + "loss_iou": 0.2333984375, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 263148000, + "step": 2883 + }, + { + "epoch": 12.016666666666667, + "grad_norm": 3.6225222493450304, + "learning_rate": 5e-05, + "loss": 0.1778, + "num_input_tokens_seen": 263239460, + "step": 2884 + }, + { + "epoch": 12.016666666666667, + "loss": 0.195316344499588, + "loss_ce": 0.003757503116503358, + "loss_iou": 0.1640625, + "loss_num": 0.038330078125, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 263239460, + "step": 2884 + }, + { + "epoch": 12.020833333333334, + "grad_norm": 4.810819290420674, + "learning_rate": 5e-05, + "loss": 0.0793, + "num_input_tokens_seen": 263330848, + "step": 2885 + }, + { + "epoch": 12.020833333333334, + "loss": 0.04318102449178696, + "loss_ce": 0.000799737055785954, + "loss_iou": 0.357421875, + "loss_num": 0.00848388671875, + "loss_xval": 0.04248046875, + "num_input_tokens_seen": 263330848, + "step": 2885 + }, + { + "epoch": 12.025, + "grad_norm": 5.366359423275253, + "learning_rate": 5e-05, + "loss": 0.0601, + "num_input_tokens_seen": 263422472, + "step": 2886 + }, + { + "epoch": 12.025, + "loss": 0.055778469890356064, + "loss_ce": 0.0001449243864044547, + "loss_iou": 0.375, + "loss_num": 0.0111083984375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 263422472, + "step": 2886 + }, + { + "epoch": 12.029166666666667, + "grad_norm": 3.0301501491243337, + "learning_rate": 5e-05, + "loss": 0.04, + "num_input_tokens_seen": 263514088, + "step": 2887 + }, + { + "epoch": 12.029166666666667, + "loss": 0.05425199121236801, + "loss_ce": 0.00031217176001518965, + "loss_iou": 0.263671875, + "loss_num": 0.01080322265625, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 263514088, + "step": 2887 + }, + { + "epoch": 12.033333333333333, + "grad_norm": 3.9172536956407757, + "learning_rate": 5e-05, + "loss": 0.0812, + "num_input_tokens_seen": 263605540, + "step": 2888 + }, + { + "epoch": 12.033333333333333, + "loss": 0.092352494597435, + "loss_ce": 0.0003648839774541557, + "loss_iou": 0.275390625, + "loss_num": 0.0184326171875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 263605540, + "step": 2888 + }, + { + "epoch": 12.0375, + "grad_norm": 2.2012629252219744, + "learning_rate": 5e-05, + "loss": 0.0737, + "num_input_tokens_seen": 263696612, + "step": 2889 + }, + { + "epoch": 12.0375, + "loss": 0.07746168971061707, + "loss_ce": 0.001137231825850904, + "loss_iou": 0.201171875, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 263696612, + "step": 2889 + }, + { + "epoch": 12.041666666666666, + "grad_norm": 2.8205767907604056, + "learning_rate": 5e-05, + "loss": 0.0481, + "num_input_tokens_seen": 263788176, + "step": 2890 + }, + { + "epoch": 12.041666666666666, + "loss": 0.0641142725944519, + "loss_ce": 0.001415909151546657, + "loss_iou": 0.35546875, + "loss_num": 0.0125732421875, + "loss_xval": 0.0625, + "num_input_tokens_seen": 263788176, + "step": 2890 + }, + { + "epoch": 12.045833333333333, + "grad_norm": 1.141144522550956, + "learning_rate": 5e-05, + "loss": 0.105, + "num_input_tokens_seen": 263879328, + "step": 2891 + }, + { + "epoch": 12.045833333333333, + "loss": 0.11292260885238647, + "loss_ce": 0.0005874069756828249, + "loss_iou": 0.240234375, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 263879328, + "step": 2891 + }, + { + "epoch": 12.05, + "grad_norm": 2.183273900885889, + "learning_rate": 5e-05, + "loss": 0.0689, + "num_input_tokens_seen": 263971180, + "step": 2892 + }, + { + "epoch": 12.05, + "loss": 0.04804209619760513, + "loss_ce": 0.0003126035735476762, + "loss_iou": 0.259765625, + "loss_num": 0.00958251953125, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 263971180, + "step": 2892 + }, + { + "epoch": 12.054166666666667, + "grad_norm": 27.968695688765518, + "learning_rate": 5e-05, + "loss": 0.1055, + "num_input_tokens_seen": 264062552, + "step": 2893 + }, + { + "epoch": 12.054166666666667, + "loss": 0.09331085532903671, + "loss_ce": 3.358149797350052e-06, + "loss_iou": 0.302734375, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 264062552, + "step": 2893 + }, + { + "epoch": 12.058333333333334, + "grad_norm": 1.479698420035943, + "learning_rate": 5e-05, + "loss": 0.0485, + "num_input_tokens_seen": 264153656, + "step": 2894 + }, + { + "epoch": 12.058333333333334, + "loss": 0.04069763422012329, + "loss_ce": 0.00010925379319814965, + "loss_iou": 0.291015625, + "loss_num": 0.00811767578125, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 264153656, + "step": 2894 + }, + { + "epoch": 12.0625, + "grad_norm": 2.6670024246000383, + "learning_rate": 5e-05, + "loss": 0.0672, + "num_input_tokens_seen": 264245196, + "step": 2895 + }, + { + "epoch": 12.0625, + "loss": 0.06512384116649628, + "loss_ce": 0.0004570932942442596, + "loss_iou": 0.30859375, + "loss_num": 0.012939453125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 264245196, + "step": 2895 + }, + { + "epoch": 12.066666666666666, + "grad_norm": 5.723331032317641, + "learning_rate": 5e-05, + "loss": 0.1045, + "num_input_tokens_seen": 264336084, + "step": 2896 + }, + { + "epoch": 12.066666666666666, + "loss": 0.15588708221912384, + "loss_ce": 3.283528712927364e-06, + "loss_iou": 0.0859375, + "loss_num": 0.03125, + "loss_xval": 0.15625, + "num_input_tokens_seen": 264336084, + "step": 2896 + }, + { + "epoch": 12.070833333333333, + "grad_norm": 6.767367961633356, + "learning_rate": 5e-05, + "loss": 0.1252, + "num_input_tokens_seen": 264428068, + "step": 2897 + }, + { + "epoch": 12.070833333333333, + "loss": 0.16708049178123474, + "loss_ce": 0.0008207280770875514, + "loss_iou": 0.0, + "loss_num": 0.033203125, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 264428068, + "step": 2897 + }, + { + "epoch": 12.075, + "grad_norm": 9.613717735454351, + "learning_rate": 5e-05, + "loss": 0.0569, + "num_input_tokens_seen": 264519880, + "step": 2898 + }, + { + "epoch": 12.075, + "loss": 0.029908880591392517, + "loss_ce": 1.6913822037167847e-05, + "loss_iou": 0.171875, + "loss_num": 0.0059814453125, + "loss_xval": 0.0299072265625, + "num_input_tokens_seen": 264519880, + "step": 2898 + }, + { + "epoch": 12.079166666666667, + "grad_norm": 3.628596364029426, + "learning_rate": 5e-05, + "loss": 0.0788, + "num_input_tokens_seen": 264611556, + "step": 2899 + }, + { + "epoch": 12.079166666666667, + "loss": 0.11229430139064789, + "loss_ce": 0.002400508848950267, + "loss_iou": 0.353515625, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 264611556, + "step": 2899 + }, + { + "epoch": 12.083333333333334, + "grad_norm": 5.529244181375904, + "learning_rate": 5e-05, + "loss": 0.0605, + "num_input_tokens_seen": 264702740, + "step": 2900 + }, + { + "epoch": 12.083333333333334, + "loss": 0.04226759821176529, + "loss_ce": 7.488557685064734e-07, + "loss_iou": 0.3203125, + "loss_num": 0.00848388671875, + "loss_xval": 0.042236328125, + "num_input_tokens_seen": 264702740, + "step": 2900 + }, + { + "epoch": 12.0875, + "grad_norm": 2.703727858030741, + "learning_rate": 5e-05, + "loss": 0.0636, + "num_input_tokens_seen": 264794332, + "step": 2901 + }, + { + "epoch": 12.0875, + "loss": 0.04961564019322395, + "loss_ce": 2.4576796931796707e-05, + "loss_iou": 0.39453125, + "loss_num": 0.0098876953125, + "loss_xval": 0.049560546875, + "num_input_tokens_seen": 264794332, + "step": 2901 + }, + { + "epoch": 12.091666666666667, + "grad_norm": 2.4838722579389616, + "learning_rate": 5e-05, + "loss": 0.0625, + "num_input_tokens_seen": 264885436, + "step": 2902 + }, + { + "epoch": 12.091666666666667, + "loss": 0.056573957204818726, + "loss_ce": 0.00037583772791549563, + "loss_iou": 0.263671875, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 264885436, + "step": 2902 + }, + { + "epoch": 12.095833333333333, + "grad_norm": 4.40229256652342, + "learning_rate": 5e-05, + "loss": 0.0916, + "num_input_tokens_seen": 264976496, + "step": 2903 + }, + { + "epoch": 12.095833333333333, + "loss": 0.09596607089042664, + "loss_ce": 3.5482567000144627e-06, + "loss_iou": 0.3203125, + "loss_num": 0.0191650390625, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 264976496, + "step": 2903 + }, + { + "epoch": 12.1, + "grad_norm": 3.0575238205030666, + "learning_rate": 5e-05, + "loss": 0.1059, + "num_input_tokens_seen": 265066968, + "step": 2904 + }, + { + "epoch": 12.1, + "loss": 0.16267964243888855, + "loss_ce": 0.00014301914779935032, + "loss_iou": 0.2373046875, + "loss_num": 0.032470703125, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 265066968, + "step": 2904 + }, + { + "epoch": 12.104166666666666, + "grad_norm": 2.931793056011788, + "learning_rate": 5e-05, + "loss": 0.07, + "num_input_tokens_seen": 265158324, + "step": 2905 + }, + { + "epoch": 12.104166666666666, + "loss": 0.07005259394645691, + "loss_ce": 0.00012156509910710156, + "loss_iou": 0.19140625, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 265158324, + "step": 2905 + }, + { + "epoch": 12.108333333333333, + "grad_norm": 1.6134630286599965, + "learning_rate": 5e-05, + "loss": 0.0514, + "num_input_tokens_seen": 265250108, + "step": 2906 + }, + { + "epoch": 12.108333333333333, + "loss": 0.05043035373091698, + "loss_ce": 6.10871720709838e-05, + "loss_iou": 0.09765625, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 265250108, + "step": 2906 + }, + { + "epoch": 12.1125, + "grad_norm": 2.656497257584411, + "learning_rate": 5e-05, + "loss": 0.0672, + "num_input_tokens_seen": 265342096, + "step": 2907 + }, + { + "epoch": 12.1125, + "loss": 0.09211976826190948, + "loss_ce": 2.462998509145109e-06, + "loss_iou": 0.263671875, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 265342096, + "step": 2907 + }, + { + "epoch": 12.116666666666667, + "grad_norm": 2.44578022152003, + "learning_rate": 5e-05, + "loss": 0.0599, + "num_input_tokens_seen": 265433364, + "step": 2908 + }, + { + "epoch": 12.116666666666667, + "loss": 0.06962820887565613, + "loss_ce": 3.287343861302361e-05, + "loss_iou": 0.1875, + "loss_num": 0.013916015625, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 265433364, + "step": 2908 + }, + { + "epoch": 12.120833333333334, + "grad_norm": 2.5929382173081827, + "learning_rate": 5e-05, + "loss": 0.0539, + "num_input_tokens_seen": 265524704, + "step": 2909 + }, + { + "epoch": 12.120833333333334, + "loss": 0.0612100325524807, + "loss_ce": 3.754855060833506e-05, + "loss_iou": 0.29296875, + "loss_num": 0.01220703125, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 265524704, + "step": 2909 + }, + { + "epoch": 12.125, + "grad_norm": 2.952912122345602, + "learning_rate": 5e-05, + "loss": 0.0668, + "num_input_tokens_seen": 265616064, + "step": 2910 + }, + { + "epoch": 12.125, + "loss": 0.0755220502614975, + "loss_ce": 0.0010286483447998762, + "loss_iou": 0.275390625, + "loss_num": 0.014892578125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 265616064, + "step": 2910 + }, + { + "epoch": 12.129166666666666, + "grad_norm": 2.8047268982104887, + "learning_rate": 5e-05, + "loss": 0.0575, + "num_input_tokens_seen": 265707516, + "step": 2911 + }, + { + "epoch": 12.129166666666666, + "loss": 0.03295915573835373, + "loss_ce": 0.0007707421318627894, + "loss_iou": 0.337890625, + "loss_num": 0.006439208984375, + "loss_xval": 0.0322265625, + "num_input_tokens_seen": 265707516, + "step": 2911 + }, + { + "epoch": 12.133333333333333, + "grad_norm": 2.716707347455883, + "learning_rate": 5e-05, + "loss": 0.08, + "num_input_tokens_seen": 265798680, + "step": 2912 + }, + { + "epoch": 12.133333333333333, + "loss": 0.05870115011930466, + "loss_ce": 5.865215939593327e-07, + "loss_iou": 0.3046875, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 265798680, + "step": 2912 + }, + { + "epoch": 12.1375, + "grad_norm": 3.7884463613882646, + "learning_rate": 5e-05, + "loss": 0.047, + "num_input_tokens_seen": 265889936, + "step": 2913 + }, + { + "epoch": 12.1375, + "loss": 0.06101018935441971, + "loss_ce": 3.60674130206462e-05, + "loss_iou": 0.32421875, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 265889936, + "step": 2913 + }, + { + "epoch": 12.141666666666667, + "grad_norm": 4.705935262745655, + "learning_rate": 5e-05, + "loss": 0.0722, + "num_input_tokens_seen": 265981024, + "step": 2914 + }, + { + "epoch": 12.141666666666667, + "loss": 0.06493838876485825, + "loss_ce": 0.00024112407118082047, + "loss_iou": 0.15234375, + "loss_num": 0.012939453125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 265981024, + "step": 2914 + }, + { + "epoch": 12.145833333333334, + "grad_norm": 4.364909969389509, + "learning_rate": 5e-05, + "loss": 0.0682, + "num_input_tokens_seen": 266072952, + "step": 2915 + }, + { + "epoch": 12.145833333333334, + "loss": 0.07338149845600128, + "loss_ce": 0.00030715003958903253, + "loss_iou": 0.34765625, + "loss_num": 0.01458740234375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 266072952, + "step": 2915 + }, + { + "epoch": 12.15, + "grad_norm": 2.72071504424907, + "learning_rate": 5e-05, + "loss": 0.071, + "num_input_tokens_seen": 266164872, + "step": 2916 + }, + { + "epoch": 12.15, + "loss": 0.07031304389238358, + "loss_ce": 5.453845233205357e-07, + "loss_iou": 0.2314453125, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 266164872, + "step": 2916 + }, + { + "epoch": 12.154166666666667, + "grad_norm": 3.4031048303871128, + "learning_rate": 5e-05, + "loss": 0.0628, + "num_input_tokens_seen": 266255884, + "step": 2917 + }, + { + "epoch": 12.154166666666667, + "loss": 0.04454587399959564, + "loss_ce": 0.0002953895309474319, + "loss_iou": 0.265625, + "loss_num": 0.00885009765625, + "loss_xval": 0.044189453125, + "num_input_tokens_seen": 266255884, + "step": 2917 + }, + { + "epoch": 12.158333333333333, + "grad_norm": 3.6525245485243634, + "learning_rate": 5e-05, + "loss": 0.0534, + "num_input_tokens_seen": 266347008, + "step": 2918 + }, + { + "epoch": 12.158333333333333, + "loss": 0.05760706961154938, + "loss_ce": 5.091585626360029e-05, + "loss_iou": 0.33203125, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 266347008, + "step": 2918 + }, + { + "epoch": 12.1625, + "grad_norm": 3.8083362004566594, + "learning_rate": 5e-05, + "loss": 0.0479, + "num_input_tokens_seen": 266438312, + "step": 2919 + }, + { + "epoch": 12.1625, + "loss": 0.05300240218639374, + "loss_ce": 0.0001917332410812378, + "loss_iou": 0.390625, + "loss_num": 0.01055908203125, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 266438312, + "step": 2919 + }, + { + "epoch": 12.166666666666666, + "grad_norm": 3.220254727036035, + "learning_rate": 5e-05, + "loss": 0.0415, + "num_input_tokens_seen": 266529904, + "step": 2920 + }, + { + "epoch": 12.166666666666666, + "loss": 0.04786030575633049, + "loss_ce": 0.00014607336197514087, + "loss_iou": 0.3359375, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 266529904, + "step": 2920 + }, + { + "epoch": 12.170833333333333, + "grad_norm": 6.3841299060006715, + "learning_rate": 5e-05, + "loss": 0.072, + "num_input_tokens_seen": 266621108, + "step": 2921 + }, + { + "epoch": 12.170833333333333, + "loss": 0.09495310485363007, + "loss_ce": 4.343495675129816e-05, + "loss_iou": 0.28515625, + "loss_num": 0.01904296875, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 266621108, + "step": 2921 + }, + { + "epoch": 12.175, + "grad_norm": 5.043845795133797, + "learning_rate": 5e-05, + "loss": 0.0733, + "num_input_tokens_seen": 266712428, + "step": 2922 + }, + { + "epoch": 12.175, + "loss": 0.07533925771713257, + "loss_ce": 3.7128469557501376e-05, + "loss_iou": 0.33984375, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 266712428, + "step": 2922 + }, + { + "epoch": 12.179166666666667, + "grad_norm": 2.027261528087587, + "learning_rate": 5e-05, + "loss": 0.0383, + "num_input_tokens_seen": 266803764, + "step": 2923 + }, + { + "epoch": 12.179166666666667, + "loss": 0.03585366904735565, + "loss_ce": 3.147234565403778e-06, + "loss_iou": 0.2236328125, + "loss_num": 0.007171630859375, + "loss_xval": 0.035888671875, + "num_input_tokens_seen": 266803764, + "step": 2923 + }, + { + "epoch": 12.183333333333334, + "grad_norm": 2.861881243241126, + "learning_rate": 5e-05, + "loss": 0.0876, + "num_input_tokens_seen": 266895580, + "step": 2924 + }, + { + "epoch": 12.183333333333334, + "loss": 0.03889714181423187, + "loss_ce": 0.0020624231547117233, + "loss_iou": 0.271484375, + "loss_num": 0.00738525390625, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 266895580, + "step": 2924 + }, + { + "epoch": 12.1875, + "grad_norm": 3.7892169185950517, + "learning_rate": 5e-05, + "loss": 0.0686, + "num_input_tokens_seen": 266986668, + "step": 2925 + }, + { + "epoch": 12.1875, + "loss": 0.056877922266721725, + "loss_ce": 8.416508535447065e-06, + "loss_iou": 0.333984375, + "loss_num": 0.0113525390625, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 266986668, + "step": 2925 + }, + { + "epoch": 12.191666666666666, + "grad_norm": 4.483164328656569, + "learning_rate": 5e-05, + "loss": 0.0961, + "num_input_tokens_seen": 267078348, + "step": 2926 + }, + { + "epoch": 12.191666666666666, + "loss": 0.061795562505722046, + "loss_ce": 0.000928252877201885, + "loss_iou": 0.271484375, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 267078348, + "step": 2926 + }, + { + "epoch": 12.195833333333333, + "grad_norm": 2.6821650432495447, + "learning_rate": 5e-05, + "loss": 0.0361, + "num_input_tokens_seen": 267169512, + "step": 2927 + }, + { + "epoch": 12.195833333333333, + "loss": 0.029609516263008118, + "loss_ce": 2.2723163056070916e-05, + "loss_iou": 0.32421875, + "loss_num": 0.00592041015625, + "loss_xval": 0.029541015625, + "num_input_tokens_seen": 267169512, + "step": 2927 + }, + { + "epoch": 12.2, + "grad_norm": 8.672822575584679, + "learning_rate": 5e-05, + "loss": 0.1132, + "num_input_tokens_seen": 267260836, + "step": 2928 + }, + { + "epoch": 12.2, + "loss": 0.11694012582302094, + "loss_ce": 0.002804376883432269, + "loss_iou": 0.279296875, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 267260836, + "step": 2928 + }, + { + "epoch": 12.204166666666667, + "grad_norm": 9.806434406786224, + "learning_rate": 5e-05, + "loss": 0.0587, + "num_input_tokens_seen": 267352716, + "step": 2929 + }, + { + "epoch": 12.204166666666667, + "loss": 0.04113160818815231, + "loss_ce": 2.443217636027839e-05, + "loss_iou": 0.30859375, + "loss_num": 0.00823974609375, + "loss_xval": 0.041015625, + "num_input_tokens_seen": 267352716, + "step": 2929 + }, + { + "epoch": 12.208333333333334, + "grad_norm": 2.175031456204424, + "learning_rate": 5e-05, + "loss": 0.0529, + "num_input_tokens_seen": 267443920, + "step": 2930 + }, + { + "epoch": 12.208333333333334, + "loss": 0.05550452694296837, + "loss_ce": 8.460329991066828e-05, + "loss_iou": 0.15625, + "loss_num": 0.01104736328125, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 267443920, + "step": 2930 + }, + { + "epoch": 12.2125, + "grad_norm": 1.605174398404864, + "learning_rate": 5e-05, + "loss": 0.047, + "num_input_tokens_seen": 267535708, + "step": 2931 + }, + { + "epoch": 12.2125, + "loss": 0.05761126056313515, + "loss_ce": 0.00031451130053028464, + "loss_iou": 0.1552734375, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 267535708, + "step": 2931 + }, + { + "epoch": 12.216666666666667, + "grad_norm": 1.9986457114321676, + "learning_rate": 5e-05, + "loss": 0.0639, + "num_input_tokens_seen": 267626804, + "step": 2932 + }, + { + "epoch": 12.216666666666667, + "loss": 0.0630793496966362, + "loss_ce": 1.4777106116525829e-05, + "loss_iou": 0.25390625, + "loss_num": 0.01263427734375, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 267626804, + "step": 2932 + }, + { + "epoch": 12.220833333333333, + "grad_norm": 6.053276257068978, + "learning_rate": 5e-05, + "loss": 0.0467, + "num_input_tokens_seen": 267718264, + "step": 2933 + }, + { + "epoch": 12.220833333333333, + "loss": 0.06700801849365234, + "loss_ce": 3.7198144127614796e-05, + "loss_iou": 0.279296875, + "loss_num": 0.013427734375, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 267718264, + "step": 2933 + }, + { + "epoch": 12.225, + "grad_norm": 2.3243663096267064, + "learning_rate": 5e-05, + "loss": 0.0454, + "num_input_tokens_seen": 267809600, + "step": 2934 + }, + { + "epoch": 12.225, + "loss": 0.03875809535384178, + "loss_ce": 7.706361793680117e-05, + "loss_iou": 0.248046875, + "loss_num": 0.00775146484375, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 267809600, + "step": 2934 + }, + { + "epoch": 12.229166666666666, + "grad_norm": 11.416599220781654, + "learning_rate": 5e-05, + "loss": 0.0753, + "num_input_tokens_seen": 267900996, + "step": 2935 + }, + { + "epoch": 12.229166666666666, + "loss": 0.08636993169784546, + "loss_ce": 0.0001272500230697915, + "loss_iou": 0.30078125, + "loss_num": 0.0172119140625, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 267900996, + "step": 2935 + }, + { + "epoch": 12.233333333333333, + "grad_norm": 10.747556296285435, + "learning_rate": 5e-05, + "loss": 0.0671, + "num_input_tokens_seen": 267992640, + "step": 2936 + }, + { + "epoch": 12.233333333333333, + "loss": 0.04982030764222145, + "loss_ce": 4.613840428646654e-05, + "loss_iou": 0.2265625, + "loss_num": 0.00994873046875, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 267992640, + "step": 2936 + }, + { + "epoch": 12.2375, + "grad_norm": 5.00122027159461, + "learning_rate": 5e-05, + "loss": 0.058, + "num_input_tokens_seen": 268084216, + "step": 2937 + }, + { + "epoch": 12.2375, + "loss": 0.07573728263378143, + "loss_ce": 0.0004656722885556519, + "loss_iou": 0.353515625, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 268084216, + "step": 2937 + }, + { + "epoch": 12.241666666666667, + "grad_norm": 1.8936543226280202, + "learning_rate": 5e-05, + "loss": 0.0419, + "num_input_tokens_seen": 268175636, + "step": 2938 + }, + { + "epoch": 12.241666666666667, + "loss": 0.039882972836494446, + "loss_ce": 5.75307167309802e-05, + "loss_iou": 0.25390625, + "loss_num": 0.0079345703125, + "loss_xval": 0.039794921875, + "num_input_tokens_seen": 268175636, + "step": 2938 + }, + { + "epoch": 12.245833333333334, + "grad_norm": 3.126485576801853, + "learning_rate": 5e-05, + "loss": 0.0645, + "num_input_tokens_seen": 268267340, + "step": 2939 + }, + { + "epoch": 12.245833333333334, + "loss": 0.05813007429242134, + "loss_ce": 0.00016193474584724754, + "loss_iou": 0.33984375, + "loss_num": 0.0115966796875, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 268267340, + "step": 2939 + }, + { + "epoch": 12.25, + "grad_norm": 2.753687602321184, + "learning_rate": 5e-05, + "loss": 0.0781, + "num_input_tokens_seen": 268358356, + "step": 2940 + }, + { + "epoch": 12.25, + "loss": 0.067852683365345, + "loss_ce": 1.211215385410469e-05, + "loss_iou": 0.271484375, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 268358356, + "step": 2940 + }, + { + "epoch": 12.254166666666666, + "grad_norm": 2.306219959360317, + "learning_rate": 5e-05, + "loss": 0.0639, + "num_input_tokens_seen": 268449056, + "step": 2941 + }, + { + "epoch": 12.254166666666666, + "loss": 0.08572492748498917, + "loss_ce": 1.0461585588927846e-06, + "loss_iou": 0.1708984375, + "loss_num": 0.01708984375, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 268449056, + "step": 2941 + }, + { + "epoch": 12.258333333333333, + "grad_norm": 2.663256282359737, + "learning_rate": 5e-05, + "loss": 0.0806, + "num_input_tokens_seen": 268541016, + "step": 2942 + }, + { + "epoch": 12.258333333333333, + "loss": 0.0969671756029129, + "loss_ce": 0.00018067903874907643, + "loss_iou": 0.34375, + "loss_num": 0.019287109375, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 268541016, + "step": 2942 + }, + { + "epoch": 12.2625, + "grad_norm": 2.1110070353785213, + "learning_rate": 5e-05, + "loss": 0.0419, + "num_input_tokens_seen": 268632556, + "step": 2943 + }, + { + "epoch": 12.2625, + "loss": 0.028460349887609482, + "loss_ce": 0.00046810219646431506, + "loss_iou": 0.2021484375, + "loss_num": 0.005584716796875, + "loss_xval": 0.0279541015625, + "num_input_tokens_seen": 268632556, + "step": 2943 + }, + { + "epoch": 12.266666666666667, + "grad_norm": 3.2150465390901717, + "learning_rate": 5e-05, + "loss": 0.0766, + "num_input_tokens_seen": 268724012, + "step": 2944 + }, + { + "epoch": 12.266666666666667, + "loss": 0.10927990078926086, + "loss_ce": 4.2232659325236455e-05, + "loss_iou": 0.251953125, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 268724012, + "step": 2944 + }, + { + "epoch": 12.270833333333334, + "grad_norm": 3.0431108054614215, + "learning_rate": 5e-05, + "loss": 0.0708, + "num_input_tokens_seen": 268815492, + "step": 2945 + }, + { + "epoch": 12.270833333333334, + "loss": 0.07915940880775452, + "loss_ce": 3.495713826850988e-05, + "loss_iou": 0.373046875, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 268815492, + "step": 2945 + }, + { + "epoch": 12.275, + "grad_norm": 2.4540010699603267, + "learning_rate": 5e-05, + "loss": 0.0634, + "num_input_tokens_seen": 268906800, + "step": 2946 + }, + { + "epoch": 12.275, + "loss": 0.08143861591815948, + "loss_ce": 4.823937706532888e-05, + "loss_iou": 0.2138671875, + "loss_num": 0.0162353515625, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 268906800, + "step": 2946 + }, + { + "epoch": 12.279166666666667, + "grad_norm": 4.1755469226422655, + "learning_rate": 5e-05, + "loss": 0.078, + "num_input_tokens_seen": 268997880, + "step": 2947 + }, + { + "epoch": 12.279166666666667, + "loss": 0.06574500352144241, + "loss_ce": 1.014260487863794e-05, + "loss_iou": 0.234375, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 268997880, + "step": 2947 + }, + { + "epoch": 12.283333333333333, + "grad_norm": 0.9371808156525325, + "learning_rate": 5e-05, + "loss": 0.0907, + "num_input_tokens_seen": 269088332, + "step": 2948 + }, + { + "epoch": 12.283333333333333, + "loss": 0.11407633125782013, + "loss_ce": 4.739892392535694e-05, + "loss_iou": 0.23828125, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 269088332, + "step": 2948 + }, + { + "epoch": 12.2875, + "grad_norm": 2.74225444069886, + "learning_rate": 5e-05, + "loss": 0.0599, + "num_input_tokens_seen": 269179540, + "step": 2949 + }, + { + "epoch": 12.2875, + "loss": 0.04844193905591965, + "loss_ce": 8.683644409757107e-05, + "loss_iou": 0.2734375, + "loss_num": 0.00970458984375, + "loss_xval": 0.04833984375, + "num_input_tokens_seen": 269179540, + "step": 2949 + }, + { + "epoch": 12.291666666666666, + "grad_norm": 46.38768999122701, + "learning_rate": 5e-05, + "loss": 0.0584, + "num_input_tokens_seen": 269270724, + "step": 2950 + }, + { + "epoch": 12.291666666666666, + "loss": 0.06026214361190796, + "loss_ce": 0.00011962662392761558, + "loss_iou": 0.22265625, + "loss_num": 0.01202392578125, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 269270724, + "step": 2950 + }, + { + "epoch": 12.295833333333333, + "grad_norm": 2.267656912745027, + "learning_rate": 5e-05, + "loss": 0.0871, + "num_input_tokens_seen": 269361640, + "step": 2951 + }, + { + "epoch": 12.295833333333333, + "loss": 0.11035619676113129, + "loss_ce": 4.627444923244184e-06, + "loss_iou": 0.0859375, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 269361640, + "step": 2951 + }, + { + "epoch": 12.3, + "grad_norm": 60.26301427305691, + "learning_rate": 5e-05, + "loss": 0.0655, + "num_input_tokens_seen": 269452804, + "step": 2952 + }, + { + "epoch": 12.3, + "loss": 0.06073518097400665, + "loss_ce": 2.045981091214344e-05, + "loss_iou": 0.322265625, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 269452804, + "step": 2952 + }, + { + "epoch": 12.304166666666667, + "grad_norm": 3.3411094226742084, + "learning_rate": 5e-05, + "loss": 0.0621, + "num_input_tokens_seen": 269543492, + "step": 2953 + }, + { + "epoch": 12.304166666666667, + "loss": 0.06969062983989716, + "loss_ce": 0.00038520374801009893, + "loss_iou": 0.28125, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 269543492, + "step": 2953 + }, + { + "epoch": 12.308333333333334, + "grad_norm": 2.694947426545346, + "learning_rate": 5e-05, + "loss": 0.0731, + "num_input_tokens_seen": 269635008, + "step": 2954 + }, + { + "epoch": 12.308333333333334, + "loss": 0.05683236941695213, + "loss_ce": 0.0005884742713533342, + "loss_iou": 0.384765625, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 269635008, + "step": 2954 + }, + { + "epoch": 12.3125, + "grad_norm": 3.2220681695144227, + "learning_rate": 5e-05, + "loss": 0.066, + "num_input_tokens_seen": 269725540, + "step": 2955 + }, + { + "epoch": 12.3125, + "loss": 0.05476832389831543, + "loss_ce": 0.00044703579624183476, + "loss_iou": 0.22265625, + "loss_num": 0.0108642578125, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 269725540, + "step": 2955 + }, + { + "epoch": 12.316666666666666, + "grad_norm": 3.4982940830366407, + "learning_rate": 5e-05, + "loss": 0.07, + "num_input_tokens_seen": 269817180, + "step": 2956 + }, + { + "epoch": 12.316666666666666, + "loss": 0.07881681621074677, + "loss_ce": 5.0943544920301065e-05, + "loss_iou": 0.306640625, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 269817180, + "step": 2956 + }, + { + "epoch": 12.320833333333333, + "grad_norm": 3.6375666812709584, + "learning_rate": 5e-05, + "loss": 0.0401, + "num_input_tokens_seen": 269908468, + "step": 2957 + }, + { + "epoch": 12.320833333333333, + "loss": 0.040140677243471146, + "loss_ce": 0.0010667321039363742, + "loss_iou": 0.271484375, + "loss_num": 0.0078125, + "loss_xval": 0.0390625, + "num_input_tokens_seen": 269908468, + "step": 2957 + }, + { + "epoch": 12.325, + "grad_norm": 4.173489294941111, + "learning_rate": 5e-05, + "loss": 0.0623, + "num_input_tokens_seen": 269999436, + "step": 2958 + }, + { + "epoch": 12.325, + "loss": 0.05630388855934143, + "loss_ce": 0.005782034248113632, + "loss_iou": 0.2255859375, + "loss_num": 0.0101318359375, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 269999436, + "step": 2958 + }, + { + "epoch": 12.329166666666667, + "grad_norm": 1.9740436997100599, + "learning_rate": 5e-05, + "loss": 0.0699, + "num_input_tokens_seen": 270090756, + "step": 2959 + }, + { + "epoch": 12.329166666666667, + "loss": 0.09736660867929459, + "loss_ce": 1.5532590623479337e-05, + "loss_iou": 0.19140625, + "loss_num": 0.01953125, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 270090756, + "step": 2959 + }, + { + "epoch": 12.333333333333334, + "grad_norm": 2.525271542103449, + "learning_rate": 5e-05, + "loss": 0.1172, + "num_input_tokens_seen": 270182140, + "step": 2960 + }, + { + "epoch": 12.333333333333334, + "loss": 0.11587947607040405, + "loss_ce": 6.527241930598393e-05, + "loss_iou": 0.2275390625, + "loss_num": 0.023193359375, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 270182140, + "step": 2960 + }, + { + "epoch": 12.3375, + "grad_norm": 3.217067686924724, + "learning_rate": 5e-05, + "loss": 0.0715, + "num_input_tokens_seen": 270273508, + "step": 2961 + }, + { + "epoch": 12.3375, + "loss": 0.052564837038517, + "loss_ce": 0.00234816106967628, + "loss_iou": 0.2255859375, + "loss_num": 0.010009765625, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 270273508, + "step": 2961 + }, + { + "epoch": 12.341666666666667, + "grad_norm": 3.7700213068144244, + "learning_rate": 5e-05, + "loss": 0.0553, + "num_input_tokens_seen": 270365420, + "step": 2962 + }, + { + "epoch": 12.341666666666667, + "loss": 0.06520096957683563, + "loss_ce": 0.0013276764657348394, + "loss_iou": 0.298828125, + "loss_num": 0.01275634765625, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 270365420, + "step": 2962 + }, + { + "epoch": 12.345833333333333, + "grad_norm": 3.888306227512155, + "learning_rate": 5e-05, + "loss": 0.0505, + "num_input_tokens_seen": 270456560, + "step": 2963 + }, + { + "epoch": 12.345833333333333, + "loss": 0.042089179158210754, + "loss_ce": 5.439537744678091e-06, + "loss_iou": 0.2734375, + "loss_num": 0.0084228515625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 270456560, + "step": 2963 + }, + { + "epoch": 12.35, + "grad_norm": 3.352267654498321, + "learning_rate": 5e-05, + "loss": 0.0649, + "num_input_tokens_seen": 270547912, + "step": 2964 + }, + { + "epoch": 12.35, + "loss": 0.061682380735874176, + "loss_ce": 3.687536809593439e-05, + "loss_iou": 0.326171875, + "loss_num": 0.0123291015625, + "loss_xval": 0.0615234375, + "num_input_tokens_seen": 270547912, + "step": 2964 + }, + { + "epoch": 12.354166666666666, + "grad_norm": 3.0135508618533806, + "learning_rate": 5e-05, + "loss": 0.0795, + "num_input_tokens_seen": 270637804, + "step": 2965 + }, + { + "epoch": 12.354166666666666, + "loss": 0.10426987707614899, + "loss_ce": 3.7082412745803595e-05, + "loss_iou": 0.32421875, + "loss_num": 0.0208740234375, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 270637804, + "step": 2965 + }, + { + "epoch": 12.358333333333333, + "grad_norm": 1.7291855053180956, + "learning_rate": 5e-05, + "loss": 0.0607, + "num_input_tokens_seen": 270728876, + "step": 2966 + }, + { + "epoch": 12.358333333333333, + "loss": 0.08189569413661957, + "loss_ce": 1.7031868992489763e-05, + "loss_iou": 0.11083984375, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 270728876, + "step": 2966 + }, + { + "epoch": 12.3625, + "grad_norm": 2.4908079853099356, + "learning_rate": 5e-05, + "loss": 0.0527, + "num_input_tokens_seen": 270820496, + "step": 2967 + }, + { + "epoch": 12.3625, + "loss": 0.03716364502906799, + "loss_ce": 0.00020685509662143886, + "loss_iou": 0.26171875, + "loss_num": 0.00738525390625, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 270820496, + "step": 2967 + }, + { + "epoch": 12.366666666666667, + "grad_norm": 2.97895839160627, + "learning_rate": 5e-05, + "loss": 0.0581, + "num_input_tokens_seen": 270912076, + "step": 2968 + }, + { + "epoch": 12.366666666666667, + "loss": 0.050762590020895004, + "loss_ce": 7.28939994587563e-05, + "loss_iou": 0.349609375, + "loss_num": 0.0101318359375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 270912076, + "step": 2968 + }, + { + "epoch": 12.370833333333334, + "grad_norm": 5.030995700810052, + "learning_rate": 5e-05, + "loss": 0.0624, + "num_input_tokens_seen": 271003444, + "step": 2969 + }, + { + "epoch": 12.370833333333334, + "loss": 0.08265762776136398, + "loss_ce": 7.6745891419705e-07, + "loss_iou": 0.24609375, + "loss_num": 0.0166015625, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 271003444, + "step": 2969 + }, + { + "epoch": 12.375, + "grad_norm": 3.6267303733690452, + "learning_rate": 5e-05, + "loss": 0.0616, + "num_input_tokens_seen": 271094672, + "step": 2970 + }, + { + "epoch": 12.375, + "loss": 0.053386226296424866, + "loss_ce": 2.6241139494231902e-05, + "loss_iou": 0.255859375, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 271094672, + "step": 2970 + }, + { + "epoch": 12.379166666666666, + "grad_norm": 3.6698821259215753, + "learning_rate": 5e-05, + "loss": 0.074, + "num_input_tokens_seen": 271186328, + "step": 2971 + }, + { + "epoch": 12.379166666666666, + "loss": 0.06262575834989548, + "loss_ce": 7.998640649020672e-05, + "loss_iou": 0.35546875, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 271186328, + "step": 2971 + }, + { + "epoch": 12.383333333333333, + "grad_norm": 3.066041322585827, + "learning_rate": 5e-05, + "loss": 0.0785, + "num_input_tokens_seen": 271277608, + "step": 2972 + }, + { + "epoch": 12.383333333333333, + "loss": 0.10933400690555573, + "loss_ce": 0.0004778074217028916, + "loss_iou": 0.3828125, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 271277608, + "step": 2972 + }, + { + "epoch": 12.3875, + "grad_norm": 2.789272102325914, + "learning_rate": 5e-05, + "loss": 0.0634, + "num_input_tokens_seen": 271369024, + "step": 2973 + }, + { + "epoch": 12.3875, + "loss": 0.08655081689357758, + "loss_ce": 0.0005217670113779604, + "loss_iou": 0.302734375, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 271369024, + "step": 2973 + }, + { + "epoch": 12.391666666666667, + "grad_norm": 3.0763527820391436, + "learning_rate": 5e-05, + "loss": 0.0601, + "num_input_tokens_seen": 271459964, + "step": 2974 + }, + { + "epoch": 12.391666666666667, + "loss": 0.0638897716999054, + "loss_ce": 0.0026562470011413097, + "loss_iou": 0.224609375, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 271459964, + "step": 2974 + }, + { + "epoch": 12.395833333333334, + "grad_norm": 2.5119394436453195, + "learning_rate": 5e-05, + "loss": 0.0444, + "num_input_tokens_seen": 271551052, + "step": 2975 + }, + { + "epoch": 12.395833333333334, + "loss": 0.03439211845397949, + "loss_ce": 6.439298886107281e-06, + "loss_iou": 0.251953125, + "loss_num": 0.00689697265625, + "loss_xval": 0.034423828125, + "num_input_tokens_seen": 271551052, + "step": 2975 + }, + { + "epoch": 12.4, + "grad_norm": 6.359073936988227, + "learning_rate": 5e-05, + "loss": 0.0973, + "num_input_tokens_seen": 271642408, + "step": 2976 + }, + { + "epoch": 12.4, + "loss": 0.06211081147193909, + "loss_ce": 1.5169309335760772e-05, + "loss_iou": 0.23828125, + "loss_num": 0.012451171875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 271642408, + "step": 2976 + }, + { + "epoch": 12.404166666666667, + "grad_norm": 5.83255234834698, + "learning_rate": 5e-05, + "loss": 0.0596, + "num_input_tokens_seen": 271734232, + "step": 2977 + }, + { + "epoch": 12.404166666666667, + "loss": 0.08257852494716644, + "loss_ce": 1.3214259524829686e-05, + "loss_iou": 0.33984375, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 271734232, + "step": 2977 + }, + { + "epoch": 12.408333333333333, + "grad_norm": 3.356095413500328, + "learning_rate": 5e-05, + "loss": 0.0454, + "num_input_tokens_seen": 271825252, + "step": 2978 + }, + { + "epoch": 12.408333333333333, + "loss": 0.05137810856103897, + "loss_ce": 1.7683807982393773e-06, + "loss_iou": 0.330078125, + "loss_num": 0.01025390625, + "loss_xval": 0.05126953125, + "num_input_tokens_seen": 271825252, + "step": 2978 + }, + { + "epoch": 12.4125, + "grad_norm": 5.3591045499391345, + "learning_rate": 5e-05, + "loss": 0.0945, + "num_input_tokens_seen": 271916788, + "step": 2979 + }, + { + "epoch": 12.4125, + "loss": 0.10306023806333542, + "loss_ce": 2.3709637844149256e-06, + "loss_iou": 0.390625, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 271916788, + "step": 2979 + }, + { + "epoch": 12.416666666666666, + "grad_norm": 2.65928675424727, + "learning_rate": 5e-05, + "loss": 0.0834, + "num_input_tokens_seen": 272008156, + "step": 2980 + }, + { + "epoch": 12.416666666666666, + "loss": 0.076792873442173, + "loss_ce": 8.6943109636195e-05, + "loss_iou": 0.27734375, + "loss_num": 0.015380859375, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 272008156, + "step": 2980 + }, + { + "epoch": 12.420833333333333, + "grad_norm": 2.647439664415239, + "learning_rate": 5e-05, + "loss": 0.0976, + "num_input_tokens_seen": 272099316, + "step": 2981 + }, + { + "epoch": 12.420833333333333, + "loss": 0.08195499330759048, + "loss_ce": 6.107529043219984e-05, + "loss_iou": 0.4375, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 272099316, + "step": 2981 + }, + { + "epoch": 12.425, + "grad_norm": 3.166738565215811, + "learning_rate": 5e-05, + "loss": 0.0869, + "num_input_tokens_seen": 272189424, + "step": 2982 + }, + { + "epoch": 12.425, + "loss": 0.08983665704727173, + "loss_ce": 0.00017601408762857318, + "loss_iou": 0.33203125, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 272189424, + "step": 2982 + }, + { + "epoch": 12.429166666666667, + "grad_norm": 1.7291364446089161, + "learning_rate": 5e-05, + "loss": 0.0642, + "num_input_tokens_seen": 272280260, + "step": 2983 + }, + { + "epoch": 12.429166666666667, + "loss": 0.07141336053609848, + "loss_ce": 1.74842425622046e-05, + "loss_iou": 0.31640625, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 272280260, + "step": 2983 + }, + { + "epoch": 12.433333333333334, + "grad_norm": 3.7551882332604087, + "learning_rate": 5e-05, + "loss": 0.0577, + "num_input_tokens_seen": 272371540, + "step": 2984 + }, + { + "epoch": 12.433333333333334, + "loss": 0.06047248840332031, + "loss_ce": 3.2421918149339035e-05, + "loss_iou": 0.244140625, + "loss_num": 0.0120849609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 272371540, + "step": 2984 + }, + { + "epoch": 12.4375, + "grad_norm": 2.7412211250098784, + "learning_rate": 5e-05, + "loss": 0.0754, + "num_input_tokens_seen": 272463376, + "step": 2985 + }, + { + "epoch": 12.4375, + "loss": 0.10339340567588806, + "loss_ce": 0.000648344517685473, + "loss_iou": 0.2490234375, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 272463376, + "step": 2985 + }, + { + "epoch": 12.441666666666666, + "grad_norm": 2.6385098374805307, + "learning_rate": 5e-05, + "loss": 0.0499, + "num_input_tokens_seen": 272554700, + "step": 2986 + }, + { + "epoch": 12.441666666666666, + "loss": 0.05318248271942139, + "loss_ce": 0.0003260402590967715, + "loss_iou": 0.2392578125, + "loss_num": 0.01055908203125, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 272554700, + "step": 2986 + }, + { + "epoch": 12.445833333333333, + "grad_norm": 4.896829389393238, + "learning_rate": 5e-05, + "loss": 0.0922, + "num_input_tokens_seen": 272646284, + "step": 2987 + }, + { + "epoch": 12.445833333333333, + "loss": 0.09454986453056335, + "loss_ce": 0.0002200333256041631, + "loss_iou": 0.265625, + "loss_num": 0.0189208984375, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 272646284, + "step": 2987 + }, + { + "epoch": 12.45, + "grad_norm": 3.040650468532159, + "learning_rate": 5e-05, + "loss": 0.0916, + "num_input_tokens_seen": 272737320, + "step": 2988 + }, + { + "epoch": 12.45, + "loss": 0.10238365083932877, + "loss_ce": 4.2953739466611296e-05, + "loss_iou": 0.294921875, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 272737320, + "step": 2988 + }, + { + "epoch": 12.454166666666667, + "grad_norm": 4.243087910124068, + "learning_rate": 5e-05, + "loss": 0.0942, + "num_input_tokens_seen": 272828568, + "step": 2989 + }, + { + "epoch": 12.454166666666667, + "loss": 0.06616440415382385, + "loss_ce": 3.281386671005748e-05, + "loss_iou": 0.2421875, + "loss_num": 0.01324462890625, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 272828568, + "step": 2989 + }, + { + "epoch": 12.458333333333334, + "grad_norm": 1.8136182206329305, + "learning_rate": 5e-05, + "loss": 0.0482, + "num_input_tokens_seen": 272920056, + "step": 2990 + }, + { + "epoch": 12.458333333333334, + "loss": 0.047113243490457535, + "loss_ce": 1.7326314036836266e-06, + "loss_iou": 0.248046875, + "loss_num": 0.0093994140625, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 272920056, + "step": 2990 + }, + { + "epoch": 12.4625, + "grad_norm": 2.6392028828013903, + "learning_rate": 5e-05, + "loss": 0.0341, + "num_input_tokens_seen": 273012108, + "step": 2991 + }, + { + "epoch": 12.4625, + "loss": 0.03175271302461624, + "loss_ce": 0.00047219384578056633, + "loss_iou": 0.2109375, + "loss_num": 0.006256103515625, + "loss_xval": 0.03125, + "num_input_tokens_seen": 273012108, + "step": 2991 + }, + { + "epoch": 12.466666666666667, + "grad_norm": 6.542435490375677, + "learning_rate": 5e-05, + "loss": 0.0538, + "num_input_tokens_seen": 273103444, + "step": 2992 + }, + { + "epoch": 12.466666666666667, + "loss": 0.06299017369747162, + "loss_ce": 1.8880823517974932e-06, + "loss_iou": 0.19921875, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 273103444, + "step": 2992 + }, + { + "epoch": 12.470833333333333, + "grad_norm": 2.489483280029813, + "learning_rate": 5e-05, + "loss": 0.0544, + "num_input_tokens_seen": 273195164, + "step": 2993 + }, + { + "epoch": 12.470833333333333, + "loss": 0.038447022438049316, + "loss_ce": 0.0008798825438134372, + "loss_iou": 0.26953125, + "loss_num": 0.00750732421875, + "loss_xval": 0.03759765625, + "num_input_tokens_seen": 273195164, + "step": 2993 + }, + { + "epoch": 12.475, + "grad_norm": 2.8719785161307123, + "learning_rate": 5e-05, + "loss": 0.0561, + "num_input_tokens_seen": 273286944, + "step": 2994 + }, + { + "epoch": 12.475, + "loss": 0.058075353503227234, + "loss_ce": 6.143513019196689e-05, + "loss_iou": 0.283203125, + "loss_num": 0.0115966796875, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 273286944, + "step": 2994 + }, + { + "epoch": 12.479166666666666, + "grad_norm": 2.5710577096424925, + "learning_rate": 5e-05, + "loss": 0.0573, + "num_input_tokens_seen": 273377916, + "step": 2995 + }, + { + "epoch": 12.479166666666666, + "loss": 0.040670327842235565, + "loss_ce": 5.657493147737114e-06, + "loss_iou": 0.2275390625, + "loss_num": 0.00811767578125, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 273377916, + "step": 2995 + }, + { + "epoch": 12.483333333333333, + "grad_norm": 2.2883425127558956, + "learning_rate": 5e-05, + "loss": 0.0583, + "num_input_tokens_seen": 273468608, + "step": 2996 + }, + { + "epoch": 12.483333333333333, + "loss": 0.03394392877817154, + "loss_ce": 8.38409505377058e-06, + "loss_iou": 0.25390625, + "loss_num": 0.00677490234375, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 273468608, + "step": 2996 + }, + { + "epoch": 12.4875, + "grad_norm": 7.473944248059015, + "learning_rate": 5e-05, + "loss": 0.0666, + "num_input_tokens_seen": 273560360, + "step": 2997 + }, + { + "epoch": 12.4875, + "loss": 0.07293405383825302, + "loss_ce": 0.00027170099201612175, + "loss_iou": 0.265625, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 273560360, + "step": 2997 + }, + { + "epoch": 12.491666666666667, + "grad_norm": 5.223530887356119, + "learning_rate": 5e-05, + "loss": 0.0906, + "num_input_tokens_seen": 273651220, + "step": 2998 + }, + { + "epoch": 12.491666666666667, + "loss": 0.06444063037633896, + "loss_ce": 2.763768634395092e-06, + "loss_iou": 0.400390625, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 273651220, + "step": 2998 + }, + { + "epoch": 12.495833333333334, + "grad_norm": 3.77445855797572, + "learning_rate": 5e-05, + "loss": 0.092, + "num_input_tokens_seen": 273742916, + "step": 2999 + }, + { + "epoch": 12.495833333333334, + "loss": 0.1081899106502533, + "loss_ce": 5.0875409215223044e-05, + "loss_iou": 0.193359375, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 273742916, + "step": 2999 + }, + { + "epoch": 12.5, + "grad_norm": 3.894907640330977, + "learning_rate": 5e-05, + "loss": 0.0959, + "num_input_tokens_seen": 273833260, + "step": 3000 + }, + { + "epoch": 12.5, + "eval_seeclick_CIoU": 0.2620522305369377, + "eval_seeclick_GIoU": 0.261411115527153, + "eval_seeclick_IoU": 0.3575377017259598, + "eval_seeclick_MAE_all": 0.09368777647614479, + "eval_seeclick_MAE_h": 0.08874733746051788, + "eval_seeclick_MAE_w": 0.18410181254148483, + "eval_seeclick_MAE_x_boxes": 0.1935308650135994, + "eval_seeclick_MAE_y_boxes": 0.09129971638321877, + "eval_seeclick_NUM_probability": 0.9999963939189911, + "eval_seeclick_inside_bbox": 0.4786931872367859, + "eval_seeclick_loss": 0.5895414352416992, + "eval_seeclick_loss_ce": 0.1322433277964592, + "eval_seeclick_loss_iou": 0.4326171875, + "eval_seeclick_loss_num": 0.0874786376953125, + "eval_seeclick_loss_xval": 0.4368896484375, + "eval_seeclick_runtime": 76.9126, + "eval_seeclick_samples_per_second": 0.559, + "eval_seeclick_steps_per_second": 0.026, + "num_input_tokens_seen": 273833260, + "step": 3000 + }, + { + "epoch": 12.5, + "eval_icons_CIoU": 0.423148512840271, + "eval_icons_GIoU": 0.43206779658794403, + "eval_icons_IoU": 0.48607292771339417, + "eval_icons_MAE_all": 0.05760672502219677, + "eval_icons_MAE_h": 0.11967556178569794, + "eval_icons_MAE_w": 0.084600280970335, + "eval_icons_MAE_x_boxes": 0.08436497300863266, + "eval_icons_MAE_y_boxes": 0.11780550330877304, + "eval_icons_NUM_probability": 0.9999983906745911, + "eval_icons_inside_bbox": 0.6493055522441864, + "eval_icons_loss": 0.2809631824493408, + "eval_icons_loss_ce": 4.4523385440697894e-05, + "eval_icons_loss_iou": 0.2989501953125, + "eval_icons_loss_num": 0.0606536865234375, + "eval_icons_loss_xval": 0.303131103515625, + "eval_icons_runtime": 88.2186, + "eval_icons_samples_per_second": 0.567, + "eval_icons_steps_per_second": 0.023, + "num_input_tokens_seen": 273833260, + "step": 3000 + }, + { + "epoch": 12.5, + "eval_screenspot_CIoU": 0.35235823194185895, + "eval_screenspot_GIoU": 0.33617523312568665, + "eval_screenspot_IoU": 0.44529513518015545, + "eval_screenspot_MAE_all": 0.10489916304747264, + "eval_screenspot_MAE_h": 0.09332895030577977, + "eval_screenspot_MAE_w": 0.2365158200263977, + "eval_screenspot_MAE_x_boxes": 0.22446357210477194, + "eval_screenspot_MAE_y_boxes": 0.08861926694711049, + "eval_screenspot_NUM_probability": 0.9974575042724609, + "eval_screenspot_inside_bbox": 0.6670833428700765, + "eval_screenspot_loss": 0.5274566411972046, + "eval_screenspot_loss_ce": 0.0008574875680362007, + "eval_screenspot_loss_iou": 0.3682861328125, + "eval_screenspot_loss_num": 0.10426839192708333, + "eval_screenspot_loss_xval": 0.52142333984375, + "eval_screenspot_runtime": 163.935, + "eval_screenspot_samples_per_second": 0.543, + "eval_screenspot_steps_per_second": 0.018, + "num_input_tokens_seen": 273833260, + "step": 3000 + }, + { + "epoch": 12.5, + "eval_compot_CIoU": 0.5151858925819397, + "eval_compot_GIoU": 0.5106890201568604, + "eval_compot_IoU": 0.5789482891559601, + "eval_compot_MAE_all": 0.0476725697517395, + "eval_compot_MAE_h": 0.06707289069890976, + "eval_compot_MAE_w": 0.11045684665441513, + "eval_compot_MAE_x_boxes": 0.10997127369046211, + "eval_compot_MAE_y_boxes": 0.06569756753742695, + "eval_compot_NUM_probability": 0.9999961256980896, + "eval_compot_inside_bbox": 0.7951388955116272, + "eval_compot_loss": 0.26056361198425293, + "eval_compot_loss_ce": 0.031769391149282455, + "eval_compot_loss_iou": 0.33770751953125, + "eval_compot_loss_num": 0.04244804382324219, + "eval_compot_loss_xval": 0.21216583251953125, + "eval_compot_runtime": 90.3101, + "eval_compot_samples_per_second": 0.554, + "eval_compot_steps_per_second": 0.022, + "num_input_tokens_seen": 273833260, + "step": 3000 + }, + { + "epoch": 12.5, + "loss": 0.23503346741199493, + "loss_ce": 0.034380391240119934, + "loss_iou": 0.33984375, + "loss_num": 0.0400390625, + "loss_xval": 0.2001953125, + "num_input_tokens_seen": 273833260, + "step": 3000 + }, + { + "epoch": 12.504166666666666, + "grad_norm": 1.6827014254027144, + "learning_rate": 5e-05, + "loss": 0.0354, + "num_input_tokens_seen": 273924180, + "step": 3001 + }, + { + "epoch": 12.504166666666666, + "loss": 0.03862186148762703, + "loss_ce": 1.8641272845343337e-06, + "loss_iou": 0.220703125, + "loss_num": 0.007720947265625, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 273924180, + "step": 3001 + }, + { + "epoch": 12.508333333333333, + "grad_norm": 3.767378502480506, + "learning_rate": 5e-05, + "loss": 0.0669, + "num_input_tokens_seen": 274015228, + "step": 3002 + }, + { + "epoch": 12.508333333333333, + "loss": 0.08175476640462875, + "loss_ce": 0.00024231600400526077, + "loss_iou": 0.20703125, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 274015228, + "step": 3002 + }, + { + "epoch": 12.5125, + "grad_norm": 3.628278411234878, + "learning_rate": 5e-05, + "loss": 0.0554, + "num_input_tokens_seen": 274106380, + "step": 3003 + }, + { + "epoch": 12.5125, + "loss": 0.06517404317855835, + "loss_ce": 3.7579250147246057e-06, + "loss_iou": 0.431640625, + "loss_num": 0.0130615234375, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 274106380, + "step": 3003 + }, + { + "epoch": 12.516666666666667, + "grad_norm": 6.21563970342266, + "learning_rate": 5e-05, + "loss": 0.0711, + "num_input_tokens_seen": 274197700, + "step": 3004 + }, + { + "epoch": 12.516666666666667, + "loss": 0.07570692151784897, + "loss_ce": 2.332625081180595e-05, + "loss_iou": 0.40625, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 274197700, + "step": 3004 + }, + { + "epoch": 12.520833333333334, + "grad_norm": 1.6570068700315066, + "learning_rate": 5e-05, + "loss": 0.078, + "num_input_tokens_seen": 274289044, + "step": 3005 + }, + { + "epoch": 12.520833333333334, + "loss": 0.09042497724294662, + "loss_ce": 0.0005049319006502628, + "loss_iou": 0.34375, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 274289044, + "step": 3005 + }, + { + "epoch": 12.525, + "grad_norm": 1.339285718282047, + "learning_rate": 5e-05, + "loss": 0.08, + "num_input_tokens_seen": 274380896, + "step": 3006 + }, + { + "epoch": 12.525, + "loss": 0.08311203867197037, + "loss_ce": 1.2673946912400424e-05, + "loss_iou": 0.283203125, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 274380896, + "step": 3006 + }, + { + "epoch": 12.529166666666667, + "grad_norm": 3.3647496664677607, + "learning_rate": 5e-05, + "loss": 0.0334, + "num_input_tokens_seen": 274472392, + "step": 3007 + }, + { + "epoch": 12.529166666666667, + "loss": 0.0321391299366951, + "loss_ce": 1.9378574506845325e-05, + "loss_iou": 0.28125, + "loss_num": 0.006439208984375, + "loss_xval": 0.0322265625, + "num_input_tokens_seen": 274472392, + "step": 3007 + }, + { + "epoch": 12.533333333333333, + "grad_norm": 3.150886227407765, + "learning_rate": 5e-05, + "loss": 0.0591, + "num_input_tokens_seen": 274563964, + "step": 3008 + }, + { + "epoch": 12.533333333333333, + "loss": 0.07331331819295883, + "loss_ce": 0.0019632219336926937, + "loss_iou": 0.341796875, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 274563964, + "step": 3008 + }, + { + "epoch": 12.5375, + "grad_norm": 3.4248419194749027, + "learning_rate": 5e-05, + "loss": 0.0557, + "num_input_tokens_seen": 274655244, + "step": 3009 + }, + { + "epoch": 12.5375, + "loss": 0.054197344928979874, + "loss_ce": 1.3388858860707842e-05, + "loss_iou": 0.220703125, + "loss_num": 0.01080322265625, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 274655244, + "step": 3009 + }, + { + "epoch": 12.541666666666666, + "grad_norm": 3.6137713418335062, + "learning_rate": 5e-05, + "loss": 0.124, + "num_input_tokens_seen": 274746376, + "step": 3010 + }, + { + "epoch": 12.541666666666666, + "loss": 0.07499793916940689, + "loss_ce": 9.865603942671441e-07, + "loss_iou": 0.220703125, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 274746376, + "step": 3010 + }, + { + "epoch": 12.545833333333333, + "grad_norm": 2.725742441669566, + "learning_rate": 5e-05, + "loss": 0.0638, + "num_input_tokens_seen": 274837960, + "step": 3011 + }, + { + "epoch": 12.545833333333333, + "loss": 0.051561854779720306, + "loss_ce": 0.0008187488419935107, + "loss_iou": 0.1904296875, + "loss_num": 0.0101318359375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 274837960, + "step": 3011 + }, + { + "epoch": 12.55, + "grad_norm": 10.900517824421192, + "learning_rate": 5e-05, + "loss": 0.0507, + "num_input_tokens_seen": 274929360, + "step": 3012 + }, + { + "epoch": 12.55, + "loss": 0.06274904310703278, + "loss_ce": 0.0002795600739773363, + "loss_iou": 0.234375, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 274929360, + "step": 3012 + }, + { + "epoch": 12.554166666666667, + "grad_norm": 3.121712391262659, + "learning_rate": 5e-05, + "loss": 0.0482, + "num_input_tokens_seen": 275020996, + "step": 3013 + }, + { + "epoch": 12.554166666666667, + "loss": 0.0564560629427433, + "loss_ce": 0.003065558848902583, + "loss_iou": 0.228515625, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 275020996, + "step": 3013 + }, + { + "epoch": 12.558333333333334, + "grad_norm": 2.184252079837103, + "learning_rate": 5e-05, + "loss": 0.0478, + "num_input_tokens_seen": 275112132, + "step": 3014 + }, + { + "epoch": 12.558333333333334, + "loss": 0.04341685771942139, + "loss_ce": 5.605666046903934e-06, + "loss_iou": 0.2177734375, + "loss_num": 0.0086669921875, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 275112132, + "step": 3014 + }, + { + "epoch": 12.5625, + "grad_norm": 5.173157766739975, + "learning_rate": 5e-05, + "loss": 0.0637, + "num_input_tokens_seen": 275203920, + "step": 3015 + }, + { + "epoch": 12.5625, + "loss": 0.0419183224439621, + "loss_ce": 0.001680898480117321, + "loss_iou": 0.3046875, + "loss_num": 0.008056640625, + "loss_xval": 0.040283203125, + "num_input_tokens_seen": 275203920, + "step": 3015 + }, + { + "epoch": 12.566666666666666, + "grad_norm": 11.37834540555279, + "learning_rate": 5e-05, + "loss": 0.0618, + "num_input_tokens_seen": 275295580, + "step": 3016 + }, + { + "epoch": 12.566666666666666, + "loss": 0.04153522476553917, + "loss_ce": 8.47255505505018e-05, + "loss_iou": 0.220703125, + "loss_num": 0.00830078125, + "loss_xval": 0.04150390625, + "num_input_tokens_seen": 275295580, + "step": 3016 + }, + { + "epoch": 12.570833333333333, + "grad_norm": 2.494361842243073, + "learning_rate": 5e-05, + "loss": 0.0609, + "num_input_tokens_seen": 275387108, + "step": 3017 + }, + { + "epoch": 12.570833333333333, + "loss": 0.04674965888261795, + "loss_ce": 0.00011879783414769918, + "loss_iou": 0.302734375, + "loss_num": 0.00933837890625, + "loss_xval": 0.046630859375, + "num_input_tokens_seen": 275387108, + "step": 3017 + }, + { + "epoch": 12.575, + "grad_norm": 2.410403468350254, + "learning_rate": 5e-05, + "loss": 0.0372, + "num_input_tokens_seen": 275477904, + "step": 3018 + }, + { + "epoch": 12.575, + "loss": 0.03227172791957855, + "loss_ce": 2.2275686205830425e-05, + "loss_iou": 0.291015625, + "loss_num": 0.006439208984375, + "loss_xval": 0.0322265625, + "num_input_tokens_seen": 275477904, + "step": 3018 + }, + { + "epoch": 12.579166666666667, + "grad_norm": 2.3486592784934888, + "learning_rate": 5e-05, + "loss": 0.0832, + "num_input_tokens_seen": 275569364, + "step": 3019 + }, + { + "epoch": 12.579166666666667, + "loss": 0.06415988504886627, + "loss_ce": 0.001583587029017508, + "loss_iou": 0.1591796875, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 275569364, + "step": 3019 + }, + { + "epoch": 12.583333333333334, + "grad_norm": 9.91921123923421, + "learning_rate": 5e-05, + "loss": 0.0731, + "num_input_tokens_seen": 275661176, + "step": 3020 + }, + { + "epoch": 12.583333333333334, + "loss": 0.09567893296480179, + "loss_ce": 0.001165991765446961, + "loss_iou": 0.2890625, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 275661176, + "step": 3020 + }, + { + "epoch": 12.5875, + "grad_norm": 5.046254567693273, + "learning_rate": 5e-05, + "loss": 0.0649, + "num_input_tokens_seen": 275752156, + "step": 3021 + }, + { + "epoch": 12.5875, + "loss": 0.07690000534057617, + "loss_ce": 4.148645530221984e-05, + "loss_iou": 0.2578125, + "loss_num": 0.015380859375, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 275752156, + "step": 3021 + }, + { + "epoch": 12.591666666666667, + "grad_norm": 1.924919864484072, + "learning_rate": 5e-05, + "loss": 0.0322, + "num_input_tokens_seen": 275842928, + "step": 3022 + }, + { + "epoch": 12.591666666666667, + "loss": 0.033310018479824066, + "loss_ce": 2.296766069775913e-05, + "loss_iou": 0.236328125, + "loss_num": 0.00665283203125, + "loss_xval": 0.033203125, + "num_input_tokens_seen": 275842928, + "step": 3022 + }, + { + "epoch": 12.595833333333333, + "grad_norm": 5.374815815843724, + "learning_rate": 5e-05, + "loss": 0.0844, + "num_input_tokens_seen": 275933884, + "step": 3023 + }, + { + "epoch": 12.595833333333333, + "loss": 0.07975783199071884, + "loss_ce": 3.065694181714207e-05, + "loss_iou": 0.20703125, + "loss_num": 0.0159912109375, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 275933884, + "step": 3023 + }, + { + "epoch": 12.6, + "grad_norm": 4.761810635006499, + "learning_rate": 5e-05, + "loss": 0.088, + "num_input_tokens_seen": 276025600, + "step": 3024 + }, + { + "epoch": 12.6, + "loss": 0.10507048666477203, + "loss_ce": 0.000395202892832458, + "loss_iou": 0.2197265625, + "loss_num": 0.02099609375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 276025600, + "step": 3024 + }, + { + "epoch": 12.604166666666666, + "grad_norm": 2.680798297145748, + "learning_rate": 5e-05, + "loss": 0.0719, + "num_input_tokens_seen": 276116744, + "step": 3025 + }, + { + "epoch": 12.604166666666666, + "loss": 0.0392971932888031, + "loss_ce": 5.810734819533536e-06, + "loss_iou": 0.322265625, + "loss_num": 0.00787353515625, + "loss_xval": 0.039306640625, + "num_input_tokens_seen": 276116744, + "step": 3025 + }, + { + "epoch": 12.608333333333333, + "grad_norm": 22.630748766733472, + "learning_rate": 5e-05, + "loss": 0.0925, + "num_input_tokens_seen": 276207340, + "step": 3026 + }, + { + "epoch": 12.608333333333333, + "loss": 0.10536612570285797, + "loss_ce": 4.1947278077714145e-06, + "loss_iou": 0.326171875, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 276207340, + "step": 3026 + }, + { + "epoch": 12.6125, + "grad_norm": 2.357555610446469, + "learning_rate": 5e-05, + "loss": 0.0729, + "num_input_tokens_seen": 276298156, + "step": 3027 + }, + { + "epoch": 12.6125, + "loss": 0.07741272449493408, + "loss_ce": 2.014567144215107e-05, + "loss_iou": 0.38671875, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 276298156, + "step": 3027 + }, + { + "epoch": 12.616666666666667, + "grad_norm": 2.3887754443380174, + "learning_rate": 5e-05, + "loss": 0.0844, + "num_input_tokens_seen": 276389868, + "step": 3028 + }, + { + "epoch": 12.616666666666667, + "loss": 0.08939746767282486, + "loss_ce": 5.725852679461241e-05, + "loss_iou": 0.32421875, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 276389868, + "step": 3028 + }, + { + "epoch": 12.620833333333334, + "grad_norm": 3.238182219509643, + "learning_rate": 5e-05, + "loss": 0.0718, + "num_input_tokens_seen": 276481088, + "step": 3029 + }, + { + "epoch": 12.620833333333334, + "loss": 0.04466459900140762, + "loss_ce": 1.7379155906382948e-05, + "loss_iou": 0.201171875, + "loss_num": 0.0089111328125, + "loss_xval": 0.044677734375, + "num_input_tokens_seen": 276481088, + "step": 3029 + }, + { + "epoch": 12.625, + "grad_norm": 1.8248130085422234, + "learning_rate": 5e-05, + "loss": 0.0454, + "num_input_tokens_seen": 276572552, + "step": 3030 + }, + { + "epoch": 12.625, + "loss": 0.04894650727510452, + "loss_ce": 0.00019467764650471509, + "loss_iou": 0.171875, + "loss_num": 0.009765625, + "loss_xval": 0.048828125, + "num_input_tokens_seen": 276572552, + "step": 3030 + }, + { + "epoch": 12.629166666666666, + "grad_norm": 14.546046331105883, + "learning_rate": 5e-05, + "loss": 0.1234, + "num_input_tokens_seen": 276663760, + "step": 3031 + }, + { + "epoch": 12.629166666666666, + "loss": 0.0462212935090065, + "loss_ce": 2.4235514501924627e-06, + "loss_iou": 0.310546875, + "loss_num": 0.00927734375, + "loss_xval": 0.046142578125, + "num_input_tokens_seen": 276663760, + "step": 3031 + }, + { + "epoch": 12.633333333333333, + "grad_norm": 5.467846134806575, + "learning_rate": 5e-05, + "loss": 0.0864, + "num_input_tokens_seen": 276755064, + "step": 3032 + }, + { + "epoch": 12.633333333333333, + "loss": 0.0916154608130455, + "loss_ce": 0.00029160885605961084, + "loss_iou": 0.314453125, + "loss_num": 0.0181884765625, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 276755064, + "step": 3032 + }, + { + "epoch": 12.6375, + "grad_norm": 3.176330171782519, + "learning_rate": 5e-05, + "loss": 0.0562, + "num_input_tokens_seen": 276846344, + "step": 3033 + }, + { + "epoch": 12.6375, + "loss": 0.05962742865085602, + "loss_ce": 2.659808160387911e-05, + "loss_iou": 0.1796875, + "loss_num": 0.01190185546875, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 276846344, + "step": 3033 + }, + { + "epoch": 12.641666666666667, + "grad_norm": 2.796019991026777, + "learning_rate": 5e-05, + "loss": 0.0546, + "num_input_tokens_seen": 276937896, + "step": 3034 + }, + { + "epoch": 12.641666666666667, + "loss": 0.0450693741440773, + "loss_ce": 0.0007502207299694419, + "loss_iou": 0.271484375, + "loss_num": 0.00885009765625, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 276937896, + "step": 3034 + }, + { + "epoch": 12.645833333333334, + "grad_norm": 5.4254994878266, + "learning_rate": 5e-05, + "loss": 0.0888, + "num_input_tokens_seen": 277029492, + "step": 3035 + }, + { + "epoch": 12.645833333333334, + "loss": 0.12581279873847961, + "loss_ce": 0.00014141679275780916, + "loss_iou": 0.32421875, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 277029492, + "step": 3035 + }, + { + "epoch": 12.65, + "grad_norm": 8.83160209648461, + "learning_rate": 5e-05, + "loss": 0.0609, + "num_input_tokens_seen": 277121068, + "step": 3036 + }, + { + "epoch": 12.65, + "loss": 0.08744452893733978, + "loss_ce": 1.929440622916445e-05, + "loss_iou": 0.34375, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 277121068, + "step": 3036 + }, + { + "epoch": 12.654166666666667, + "grad_norm": 2.9922367136257133, + "learning_rate": 5e-05, + "loss": 0.0587, + "num_input_tokens_seen": 277211956, + "step": 3037 + }, + { + "epoch": 12.654166666666667, + "loss": 0.0431191548705101, + "loss_ce": 1.3074773960397579e-05, + "loss_iou": 0.2255859375, + "loss_num": 0.00860595703125, + "loss_xval": 0.043212890625, + "num_input_tokens_seen": 277211956, + "step": 3037 + }, + { + "epoch": 12.658333333333333, + "grad_norm": 7.312914555316495, + "learning_rate": 5e-05, + "loss": 0.0471, + "num_input_tokens_seen": 277303404, + "step": 3038 + }, + { + "epoch": 12.658333333333333, + "loss": 0.061124954372644424, + "loss_ce": 0.00028816104168072343, + "loss_iou": 0.298828125, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 277303404, + "step": 3038 + }, + { + "epoch": 12.6625, + "grad_norm": 2.5224699213733874, + "learning_rate": 5e-05, + "loss": 0.0917, + "num_input_tokens_seen": 277394460, + "step": 3039 + }, + { + "epoch": 12.6625, + "loss": 0.1085333526134491, + "loss_ce": 3.5741508327191696e-05, + "loss_iou": 0.3125, + "loss_num": 0.021728515625, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 277394460, + "step": 3039 + }, + { + "epoch": 12.666666666666666, + "grad_norm": 3.141807694935655, + "learning_rate": 5e-05, + "loss": 0.0563, + "num_input_tokens_seen": 277485988, + "step": 3040 + }, + { + "epoch": 12.666666666666666, + "loss": 0.057176895439624786, + "loss_ce": 0.000284504727460444, + "loss_iou": 0.1982421875, + "loss_num": 0.0113525390625, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 277485988, + "step": 3040 + }, + { + "epoch": 12.670833333333333, + "grad_norm": 3.1119452188475387, + "learning_rate": 5e-05, + "loss": 0.0396, + "num_input_tokens_seen": 277577536, + "step": 3041 + }, + { + "epoch": 12.670833333333333, + "loss": 0.04843373969197273, + "loss_ce": 6.337818194879219e-05, + "loss_iou": 0.302734375, + "loss_num": 0.0096435546875, + "loss_xval": 0.04833984375, + "num_input_tokens_seen": 277577536, + "step": 3041 + }, + { + "epoch": 12.675, + "grad_norm": 3.381427897262613, + "learning_rate": 5e-05, + "loss": 0.0732, + "num_input_tokens_seen": 277668420, + "step": 3042 + }, + { + "epoch": 12.675, + "loss": 0.04654950648546219, + "loss_ce": 0.00013227068120613694, + "loss_iou": 0.375, + "loss_num": 0.00927734375, + "loss_xval": 0.04638671875, + "num_input_tokens_seen": 277668420, + "step": 3042 + }, + { + "epoch": 12.679166666666667, + "grad_norm": 3.076538192616019, + "learning_rate": 5e-05, + "loss": 0.1251, + "num_input_tokens_seen": 277759760, + "step": 3043 + }, + { + "epoch": 12.679166666666667, + "loss": 0.07401155680418015, + "loss_ce": 0.00011323827493470162, + "loss_iou": 0.2294921875, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 277759760, + "step": 3043 + }, + { + "epoch": 12.683333333333334, + "grad_norm": 17.963846970582654, + "learning_rate": 5e-05, + "loss": 0.0708, + "num_input_tokens_seen": 277851880, + "step": 3044 + }, + { + "epoch": 12.683333333333334, + "loss": 0.08022314310073853, + "loss_ce": 0.00014501073746941984, + "loss_iou": 0.20703125, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 277851880, + "step": 3044 + }, + { + "epoch": 12.6875, + "grad_norm": 2.030278631882756, + "learning_rate": 5e-05, + "loss": 0.056, + "num_input_tokens_seen": 277943352, + "step": 3045 + }, + { + "epoch": 12.6875, + "loss": 0.04376886039972305, + "loss_ce": 0.0005483399145305157, + "loss_iou": 0.1982421875, + "loss_num": 0.0086669921875, + "loss_xval": 0.043212890625, + "num_input_tokens_seen": 277943352, + "step": 3045 + }, + { + "epoch": 12.691666666666666, + "grad_norm": 1.051735154214462, + "learning_rate": 5e-05, + "loss": 0.0521, + "num_input_tokens_seen": 278034200, + "step": 3046 + }, + { + "epoch": 12.691666666666666, + "loss": 0.06943688541650772, + "loss_ce": 2.465544457663782e-05, + "loss_iou": 0.251953125, + "loss_num": 0.013916015625, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 278034200, + "step": 3046 + }, + { + "epoch": 12.695833333333333, + "grad_norm": 14.104348655354823, + "learning_rate": 5e-05, + "loss": 0.0888, + "num_input_tokens_seen": 278125860, + "step": 3047 + }, + { + "epoch": 12.695833333333333, + "loss": 0.09014366567134857, + "loss_ce": 0.002939691534265876, + "loss_iou": 0.251953125, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 278125860, + "step": 3047 + }, + { + "epoch": 12.7, + "grad_norm": 2.1425563049295406, + "learning_rate": 5e-05, + "loss": 0.0969, + "num_input_tokens_seen": 278216780, + "step": 3048 + }, + { + "epoch": 12.7, + "loss": 0.08864589035511017, + "loss_ce": 5.336176400305703e-05, + "loss_iou": 0.248046875, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 278216780, + "step": 3048 + }, + { + "epoch": 12.704166666666667, + "grad_norm": 1.705437231966854, + "learning_rate": 5e-05, + "loss": 0.0515, + "num_input_tokens_seen": 278307744, + "step": 3049 + }, + { + "epoch": 12.704166666666667, + "loss": 0.0454210564494133, + "loss_ce": 1.0902575013460591e-05, + "loss_iou": 0.228515625, + "loss_num": 0.00909423828125, + "loss_xval": 0.04541015625, + "num_input_tokens_seen": 278307744, + "step": 3049 + }, + { + "epoch": 12.708333333333334, + "grad_norm": 7.052905088085709, + "learning_rate": 5e-05, + "loss": 0.071, + "num_input_tokens_seen": 278398612, + "step": 3050 + }, + { + "epoch": 12.708333333333334, + "loss": 0.058208584785461426, + "loss_ce": 1.1565132808755152e-05, + "loss_iou": 0.10400390625, + "loss_num": 0.01165771484375, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 278398612, + "step": 3050 + }, + { + "epoch": 12.7125, + "grad_norm": 1.429215882691101, + "learning_rate": 5e-05, + "loss": 0.0605, + "num_input_tokens_seen": 278489904, + "step": 3051 + }, + { + "epoch": 12.7125, + "loss": 0.08760759234428406, + "loss_ce": 0.00045702431816607714, + "loss_iou": 0.197265625, + "loss_num": 0.0174560546875, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 278489904, + "step": 3051 + }, + { + "epoch": 12.716666666666667, + "grad_norm": 2.157319883356644, + "learning_rate": 5e-05, + "loss": 0.065, + "num_input_tokens_seen": 278581152, + "step": 3052 + }, + { + "epoch": 12.716666666666667, + "loss": 0.046220067888498306, + "loss_ce": 4.6972148993518203e-05, + "loss_iou": 0.345703125, + "loss_num": 0.00921630859375, + "loss_xval": 0.046142578125, + "num_input_tokens_seen": 278581152, + "step": 3052 + }, + { + "epoch": 12.720833333333333, + "grad_norm": 2.705812983876491, + "learning_rate": 5e-05, + "loss": 0.0571, + "num_input_tokens_seen": 278672888, + "step": 3053 + }, + { + "epoch": 12.720833333333333, + "loss": 0.031630102545022964, + "loss_ce": 0.0005021725664846599, + "loss_iou": 0.251953125, + "loss_num": 0.0062255859375, + "loss_xval": 0.0311279296875, + "num_input_tokens_seen": 278672888, + "step": 3053 + }, + { + "epoch": 12.725, + "grad_norm": 2.93868503933879, + "learning_rate": 5e-05, + "loss": 0.0698, + "num_input_tokens_seen": 278764400, + "step": 3054 + }, + { + "epoch": 12.725, + "loss": 0.10779900848865509, + "loss_ce": 2.6188343326793984e-05, + "loss_iou": 0.11669921875, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 278764400, + "step": 3054 + }, + { + "epoch": 12.729166666666666, + "grad_norm": 3.8858062498122563, + "learning_rate": 5e-05, + "loss": 0.0709, + "num_input_tokens_seen": 278854972, + "step": 3055 + }, + { + "epoch": 12.729166666666666, + "loss": 0.06658395379781723, + "loss_ce": 9.854529707808979e-06, + "loss_iou": 0.2734375, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 278854972, + "step": 3055 + }, + { + "epoch": 12.733333333333333, + "grad_norm": 5.271084710186165, + "learning_rate": 5e-05, + "loss": 0.0878, + "num_input_tokens_seen": 278946476, + "step": 3056 + }, + { + "epoch": 12.733333333333333, + "loss": 0.10122112929821014, + "loss_ce": 9.584147846908309e-06, + "loss_iou": 0.390625, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 278946476, + "step": 3056 + }, + { + "epoch": 12.7375, + "grad_norm": 2.6558501306865336, + "learning_rate": 5e-05, + "loss": 0.0468, + "num_input_tokens_seen": 279038040, + "step": 3057 + }, + { + "epoch": 12.7375, + "loss": 0.057488322257995605, + "loss_ce": 8.464216080028564e-06, + "loss_iou": 0.291015625, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 279038040, + "step": 3057 + }, + { + "epoch": 12.741666666666667, + "grad_norm": 2.1151067539954984, + "learning_rate": 5e-05, + "loss": 0.0525, + "num_input_tokens_seen": 279128956, + "step": 3058 + }, + { + "epoch": 12.741666666666667, + "loss": 0.07311089336872101, + "loss_ce": 2.1294054022291675e-05, + "loss_iou": 0.189453125, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 279128956, + "step": 3058 + }, + { + "epoch": 12.745833333333334, + "grad_norm": 1.830733179434178, + "learning_rate": 5e-05, + "loss": 0.0392, + "num_input_tokens_seen": 279220376, + "step": 3059 + }, + { + "epoch": 12.745833333333334, + "loss": 0.0318562351167202, + "loss_ce": 4.1659819544292986e-05, + "loss_iou": 0.173828125, + "loss_num": 0.00634765625, + "loss_xval": 0.03173828125, + "num_input_tokens_seen": 279220376, + "step": 3059 + }, + { + "epoch": 12.75, + "grad_norm": 3.7824054547708674, + "learning_rate": 5e-05, + "loss": 0.0631, + "num_input_tokens_seen": 279311848, + "step": 3060 + }, + { + "epoch": 12.75, + "loss": 0.05131109058856964, + "loss_ce": 3.414619413888431e-06, + "loss_iou": 0.23046875, + "loss_num": 0.01025390625, + "loss_xval": 0.05126953125, + "num_input_tokens_seen": 279311848, + "step": 3060 + }, + { + "epoch": 12.754166666666666, + "grad_norm": 8.165410655420041, + "learning_rate": 5e-05, + "loss": 0.0624, + "num_input_tokens_seen": 279403576, + "step": 3061 + }, + { + "epoch": 12.754166666666666, + "loss": 0.06295231729745865, + "loss_ce": 0.00011662582255667076, + "loss_iou": 0.328125, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 279403576, + "step": 3061 + }, + { + "epoch": 12.758333333333333, + "grad_norm": 2.181439238726654, + "learning_rate": 5e-05, + "loss": 0.0661, + "num_input_tokens_seen": 279494252, + "step": 3062 + }, + { + "epoch": 12.758333333333333, + "loss": 0.06396616250276566, + "loss_ce": 8.523969881935045e-05, + "loss_iou": 0.166015625, + "loss_num": 0.01275634765625, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 279494252, + "step": 3062 + }, + { + "epoch": 12.7625, + "grad_norm": 2.3387174785163856, + "learning_rate": 5e-05, + "loss": 0.0456, + "num_input_tokens_seen": 279585728, + "step": 3063 + }, + { + "epoch": 12.7625, + "loss": 0.035773493349552155, + "loss_ce": 6.8942003963456955e-06, + "loss_iou": 0.2275390625, + "loss_num": 0.00714111328125, + "loss_xval": 0.03564453125, + "num_input_tokens_seen": 279585728, + "step": 3063 + }, + { + "epoch": 12.766666666666667, + "grad_norm": 4.427669922592235, + "learning_rate": 5e-05, + "loss": 0.07, + "num_input_tokens_seen": 279676584, + "step": 3064 + }, + { + "epoch": 12.766666666666667, + "loss": 0.09679196774959564, + "loss_ce": 2.07240318559343e-05, + "loss_iou": 0.28125, + "loss_num": 0.0194091796875, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 279676584, + "step": 3064 + }, + { + "epoch": 12.770833333333334, + "grad_norm": 5.098015977237453, + "learning_rate": 5e-05, + "loss": 0.1176, + "num_input_tokens_seen": 279768452, + "step": 3065 + }, + { + "epoch": 12.770833333333334, + "loss": 0.1344681829214096, + "loss_ce": 9.928654617397115e-05, + "loss_iou": 0.19921875, + "loss_num": 0.02685546875, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 279768452, + "step": 3065 + }, + { + "epoch": 12.775, + "grad_norm": 2.7691277990758274, + "learning_rate": 5e-05, + "loss": 0.0855, + "num_input_tokens_seen": 279859876, + "step": 3066 + }, + { + "epoch": 12.775, + "loss": 0.12224830687046051, + "loss_ce": 0.0006967922090552747, + "loss_iou": 0.361328125, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 279859876, + "step": 3066 + }, + { + "epoch": 12.779166666666667, + "grad_norm": 7.2095868531702605, + "learning_rate": 5e-05, + "loss": 0.0843, + "num_input_tokens_seen": 279951356, + "step": 3067 + }, + { + "epoch": 12.779166666666667, + "loss": 0.07838231325149536, + "loss_ce": 0.0006692995084449649, + "loss_iou": 0.3046875, + "loss_num": 0.01556396484375, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 279951356, + "step": 3067 + }, + { + "epoch": 12.783333333333333, + "grad_norm": 1.5714320567596505, + "learning_rate": 5e-05, + "loss": 0.0847, + "num_input_tokens_seen": 280042444, + "step": 3068 + }, + { + "epoch": 12.783333333333333, + "loss": 0.06469504535198212, + "loss_ce": 5.881530887563713e-05, + "loss_iou": 0.232421875, + "loss_num": 0.012939453125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 280042444, + "step": 3068 + }, + { + "epoch": 12.7875, + "grad_norm": 2.73049184603913, + "learning_rate": 5e-05, + "loss": 0.0748, + "num_input_tokens_seen": 280132980, + "step": 3069 + }, + { + "epoch": 12.7875, + "loss": 0.0646815299987793, + "loss_ce": 0.00012159519974375144, + "loss_iou": 0.32421875, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 280132980, + "step": 3069 + }, + { + "epoch": 12.791666666666666, + "grad_norm": 2.6206030451709124, + "learning_rate": 5e-05, + "loss": 0.0677, + "num_input_tokens_seen": 280224380, + "step": 3070 + }, + { + "epoch": 12.791666666666666, + "loss": 0.048339590430259705, + "loss_ce": 0.00016759365098550916, + "loss_iou": 0.2373046875, + "loss_num": 0.0096435546875, + "loss_xval": 0.048095703125, + "num_input_tokens_seen": 280224380, + "step": 3070 + }, + { + "epoch": 12.795833333333333, + "grad_norm": 1.9195245267601702, + "learning_rate": 5e-05, + "loss": 0.097, + "num_input_tokens_seen": 280315536, + "step": 3071 + }, + { + "epoch": 12.795833333333333, + "loss": 0.08559077978134155, + "loss_ce": 0.00011104091390734538, + "loss_iou": 0.1875, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 280315536, + "step": 3071 + }, + { + "epoch": 12.8, + "grad_norm": 6.07437903070192, + "learning_rate": 5e-05, + "loss": 0.1087, + "num_input_tokens_seen": 280406956, + "step": 3072 + }, + { + "epoch": 12.8, + "loss": 0.09862232953310013, + "loss_ce": 4.2919116822304204e-05, + "loss_iou": 0.146484375, + "loss_num": 0.0196533203125, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 280406956, + "step": 3072 + }, + { + "epoch": 12.804166666666667, + "grad_norm": 2.9138495381433853, + "learning_rate": 5e-05, + "loss": 0.0776, + "num_input_tokens_seen": 280498940, + "step": 3073 + }, + { + "epoch": 12.804166666666667, + "loss": 0.10507670789957047, + "loss_ce": 0.00023356490419246256, + "loss_iou": 0.25390625, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 280498940, + "step": 3073 + }, + { + "epoch": 12.808333333333334, + "grad_norm": 1.9506126651259772, + "learning_rate": 5e-05, + "loss": 0.1089, + "num_input_tokens_seen": 280590392, + "step": 3074 + }, + { + "epoch": 12.808333333333334, + "loss": 0.07741060853004456, + "loss_ce": 0.0009640734060667455, + "loss_iou": 0.0888671875, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 280590392, + "step": 3074 + }, + { + "epoch": 12.8125, + "grad_norm": 4.158899079033557, + "learning_rate": 5e-05, + "loss": 0.0758, + "num_input_tokens_seen": 280681300, + "step": 3075 + }, + { + "epoch": 12.8125, + "loss": 0.03765298053622246, + "loss_ce": 1.9179929040546995e-06, + "loss_iou": 0.1865234375, + "loss_num": 0.007537841796875, + "loss_xval": 0.03759765625, + "num_input_tokens_seen": 280681300, + "step": 3075 + }, + { + "epoch": 12.816666666666666, + "grad_norm": 3.1401496759220255, + "learning_rate": 5e-05, + "loss": 0.0898, + "num_input_tokens_seen": 280772832, + "step": 3076 + }, + { + "epoch": 12.816666666666666, + "loss": 0.0684804618358612, + "loss_ce": 0.00010582937102299184, + "loss_iou": 0.1640625, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 280772832, + "step": 3076 + }, + { + "epoch": 12.820833333333333, + "grad_norm": 1.9620936144898184, + "learning_rate": 5e-05, + "loss": 0.0522, + "num_input_tokens_seen": 280863912, + "step": 3077 + }, + { + "epoch": 12.820833333333333, + "loss": 0.028973519802093506, + "loss_ce": 9.626205428503454e-05, + "loss_iou": 0.1875, + "loss_num": 0.005767822265625, + "loss_xval": 0.0289306640625, + "num_input_tokens_seen": 280863912, + "step": 3077 + }, + { + "epoch": 12.825, + "grad_norm": 6.244746819631481, + "learning_rate": 5e-05, + "loss": 0.112, + "num_input_tokens_seen": 280955080, + "step": 3078 + }, + { + "epoch": 12.825, + "loss": 0.15819403529167175, + "loss_ce": 0.010996315628290176, + "loss_iou": 0.29296875, + "loss_num": 0.0294189453125, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 280955080, + "step": 3078 + }, + { + "epoch": 12.829166666666667, + "grad_norm": 5.111417722850773, + "learning_rate": 5e-05, + "loss": 0.0755, + "num_input_tokens_seen": 281046240, + "step": 3079 + }, + { + "epoch": 12.829166666666667, + "loss": 0.0795697346329689, + "loss_ce": 0.0017842412926256657, + "loss_iou": 0.173828125, + "loss_num": 0.01556396484375, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 281046240, + "step": 3079 + }, + { + "epoch": 12.833333333333334, + "grad_norm": 6.658668734378153, + "learning_rate": 5e-05, + "loss": 0.0664, + "num_input_tokens_seen": 281137868, + "step": 3080 + }, + { + "epoch": 12.833333333333334, + "loss": 0.05171462893486023, + "loss_ce": 1.0222116543445736e-05, + "loss_iou": 0.21875, + "loss_num": 0.01031494140625, + "loss_xval": 0.0517578125, + "num_input_tokens_seen": 281137868, + "step": 3080 + }, + { + "epoch": 12.8375, + "grad_norm": 3.073369365091355, + "learning_rate": 5e-05, + "loss": 0.0455, + "num_input_tokens_seen": 281229028, + "step": 3081 + }, + { + "epoch": 12.8375, + "loss": 0.03657699003815651, + "loss_ce": 9.30035184865119e-06, + "loss_iou": 0.2255859375, + "loss_num": 0.00732421875, + "loss_xval": 0.03662109375, + "num_input_tokens_seen": 281229028, + "step": 3081 + }, + { + "epoch": 12.841666666666667, + "grad_norm": 4.071459919065855, + "learning_rate": 5e-05, + "loss": 0.1149, + "num_input_tokens_seen": 281320012, + "step": 3082 + }, + { + "epoch": 12.841666666666667, + "loss": 0.10545842349529266, + "loss_ce": 5.0711940275505185e-05, + "loss_iou": 0.283203125, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 281320012, + "step": 3082 + }, + { + "epoch": 12.845833333333333, + "grad_norm": 8.508769096963789, + "learning_rate": 5e-05, + "loss": 0.1094, + "num_input_tokens_seen": 281411004, + "step": 3083 + }, + { + "epoch": 12.845833333333333, + "loss": 0.17188432812690735, + "loss_ce": 0.00025346592883579433, + "loss_iou": 0.27734375, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 281411004, + "step": 3083 + }, + { + "epoch": 12.85, + "grad_norm": 2.3736834116012693, + "learning_rate": 5e-05, + "loss": 0.0723, + "num_input_tokens_seen": 281501632, + "step": 3084 + }, + { + "epoch": 12.85, + "loss": 0.08856417238712311, + "loss_ce": 0.0005514741060324013, + "loss_iou": 0.2412109375, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 281501632, + "step": 3084 + }, + { + "epoch": 12.854166666666666, + "grad_norm": 10.554135196933103, + "learning_rate": 5e-05, + "loss": 0.0802, + "num_input_tokens_seen": 281592620, + "step": 3085 + }, + { + "epoch": 12.854166666666666, + "loss": 0.055758289992809296, + "loss_ce": 1.7931039110408165e-05, + "loss_iou": 0.169921875, + "loss_num": 0.01116943359375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 281592620, + "step": 3085 + }, + { + "epoch": 12.858333333333333, + "grad_norm": 6.416996665406724, + "learning_rate": 5e-05, + "loss": 0.0492, + "num_input_tokens_seen": 281684444, + "step": 3086 + }, + { + "epoch": 12.858333333333333, + "loss": 0.053812433034181595, + "loss_ce": 0.008203910663723946, + "loss_iou": 0.390625, + "loss_num": 0.00909423828125, + "loss_xval": 0.045654296875, + "num_input_tokens_seen": 281684444, + "step": 3086 + }, + { + "epoch": 12.8625, + "grad_norm": 2.7378792324830474, + "learning_rate": 5e-05, + "loss": 0.0753, + "num_input_tokens_seen": 281776004, + "step": 3087 + }, + { + "epoch": 12.8625, + "loss": 0.1071212887763977, + "loss_ce": 0.003010562853887677, + "loss_iou": 0.1396484375, + "loss_num": 0.0208740234375, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 281776004, + "step": 3087 + }, + { + "epoch": 12.866666666666667, + "grad_norm": 5.4016920389500935, + "learning_rate": 5e-05, + "loss": 0.0784, + "num_input_tokens_seen": 281866916, + "step": 3088 + }, + { + "epoch": 12.866666666666667, + "loss": 0.10034558922052383, + "loss_ce": 3.7943809729767963e-06, + "loss_iou": 0.29296875, + "loss_num": 0.02001953125, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 281866916, + "step": 3088 + }, + { + "epoch": 12.870833333333334, + "grad_norm": 4.971595148868411, + "learning_rate": 5e-05, + "loss": 0.0574, + "num_input_tokens_seen": 281958500, + "step": 3089 + }, + { + "epoch": 12.870833333333334, + "loss": 0.049116350710392, + "loss_ce": 0.00262281927280128, + "loss_iou": 0.369140625, + "loss_num": 0.00927734375, + "loss_xval": 0.04638671875, + "num_input_tokens_seen": 281958500, + "step": 3089 + }, + { + "epoch": 12.875, + "grad_norm": 3.903255524586416, + "learning_rate": 5e-05, + "loss": 0.1098, + "num_input_tokens_seen": 282049928, + "step": 3090 + }, + { + "epoch": 12.875, + "loss": 0.11985423415899277, + "loss_ce": 0.00030162016628310084, + "loss_iou": 0.3203125, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 282049928, + "step": 3090 + }, + { + "epoch": 12.879166666666666, + "grad_norm": 4.243647579617985, + "learning_rate": 5e-05, + "loss": 0.0629, + "num_input_tokens_seen": 282140984, + "step": 3091 + }, + { + "epoch": 12.879166666666666, + "loss": 0.05852103605866432, + "loss_ce": 0.00048423168482258916, + "loss_iou": 0.291015625, + "loss_num": 0.0115966796875, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 282140984, + "step": 3091 + }, + { + "epoch": 12.883333333333333, + "grad_norm": 2.6784587835319487, + "learning_rate": 5e-05, + "loss": 0.0652, + "num_input_tokens_seen": 282232476, + "step": 3092 + }, + { + "epoch": 12.883333333333333, + "loss": 0.05952540040016174, + "loss_ce": 1.6124329704325646e-05, + "loss_iou": 0.330078125, + "loss_num": 0.01190185546875, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 282232476, + "step": 3092 + }, + { + "epoch": 12.8875, + "grad_norm": 12.729861278675463, + "learning_rate": 5e-05, + "loss": 0.0748, + "num_input_tokens_seen": 282323936, + "step": 3093 + }, + { + "epoch": 12.8875, + "loss": 0.10516448318958282, + "loss_ce": 5.430977398646064e-05, + "loss_iou": 0.314453125, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 282323936, + "step": 3093 + }, + { + "epoch": 12.891666666666667, + "grad_norm": 2.1835375010426437, + "learning_rate": 5e-05, + "loss": 0.0569, + "num_input_tokens_seen": 282415116, + "step": 3094 + }, + { + "epoch": 12.891666666666667, + "loss": 0.053579214960336685, + "loss_ce": 6.664089596597478e-05, + "loss_iou": 0.2119140625, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 282415116, + "step": 3094 + }, + { + "epoch": 12.895833333333334, + "grad_norm": 2.974429504500354, + "learning_rate": 5e-05, + "loss": 0.0794, + "num_input_tokens_seen": 282505324, + "step": 3095 + }, + { + "epoch": 12.895833333333334, + "loss": 0.06614936143159866, + "loss_ce": 0.0024133960250765085, + "loss_iou": 0.341796875, + "loss_num": 0.01275634765625, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 282505324, + "step": 3095 + }, + { + "epoch": 12.9, + "grad_norm": 2.828988907059202, + "learning_rate": 5e-05, + "loss": 0.1236, + "num_input_tokens_seen": 282596564, + "step": 3096 + }, + { + "epoch": 12.9, + "loss": 0.1410875916481018, + "loss_ce": 0.0005541453720070422, + "loss_iou": 0.23046875, + "loss_num": 0.0281982421875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 282596564, + "step": 3096 + }, + { + "epoch": 12.904166666666667, + "grad_norm": 2.5107003046604746, + "learning_rate": 5e-05, + "loss": 0.0545, + "num_input_tokens_seen": 282688016, + "step": 3097 + }, + { + "epoch": 12.904166666666667, + "loss": 0.049967456609010696, + "loss_ce": 0.00010936275066342205, + "loss_iou": 0.234375, + "loss_num": 0.00994873046875, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 282688016, + "step": 3097 + }, + { + "epoch": 12.908333333333333, + "grad_norm": 6.166785831775601, + "learning_rate": 5e-05, + "loss": 0.1325, + "num_input_tokens_seen": 282778720, + "step": 3098 + }, + { + "epoch": 12.908333333333333, + "loss": 0.08252835273742676, + "loss_ce": 8.823913049127441e-06, + "loss_iou": 0.232421875, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 282778720, + "step": 3098 + }, + { + "epoch": 12.9125, + "grad_norm": 3.4858220348262754, + "learning_rate": 5e-05, + "loss": 0.1047, + "num_input_tokens_seen": 282869576, + "step": 3099 + }, + { + "epoch": 12.9125, + "loss": 0.14050546288490295, + "loss_ce": 2.538679382269038e-06, + "loss_iou": 0.330078125, + "loss_num": 0.028076171875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 282869576, + "step": 3099 + }, + { + "epoch": 12.916666666666666, + "grad_norm": 2.5003365309894066, + "learning_rate": 5e-05, + "loss": 0.0435, + "num_input_tokens_seen": 282961324, + "step": 3100 + }, + { + "epoch": 12.916666666666666, + "loss": 0.057860180735588074, + "loss_ce": 0.0019672391936182976, + "loss_iou": 0.2080078125, + "loss_num": 0.01116943359375, + "loss_xval": 0.055908203125, + "num_input_tokens_seen": 282961324, + "step": 3100 + }, + { + "epoch": 12.920833333333333, + "grad_norm": 10.557839645286178, + "learning_rate": 5e-05, + "loss": 0.0705, + "num_input_tokens_seen": 283052668, + "step": 3101 + }, + { + "epoch": 12.920833333333333, + "loss": 0.09789260476827621, + "loss_ce": 4.561465175356716e-05, + "loss_iou": 0.271484375, + "loss_num": 0.0196533203125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 283052668, + "step": 3101 + }, + { + "epoch": 12.925, + "grad_norm": 3.909885904693897, + "learning_rate": 5e-05, + "loss": 0.076, + "num_input_tokens_seen": 283143300, + "step": 3102 + }, + { + "epoch": 12.925, + "loss": 0.057817135006189346, + "loss_ce": 1.5816697214177111e-06, + "loss_iou": 0.294921875, + "loss_num": 0.01153564453125, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 283143300, + "step": 3102 + }, + { + "epoch": 12.929166666666667, + "grad_norm": 5.099507288933682, + "learning_rate": 5e-05, + "loss": 0.0676, + "num_input_tokens_seen": 283234708, + "step": 3103 + }, + { + "epoch": 12.929166666666667, + "loss": 0.06371396780014038, + "loss_ce": 0.0029229512438178062, + "loss_iou": 0.2578125, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 283234708, + "step": 3103 + }, + { + "epoch": 12.933333333333334, + "grad_norm": 2.574523047575215, + "learning_rate": 5e-05, + "loss": 0.0486, + "num_input_tokens_seen": 283325848, + "step": 3104 + }, + { + "epoch": 12.933333333333334, + "loss": 0.056650642305612564, + "loss_ce": 6.342041160678491e-05, + "loss_iou": 0.228515625, + "loss_num": 0.0113525390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 283325848, + "step": 3104 + }, + { + "epoch": 12.9375, + "grad_norm": 3.7387324962710315, + "learning_rate": 5e-05, + "loss": 0.0491, + "num_input_tokens_seen": 283417176, + "step": 3105 + }, + { + "epoch": 12.9375, + "loss": 0.07551056891679764, + "loss_ce": 0.00034577763290144503, + "loss_iou": 0.326171875, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 283417176, + "step": 3105 + }, + { + "epoch": 12.941666666666666, + "grad_norm": 2.4073879710544164, + "learning_rate": 5e-05, + "loss": 0.0853, + "num_input_tokens_seen": 283507832, + "step": 3106 + }, + { + "epoch": 12.941666666666666, + "loss": 0.08266405016183853, + "loss_ce": 6.822718569310382e-05, + "loss_iou": 0.1904296875, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 283507832, + "step": 3106 + }, + { + "epoch": 12.945833333333333, + "grad_norm": 2.7592590095427254, + "learning_rate": 5e-05, + "loss": 0.0523, + "num_input_tokens_seen": 283599920, + "step": 3107 + }, + { + "epoch": 12.945833333333333, + "loss": 0.07048561424016953, + "loss_ce": 6.630267307627946e-05, + "loss_iou": 0.21484375, + "loss_num": 0.01409912109375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 283599920, + "step": 3107 + }, + { + "epoch": 12.95, + "grad_norm": 1.702638937834739, + "learning_rate": 5e-05, + "loss": 0.0641, + "num_input_tokens_seen": 283691752, + "step": 3108 + }, + { + "epoch": 12.95, + "loss": 0.04186766594648361, + "loss_ce": 0.00014250561071094126, + "loss_iou": 0.201171875, + "loss_num": 0.00836181640625, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 283691752, + "step": 3108 + }, + { + "epoch": 12.954166666666667, + "grad_norm": 2.709329891336406, + "learning_rate": 5e-05, + "loss": 0.0712, + "num_input_tokens_seen": 283782824, + "step": 3109 + }, + { + "epoch": 12.954166666666667, + "loss": 0.09974834322929382, + "loss_ce": 1.637887521610537e-06, + "loss_iou": 0.224609375, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 283782824, + "step": 3109 + }, + { + "epoch": 12.958333333333334, + "grad_norm": 4.142908063588557, + "learning_rate": 5e-05, + "loss": 0.0534, + "num_input_tokens_seen": 283873860, + "step": 3110 + }, + { + "epoch": 12.958333333333334, + "loss": 0.05954314023256302, + "loss_ce": 0.0007510251016356051, + "loss_iou": 0.1845703125, + "loss_num": 0.01171875, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 283873860, + "step": 3110 + }, + { + "epoch": 12.9625, + "grad_norm": 1.8766834715963754, + "learning_rate": 5e-05, + "loss": 0.0688, + "num_input_tokens_seen": 283964804, + "step": 3111 + }, + { + "epoch": 12.9625, + "loss": 0.06928491592407227, + "loss_ce": 1.0013500286731869e-05, + "loss_iou": 0.0908203125, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 283964804, + "step": 3111 + }, + { + "epoch": 12.966666666666667, + "grad_norm": 3.0770804610216795, + "learning_rate": 5e-05, + "loss": 0.0475, + "num_input_tokens_seen": 284056028, + "step": 3112 + }, + { + "epoch": 12.966666666666667, + "loss": 0.05841745436191559, + "loss_ce": 6.809648766648024e-06, + "loss_iou": 0.3359375, + "loss_num": 0.01165771484375, + "loss_xval": 0.058349609375, + "num_input_tokens_seen": 284056028, + "step": 3112 + }, + { + "epoch": 12.970833333333333, + "grad_norm": 3.370660539882287, + "learning_rate": 5e-05, + "loss": 0.0783, + "num_input_tokens_seen": 284147612, + "step": 3113 + }, + { + "epoch": 12.970833333333333, + "loss": 0.05954941734671593, + "loss_ce": 0.00043686505523510277, + "loss_iou": 0.275390625, + "loss_num": 0.0118408203125, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 284147612, + "step": 3113 + }, + { + "epoch": 12.975, + "grad_norm": 2.706977893222399, + "learning_rate": 5e-05, + "loss": 0.0685, + "num_input_tokens_seen": 284239144, + "step": 3114 + }, + { + "epoch": 12.975, + "loss": 0.09260821342468262, + "loss_ce": 0.00010943684901576489, + "loss_iou": 0.32421875, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 284239144, + "step": 3114 + }, + { + "epoch": 12.979166666666666, + "grad_norm": 2.558267317672637, + "learning_rate": 5e-05, + "loss": 0.095, + "num_input_tokens_seen": 284330800, + "step": 3115 + }, + { + "epoch": 12.979166666666666, + "loss": 0.09775135666131973, + "loss_ce": 0.00044605947914533317, + "loss_iou": 0.283203125, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 284330800, + "step": 3115 + }, + { + "epoch": 12.983333333333333, + "grad_norm": 1.704154512433284, + "learning_rate": 5e-05, + "loss": 0.0546, + "num_input_tokens_seen": 284421776, + "step": 3116 + }, + { + "epoch": 12.983333333333333, + "loss": 0.030959153547883034, + "loss_ce": 6.699737241433468e-06, + "loss_iou": 0.17578125, + "loss_num": 0.006195068359375, + "loss_xval": 0.031005859375, + "num_input_tokens_seen": 284421776, + "step": 3116 + }, + { + "epoch": 12.9875, + "grad_norm": 1.6481778234420617, + "learning_rate": 5e-05, + "loss": 0.0631, + "num_input_tokens_seen": 284512804, + "step": 3117 + }, + { + "epoch": 12.9875, + "loss": 0.04519006237387657, + "loss_ce": 3.9303664379986e-05, + "loss_iou": 0.240234375, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 284512804, + "step": 3117 + }, + { + "epoch": 12.991666666666667, + "grad_norm": 3.9810323306664572, + "learning_rate": 5e-05, + "loss": 0.0784, + "num_input_tokens_seen": 284604228, + "step": 3118 + }, + { + "epoch": 12.991666666666667, + "loss": 0.07797044515609741, + "loss_ce": 1.3288912668940611e-05, + "loss_iou": 0.296875, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 284604228, + "step": 3118 + }, + { + "epoch": 12.995833333333334, + "grad_norm": 9.399576967826073, + "learning_rate": 5e-05, + "loss": 0.0548, + "num_input_tokens_seen": 284695532, + "step": 3119 + }, + { + "epoch": 12.995833333333334, + "loss": 0.0511610209941864, + "loss_ce": 4.407762753544375e-05, + "loss_iou": 0.2001953125, + "loss_num": 0.01025390625, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 284695532, + "step": 3119 + }, + { + "epoch": 13.0, + "grad_norm": 4.164992602967978, + "learning_rate": 5e-05, + "loss": 0.1244, + "num_input_tokens_seen": 284786576, + "step": 3120 + }, + { + "epoch": 13.0, + "loss": 0.18091799318790436, + "loss_ce": 1.7431500964448787e-05, + "loss_iou": 0.12890625, + "loss_num": 0.0361328125, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 284786576, + "step": 3120 + }, + { + "epoch": 13.004166666666666, + "grad_norm": 2.9079685810778533, + "learning_rate": 5e-05, + "loss": 0.0524, + "num_input_tokens_seen": 284877304, + "step": 3121 + }, + { + "epoch": 13.004166666666666, + "loss": 0.055656641721725464, + "loss_ce": 7.834884854673874e-06, + "loss_iou": 0.1826171875, + "loss_num": 0.0111083984375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 284877304, + "step": 3121 + }, + { + "epoch": 13.008333333333333, + "grad_norm": 2.787839217725438, + "learning_rate": 5e-05, + "loss": 0.0453, + "num_input_tokens_seen": 284969176, + "step": 3122 + }, + { + "epoch": 13.008333333333333, + "loss": 0.04686301201581955, + "loss_ce": 4.904704837827012e-05, + "loss_iou": 0.302734375, + "loss_num": 0.00933837890625, + "loss_xval": 0.046875, + "num_input_tokens_seen": 284969176, + "step": 3122 + }, + { + "epoch": 13.0125, + "grad_norm": 2.7453722467302253, + "learning_rate": 5e-05, + "loss": 0.0565, + "num_input_tokens_seen": 285060356, + "step": 3123 + }, + { + "epoch": 13.0125, + "loss": 0.05110064521431923, + "loss_ce": 1.4217559510143474e-05, + "loss_iou": 0.3125, + "loss_num": 0.01019287109375, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 285060356, + "step": 3123 + }, + { + "epoch": 13.016666666666667, + "grad_norm": 3.3803002591461024, + "learning_rate": 5e-05, + "loss": 0.0597, + "num_input_tokens_seen": 285151684, + "step": 3124 + }, + { + "epoch": 13.016666666666667, + "loss": 0.03558432683348656, + "loss_ce": 0.0019844716880470514, + "loss_iou": 0.306640625, + "loss_num": 0.0067138671875, + "loss_xval": 0.03369140625, + "num_input_tokens_seen": 285151684, + "step": 3124 + }, + { + "epoch": 13.020833333333334, + "grad_norm": 2.220903338732122, + "learning_rate": 5e-05, + "loss": 0.1168, + "num_input_tokens_seen": 285243324, + "step": 3125 + }, + { + "epoch": 13.020833333333334, + "loss": 0.12469391524791718, + "loss_ce": 2.9611199352075346e-05, + "loss_iou": 0.023193359375, + "loss_num": 0.02490234375, + "loss_xval": 0.12451171875, + "num_input_tokens_seen": 285243324, + "step": 3125 + }, + { + "epoch": 13.025, + "grad_norm": 2.51031468120179, + "learning_rate": 5e-05, + "loss": 0.0669, + "num_input_tokens_seen": 285335080, + "step": 3126 + }, + { + "epoch": 13.025, + "loss": 0.07845841348171234, + "loss_ce": 0.00031052250415086746, + "loss_iou": 0.19140625, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 285335080, + "step": 3126 + }, + { + "epoch": 13.029166666666667, + "grad_norm": 5.397176496704968, + "learning_rate": 5e-05, + "loss": 0.0767, + "num_input_tokens_seen": 285426028, + "step": 3127 + }, + { + "epoch": 13.029166666666667, + "loss": 0.036863990128040314, + "loss_ce": 2.9271199309732765e-05, + "loss_iou": 0.26953125, + "loss_num": 0.007354736328125, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 285426028, + "step": 3127 + }, + { + "epoch": 13.033333333333333, + "grad_norm": 4.800806798698757, + "learning_rate": 5e-05, + "loss": 0.0776, + "num_input_tokens_seen": 285517452, + "step": 3128 + }, + { + "epoch": 13.033333333333333, + "loss": 0.044313944876194, + "loss_ce": 2.530910205678083e-05, + "loss_iou": 0.2138671875, + "loss_num": 0.00885009765625, + "loss_xval": 0.044189453125, + "num_input_tokens_seen": 285517452, + "step": 3128 + }, + { + "epoch": 13.0375, + "grad_norm": 4.8157125922109945, + "learning_rate": 5e-05, + "loss": 0.0684, + "num_input_tokens_seen": 285609188, + "step": 3129 + }, + { + "epoch": 13.0375, + "loss": 0.09021648019552231, + "loss_ce": 6.522158855659654e-06, + "loss_iou": 0.232421875, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 285609188, + "step": 3129 + }, + { + "epoch": 13.041666666666666, + "grad_norm": 2.759377528077706, + "learning_rate": 5e-05, + "loss": 0.0869, + "num_input_tokens_seen": 285699832, + "step": 3130 + }, + { + "epoch": 13.041666666666666, + "loss": 0.08318091928958893, + "loss_ce": 5.263358161755605e-06, + "loss_iou": 0.28125, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 285699832, + "step": 3130 + }, + { + "epoch": 13.045833333333333, + "grad_norm": 2.551485384721823, + "learning_rate": 5e-05, + "loss": 0.0842, + "num_input_tokens_seen": 285790920, + "step": 3131 + }, + { + "epoch": 13.045833333333333, + "loss": 0.1195254698395729, + "loss_ce": 3.3810711101978086e-06, + "loss_iou": 0.3046875, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 285790920, + "step": 3131 + }, + { + "epoch": 13.05, + "grad_norm": 3.0899997537226866, + "learning_rate": 5e-05, + "loss": 0.0705, + "num_input_tokens_seen": 285882040, + "step": 3132 + }, + { + "epoch": 13.05, + "loss": 0.07134020328521729, + "loss_ce": 5.3618177844327874e-06, + "loss_iou": 0.1953125, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 285882040, + "step": 3132 + }, + { + "epoch": 13.054166666666667, + "grad_norm": 8.38869458745163, + "learning_rate": 5e-05, + "loss": 0.0849, + "num_input_tokens_seen": 285973412, + "step": 3133 + }, + { + "epoch": 13.054166666666667, + "loss": 0.05942576751112938, + "loss_ce": 8.045359209063463e-06, + "loss_iou": 0.2373046875, + "loss_num": 0.01190185546875, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 285973412, + "step": 3133 + }, + { + "epoch": 13.058333333333334, + "grad_norm": 3.321566870982853, + "learning_rate": 5e-05, + "loss": 0.077, + "num_input_tokens_seen": 286064536, + "step": 3134 + }, + { + "epoch": 13.058333333333334, + "loss": 0.10176359862089157, + "loss_ce": 0.00020109850447624922, + "loss_iou": 0.32421875, + "loss_num": 0.020263671875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 286064536, + "step": 3134 + }, + { + "epoch": 13.0625, + "grad_norm": 1.2412773253705964, + "learning_rate": 5e-05, + "loss": 0.0608, + "num_input_tokens_seen": 286155540, + "step": 3135 + }, + { + "epoch": 13.0625, + "loss": 0.03508618474006653, + "loss_ce": 2.1485979232238606e-05, + "loss_iou": 0.232421875, + "loss_num": 0.00701904296875, + "loss_xval": 0.03515625, + "num_input_tokens_seen": 286155540, + "step": 3135 + }, + { + "epoch": 13.066666666666666, + "grad_norm": 2.831490400001111, + "learning_rate": 5e-05, + "loss": 0.0646, + "num_input_tokens_seen": 286246804, + "step": 3136 + }, + { + "epoch": 13.066666666666666, + "loss": 0.04610330983996391, + "loss_ce": 2.1765297788078897e-05, + "loss_iou": 0.154296875, + "loss_num": 0.00921630859375, + "loss_xval": 0.046142578125, + "num_input_tokens_seen": 286246804, + "step": 3136 + }, + { + "epoch": 13.070833333333333, + "grad_norm": 2.5292858582678353, + "learning_rate": 5e-05, + "loss": 0.0669, + "num_input_tokens_seen": 286338372, + "step": 3137 + }, + { + "epoch": 13.070833333333333, + "loss": 0.06301337480545044, + "loss_ce": 2.8901637051603757e-05, + "loss_iou": 0.1962890625, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 286338372, + "step": 3137 + }, + { + "epoch": 13.075, + "grad_norm": 1.956081203255567, + "learning_rate": 5e-05, + "loss": 0.0529, + "num_input_tokens_seen": 286429816, + "step": 3138 + }, + { + "epoch": 13.075, + "loss": 0.048599861562252045, + "loss_ce": 3.113657294306904e-05, + "loss_iou": 0.134765625, + "loss_num": 0.00970458984375, + "loss_xval": 0.048583984375, + "num_input_tokens_seen": 286429816, + "step": 3138 + }, + { + "epoch": 13.079166666666667, + "grad_norm": 1.758795319298648, + "learning_rate": 5e-05, + "loss": 0.0599, + "num_input_tokens_seen": 286521076, + "step": 3139 + }, + { + "epoch": 13.079166666666667, + "loss": 0.04789041727781296, + "loss_ce": 8.336599421454594e-06, + "loss_iou": 0.296875, + "loss_num": 0.00958251953125, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 286521076, + "step": 3139 + }, + { + "epoch": 13.083333333333334, + "grad_norm": 4.482802864674869, + "learning_rate": 5e-05, + "loss": 0.0946, + "num_input_tokens_seen": 286611880, + "step": 3140 + }, + { + "epoch": 13.083333333333334, + "loss": 0.07295797020196915, + "loss_ce": 0.03647420182824135, + "loss_iou": 0.244140625, + "loss_num": 0.007293701171875, + "loss_xval": 0.036376953125, + "num_input_tokens_seen": 286611880, + "step": 3140 + }, + { + "epoch": 13.0875, + "grad_norm": 2.6958998324483803, + "learning_rate": 5e-05, + "loss": 0.088, + "num_input_tokens_seen": 286703356, + "step": 3141 + }, + { + "epoch": 13.0875, + "loss": 0.10715167224407196, + "loss_ce": 0.00023333393619395792, + "loss_iou": 0.1787109375, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 286703356, + "step": 3141 + }, + { + "epoch": 13.091666666666667, + "grad_norm": 2.4808391143690827, + "learning_rate": 5e-05, + "loss": 0.0451, + "num_input_tokens_seen": 286794744, + "step": 3142 + }, + { + "epoch": 13.091666666666667, + "loss": 0.04689784348011017, + "loss_ce": 0.0003585351223591715, + "loss_iou": 0.169921875, + "loss_num": 0.00927734375, + "loss_xval": 0.046630859375, + "num_input_tokens_seen": 286794744, + "step": 3142 + }, + { + "epoch": 13.095833333333333, + "grad_norm": 3.273393608824858, + "learning_rate": 5e-05, + "loss": 0.126, + "num_input_tokens_seen": 286886376, + "step": 3143 + }, + { + "epoch": 13.095833333333333, + "loss": 0.07159381359815598, + "loss_ce": 4.535461630439386e-05, + "loss_iou": 0.34375, + "loss_num": 0.0142822265625, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 286886376, + "step": 3143 + }, + { + "epoch": 13.1, + "grad_norm": 2.014824322296352, + "learning_rate": 5e-05, + "loss": 0.0648, + "num_input_tokens_seen": 286977692, + "step": 3144 + }, + { + "epoch": 13.1, + "loss": 0.05477256327867508, + "loss_ce": 0.000542825844604522, + "loss_iou": 0.2041015625, + "loss_num": 0.0108642578125, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 286977692, + "step": 3144 + }, + { + "epoch": 13.104166666666666, + "grad_norm": 1.9183730508376748, + "learning_rate": 5e-05, + "loss": 0.0576, + "num_input_tokens_seen": 287068988, + "step": 3145 + }, + { + "epoch": 13.104166666666666, + "loss": 0.03723179176449776, + "loss_ce": 1.560458986205049e-05, + "loss_iou": 0.203125, + "loss_num": 0.0074462890625, + "loss_xval": 0.037109375, + "num_input_tokens_seen": 287068988, + "step": 3145 + }, + { + "epoch": 13.108333333333333, + "grad_norm": 12.41752476804688, + "learning_rate": 5e-05, + "loss": 0.0676, + "num_input_tokens_seen": 287161128, + "step": 3146 + }, + { + "epoch": 13.108333333333333, + "loss": 0.07558012008666992, + "loss_ce": 0.0022005955688655376, + "loss_iou": 0.2119140625, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 287161128, + "step": 3146 + }, + { + "epoch": 13.1125, + "grad_norm": 2.3556336220738054, + "learning_rate": 5e-05, + "loss": 0.0347, + "num_input_tokens_seen": 287252576, + "step": 3147 + }, + { + "epoch": 13.1125, + "loss": 0.03619590029120445, + "loss_ce": 0.0005971458158455789, + "loss_iou": 0.28515625, + "loss_num": 0.00714111328125, + "loss_xval": 0.03564453125, + "num_input_tokens_seen": 287252576, + "step": 3147 + }, + { + "epoch": 13.116666666666667, + "grad_norm": 3.9532078740492214, + "learning_rate": 5e-05, + "loss": 0.1146, + "num_input_tokens_seen": 287343876, + "step": 3148 + }, + { + "epoch": 13.116666666666667, + "loss": 0.048301875591278076, + "loss_ce": 7.809003363945521e-06, + "loss_iou": 0.388671875, + "loss_num": 0.0096435546875, + "loss_xval": 0.04833984375, + "num_input_tokens_seen": 287343876, + "step": 3148 + }, + { + "epoch": 13.120833333333334, + "grad_norm": 2.2679126909560647, + "learning_rate": 5e-05, + "loss": 0.0498, + "num_input_tokens_seen": 287435524, + "step": 3149 + }, + { + "epoch": 13.120833333333334, + "loss": 0.05186320096254349, + "loss_ce": 2.1469897546921857e-05, + "loss_iou": 0.1796875, + "loss_num": 0.0103759765625, + "loss_xval": 0.0517578125, + "num_input_tokens_seen": 287435524, + "step": 3149 + }, + { + "epoch": 13.125, + "grad_norm": 2.889271472199033, + "learning_rate": 5e-05, + "loss": 0.0661, + "num_input_tokens_seen": 287526712, + "step": 3150 + }, + { + "epoch": 13.125, + "loss": 0.07298816740512848, + "loss_ce": 5.382002200349234e-06, + "loss_iou": 0.2177734375, + "loss_num": 0.01458740234375, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 287526712, + "step": 3150 + }, + { + "epoch": 13.129166666666666, + "grad_norm": 3.3133748528039897, + "learning_rate": 5e-05, + "loss": 0.0697, + "num_input_tokens_seen": 287617580, + "step": 3151 + }, + { + "epoch": 13.129166666666666, + "loss": 0.06128918379545212, + "loss_ce": 0.0002845459384843707, + "loss_iou": 0.322265625, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 287617580, + "step": 3151 + }, + { + "epoch": 13.133333333333333, + "grad_norm": 1.5366254448525556, + "learning_rate": 5e-05, + "loss": 0.0499, + "num_input_tokens_seen": 287709012, + "step": 3152 + }, + { + "epoch": 13.133333333333333, + "loss": 0.06348831951618195, + "loss_ce": 1.1753787475754507e-05, + "loss_iou": 0.025634765625, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 287709012, + "step": 3152 + }, + { + "epoch": 13.1375, + "grad_norm": 5.812825209981419, + "learning_rate": 5e-05, + "loss": 0.0578, + "num_input_tokens_seen": 287800116, + "step": 3153 + }, + { + "epoch": 13.1375, + "loss": 0.06690312922000885, + "loss_ce": 7.726660260232165e-05, + "loss_iou": 0.1591796875, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 287800116, + "step": 3153 + }, + { + "epoch": 13.141666666666667, + "grad_norm": 3.549976425566346, + "learning_rate": 5e-05, + "loss": 0.0421, + "num_input_tokens_seen": 287891084, + "step": 3154 + }, + { + "epoch": 13.141666666666667, + "loss": 0.04346313700079918, + "loss_ce": 6.105668035161216e-06, + "loss_iou": 0.205078125, + "loss_num": 0.00872802734375, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 287891084, + "step": 3154 + }, + { + "epoch": 13.145833333333334, + "grad_norm": 3.045076072600189, + "learning_rate": 5e-05, + "loss": 0.1268, + "num_input_tokens_seen": 287982756, + "step": 3155 + }, + { + "epoch": 13.145833333333334, + "loss": 0.19146740436553955, + "loss_ce": 7.640861440449953e-05, + "loss_iou": 0.33203125, + "loss_num": 0.038330078125, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 287982756, + "step": 3155 + }, + { + "epoch": 13.15, + "grad_norm": 3.803486531579513, + "learning_rate": 5e-05, + "loss": 0.047, + "num_input_tokens_seen": 288073972, + "step": 3156 + }, + { + "epoch": 13.15, + "loss": 0.0343029648065567, + "loss_ce": 1.2068505839124555e-06, + "loss_iou": 0.3515625, + "loss_num": 0.006866455078125, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 288073972, + "step": 3156 + }, + { + "epoch": 13.154166666666667, + "grad_norm": 2.7662928320027382, + "learning_rate": 5e-05, + "loss": 0.0711, + "num_input_tokens_seen": 288165052, + "step": 3157 + }, + { + "epoch": 13.154166666666667, + "loss": 0.0952390804886818, + "loss_ce": 8.984496162156574e-06, + "loss_iou": 0.1875, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 288165052, + "step": 3157 + }, + { + "epoch": 13.158333333333333, + "grad_norm": 4.308966729779597, + "learning_rate": 5e-05, + "loss": 0.0702, + "num_input_tokens_seen": 288256128, + "step": 3158 + }, + { + "epoch": 13.158333333333333, + "loss": 0.06495509296655655, + "loss_ce": 0.0002730850537773222, + "loss_iou": 0.32421875, + "loss_num": 0.012939453125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 288256128, + "step": 3158 + }, + { + "epoch": 13.1625, + "grad_norm": 3.8397928699877433, + "learning_rate": 5e-05, + "loss": 0.0637, + "num_input_tokens_seen": 288347464, + "step": 3159 + }, + { + "epoch": 13.1625, + "loss": 0.04620542749762535, + "loss_ce": 1.816176222746435e-06, + "loss_iou": 0.3671875, + "loss_num": 0.00921630859375, + "loss_xval": 0.046142578125, + "num_input_tokens_seen": 288347464, + "step": 3159 + }, + { + "epoch": 13.166666666666666, + "grad_norm": 2.3495969026979284, + "learning_rate": 5e-05, + "loss": 0.0437, + "num_input_tokens_seen": 288438828, + "step": 3160 + }, + { + "epoch": 13.166666666666666, + "loss": 0.04184609279036522, + "loss_ce": 2.1753499822807498e-05, + "loss_iou": 0.146484375, + "loss_num": 0.00836181640625, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 288438828, + "step": 3160 + }, + { + "epoch": 13.170833333333333, + "grad_norm": 2.1779618590482057, + "learning_rate": 5e-05, + "loss": 0.0776, + "num_input_tokens_seen": 288529740, + "step": 3161 + }, + { + "epoch": 13.170833333333333, + "loss": 0.10389817506074905, + "loss_ce": 1.0718932799136383e-06, + "loss_iou": 0.1123046875, + "loss_num": 0.020751953125, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 288529740, + "step": 3161 + }, + { + "epoch": 13.175, + "grad_norm": 4.719728899260996, + "learning_rate": 5e-05, + "loss": 0.0641, + "num_input_tokens_seen": 288620912, + "step": 3162 + }, + { + "epoch": 13.175, + "loss": 0.0952850729227066, + "loss_ce": 1.5630433836122393e-06, + "loss_iou": 0.36328125, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 288620912, + "step": 3162 + }, + { + "epoch": 13.179166666666667, + "grad_norm": 3.034087198442263, + "learning_rate": 5e-05, + "loss": 0.066, + "num_input_tokens_seen": 288712524, + "step": 3163 + }, + { + "epoch": 13.179166666666667, + "loss": 0.051329098641872406, + "loss_ce": 0.0002426707505946979, + "loss_iou": 0.19140625, + "loss_num": 0.01019287109375, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 288712524, + "step": 3163 + }, + { + "epoch": 13.183333333333334, + "grad_norm": 2.4655614107995922, + "learning_rate": 5e-05, + "loss": 0.0637, + "num_input_tokens_seen": 288803184, + "step": 3164 + }, + { + "epoch": 13.183333333333334, + "loss": 0.055702660232782364, + "loss_ce": 8.08061577117769e-06, + "loss_iou": 0.248046875, + "loss_num": 0.0111083984375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 288803184, + "step": 3164 + }, + { + "epoch": 13.1875, + "grad_norm": 3.0002036304689423, + "learning_rate": 5e-05, + "loss": 0.0751, + "num_input_tokens_seen": 288893656, + "step": 3165 + }, + { + "epoch": 13.1875, + "loss": 0.07198071479797363, + "loss_ce": 5.004450486012502e-06, + "loss_iou": 0.3515625, + "loss_num": 0.014404296875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 288893656, + "step": 3165 + }, + { + "epoch": 13.191666666666666, + "grad_norm": 3.3823548533561465, + "learning_rate": 5e-05, + "loss": 0.0526, + "num_input_tokens_seen": 288985708, + "step": 3166 + }, + { + "epoch": 13.191666666666666, + "loss": 0.061142697930336, + "loss_ce": 7.702488801442087e-05, + "loss_iou": 0.275390625, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 288985708, + "step": 3166 + }, + { + "epoch": 13.195833333333333, + "grad_norm": 3.1353485248169104, + "learning_rate": 5e-05, + "loss": 0.0708, + "num_input_tokens_seen": 289076480, + "step": 3167 + }, + { + "epoch": 13.195833333333333, + "loss": 0.06192321702837944, + "loss_ce": 3.05069329442631e-06, + "loss_iou": 0.298828125, + "loss_num": 0.01239013671875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 289076480, + "step": 3167 + }, + { + "epoch": 13.2, + "grad_norm": 2.0384665273080653, + "learning_rate": 5e-05, + "loss": 0.0371, + "num_input_tokens_seen": 289167716, + "step": 3168 + }, + { + "epoch": 13.2, + "loss": 0.05005306378006935, + "loss_ce": 4.234663720126264e-06, + "loss_iou": 0.08447265625, + "loss_num": 0.010009765625, + "loss_xval": 0.050048828125, + "num_input_tokens_seen": 289167716, + "step": 3168 + }, + { + "epoch": 13.204166666666667, + "grad_norm": 3.8817508516141057, + "learning_rate": 5e-05, + "loss": 0.0664, + "num_input_tokens_seen": 289258356, + "step": 3169 + }, + { + "epoch": 13.204166666666667, + "loss": 0.05380536615848541, + "loss_ce": 2.8745339477609377e-06, + "loss_iou": 0.291015625, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 289258356, + "step": 3169 + }, + { + "epoch": 13.208333333333334, + "grad_norm": 3.1083953606747827, + "learning_rate": 5e-05, + "loss": 0.0864, + "num_input_tokens_seen": 289349840, + "step": 3170 + }, + { + "epoch": 13.208333333333334, + "loss": 0.09058225899934769, + "loss_ce": 3.6607481888495386e-05, + "loss_iou": 0.2109375, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 289349840, + "step": 3170 + }, + { + "epoch": 13.2125, + "grad_norm": 2.1639030749255412, + "learning_rate": 5e-05, + "loss": 0.0332, + "num_input_tokens_seen": 289440916, + "step": 3171 + }, + { + "epoch": 13.2125, + "loss": 0.027187222614884377, + "loss_ce": 0.00014864768309053034, + "loss_iou": 0.1923828125, + "loss_num": 0.005401611328125, + "loss_xval": 0.027099609375, + "num_input_tokens_seen": 289440916, + "step": 3171 + }, + { + "epoch": 13.216666666666667, + "grad_norm": 2.6789458317545987, + "learning_rate": 5e-05, + "loss": 0.0413, + "num_input_tokens_seen": 289532536, + "step": 3172 + }, + { + "epoch": 13.216666666666667, + "loss": 0.037016674876213074, + "loss_ce": 0.00027350973687134683, + "loss_iou": 0.234375, + "loss_num": 0.007354736328125, + "loss_xval": 0.03662109375, + "num_input_tokens_seen": 289532536, + "step": 3172 + }, + { + "epoch": 13.220833333333333, + "grad_norm": 3.155049670169387, + "learning_rate": 5e-05, + "loss": 0.048, + "num_input_tokens_seen": 289624016, + "step": 3173 + }, + { + "epoch": 13.220833333333333, + "loss": 0.04516705870628357, + "loss_ce": 1.629926191526465e-05, + "loss_iou": 0.322265625, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 289624016, + "step": 3173 + }, + { + "epoch": 13.225, + "grad_norm": 2.230629433782654, + "learning_rate": 5e-05, + "loss": 0.069, + "num_input_tokens_seen": 289715732, + "step": 3174 + }, + { + "epoch": 13.225, + "loss": 0.08780436962842941, + "loss_ce": 0.0012717776698991656, + "loss_iou": 0.265625, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 289715732, + "step": 3174 + }, + { + "epoch": 13.229166666666666, + "grad_norm": 3.4239700489282483, + "learning_rate": 5e-05, + "loss": 0.0747, + "num_input_tokens_seen": 289807220, + "step": 3175 + }, + { + "epoch": 13.229166666666666, + "loss": 0.08312824368476868, + "loss_ce": 4.4145624997327104e-05, + "loss_iou": 0.224609375, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 289807220, + "step": 3175 + }, + { + "epoch": 13.233333333333333, + "grad_norm": 3.053632576851467, + "learning_rate": 5e-05, + "loss": 0.0505, + "num_input_tokens_seen": 289898104, + "step": 3176 + }, + { + "epoch": 13.233333333333333, + "loss": 0.043274927884340286, + "loss_ce": 1.626130506338086e-05, + "loss_iou": 0.2734375, + "loss_num": 0.0086669921875, + "loss_xval": 0.043212890625, + "num_input_tokens_seen": 289898104, + "step": 3176 + }, + { + "epoch": 13.2375, + "grad_norm": 2.529505553985378, + "learning_rate": 5e-05, + "loss": 0.0557, + "num_input_tokens_seen": 289989496, + "step": 3177 + }, + { + "epoch": 13.2375, + "loss": 0.05597273260354996, + "loss_ce": 0.0005680684698745608, + "loss_iou": 0.29296875, + "loss_num": 0.0111083984375, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 289989496, + "step": 3177 + }, + { + "epoch": 13.241666666666667, + "grad_norm": 3.0168185065630957, + "learning_rate": 5e-05, + "loss": 0.0606, + "num_input_tokens_seen": 290081328, + "step": 3178 + }, + { + "epoch": 13.241666666666667, + "loss": 0.04695458710193634, + "loss_ce": 3.290790800747345e-06, + "loss_iou": 0.353515625, + "loss_num": 0.0093994140625, + "loss_xval": 0.046875, + "num_input_tokens_seen": 290081328, + "step": 3178 + }, + { + "epoch": 13.245833333333334, + "grad_norm": 10.427004976348053, + "learning_rate": 5e-05, + "loss": 0.0679, + "num_input_tokens_seen": 290172672, + "step": 3179 + }, + { + "epoch": 13.245833333333334, + "loss": 0.04928769916296005, + "loss_ce": 7.810293027432635e-05, + "loss_iou": 0.3515625, + "loss_num": 0.00982666015625, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 290172672, + "step": 3179 + }, + { + "epoch": 13.25, + "grad_norm": 3.649719781616416, + "learning_rate": 5e-05, + "loss": 0.0511, + "num_input_tokens_seen": 290264228, + "step": 3180 + }, + { + "epoch": 13.25, + "loss": 0.056522026658058167, + "loss_ce": 1.1101107702415902e-05, + "loss_iou": 0.32421875, + "loss_num": 0.01129150390625, + "loss_xval": 0.056396484375, + "num_input_tokens_seen": 290264228, + "step": 3180 + }, + { + "epoch": 13.254166666666666, + "grad_norm": 2.619291730296451, + "learning_rate": 5e-05, + "loss": 0.0645, + "num_input_tokens_seen": 290355480, + "step": 3181 + }, + { + "epoch": 13.254166666666666, + "loss": 0.054982878267765045, + "loss_ce": 0.0004708552733063698, + "loss_iou": 0.28515625, + "loss_num": 0.01092529296875, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 290355480, + "step": 3181 + }, + { + "epoch": 13.258333333333333, + "grad_norm": 2.2875459548458377, + "learning_rate": 5e-05, + "loss": 0.0497, + "num_input_tokens_seen": 290447160, + "step": 3182 + }, + { + "epoch": 13.258333333333333, + "loss": 0.05870981141924858, + "loss_ce": 7.028212712612003e-05, + "loss_iou": 0.41015625, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 290447160, + "step": 3182 + }, + { + "epoch": 13.2625, + "grad_norm": 12.306772143262872, + "learning_rate": 5e-05, + "loss": 0.0484, + "num_input_tokens_seen": 290538864, + "step": 3183 + }, + { + "epoch": 13.2625, + "loss": 0.02843681536614895, + "loss_ce": 2.494886939530261e-05, + "loss_iou": 0.2431640625, + "loss_num": 0.00567626953125, + "loss_xval": 0.0284423828125, + "num_input_tokens_seen": 290538864, + "step": 3183 + }, + { + "epoch": 13.266666666666667, + "grad_norm": 2.1338759100200586, + "learning_rate": 5e-05, + "loss": 0.0504, + "num_input_tokens_seen": 290630028, + "step": 3184 + }, + { + "epoch": 13.266666666666667, + "loss": 0.059931568801403046, + "loss_ce": 2.67685436483589e-06, + "loss_iou": 0.291015625, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 290630028, + "step": 3184 + }, + { + "epoch": 13.270833333333334, + "grad_norm": 1.8325673657695876, + "learning_rate": 5e-05, + "loss": 0.0593, + "num_input_tokens_seen": 290719740, + "step": 3185 + }, + { + "epoch": 13.270833333333334, + "loss": 0.04650639742612839, + "loss_ce": 0.0071082036010921, + "loss_iou": 0.181640625, + "loss_num": 0.00787353515625, + "loss_xval": 0.039306640625, + "num_input_tokens_seen": 290719740, + "step": 3185 + }, + { + "epoch": 13.275, + "grad_norm": 1.2132126586571146, + "learning_rate": 5e-05, + "loss": 0.0425, + "num_input_tokens_seen": 290811232, + "step": 3186 + }, + { + "epoch": 13.275, + "loss": 0.03850376605987549, + "loss_ce": 5.842209247930441e-06, + "loss_iou": 0.19140625, + "loss_num": 0.007720947265625, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 290811232, + "step": 3186 + }, + { + "epoch": 13.279166666666667, + "grad_norm": 2.764621298389163, + "learning_rate": 5e-05, + "loss": 0.0401, + "num_input_tokens_seen": 290902284, + "step": 3187 + }, + { + "epoch": 13.279166666666667, + "loss": 0.04973310977220535, + "loss_ce": 4.718270247394685e-06, + "loss_iou": 0.251953125, + "loss_num": 0.00994873046875, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 290902284, + "step": 3187 + }, + { + "epoch": 13.283333333333333, + "grad_norm": 2.4934463767616686, + "learning_rate": 5e-05, + "loss": 0.0329, + "num_input_tokens_seen": 290993196, + "step": 3188 + }, + { + "epoch": 13.283333333333333, + "loss": 0.04270722717046738, + "loss_ce": 1.3137170753907412e-05, + "loss_iou": 0.26171875, + "loss_num": 0.008544921875, + "loss_xval": 0.042724609375, + "num_input_tokens_seen": 290993196, + "step": 3188 + }, + { + "epoch": 13.2875, + "grad_norm": 3.3667243336877553, + "learning_rate": 5e-05, + "loss": 0.0562, + "num_input_tokens_seen": 291084408, + "step": 3189 + }, + { + "epoch": 13.2875, + "loss": 0.06418552249670029, + "loss_ce": 0.0002435675705783069, + "loss_iou": 0.291015625, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 291084408, + "step": 3189 + }, + { + "epoch": 13.291666666666666, + "grad_norm": 2.904027696337672, + "learning_rate": 5e-05, + "loss": 0.0734, + "num_input_tokens_seen": 291174800, + "step": 3190 + }, + { + "epoch": 13.291666666666666, + "loss": 0.05541330575942993, + "loss_ce": 2.3902084649307653e-05, + "loss_iou": 0.296875, + "loss_num": 0.01104736328125, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 291174800, + "step": 3190 + }, + { + "epoch": 13.295833333333333, + "grad_norm": 5.006072671935732, + "learning_rate": 5e-05, + "loss": 0.0448, + "num_input_tokens_seen": 291266916, + "step": 3191 + }, + { + "epoch": 13.295833333333333, + "loss": 0.046564217656850815, + "loss_ce": 0.00014698227460030466, + "loss_iou": 0.302734375, + "loss_num": 0.00927734375, + "loss_xval": 0.04638671875, + "num_input_tokens_seen": 291266916, + "step": 3191 + }, + { + "epoch": 13.3, + "grad_norm": 2.719090840073287, + "learning_rate": 5e-05, + "loss": 0.0613, + "num_input_tokens_seen": 291358592, + "step": 3192 + }, + { + "epoch": 13.3, + "loss": 0.050211891531944275, + "loss_ce": 0.001048073172569275, + "loss_iou": 0.296875, + "loss_num": 0.00982666015625, + "loss_xval": 0.049072265625, + "num_input_tokens_seen": 291358592, + "step": 3192 + }, + { + "epoch": 13.304166666666667, + "grad_norm": 2.189135791353442, + "learning_rate": 5e-05, + "loss": 0.0834, + "num_input_tokens_seen": 291449948, + "step": 3193 + }, + { + "epoch": 13.304166666666667, + "loss": 0.10209139436483383, + "loss_ce": 0.00016268000763375312, + "loss_iou": 0.419921875, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 291449948, + "step": 3193 + }, + { + "epoch": 13.308333333333334, + "grad_norm": 5.16939944493548, + "learning_rate": 5e-05, + "loss": 0.0639, + "num_input_tokens_seen": 291540952, + "step": 3194 + }, + { + "epoch": 13.308333333333334, + "loss": 0.0835447683930397, + "loss_ce": 2.898941602325067e-06, + "loss_iou": 0.349609375, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 291540952, + "step": 3194 + }, + { + "epoch": 13.3125, + "grad_norm": 27.4742236946222, + "learning_rate": 5e-05, + "loss": 0.0495, + "num_input_tokens_seen": 291632664, + "step": 3195 + }, + { + "epoch": 13.3125, + "loss": 0.05430099368095398, + "loss_ce": 1.0216759619652294e-05, + "loss_iou": 0.2734375, + "loss_num": 0.0108642578125, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 291632664, + "step": 3195 + }, + { + "epoch": 13.316666666666666, + "grad_norm": 1.853929865425854, + "learning_rate": 5e-05, + "loss": 0.0951, + "num_input_tokens_seen": 291723896, + "step": 3196 + }, + { + "epoch": 13.316666666666666, + "loss": 0.058908045291900635, + "loss_ce": 1.4912147889845073e-06, + "loss_iou": 0.1943359375, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 291723896, + "step": 3196 + }, + { + "epoch": 13.320833333333333, + "grad_norm": 1.4714996835976843, + "learning_rate": 5e-05, + "loss": 0.0501, + "num_input_tokens_seen": 291814032, + "step": 3197 + }, + { + "epoch": 13.320833333333333, + "loss": 0.0393851101398468, + "loss_ce": 4.032349170302041e-05, + "loss_iou": 0.2158203125, + "loss_num": 0.00787353515625, + "loss_xval": 0.039306640625, + "num_input_tokens_seen": 291814032, + "step": 3197 + }, + { + "epoch": 13.325, + "grad_norm": 2.0295531105932603, + "learning_rate": 5e-05, + "loss": 0.0593, + "num_input_tokens_seen": 291905260, + "step": 3198 + }, + { + "epoch": 13.325, + "loss": 0.09402615576982498, + "loss_ce": 1.5004483202574193e-06, + "loss_iou": 0.2373046875, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 291905260, + "step": 3198 + }, + { + "epoch": 13.329166666666667, + "grad_norm": 6.217075773976911, + "learning_rate": 5e-05, + "loss": 0.0684, + "num_input_tokens_seen": 291996148, + "step": 3199 + }, + { + "epoch": 13.329166666666667, + "loss": 0.06208435446023941, + "loss_ce": 4.211819759802893e-05, + "loss_iou": 0.28515625, + "loss_num": 0.01239013671875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 291996148, + "step": 3199 + }, + { + "epoch": 13.333333333333334, + "grad_norm": 7.558257323309482, + "learning_rate": 5e-05, + "loss": 0.0741, + "num_input_tokens_seen": 292087368, + "step": 3200 + }, + { + "epoch": 13.333333333333334, + "loss": 0.06037037819623947, + "loss_ce": 6.764857971575111e-05, + "loss_iou": 0.287109375, + "loss_num": 0.0120849609375, + "loss_xval": 0.060302734375, + "num_input_tokens_seen": 292087368, + "step": 3200 + }, + { + "epoch": 13.3375, + "grad_norm": 3.1211902818135626, + "learning_rate": 5e-05, + "loss": 0.0708, + "num_input_tokens_seen": 292178960, + "step": 3201 + }, + { + "epoch": 13.3375, + "loss": 0.05840154364705086, + "loss_ce": 0.00011296742013655603, + "loss_iou": 0.287109375, + "loss_num": 0.01165771484375, + "loss_xval": 0.058349609375, + "num_input_tokens_seen": 292178960, + "step": 3201 + }, + { + "epoch": 13.341666666666667, + "grad_norm": 4.746488694193927, + "learning_rate": 5e-05, + "loss": 0.0896, + "num_input_tokens_seen": 292270872, + "step": 3202 + }, + { + "epoch": 13.341666666666667, + "loss": 0.12660613656044006, + "loss_ce": 0.003002327401190996, + "loss_iou": 0.388671875, + "loss_num": 0.024658203125, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 292270872, + "step": 3202 + }, + { + "epoch": 13.345833333333333, + "grad_norm": 5.126919814558636, + "learning_rate": 5e-05, + "loss": 0.0557, + "num_input_tokens_seen": 292362420, + "step": 3203 + }, + { + "epoch": 13.345833333333333, + "loss": 0.0694979578256607, + "loss_ce": 0.00045193356345407665, + "loss_iou": 0.380859375, + "loss_num": 0.01385498046875, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 292362420, + "step": 3203 + }, + { + "epoch": 13.35, + "grad_norm": 4.9269241324619735, + "learning_rate": 5e-05, + "loss": 0.0797, + "num_input_tokens_seen": 292453896, + "step": 3204 + }, + { + "epoch": 13.35, + "loss": 0.07445742189884186, + "loss_ce": 8.608803909737617e-05, + "loss_iou": 0.306640625, + "loss_num": 0.014892578125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 292453896, + "step": 3204 + }, + { + "epoch": 13.354166666666666, + "grad_norm": 4.441509840417962, + "learning_rate": 5e-05, + "loss": 0.0766, + "num_input_tokens_seen": 292545344, + "step": 3205 + }, + { + "epoch": 13.354166666666666, + "loss": 0.05894090607762337, + "loss_ce": 4.198206079308875e-05, + "loss_iou": 0.23828125, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 292545344, + "step": 3205 + }, + { + "epoch": 13.358333333333333, + "grad_norm": 10.734619290972743, + "learning_rate": 5e-05, + "loss": 0.0484, + "num_input_tokens_seen": 292636268, + "step": 3206 + }, + { + "epoch": 13.358333333333333, + "loss": 0.06319648027420044, + "loss_ce": 9.830087947193533e-06, + "loss_iou": 0.228515625, + "loss_num": 0.01263427734375, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 292636268, + "step": 3206 + }, + { + "epoch": 13.3625, + "grad_norm": 3.1316433085948847, + "learning_rate": 5e-05, + "loss": 0.0533, + "num_input_tokens_seen": 292727492, + "step": 3207 + }, + { + "epoch": 13.3625, + "loss": 0.05296047031879425, + "loss_ce": 7.350678788498044e-05, + "loss_iou": 0.20703125, + "loss_num": 0.01055908203125, + "loss_xval": 0.052978515625, + "num_input_tokens_seen": 292727492, + "step": 3207 + }, + { + "epoch": 13.366666666666667, + "grad_norm": 3.5349685046344845, + "learning_rate": 5e-05, + "loss": 0.0942, + "num_input_tokens_seen": 292819156, + "step": 3208 + }, + { + "epoch": 13.366666666666667, + "loss": 0.13701561093330383, + "loss_ce": 0.00022056905436329544, + "loss_iou": 0.2138671875, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 292819156, + "step": 3208 + }, + { + "epoch": 13.370833333333334, + "grad_norm": 3.446596785343221, + "learning_rate": 5e-05, + "loss": 0.0689, + "num_input_tokens_seen": 292909460, + "step": 3209 + }, + { + "epoch": 13.370833333333334, + "loss": 0.08869334310293198, + "loss_ce": 0.0007569487206637859, + "loss_iou": 0.3203125, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 292909460, + "step": 3209 + }, + { + "epoch": 13.375, + "grad_norm": 3.1388333940340516, + "learning_rate": 5e-05, + "loss": 0.0552, + "num_input_tokens_seen": 293001040, + "step": 3210 + }, + { + "epoch": 13.375, + "loss": 0.07420412451028824, + "loss_ce": 0.00012270617298781872, + "loss_iou": 0.328125, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 293001040, + "step": 3210 + }, + { + "epoch": 13.379166666666666, + "grad_norm": 3.0297964653931015, + "learning_rate": 5e-05, + "loss": 0.0597, + "num_input_tokens_seen": 293091976, + "step": 3211 + }, + { + "epoch": 13.379166666666666, + "loss": 0.06871441006660461, + "loss_ce": 4.083505245944252e-06, + "loss_iou": 0.23828125, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 293091976, + "step": 3211 + }, + { + "epoch": 13.383333333333333, + "grad_norm": 2.8523748146602013, + "learning_rate": 5e-05, + "loss": 0.0787, + "num_input_tokens_seen": 293183272, + "step": 3212 + }, + { + "epoch": 13.383333333333333, + "loss": 0.08873356878757477, + "loss_ce": 3.7158408758841688e-06, + "loss_iou": 0.3671875, + "loss_num": 0.0177001953125, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 293183272, + "step": 3212 + }, + { + "epoch": 13.3875, + "grad_norm": 3.764933436661599, + "learning_rate": 5e-05, + "loss": 0.0637, + "num_input_tokens_seen": 293274228, + "step": 3213 + }, + { + "epoch": 13.3875, + "loss": 0.0799584686756134, + "loss_ce": 2.4127339202095754e-06, + "loss_iou": 0.28515625, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 293274228, + "step": 3213 + }, + { + "epoch": 13.391666666666667, + "grad_norm": 1.6610604273488636, + "learning_rate": 5e-05, + "loss": 0.0433, + "num_input_tokens_seen": 293365980, + "step": 3214 + }, + { + "epoch": 13.391666666666667, + "loss": 0.043038297444581985, + "loss_ce": 8.513585271430202e-06, + "loss_iou": 0.28515625, + "loss_num": 0.00860595703125, + "loss_xval": 0.04296875, + "num_input_tokens_seen": 293365980, + "step": 3214 + }, + { + "epoch": 13.395833333333334, + "grad_norm": 3.524622724240209, + "learning_rate": 5e-05, + "loss": 0.0592, + "num_input_tokens_seen": 293457464, + "step": 3215 + }, + { + "epoch": 13.395833333333334, + "loss": 0.06250010430812836, + "loss_ce": 0.00016794718976598233, + "loss_iou": 0.3671875, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 293457464, + "step": 3215 + }, + { + "epoch": 13.4, + "grad_norm": 2.8888911715197008, + "learning_rate": 5e-05, + "loss": 0.0707, + "num_input_tokens_seen": 293549552, + "step": 3216 + }, + { + "epoch": 13.4, + "loss": 0.07175292819738388, + "loss_ce": 0.00015869125491008162, + "loss_iou": 0.349609375, + "loss_num": 0.0142822265625, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 293549552, + "step": 3216 + }, + { + "epoch": 13.404166666666667, + "grad_norm": 3.3778301357320637, + "learning_rate": 5e-05, + "loss": 0.0598, + "num_input_tokens_seen": 293641200, + "step": 3217 + }, + { + "epoch": 13.404166666666667, + "loss": 0.05942009389400482, + "loss_ce": 4.814373096451163e-05, + "loss_iou": 0.259765625, + "loss_num": 0.01190185546875, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 293641200, + "step": 3217 + }, + { + "epoch": 13.408333333333333, + "grad_norm": 2.7493530635289845, + "learning_rate": 5e-05, + "loss": 0.0614, + "num_input_tokens_seen": 293732296, + "step": 3218 + }, + { + "epoch": 13.408333333333333, + "loss": 0.05776010453701019, + "loss_ce": 5.587176019616891e-06, + "loss_iou": 0.2119140625, + "loss_num": 0.01153564453125, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 293732296, + "step": 3218 + }, + { + "epoch": 13.4125, + "grad_norm": 2.4829311814395063, + "learning_rate": 5e-05, + "loss": 0.0345, + "num_input_tokens_seen": 293821980, + "step": 3219 + }, + { + "epoch": 13.4125, + "loss": 0.03791956976056099, + "loss_ce": 1.4795160723224399e-06, + "loss_iou": 0.31640625, + "loss_num": 0.007598876953125, + "loss_xval": 0.037841796875, + "num_input_tokens_seen": 293821980, + "step": 3219 + }, + { + "epoch": 13.416666666666666, + "grad_norm": 3.703491461010344, + "learning_rate": 5e-05, + "loss": 0.0584, + "num_input_tokens_seen": 293913188, + "step": 3220 + }, + { + "epoch": 13.416666666666666, + "loss": 0.0662919133901596, + "loss_ce": 0.0001298037386732176, + "loss_iou": 0.2412109375, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 293913188, + "step": 3220 + }, + { + "epoch": 13.420833333333333, + "grad_norm": 3.084202386080775, + "learning_rate": 5e-05, + "loss": 0.0756, + "num_input_tokens_seen": 294004852, + "step": 3221 + }, + { + "epoch": 13.420833333333333, + "loss": 0.1051362007856369, + "loss_ce": 0.0005028643645346165, + "loss_iou": 0.24609375, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 294004852, + "step": 3221 + }, + { + "epoch": 13.425, + "grad_norm": 3.1228620062025017, + "learning_rate": 5e-05, + "loss": 0.1263, + "num_input_tokens_seen": 294096136, + "step": 3222 + }, + { + "epoch": 13.425, + "loss": 0.17248567938804626, + "loss_ce": 0.00010714141535572708, + "loss_iou": 0.345703125, + "loss_num": 0.034423828125, + "loss_xval": 0.1728515625, + "num_input_tokens_seen": 294096136, + "step": 3222 + }, + { + "epoch": 13.429166666666667, + "grad_norm": 1.37170971948216, + "learning_rate": 5e-05, + "loss": 0.0333, + "num_input_tokens_seen": 294187544, + "step": 3223 + }, + { + "epoch": 13.429166666666667, + "loss": 0.03299761191010475, + "loss_ce": 6.914298137417063e-05, + "loss_iou": 0.20703125, + "loss_num": 0.006591796875, + "loss_xval": 0.032958984375, + "num_input_tokens_seen": 294187544, + "step": 3223 + }, + { + "epoch": 13.433333333333334, + "grad_norm": 2.073077056193961, + "learning_rate": 5e-05, + "loss": 0.0383, + "num_input_tokens_seen": 294279420, + "step": 3224 + }, + { + "epoch": 13.433333333333334, + "loss": 0.04905780777335167, + "loss_ce": 8.432472895947285e-06, + "loss_iou": 0.314453125, + "loss_num": 0.00982666015625, + "loss_xval": 0.049072265625, + "num_input_tokens_seen": 294279420, + "step": 3224 + }, + { + "epoch": 13.4375, + "grad_norm": 3.519264702945926, + "learning_rate": 5e-05, + "loss": 0.0569, + "num_input_tokens_seen": 294370768, + "step": 3225 + }, + { + "epoch": 13.4375, + "loss": 0.035311006009578705, + "loss_ce": 0.00018527149222791195, + "loss_iou": 0.33984375, + "loss_num": 0.00701904296875, + "loss_xval": 0.03515625, + "num_input_tokens_seen": 294370768, + "step": 3225 + }, + { + "epoch": 13.441666666666666, + "grad_norm": 2.2554218223026634, + "learning_rate": 5e-05, + "loss": 0.0565, + "num_input_tokens_seen": 294462096, + "step": 3226 + }, + { + "epoch": 13.441666666666666, + "loss": 0.05369444936513901, + "loss_ce": 0.006071767769753933, + "loss_iou": 0.287109375, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 294462096, + "step": 3226 + }, + { + "epoch": 13.445833333333333, + "grad_norm": 2.390943543831352, + "learning_rate": 5e-05, + "loss": 0.0375, + "num_input_tokens_seen": 294553516, + "step": 3227 + }, + { + "epoch": 13.445833333333333, + "loss": 0.05189789831638336, + "loss_ce": 2.753644821495982e-06, + "loss_iou": 0.220703125, + "loss_num": 0.0103759765625, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 294553516, + "step": 3227 + }, + { + "epoch": 13.45, + "grad_norm": 2.6968114523630544, + "learning_rate": 5e-05, + "loss": 0.0488, + "num_input_tokens_seen": 294645088, + "step": 3228 + }, + { + "epoch": 13.45, + "loss": 0.05043869838118553, + "loss_ce": 5.417674037744291e-05, + "loss_iou": 0.2119140625, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 294645088, + "step": 3228 + }, + { + "epoch": 13.454166666666667, + "grad_norm": 4.856076039293107, + "learning_rate": 5e-05, + "loss": 0.0846, + "num_input_tokens_seen": 294736588, + "step": 3229 + }, + { + "epoch": 13.454166666666667, + "loss": 0.09605845808982849, + "loss_ce": 0.0003095621941611171, + "loss_iou": 0.2734375, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 294736588, + "step": 3229 + }, + { + "epoch": 13.458333333333334, + "grad_norm": 2.5069662318370267, + "learning_rate": 5e-05, + "loss": 0.0591, + "num_input_tokens_seen": 294828220, + "step": 3230 + }, + { + "epoch": 13.458333333333334, + "loss": 0.047693684697151184, + "loss_ce": 0.0015358488308265805, + "loss_iou": 0.314453125, + "loss_num": 0.00921630859375, + "loss_xval": 0.046142578125, + "num_input_tokens_seen": 294828220, + "step": 3230 + }, + { + "epoch": 13.4625, + "grad_norm": 8.298429445162126, + "learning_rate": 5e-05, + "loss": 0.0467, + "num_input_tokens_seen": 294919444, + "step": 3231 + }, + { + "epoch": 13.4625, + "loss": 0.062046781182289124, + "loss_ce": 7.320824079215527e-05, + "loss_iou": 0.1416015625, + "loss_num": 0.01239013671875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 294919444, + "step": 3231 + }, + { + "epoch": 13.466666666666667, + "grad_norm": 1.5209402952815354, + "learning_rate": 5e-05, + "loss": 0.0973, + "num_input_tokens_seen": 295010332, + "step": 3232 + }, + { + "epoch": 13.466666666666667, + "loss": 0.07522740960121155, + "loss_ce": 0.0013748712372034788, + "loss_iou": 0.158203125, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 295010332, + "step": 3232 + }, + { + "epoch": 13.470833333333333, + "grad_norm": 1.5515329397715614, + "learning_rate": 5e-05, + "loss": 0.0484, + "num_input_tokens_seen": 295101164, + "step": 3233 + }, + { + "epoch": 13.470833333333333, + "loss": 0.031293466687202454, + "loss_ce": 4.3465348426252604e-05, + "loss_iou": 0.169921875, + "loss_num": 0.006256103515625, + "loss_xval": 0.03125, + "num_input_tokens_seen": 295101164, + "step": 3233 + }, + { + "epoch": 13.475, + "grad_norm": 2.279928448141374, + "learning_rate": 5e-05, + "loss": 0.0578, + "num_input_tokens_seen": 295192356, + "step": 3234 + }, + { + "epoch": 13.475, + "loss": 0.044148027896881104, + "loss_ce": 4.352897576609394e-06, + "loss_iou": 0.2431640625, + "loss_num": 0.00885009765625, + "loss_xval": 0.044189453125, + "num_input_tokens_seen": 295192356, + "step": 3234 + }, + { + "epoch": 13.479166666666666, + "grad_norm": 6.026628542014703, + "learning_rate": 5e-05, + "loss": 0.0724, + "num_input_tokens_seen": 295283856, + "step": 3235 + }, + { + "epoch": 13.479166666666666, + "loss": 0.10446874797344208, + "loss_ce": 0.00020544748986139894, + "loss_iou": 0.361328125, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 295283856, + "step": 3235 + }, + { + "epoch": 13.483333333333333, + "grad_norm": 3.62820253390924, + "learning_rate": 5e-05, + "loss": 0.0532, + "num_input_tokens_seen": 295375088, + "step": 3236 + }, + { + "epoch": 13.483333333333333, + "loss": 0.07601499557495117, + "loss_ce": 0.00011778472980950028, + "loss_iou": 0.41796875, + "loss_num": 0.01519775390625, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 295375088, + "step": 3236 + }, + { + "epoch": 13.4875, + "grad_norm": 3.6475192112020154, + "learning_rate": 5e-05, + "loss": 0.0743, + "num_input_tokens_seen": 295466100, + "step": 3237 + }, + { + "epoch": 13.4875, + "loss": 0.0731085017323494, + "loss_ce": 3.415943865547888e-05, + "loss_iou": 0.330078125, + "loss_num": 0.01458740234375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 295466100, + "step": 3237 + }, + { + "epoch": 13.491666666666667, + "grad_norm": 2.6123224492326496, + "learning_rate": 5e-05, + "loss": 0.0923, + "num_input_tokens_seen": 295557220, + "step": 3238 + }, + { + "epoch": 13.491666666666667, + "loss": 0.05012810602784157, + "loss_ce": 6.401949940482154e-05, + "loss_iou": 0.388671875, + "loss_num": 0.010009765625, + "loss_xval": 0.050048828125, + "num_input_tokens_seen": 295557220, + "step": 3238 + }, + { + "epoch": 13.495833333333334, + "grad_norm": 3.359588517198497, + "learning_rate": 5e-05, + "loss": 0.0438, + "num_input_tokens_seen": 295648748, + "step": 3239 + }, + { + "epoch": 13.495833333333334, + "loss": 0.036655161529779434, + "loss_ce": 3.5483176361594815e-06, + "loss_iou": 0.2890625, + "loss_num": 0.00732421875, + "loss_xval": 0.03662109375, + "num_input_tokens_seen": 295648748, + "step": 3239 + }, + { + "epoch": 13.5, + "grad_norm": 3.604821762237533, + "learning_rate": 5e-05, + "loss": 0.0582, + "num_input_tokens_seen": 295740244, + "step": 3240 + }, + { + "epoch": 13.5, + "loss": 0.062007561326026917, + "loss_ce": 0.001887934748083353, + "loss_iou": 0.19140625, + "loss_num": 0.01202392578125, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 295740244, + "step": 3240 + }, + { + "epoch": 13.504166666666666, + "grad_norm": 4.055241534775984, + "learning_rate": 5e-05, + "loss": 0.071, + "num_input_tokens_seen": 295830984, + "step": 3241 + }, + { + "epoch": 13.504166666666666, + "loss": 0.07002242654561996, + "loss_ce": 0.0029600486159324646, + "loss_iou": 0.203125, + "loss_num": 0.013427734375, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 295830984, + "step": 3241 + }, + { + "epoch": 13.508333333333333, + "grad_norm": 6.029464768209386, + "learning_rate": 5e-05, + "loss": 0.057, + "num_input_tokens_seen": 295922340, + "step": 3242 + }, + { + "epoch": 13.508333333333333, + "loss": 0.06743155419826508, + "loss_ce": 0.00014029696467332542, + "loss_iou": 0.27734375, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 295922340, + "step": 3242 + }, + { + "epoch": 13.5125, + "grad_norm": 2.6982344595986687, + "learning_rate": 5e-05, + "loss": 0.0921, + "num_input_tokens_seen": 296013456, + "step": 3243 + }, + { + "epoch": 13.5125, + "loss": 0.05534626543521881, + "loss_ce": 0.0016963636735454202, + "loss_iou": 0.26953125, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 296013456, + "step": 3243 + }, + { + "epoch": 13.516666666666667, + "grad_norm": 2.8279473447502537, + "learning_rate": 5e-05, + "loss": 0.0931, + "num_input_tokens_seen": 296105064, + "step": 3244 + }, + { + "epoch": 13.516666666666667, + "loss": 0.05431250110268593, + "loss_ce": 0.0002506112796254456, + "loss_iou": 0.17578125, + "loss_num": 0.01080322265625, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 296105064, + "step": 3244 + }, + { + "epoch": 13.520833333333334, + "grad_norm": 3.0632875241857533, + "learning_rate": 5e-05, + "loss": 0.0859, + "num_input_tokens_seen": 296196896, + "step": 3245 + }, + { + "epoch": 13.520833333333334, + "loss": 0.08690355718135834, + "loss_ce": 0.0030870346818119287, + "loss_iou": 0.2177734375, + "loss_num": 0.0167236328125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 296196896, + "step": 3245 + }, + { + "epoch": 13.525, + "grad_norm": 47.683143876117725, + "learning_rate": 5e-05, + "loss": 0.0669, + "num_input_tokens_seen": 296287372, + "step": 3246 + }, + { + "epoch": 13.525, + "loss": 0.052827537059783936, + "loss_ce": 1.6864692952367477e-05, + "loss_iou": 0.212890625, + "loss_num": 0.01055908203125, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 296287372, + "step": 3246 + }, + { + "epoch": 13.529166666666667, + "grad_norm": 5.317836818171935, + "learning_rate": 5e-05, + "loss": 0.0682, + "num_input_tokens_seen": 296379124, + "step": 3247 + }, + { + "epoch": 13.529166666666667, + "loss": 0.04468844458460808, + "loss_ce": 1.0708215995691717e-05, + "loss_iou": 0.1865234375, + "loss_num": 0.0089111328125, + "loss_xval": 0.044677734375, + "num_input_tokens_seen": 296379124, + "step": 3247 + }, + { + "epoch": 13.533333333333333, + "grad_norm": 1.592828681904474, + "learning_rate": 5e-05, + "loss": 0.0638, + "num_input_tokens_seen": 296470452, + "step": 3248 + }, + { + "epoch": 13.533333333333333, + "loss": 0.057369768619537354, + "loss_ce": 0.00043922552140429616, + "loss_iou": 0.1015625, + "loss_num": 0.01141357421875, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 296470452, + "step": 3248 + }, + { + "epoch": 13.5375, + "grad_norm": 2.19081889373599, + "learning_rate": 5e-05, + "loss": 0.0815, + "num_input_tokens_seen": 296561668, + "step": 3249 + }, + { + "epoch": 13.5375, + "loss": 0.09439752995967865, + "loss_ce": 0.00018976579303853214, + "loss_iou": 0.27734375, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 296561668, + "step": 3249 + }, + { + "epoch": 13.541666666666666, + "grad_norm": 2.3321345177616406, + "learning_rate": 5e-05, + "loss": 0.0514, + "num_input_tokens_seen": 296653248, + "step": 3250 + }, + { + "epoch": 13.541666666666666, + "eval_seeclick_CIoU": 0.22977716475725174, + "eval_seeclick_GIoU": 0.2202046513557434, + "eval_seeclick_IoU": 0.33641111850738525, + "eval_seeclick_MAE_all": 0.10021472722291946, + "eval_seeclick_MAE_h": 0.09250224754214287, + "eval_seeclick_MAE_w": 0.20677661150693893, + "eval_seeclick_MAE_x_boxes": 0.22251524031162262, + "eval_seeclick_MAE_y_boxes": 0.09527213498950005, + "eval_seeclick_NUM_probability": 0.9999966621398926, + "eval_seeclick_inside_bbox": 0.4786931872367859, + "eval_seeclick_loss": 0.5649774670600891, + "eval_seeclick_loss_ce": 0.12786505371332169, + "eval_seeclick_loss_iou": 0.4483642578125, + "eval_seeclick_loss_num": 0.0825653076171875, + "eval_seeclick_loss_xval": 0.41302490234375, + "eval_seeclick_runtime": 76.5699, + "eval_seeclick_samples_per_second": 0.562, + "eval_seeclick_steps_per_second": 0.026, + "num_input_tokens_seen": 296653248, + "step": 3250 + }, + { + "epoch": 13.541666666666666, + "eval_icons_CIoU": 0.36827825009822845, + "eval_icons_GIoU": 0.3672151267528534, + "eval_icons_IoU": 0.43234655261039734, + "eval_icons_MAE_all": 0.05891683138906956, + "eval_icons_MAE_h": 0.11804331094026566, + "eval_icons_MAE_w": 0.0930902399122715, + "eval_icons_MAE_x_boxes": 0.08903488144278526, + "eval_icons_MAE_y_boxes": 0.11908634006977081, + "eval_icons_NUM_probability": 0.9999977350234985, + "eval_icons_inside_bbox": 0.6145833432674408, + "eval_icons_loss": 0.2957323491573334, + "eval_icons_loss_ce": 0.0025786529295146465, + "eval_icons_loss_iou": 0.2705078125, + "eval_icons_loss_num": 0.06157684326171875, + "eval_icons_loss_xval": 0.3079833984375, + "eval_icons_runtime": 88.3295, + "eval_icons_samples_per_second": 0.566, + "eval_icons_steps_per_second": 0.023, + "num_input_tokens_seen": 296653248, + "step": 3250 + }, + { + "epoch": 13.541666666666666, + "eval_screenspot_CIoU": 0.36341987053553265, + "eval_screenspot_GIoU": 0.33735422293345135, + "eval_screenspot_IoU": 0.44230784972508747, + "eval_screenspot_MAE_all": 0.1005148043235143, + "eval_screenspot_MAE_h": 0.11051860203345616, + "eval_screenspot_MAE_w": 0.2034025490283966, + "eval_screenspot_MAE_x_boxes": 0.18765543401241302, + "eval_screenspot_MAE_y_boxes": 0.10739666223526001, + "eval_screenspot_NUM_probability": 0.9987173080444336, + "eval_screenspot_inside_bbox": 0.662500003973643, + "eval_screenspot_loss": 0.5043066740036011, + "eval_screenspot_loss_ce": 0.0002492249436727434, + "eval_screenspot_loss_iou": 0.3599853515625, + "eval_screenspot_loss_num": 0.10186767578125, + "eval_screenspot_loss_xval": 0.5091959635416666, + "eval_screenspot_runtime": 158.9939, + "eval_screenspot_samples_per_second": 0.56, + "eval_screenspot_steps_per_second": 0.019, + "num_input_tokens_seen": 296653248, + "step": 3250 + }, + { + "epoch": 13.541666666666666, + "eval_compot_CIoU": 0.5643254518508911, + "eval_compot_GIoU": 0.559347540140152, + "eval_compot_IoU": 0.62269726395607, + "eval_compot_MAE_all": 0.04530065879225731, + "eval_compot_MAE_h": 0.06042328476905823, + "eval_compot_MAE_w": 0.10373424738645554, + "eval_compot_MAE_x_boxes": 0.10278752446174622, + "eval_compot_MAE_y_boxes": 0.05855695717036724, + "eval_compot_NUM_probability": 0.9999950230121613, + "eval_compot_inside_bbox": 0.7986111044883728, + "eval_compot_loss": 0.24213729798793793, + "eval_compot_loss_ce": 0.029723092913627625, + "eval_compot_loss_iou": 0.40264892578125, + "eval_compot_loss_num": 0.036865234375, + "eval_compot_loss_xval": 0.18415069580078125, + "eval_compot_runtime": 91.1287, + "eval_compot_samples_per_second": 0.549, + "eval_compot_steps_per_second": 0.022, + "num_input_tokens_seen": 296653248, + "step": 3250 + }, + { + "epoch": 13.541666666666666, + "loss": 0.2028052806854248, + "loss_ce": 0.026993528008461, + "loss_iou": 0.404296875, + "loss_num": 0.03515625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 296653248, + "step": 3250 + }, + { + "epoch": 13.545833333333333, + "grad_norm": 2.8028758060177674, + "learning_rate": 5e-05, + "loss": 0.066, + "num_input_tokens_seen": 296744932, + "step": 3251 + }, + { + "epoch": 13.545833333333333, + "loss": 0.046471647918224335, + "loss_ce": 0.00013070247950963676, + "loss_iou": 0.30859375, + "loss_num": 0.00927734375, + "loss_xval": 0.04638671875, + "num_input_tokens_seen": 296744932, + "step": 3251 + }, + { + "epoch": 13.55, + "grad_norm": 2.103715226100908, + "learning_rate": 5e-05, + "loss": 0.0709, + "num_input_tokens_seen": 296836068, + "step": 3252 + }, + { + "epoch": 13.55, + "loss": 0.07032459229230881, + "loss_ce": 1.2091385542589705e-05, + "loss_iou": 0.3125, + "loss_num": 0.01409912109375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 296836068, + "step": 3252 + }, + { + "epoch": 13.554166666666667, + "grad_norm": 22.408969808322134, + "learning_rate": 5e-05, + "loss": 0.069, + "num_input_tokens_seen": 296927288, + "step": 3253 + }, + { + "epoch": 13.554166666666667, + "loss": 0.09005076438188553, + "loss_ce": 0.0006037460989318788, + "loss_iou": 0.3125, + "loss_num": 0.0179443359375, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 296927288, + "step": 3253 + }, + { + "epoch": 13.558333333333334, + "grad_norm": 1.2666054672318465, + "learning_rate": 5e-05, + "loss": 0.0311, + "num_input_tokens_seen": 297018372, + "step": 3254 + }, + { + "epoch": 13.558333333333334, + "loss": 0.031558021903038025, + "loss_ce": 2.8491049306467175e-06, + "loss_iou": 0.2109375, + "loss_num": 0.006317138671875, + "loss_xval": 0.031494140625, + "num_input_tokens_seen": 297018372, + "step": 3254 + }, + { + "epoch": 13.5625, + "grad_norm": 2.562090417454454, + "learning_rate": 5e-05, + "loss": 0.0901, + "num_input_tokens_seen": 297109632, + "step": 3255 + }, + { + "epoch": 13.5625, + "loss": 0.12382586300373077, + "loss_ce": 4.657105091609992e-05, + "loss_iou": 0.244140625, + "loss_num": 0.0247802734375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 297109632, + "step": 3255 + }, + { + "epoch": 13.566666666666666, + "grad_norm": 2.697239892910222, + "learning_rate": 5e-05, + "loss": 0.0882, + "num_input_tokens_seen": 297200668, + "step": 3256 + }, + { + "epoch": 13.566666666666666, + "loss": 0.058978091925382614, + "loss_ce": 2.871132892323658e-06, + "loss_iou": 0.25390625, + "loss_num": 0.01177978515625, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 297200668, + "step": 3256 + }, + { + "epoch": 13.570833333333333, + "grad_norm": 2.1794785374967636, + "learning_rate": 5e-05, + "loss": 0.0445, + "num_input_tokens_seen": 297292116, + "step": 3257 + }, + { + "epoch": 13.570833333333333, + "loss": 0.04801057279109955, + "loss_ce": 6.423933882615529e-06, + "loss_iou": 0.1943359375, + "loss_num": 0.00958251953125, + "loss_xval": 0.048095703125, + "num_input_tokens_seen": 297292116, + "step": 3257 + }, + { + "epoch": 13.575, + "grad_norm": 4.592907359980534, + "learning_rate": 5e-05, + "loss": 0.0705, + "num_input_tokens_seen": 297383600, + "step": 3258 + }, + { + "epoch": 13.575, + "loss": 0.0719158798456192, + "loss_ce": 0.002900381339713931, + "loss_iou": 0.2734375, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 297383600, + "step": 3258 + }, + { + "epoch": 13.579166666666667, + "grad_norm": 2.7038816836170354, + "learning_rate": 5e-05, + "loss": 0.0849, + "num_input_tokens_seen": 297474900, + "step": 3259 + }, + { + "epoch": 13.579166666666667, + "loss": 0.053036607801914215, + "loss_ce": 0.00027171536930836737, + "loss_iou": 0.23828125, + "loss_num": 0.01055908203125, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 297474900, + "step": 3259 + }, + { + "epoch": 13.583333333333334, + "grad_norm": 4.639443056226232, + "learning_rate": 5e-05, + "loss": 0.0883, + "num_input_tokens_seen": 297565816, + "step": 3260 + }, + { + "epoch": 13.583333333333334, + "loss": 0.10763812065124512, + "loss_ce": 0.00015521238674409688, + "loss_iou": 0.216796875, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 297565816, + "step": 3260 + }, + { + "epoch": 13.5875, + "grad_norm": 2.9872991114469727, + "learning_rate": 5e-05, + "loss": 0.0771, + "num_input_tokens_seen": 297656984, + "step": 3261 + }, + { + "epoch": 13.5875, + "loss": 0.07486598193645477, + "loss_ce": 1.3987812053528614e-05, + "loss_iou": 0.21484375, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 297656984, + "step": 3261 + }, + { + "epoch": 13.591666666666667, + "grad_norm": 5.086790925707244, + "learning_rate": 5e-05, + "loss": 0.0661, + "num_input_tokens_seen": 297748888, + "step": 3262 + }, + { + "epoch": 13.591666666666667, + "loss": 0.07709628343582153, + "loss_ce": 2.413909896858968e-05, + "loss_iou": 0.34375, + "loss_num": 0.015380859375, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 297748888, + "step": 3262 + }, + { + "epoch": 13.595833333333333, + "grad_norm": 3.0673587592493154, + "learning_rate": 5e-05, + "loss": 0.0406, + "num_input_tokens_seen": 297840056, + "step": 3263 + }, + { + "epoch": 13.595833333333333, + "loss": 0.031825195997953415, + "loss_ce": 0.00043023889884352684, + "loss_iou": 0.2578125, + "loss_num": 0.00628662109375, + "loss_xval": 0.031494140625, + "num_input_tokens_seen": 297840056, + "step": 3263 + }, + { + "epoch": 13.6, + "grad_norm": 2.8440067449826523, + "learning_rate": 5e-05, + "loss": 0.0653, + "num_input_tokens_seen": 297931472, + "step": 3264 + }, + { + "epoch": 13.6, + "loss": 0.07889088243246078, + "loss_ce": 3.346401354065165e-05, + "loss_iou": 0.23046875, + "loss_num": 0.0157470703125, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 297931472, + "step": 3264 + }, + { + "epoch": 13.604166666666666, + "grad_norm": 2.300014104251759, + "learning_rate": 5e-05, + "loss": 0.0421, + "num_input_tokens_seen": 298022684, + "step": 3265 + }, + { + "epoch": 13.604166666666666, + "loss": 0.05194888263940811, + "loss_ce": 6.899907748447731e-05, + "loss_iou": 0.265625, + "loss_num": 0.0103759765625, + "loss_xval": 0.0517578125, + "num_input_tokens_seen": 298022684, + "step": 3265 + }, + { + "epoch": 13.608333333333333, + "grad_norm": 3.4597564597891304, + "learning_rate": 5e-05, + "loss": 0.0596, + "num_input_tokens_seen": 298114044, + "step": 3266 + }, + { + "epoch": 13.608333333333333, + "loss": 0.05340898036956787, + "loss_ce": 9.47702574194409e-05, + "loss_iou": 0.3671875, + "loss_num": 0.01068115234375, + "loss_xval": 0.05322265625, + "num_input_tokens_seen": 298114044, + "step": 3266 + }, + { + "epoch": 13.6125, + "grad_norm": 2.4288926560854542, + "learning_rate": 5e-05, + "loss": 0.0417, + "num_input_tokens_seen": 298205696, + "step": 3267 + }, + { + "epoch": 13.6125, + "loss": 0.04282256215810776, + "loss_ce": 0.0009066681959666312, + "loss_iou": 0.2890625, + "loss_num": 0.00836181640625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 298205696, + "step": 3267 + }, + { + "epoch": 13.616666666666667, + "grad_norm": 1.5850870816034652, + "learning_rate": 5e-05, + "loss": 0.0712, + "num_input_tokens_seen": 298297312, + "step": 3268 + }, + { + "epoch": 13.616666666666667, + "loss": 0.05654668062925339, + "loss_ce": 1.2864399650425185e-05, + "loss_iou": 0.2060546875, + "loss_num": 0.01129150390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 298297312, + "step": 3268 + }, + { + "epoch": 13.620833333333334, + "grad_norm": 1.3325825115255354, + "learning_rate": 5e-05, + "loss": 0.0436, + "num_input_tokens_seen": 298388808, + "step": 3269 + }, + { + "epoch": 13.620833333333334, + "loss": 0.05588510259985924, + "loss_ce": 2.2675443688058294e-05, + "loss_iou": 0.33203125, + "loss_num": 0.01116943359375, + "loss_xval": 0.055908203125, + "num_input_tokens_seen": 298388808, + "step": 3269 + }, + { + "epoch": 13.625, + "grad_norm": 2.703315621670545, + "learning_rate": 5e-05, + "loss": 0.0768, + "num_input_tokens_seen": 298479920, + "step": 3270 + }, + { + "epoch": 13.625, + "loss": 0.10270601511001587, + "loss_ce": 0.00010591827594907954, + "loss_iou": 0.2060546875, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 298479920, + "step": 3270 + }, + { + "epoch": 13.629166666666666, + "grad_norm": 2.310328603995657, + "learning_rate": 5e-05, + "loss": 0.0511, + "num_input_tokens_seen": 298572016, + "step": 3271 + }, + { + "epoch": 13.629166666666666, + "loss": 0.06343436986207962, + "loss_ce": 0.00038505299016833305, + "loss_iou": 0.2294921875, + "loss_num": 0.01263427734375, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 298572016, + "step": 3271 + }, + { + "epoch": 13.633333333333333, + "grad_norm": 3.9734558947742147, + "learning_rate": 5e-05, + "loss": 0.1338, + "num_input_tokens_seen": 298661968, + "step": 3272 + }, + { + "epoch": 13.633333333333333, + "loss": 0.1281973421573639, + "loss_ce": 6.176564966153819e-07, + "loss_iou": 0.18359375, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 298661968, + "step": 3272 + }, + { + "epoch": 13.6375, + "grad_norm": 3.734083755989058, + "learning_rate": 5e-05, + "loss": 0.0862, + "num_input_tokens_seen": 298753168, + "step": 3273 + }, + { + "epoch": 13.6375, + "loss": 0.07938088476657867, + "loss_ce": 4.663808340410469e-06, + "loss_iou": 0.23828125, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 298753168, + "step": 3273 + }, + { + "epoch": 13.641666666666667, + "grad_norm": 2.7377364329741494, + "learning_rate": 5e-05, + "loss": 0.0533, + "num_input_tokens_seen": 298844796, + "step": 3274 + }, + { + "epoch": 13.641666666666667, + "loss": 0.06879688799381256, + "loss_ce": 2.637311581565882e-06, + "loss_iou": 0.25390625, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 298844796, + "step": 3274 + }, + { + "epoch": 13.645833333333334, + "grad_norm": 5.587498360937527, + "learning_rate": 5e-05, + "loss": 0.0589, + "num_input_tokens_seen": 298936064, + "step": 3275 + }, + { + "epoch": 13.645833333333334, + "loss": 0.07425408810377121, + "loss_ce": 4.820844878850039e-06, + "loss_iou": 0.173828125, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 298936064, + "step": 3275 + }, + { + "epoch": 13.65, + "grad_norm": 1.4913922652249463, + "learning_rate": 5e-05, + "loss": 0.0427, + "num_input_tokens_seen": 299027768, + "step": 3276 + }, + { + "epoch": 13.65, + "loss": 0.05714738368988037, + "loss_ce": 0.002337811980396509, + "loss_iou": 0.265625, + "loss_num": 0.010986328125, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 299027768, + "step": 3276 + }, + { + "epoch": 13.654166666666667, + "grad_norm": 2.540078846532057, + "learning_rate": 5e-05, + "loss": 0.0678, + "num_input_tokens_seen": 299119264, + "step": 3277 + }, + { + "epoch": 13.654166666666667, + "loss": 0.04711538180708885, + "loss_ce": 0.00022511978750117123, + "loss_iou": 0.189453125, + "loss_num": 0.0093994140625, + "loss_xval": 0.046875, + "num_input_tokens_seen": 299119264, + "step": 3277 + }, + { + "epoch": 13.658333333333333, + "grad_norm": 4.590505300981462, + "learning_rate": 5e-05, + "loss": 0.0823, + "num_input_tokens_seen": 299211500, + "step": 3278 + }, + { + "epoch": 13.658333333333333, + "loss": 0.0764964297413826, + "loss_ce": 0.0002635241544339806, + "loss_iou": 0.2470703125, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 299211500, + "step": 3278 + }, + { + "epoch": 13.6625, + "grad_norm": 3.1231933101305698, + "learning_rate": 5e-05, + "loss": 0.0529, + "num_input_tokens_seen": 299302380, + "step": 3279 + }, + { + "epoch": 13.6625, + "loss": 0.05796603858470917, + "loss_ce": 1.3158914043742698e-05, + "loss_iou": 0.283203125, + "loss_num": 0.0115966796875, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 299302380, + "step": 3279 + }, + { + "epoch": 13.666666666666666, + "grad_norm": 4.598121246096437, + "learning_rate": 5e-05, + "loss": 0.1037, + "num_input_tokens_seen": 299393528, + "step": 3280 + }, + { + "epoch": 13.666666666666666, + "loss": 0.13770559430122375, + "loss_ce": 0.0002544168964959681, + "loss_iou": 0.376953125, + "loss_num": 0.0274658203125, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 299393528, + "step": 3280 + }, + { + "epoch": 13.670833333333333, + "grad_norm": 14.41610190677706, + "learning_rate": 5e-05, + "loss": 0.0893, + "num_input_tokens_seen": 299484840, + "step": 3281 + }, + { + "epoch": 13.670833333333333, + "loss": 0.0995384082198143, + "loss_ce": 3.584696241887286e-05, + "loss_iou": 0.34375, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 299484840, + "step": 3281 + }, + { + "epoch": 13.675, + "grad_norm": 3.5831341085790216, + "learning_rate": 5e-05, + "loss": 0.0765, + "num_input_tokens_seen": 299576196, + "step": 3282 + }, + { + "epoch": 13.675, + "loss": 0.08482582122087479, + "loss_ce": 2.2115034425951308e-06, + "loss_iou": 0.08984375, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 299576196, + "step": 3282 + }, + { + "epoch": 13.679166666666667, + "grad_norm": 3.421202748259547, + "learning_rate": 5e-05, + "loss": 0.0703, + "num_input_tokens_seen": 299667204, + "step": 3283 + }, + { + "epoch": 13.679166666666667, + "loss": 0.08301764726638794, + "loss_ce": 9.841729479376227e-06, + "loss_iou": 0.416015625, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 299667204, + "step": 3283 + }, + { + "epoch": 13.683333333333334, + "grad_norm": 2.5523787944062843, + "learning_rate": 5e-05, + "loss": 0.1216, + "num_input_tokens_seen": 299758168, + "step": 3284 + }, + { + "epoch": 13.683333333333334, + "loss": 0.05732997506856918, + "loss_ce": 2.708056399569614e-06, + "loss_iou": 0.2578125, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 299758168, + "step": 3284 + }, + { + "epoch": 13.6875, + "grad_norm": 2.867892743118164, + "learning_rate": 5e-05, + "loss": 0.0597, + "num_input_tokens_seen": 299848980, + "step": 3285 + }, + { + "epoch": 13.6875, + "loss": 0.05687332898378372, + "loss_ce": 3.821900463663042e-06, + "loss_iou": 0.318359375, + "loss_num": 0.0113525390625, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 299848980, + "step": 3285 + }, + { + "epoch": 13.691666666666666, + "grad_norm": 3.3272804987740137, + "learning_rate": 5e-05, + "loss": 0.0458, + "num_input_tokens_seen": 299939920, + "step": 3286 + }, + { + "epoch": 13.691666666666666, + "loss": 0.04706061631441116, + "loss_ce": 2.51245137405931e-06, + "loss_iou": 0.310546875, + "loss_num": 0.0093994140625, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 299939920, + "step": 3286 + }, + { + "epoch": 13.695833333333333, + "grad_norm": 6.618524967812087, + "learning_rate": 5e-05, + "loss": 0.0699, + "num_input_tokens_seen": 300031380, + "step": 3287 + }, + { + "epoch": 13.695833333333333, + "loss": 0.07023078203201294, + "loss_ce": 0.00046760475379414856, + "loss_iou": 0.2197265625, + "loss_num": 0.013916015625, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 300031380, + "step": 3287 + }, + { + "epoch": 13.7, + "grad_norm": 1.8579559681332511, + "learning_rate": 5e-05, + "loss": 0.0728, + "num_input_tokens_seen": 300123220, + "step": 3288 + }, + { + "epoch": 13.7, + "loss": 0.04622560739517212, + "loss_ce": 0.00044924189569428563, + "loss_iou": 0.3828125, + "loss_num": 0.0091552734375, + "loss_xval": 0.0458984375, + "num_input_tokens_seen": 300123220, + "step": 3288 + }, + { + "epoch": 13.704166666666667, + "grad_norm": 3.685118217162781, + "learning_rate": 5e-05, + "loss": 0.0533, + "num_input_tokens_seen": 300214848, + "step": 3289 + }, + { + "epoch": 13.704166666666667, + "loss": 0.04516689479351044, + "loss_ce": 8.769609962655522e-07, + "loss_iou": 0.302734375, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 300214848, + "step": 3289 + }, + { + "epoch": 13.708333333333334, + "grad_norm": 4.049860614055312, + "learning_rate": 5e-05, + "loss": 0.0591, + "num_input_tokens_seen": 300306604, + "step": 3290 + }, + { + "epoch": 13.708333333333334, + "loss": 0.05506318807601929, + "loss_ce": 7.051033026073128e-05, + "loss_iou": 0.177734375, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 300306604, + "step": 3290 + }, + { + "epoch": 13.7125, + "grad_norm": 2.6490315457322073, + "learning_rate": 5e-05, + "loss": 0.0518, + "num_input_tokens_seen": 300397640, + "step": 3291 + }, + { + "epoch": 13.7125, + "loss": 0.05209658667445183, + "loss_ce": 3.081056775044999e-06, + "loss_iou": 0.31640625, + "loss_num": 0.01043701171875, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 300397640, + "step": 3291 + }, + { + "epoch": 13.716666666666667, + "grad_norm": 2.240454278054644, + "learning_rate": 5e-05, + "loss": 0.0586, + "num_input_tokens_seen": 300489368, + "step": 3292 + }, + { + "epoch": 13.716666666666667, + "loss": 0.07966948300600052, + "loss_ce": 0.0011782721849158406, + "loss_iou": 0.197265625, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 300489368, + "step": 3292 + }, + { + "epoch": 13.720833333333333, + "grad_norm": 4.747013091701193, + "learning_rate": 5e-05, + "loss": 0.0601, + "num_input_tokens_seen": 300580500, + "step": 3293 + }, + { + "epoch": 13.720833333333333, + "loss": 0.04834999889135361, + "loss_ce": 1.0152914001082536e-05, + "loss_iou": 0.298828125, + "loss_num": 0.0096435546875, + "loss_xval": 0.04833984375, + "num_input_tokens_seen": 300580500, + "step": 3293 + }, + { + "epoch": 13.725, + "grad_norm": 2.851445875282756, + "learning_rate": 5e-05, + "loss": 0.049, + "num_input_tokens_seen": 300671940, + "step": 3294 + }, + { + "epoch": 13.725, + "loss": 0.032225240021944046, + "loss_ce": 0.0005861401441507041, + "loss_iou": 0.205078125, + "loss_num": 0.006317138671875, + "loss_xval": 0.03173828125, + "num_input_tokens_seen": 300671940, + "step": 3294 + }, + { + "epoch": 13.729166666666666, + "grad_norm": 3.034120458590585, + "learning_rate": 5e-05, + "loss": 0.0529, + "num_input_tokens_seen": 300762992, + "step": 3295 + }, + { + "epoch": 13.729166666666666, + "loss": 0.05423610657453537, + "loss_ce": 9.792399941943586e-05, + "loss_iou": 0.298828125, + "loss_num": 0.01080322265625, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 300762992, + "step": 3295 + }, + { + "epoch": 13.733333333333333, + "grad_norm": 84.9857919333762, + "learning_rate": 5e-05, + "loss": 0.0516, + "num_input_tokens_seen": 300854340, + "step": 3296 + }, + { + "epoch": 13.733333333333333, + "loss": 0.05023489147424698, + "loss_ce": 2.9535742669395404e-06, + "loss_iou": 0.2275390625, + "loss_num": 0.010009765625, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 300854340, + "step": 3296 + }, + { + "epoch": 13.7375, + "grad_norm": 2.8617475247265927, + "learning_rate": 5e-05, + "loss": 0.0428, + "num_input_tokens_seen": 300945760, + "step": 3297 + }, + { + "epoch": 13.7375, + "loss": 0.04916559159755707, + "loss_ce": 0.0013216584920883179, + "loss_iou": 0.275390625, + "loss_num": 0.00958251953125, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 300945760, + "step": 3297 + }, + { + "epoch": 13.741666666666667, + "grad_norm": 2.957150236721741, + "learning_rate": 5e-05, + "loss": 0.0655, + "num_input_tokens_seen": 301037156, + "step": 3298 + }, + { + "epoch": 13.741666666666667, + "loss": 0.07779006659984589, + "loss_ce": 1.6021556803025305e-05, + "loss_iou": 0.302734375, + "loss_num": 0.01556396484375, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 301037156, + "step": 3298 + }, + { + "epoch": 13.745833333333334, + "grad_norm": 3.7764971279076978, + "learning_rate": 5e-05, + "loss": 0.0997, + "num_input_tokens_seen": 301128548, + "step": 3299 + }, + { + "epoch": 13.745833333333334, + "loss": 0.11629924178123474, + "loss_ce": 1.2012060324195772e-05, + "loss_iou": 0.2890625, + "loss_num": 0.0233154296875, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 301128548, + "step": 3299 + }, + { + "epoch": 13.75, + "grad_norm": 9.032833363759718, + "learning_rate": 5e-05, + "loss": 0.0753, + "num_input_tokens_seen": 301219812, + "step": 3300 + }, + { + "epoch": 13.75, + "loss": 0.07080667465925217, + "loss_ce": 5.8882683333649766e-06, + "loss_iou": 0.2197265625, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 301219812, + "step": 3300 + }, + { + "epoch": 13.754166666666666, + "grad_norm": 2.5179286794266242, + "learning_rate": 5e-05, + "loss": 0.0367, + "num_input_tokens_seen": 301311568, + "step": 3301 + }, + { + "epoch": 13.754166666666666, + "loss": 0.038580723106861115, + "loss_ce": 6.501464213215513e-06, + "loss_iou": 0.19921875, + "loss_num": 0.007720947265625, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 301311568, + "step": 3301 + }, + { + "epoch": 13.758333333333333, + "grad_norm": 2.915091842981452, + "learning_rate": 5e-05, + "loss": 0.0722, + "num_input_tokens_seen": 301402176, + "step": 3302 + }, + { + "epoch": 13.758333333333333, + "loss": 0.0477927103638649, + "loss_ce": 9.814760232984554e-06, + "loss_iou": 0.31640625, + "loss_num": 0.00958251953125, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 301402176, + "step": 3302 + }, + { + "epoch": 13.7625, + "grad_norm": 1.8902026133477452, + "learning_rate": 5e-05, + "loss": 0.0335, + "num_input_tokens_seen": 301493596, + "step": 3303 + }, + { + "epoch": 13.7625, + "loss": 0.04254970699548721, + "loss_ce": 0.0001684220042079687, + "loss_iou": 0.17578125, + "loss_num": 0.00848388671875, + "loss_xval": 0.04248046875, + "num_input_tokens_seen": 301493596, + "step": 3303 + }, + { + "epoch": 13.766666666666667, + "grad_norm": 4.509340256046256, + "learning_rate": 5e-05, + "loss": 0.0481, + "num_input_tokens_seen": 301585296, + "step": 3304 + }, + { + "epoch": 13.766666666666667, + "loss": 0.03628820180892944, + "loss_ce": 0.0005215998971834779, + "loss_iou": 0.294921875, + "loss_num": 0.00714111328125, + "loss_xval": 0.03564453125, + "num_input_tokens_seen": 301585296, + "step": 3304 + }, + { + "epoch": 13.770833333333334, + "grad_norm": 5.033491555102182, + "learning_rate": 5e-05, + "loss": 0.0712, + "num_input_tokens_seen": 301676888, + "step": 3305 + }, + { + "epoch": 13.770833333333334, + "loss": 0.034825798124074936, + "loss_ce": 5.241945927991765e-06, + "loss_iou": 0.310546875, + "loss_num": 0.0069580078125, + "loss_xval": 0.034912109375, + "num_input_tokens_seen": 301676888, + "step": 3305 + }, + { + "epoch": 13.775, + "grad_norm": 7.191350621732554, + "learning_rate": 5e-05, + "loss": 0.0957, + "num_input_tokens_seen": 301767656, + "step": 3306 + }, + { + "epoch": 13.775, + "loss": 0.11079069972038269, + "loss_ce": 1.1897653166670352e-05, + "loss_iou": 0.228515625, + "loss_num": 0.022216796875, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 301767656, + "step": 3306 + }, + { + "epoch": 13.779166666666667, + "grad_norm": 3.804325948813366, + "learning_rate": 5e-05, + "loss": 0.0638, + "num_input_tokens_seen": 301858904, + "step": 3307 + }, + { + "epoch": 13.779166666666667, + "loss": 0.05447077751159668, + "loss_ce": 0.0006377688841894269, + "loss_iou": 0.328125, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 301858904, + "step": 3307 + }, + { + "epoch": 13.783333333333333, + "grad_norm": 2.261119446992845, + "learning_rate": 5e-05, + "loss": 0.0718, + "num_input_tokens_seen": 301949480, + "step": 3308 + }, + { + "epoch": 13.783333333333333, + "loss": 0.0620134212076664, + "loss_ce": 1.7031369452524814e-06, + "loss_iou": 0.2109375, + "loss_num": 0.01239013671875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 301949480, + "step": 3308 + }, + { + "epoch": 13.7875, + "grad_norm": 3.9577403010481476, + "learning_rate": 5e-05, + "loss": 0.06, + "num_input_tokens_seen": 302040868, + "step": 3309 + }, + { + "epoch": 13.7875, + "loss": 0.04542113095521927, + "loss_ce": 4.14906426158268e-05, + "loss_iou": 0.29296875, + "loss_num": 0.00909423828125, + "loss_xval": 0.04541015625, + "num_input_tokens_seen": 302040868, + "step": 3309 + }, + { + "epoch": 13.791666666666666, + "grad_norm": 2.609996874344853, + "learning_rate": 5e-05, + "loss": 0.0621, + "num_input_tokens_seen": 302132496, + "step": 3310 + }, + { + "epoch": 13.791666666666666, + "loss": 0.05624785274267197, + "loss_ce": 1.1587387234612834e-05, + "loss_iou": 0.3359375, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 302132496, + "step": 3310 + }, + { + "epoch": 13.795833333333333, + "grad_norm": 3.227300689239271, + "learning_rate": 5e-05, + "loss": 0.0832, + "num_input_tokens_seen": 302223328, + "step": 3311 + }, + { + "epoch": 13.795833333333333, + "loss": 0.11890000104904175, + "loss_ce": 3.5156972444383428e-06, + "loss_iou": 0.263671875, + "loss_num": 0.0238037109375, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 302223328, + "step": 3311 + }, + { + "epoch": 13.8, + "grad_norm": 5.317931108982696, + "learning_rate": 5e-05, + "loss": 0.0505, + "num_input_tokens_seen": 302315076, + "step": 3312 + }, + { + "epoch": 13.8, + "loss": 0.061526887118816376, + "loss_ce": 1.870663254521787e-05, + "loss_iou": 0.306640625, + "loss_num": 0.0123291015625, + "loss_xval": 0.0615234375, + "num_input_tokens_seen": 302315076, + "step": 3312 + }, + { + "epoch": 13.804166666666667, + "grad_norm": 2.548530665417501, + "learning_rate": 5e-05, + "loss": 0.0652, + "num_input_tokens_seen": 302406864, + "step": 3313 + }, + { + "epoch": 13.804166666666667, + "loss": 0.07590900361537933, + "loss_ce": 1.1779991837101988e-05, + "loss_iou": 0.3046875, + "loss_num": 0.01519775390625, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 302406864, + "step": 3313 + }, + { + "epoch": 13.808333333333334, + "grad_norm": 2.7120286980691404, + "learning_rate": 5e-05, + "loss": 0.112, + "num_input_tokens_seen": 302497700, + "step": 3314 + }, + { + "epoch": 13.808333333333334, + "loss": 0.04778246209025383, + "loss_ce": 0.0002513352083042264, + "loss_iou": 0.390625, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 302497700, + "step": 3314 + }, + { + "epoch": 13.8125, + "grad_norm": 12.520246038924078, + "learning_rate": 5e-05, + "loss": 0.0664, + "num_input_tokens_seen": 302587720, + "step": 3315 + }, + { + "epoch": 13.8125, + "loss": 0.04641222953796387, + "loss_ce": 0.0002391317393630743, + "loss_iou": 0.3203125, + "loss_num": 0.00921630859375, + "loss_xval": 0.046142578125, + "num_input_tokens_seen": 302587720, + "step": 3315 + }, + { + "epoch": 13.816666666666666, + "grad_norm": 3.094948368543814, + "learning_rate": 5e-05, + "loss": 0.0713, + "num_input_tokens_seen": 302678844, + "step": 3316 + }, + { + "epoch": 13.816666666666666, + "loss": 0.06758721172809601, + "loss_ce": 0.0005095761734992266, + "loss_iou": 0.25, + "loss_num": 0.013427734375, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 302678844, + "step": 3316 + }, + { + "epoch": 13.820833333333333, + "grad_norm": 5.153459037298692, + "learning_rate": 5e-05, + "loss": 0.0536, + "num_input_tokens_seen": 302769940, + "step": 3317 + }, + { + "epoch": 13.820833333333333, + "loss": 0.05050954967737198, + "loss_ce": 3.34742981067393e-05, + "loss_iou": 0.33984375, + "loss_num": 0.01007080078125, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 302769940, + "step": 3317 + }, + { + "epoch": 13.825, + "grad_norm": 4.53011008560501, + "learning_rate": 5e-05, + "loss": 0.0505, + "num_input_tokens_seen": 302861468, + "step": 3318 + }, + { + "epoch": 13.825, + "loss": 0.026505716145038605, + "loss_ce": 1.2008333669655258e-06, + "loss_iou": 0.220703125, + "loss_num": 0.00531005859375, + "loss_xval": 0.0264892578125, + "num_input_tokens_seen": 302861468, + "step": 3318 + }, + { + "epoch": 13.829166666666667, + "grad_norm": 4.308616516778707, + "learning_rate": 5e-05, + "loss": 0.0747, + "num_input_tokens_seen": 302951936, + "step": 3319 + }, + { + "epoch": 13.829166666666667, + "loss": 0.057527441531419754, + "loss_ce": 0.0002459501556586474, + "loss_iou": 0.201171875, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 302951936, + "step": 3319 + }, + { + "epoch": 13.833333333333334, + "grad_norm": 3.117831840990339, + "learning_rate": 5e-05, + "loss": 0.0753, + "num_input_tokens_seen": 303042944, + "step": 3320 + }, + { + "epoch": 13.833333333333334, + "loss": 0.0689389780163765, + "loss_ce": 1.5027325389382895e-05, + "loss_iou": 0.26953125, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 303042944, + "step": 3320 + }, + { + "epoch": 13.8375, + "grad_norm": 3.3334205918882374, + "learning_rate": 5e-05, + "loss": 0.0481, + "num_input_tokens_seen": 303134920, + "step": 3321 + }, + { + "epoch": 13.8375, + "loss": 0.05091170221567154, + "loss_ce": 0.0007865775842219591, + "loss_iou": 0.3046875, + "loss_num": 0.010009765625, + "loss_xval": 0.050048828125, + "num_input_tokens_seen": 303134920, + "step": 3321 + }, + { + "epoch": 13.841666666666667, + "grad_norm": 3.6615777665525617, + "learning_rate": 5e-05, + "loss": 0.0523, + "num_input_tokens_seen": 303226032, + "step": 3322 + }, + { + "epoch": 13.841666666666667, + "loss": 0.04552161693572998, + "loss_ce": 0.00040137613541446626, + "loss_iou": 0.2470703125, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 303226032, + "step": 3322 + }, + { + "epoch": 13.845833333333333, + "grad_norm": 3.2236865662362146, + "learning_rate": 5e-05, + "loss": 0.0687, + "num_input_tokens_seen": 303317104, + "step": 3323 + }, + { + "epoch": 13.845833333333333, + "loss": 0.06049469858407974, + "loss_ce": 8.860646630637348e-06, + "loss_iou": 0.310546875, + "loss_num": 0.0120849609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 303317104, + "step": 3323 + }, + { + "epoch": 13.85, + "grad_norm": 3.551217326920821, + "learning_rate": 5e-05, + "loss": 0.0761, + "num_input_tokens_seen": 303408324, + "step": 3324 + }, + { + "epoch": 13.85, + "loss": 0.055788613855838776, + "loss_ce": 6.351471529342234e-05, + "loss_iou": 0.166015625, + "loss_num": 0.0111083984375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 303408324, + "step": 3324 + }, + { + "epoch": 13.854166666666666, + "grad_norm": 2.225093520187707, + "learning_rate": 5e-05, + "loss": 0.0725, + "num_input_tokens_seen": 303499640, + "step": 3325 + }, + { + "epoch": 13.854166666666666, + "loss": 0.07631266117095947, + "loss_ce": 0.006007795687764883, + "loss_iou": 0.2421875, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 303499640, + "step": 3325 + }, + { + "epoch": 13.858333333333333, + "grad_norm": 2.7554504416171737, + "learning_rate": 5e-05, + "loss": 0.0716, + "num_input_tokens_seen": 303590800, + "step": 3326 + }, + { + "epoch": 13.858333333333333, + "loss": 0.08511213213205338, + "loss_ce": 0.0009370205807499588, + "loss_iou": 0.26171875, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 303590800, + "step": 3326 + }, + { + "epoch": 13.8625, + "grad_norm": 14.147968514520535, + "learning_rate": 5e-05, + "loss": 0.0971, + "num_input_tokens_seen": 303681040, + "step": 3327 + }, + { + "epoch": 13.8625, + "loss": 0.10966304689645767, + "loss_ce": 1.3385659258347005e-05, + "loss_iou": 0.22265625, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 303681040, + "step": 3327 + }, + { + "epoch": 13.866666666666667, + "grad_norm": 12.554145044744343, + "learning_rate": 5e-05, + "loss": 0.0538, + "num_input_tokens_seen": 303771648, + "step": 3328 + }, + { + "epoch": 13.866666666666667, + "loss": 0.05274789035320282, + "loss_ce": 0.0025312108919024467, + "loss_iou": 0.328125, + "loss_num": 0.010009765625, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 303771648, + "step": 3328 + }, + { + "epoch": 13.870833333333334, + "grad_norm": 2.5352712739982453, + "learning_rate": 5e-05, + "loss": 0.041, + "num_input_tokens_seen": 303863712, + "step": 3329 + }, + { + "epoch": 13.870833333333334, + "loss": 0.03622327744960785, + "loss_ce": 0.00017438957002013922, + "loss_iou": 0.259765625, + "loss_num": 0.0072021484375, + "loss_xval": 0.0361328125, + "num_input_tokens_seen": 303863712, + "step": 3329 + }, + { + "epoch": 13.875, + "grad_norm": 3.100654221670784, + "learning_rate": 5e-05, + "loss": 0.0736, + "num_input_tokens_seen": 303954564, + "step": 3330 + }, + { + "epoch": 13.875, + "loss": 0.08480900526046753, + "loss_ce": 3.116882362519391e-05, + "loss_iou": 0.1455078125, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 303954564, + "step": 3330 + }, + { + "epoch": 13.879166666666666, + "grad_norm": 3.217399946775899, + "learning_rate": 5e-05, + "loss": 0.0858, + "num_input_tokens_seen": 304046424, + "step": 3331 + }, + { + "epoch": 13.879166666666666, + "loss": 0.1203436478972435, + "loss_ce": 4.335532139521092e-05, + "loss_iou": 0.275390625, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 304046424, + "step": 3331 + }, + { + "epoch": 13.883333333333333, + "grad_norm": 6.4596229589144425, + "learning_rate": 5e-05, + "loss": 0.1057, + "num_input_tokens_seen": 304138408, + "step": 3332 + }, + { + "epoch": 13.883333333333333, + "loss": 0.11160202324390411, + "loss_ce": 6.864986062282696e-06, + "loss_iou": 0.22265625, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 304138408, + "step": 3332 + }, + { + "epoch": 13.8875, + "grad_norm": 2.7269878636408387, + "learning_rate": 5e-05, + "loss": 0.0498, + "num_input_tokens_seen": 304229500, + "step": 3333 + }, + { + "epoch": 13.8875, + "loss": 0.032473981380462646, + "loss_ce": 1.0906089300988242e-05, + "loss_iou": 0.154296875, + "loss_num": 0.006500244140625, + "loss_xval": 0.032470703125, + "num_input_tokens_seen": 304229500, + "step": 3333 + }, + { + "epoch": 13.891666666666667, + "grad_norm": 5.250857290393431, + "learning_rate": 5e-05, + "loss": 0.0666, + "num_input_tokens_seen": 304320936, + "step": 3334 + }, + { + "epoch": 13.891666666666667, + "loss": 0.06586028635501862, + "loss_ce": 0.0006747441948391497, + "loss_iou": 0.2177734375, + "loss_num": 0.01300048828125, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 304320936, + "step": 3334 + }, + { + "epoch": 13.895833333333334, + "grad_norm": 1.8983321006579221, + "learning_rate": 5e-05, + "loss": 0.0641, + "num_input_tokens_seen": 304412016, + "step": 3335 + }, + { + "epoch": 13.895833333333334, + "loss": 0.061099447309970856, + "loss_ce": 0.00040761884883977473, + "loss_iou": 0.11181640625, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 304412016, + "step": 3335 + }, + { + "epoch": 13.9, + "grad_norm": 4.511945486895753, + "learning_rate": 5e-05, + "loss": 0.084, + "num_input_tokens_seen": 304503732, + "step": 3336 + }, + { + "epoch": 13.9, + "loss": 0.07431946694850922, + "loss_ce": 9.166088602796663e-06, + "loss_iou": 0.380859375, + "loss_num": 0.014892578125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 304503732, + "step": 3336 + }, + { + "epoch": 13.904166666666667, + "grad_norm": 2.205920050046423, + "learning_rate": 5e-05, + "loss": 0.0657, + "num_input_tokens_seen": 304594832, + "step": 3337 + }, + { + "epoch": 13.904166666666667, + "loss": 0.05776306986808777, + "loss_ce": 3.9070018829079345e-05, + "loss_iou": 0.1962890625, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 304594832, + "step": 3337 + }, + { + "epoch": 13.908333333333333, + "grad_norm": 2.3042991811889095, + "learning_rate": 5e-05, + "loss": 0.0496, + "num_input_tokens_seen": 304685720, + "step": 3338 + }, + { + "epoch": 13.908333333333333, + "loss": 0.046074278652668, + "loss_ce": 3.851512883557007e-05, + "loss_iou": 0.150390625, + "loss_num": 0.00921630859375, + "loss_xval": 0.046142578125, + "num_input_tokens_seen": 304685720, + "step": 3338 + }, + { + "epoch": 13.9125, + "grad_norm": 2.630684878369044, + "learning_rate": 5e-05, + "loss": 0.0709, + "num_input_tokens_seen": 304776520, + "step": 3339 + }, + { + "epoch": 13.9125, + "loss": 0.10420745611190796, + "loss_ce": 3.569770706235431e-05, + "loss_iou": 0.2265625, + "loss_num": 0.0208740234375, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 304776520, + "step": 3339 + }, + { + "epoch": 13.916666666666666, + "grad_norm": 3.1437627546639684, + "learning_rate": 5e-05, + "loss": 0.104, + "num_input_tokens_seen": 304868184, + "step": 3340 + }, + { + "epoch": 13.916666666666666, + "loss": 0.05712635815143585, + "loss_ce": 0.0039037007372826338, + "loss_iou": 0.2890625, + "loss_num": 0.0106201171875, + "loss_xval": 0.05322265625, + "num_input_tokens_seen": 304868184, + "step": 3340 + }, + { + "epoch": 13.920833333333333, + "grad_norm": 2.5147699603318334, + "learning_rate": 5e-05, + "loss": 0.0612, + "num_input_tokens_seen": 304959688, + "step": 3341 + }, + { + "epoch": 13.920833333333333, + "loss": 0.06732615828514099, + "loss_ce": 0.006901353597640991, + "loss_iou": 0.275390625, + "loss_num": 0.0120849609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 304959688, + "step": 3341 + }, + { + "epoch": 13.925, + "grad_norm": 3.285856756914253, + "learning_rate": 5e-05, + "loss": 0.0878, + "num_input_tokens_seen": 305049076, + "step": 3342 + }, + { + "epoch": 13.925, + "loss": 0.11383511871099472, + "loss_ce": 1.9817061911453493e-05, + "loss_iou": 0.349609375, + "loss_num": 0.022705078125, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 305049076, + "step": 3342 + }, + { + "epoch": 13.929166666666667, + "grad_norm": 22.50870734198593, + "learning_rate": 5e-05, + "loss": 0.0925, + "num_input_tokens_seen": 305140444, + "step": 3343 + }, + { + "epoch": 13.929166666666667, + "loss": 0.0981772392988205, + "loss_ce": 6.322274566628039e-05, + "loss_iou": 0.1103515625, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 305140444, + "step": 3343 + }, + { + "epoch": 13.933333333333334, + "grad_norm": 8.855170280331524, + "learning_rate": 5e-05, + "loss": 0.0663, + "num_input_tokens_seen": 305231820, + "step": 3344 + }, + { + "epoch": 13.933333333333334, + "loss": 0.09573826193809509, + "loss_ce": 4.617154445440974e-06, + "loss_iou": 0.248046875, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 305231820, + "step": 3344 + }, + { + "epoch": 13.9375, + "grad_norm": 2.690396848874294, + "learning_rate": 5e-05, + "loss": 0.1034, + "num_input_tokens_seen": 305323096, + "step": 3345 + }, + { + "epoch": 13.9375, + "loss": 0.1162140816450119, + "loss_ce": 3.1447380024474114e-06, + "loss_iou": 0.1796875, + "loss_num": 0.023193359375, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 305323096, + "step": 3345 + }, + { + "epoch": 13.941666666666666, + "grad_norm": 3.1007746118976574, + "learning_rate": 5e-05, + "loss": 0.0617, + "num_input_tokens_seen": 305414204, + "step": 3346 + }, + { + "epoch": 13.941666666666666, + "loss": 0.07584530860185623, + "loss_ce": 0.00011593498493311927, + "loss_iou": 0.1181640625, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 305414204, + "step": 3346 + }, + { + "epoch": 13.945833333333333, + "grad_norm": 1.568536077812119, + "learning_rate": 5e-05, + "loss": 0.0516, + "num_input_tokens_seen": 305505092, + "step": 3347 + }, + { + "epoch": 13.945833333333333, + "loss": 0.07008616626262665, + "loss_ce": 2.5482622731942683e-06, + "loss_iou": 0.2236328125, + "loss_num": 0.01397705078125, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 305505092, + "step": 3347 + }, + { + "epoch": 13.95, + "grad_norm": 3.939095608261935, + "learning_rate": 5e-05, + "loss": 0.0927, + "num_input_tokens_seen": 305596448, + "step": 3348 + }, + { + "epoch": 13.95, + "loss": 0.06267900764942169, + "loss_ce": 0.00027056410908699036, + "loss_iou": 0.294921875, + "loss_num": 0.012451171875, + "loss_xval": 0.0625, + "num_input_tokens_seen": 305596448, + "step": 3348 + }, + { + "epoch": 13.954166666666667, + "grad_norm": 5.10613752536137, + "learning_rate": 5e-05, + "loss": 0.0819, + "num_input_tokens_seen": 305687404, + "step": 3349 + }, + { + "epoch": 13.954166666666667, + "loss": 0.1333942413330078, + "loss_ce": 0.00033760571386665106, + "loss_iou": 0.244140625, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 305687404, + "step": 3349 + }, + { + "epoch": 13.958333333333334, + "grad_norm": 2.8926341934022926, + "learning_rate": 5e-05, + "loss": 0.0852, + "num_input_tokens_seen": 305779152, + "step": 3350 + }, + { + "epoch": 13.958333333333334, + "loss": 0.07041196525096893, + "loss_ce": 0.0004351534298621118, + "loss_iou": 0.322265625, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 305779152, + "step": 3350 + }, + { + "epoch": 13.9625, + "grad_norm": 30.49982092068426, + "learning_rate": 5e-05, + "loss": 0.0744, + "num_input_tokens_seen": 305870624, + "step": 3351 + }, + { + "epoch": 13.9625, + "loss": 0.09339140355587006, + "loss_ce": 0.00034330939524807036, + "loss_iou": 0.1884765625, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 305870624, + "step": 3351 + }, + { + "epoch": 13.966666666666667, + "grad_norm": 7.2361907832037575, + "learning_rate": 5e-05, + "loss": 0.0706, + "num_input_tokens_seen": 305962124, + "step": 3352 + }, + { + "epoch": 13.966666666666667, + "loss": 0.0672103762626648, + "loss_ce": 2.592532109702006e-05, + "loss_iou": 0.2734375, + "loss_num": 0.013427734375, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 305962124, + "step": 3352 + }, + { + "epoch": 13.970833333333333, + "grad_norm": 2.3789658089565124, + "learning_rate": 5e-05, + "loss": 0.0329, + "num_input_tokens_seen": 306052748, + "step": 3353 + }, + { + "epoch": 13.970833333333333, + "loss": 0.033177584409713745, + "loss_ce": 4.97952350997366e-06, + "loss_iou": 0.1953125, + "loss_num": 0.006622314453125, + "loss_xval": 0.033203125, + "num_input_tokens_seen": 306052748, + "step": 3353 + }, + { + "epoch": 13.975, + "grad_norm": 2.603933218904834, + "learning_rate": 5e-05, + "loss": 0.0499, + "num_input_tokens_seen": 306143928, + "step": 3354 + }, + { + "epoch": 13.975, + "loss": 0.05014060065150261, + "loss_ce": 7.84904113970697e-06, + "loss_iou": 0.1953125, + "loss_num": 0.010009765625, + "loss_xval": 0.050048828125, + "num_input_tokens_seen": 306143928, + "step": 3354 + }, + { + "epoch": 13.979166666666666, + "grad_norm": 5.750381590450532, + "learning_rate": 5e-05, + "loss": 0.0652, + "num_input_tokens_seen": 306235504, + "step": 3355 + }, + { + "epoch": 13.979166666666666, + "loss": 0.057033687829971313, + "loss_ce": 0.0032922320533543825, + "loss_iou": 0.310546875, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 306235504, + "step": 3355 + }, + { + "epoch": 13.983333333333333, + "grad_norm": 3.811378525309582, + "learning_rate": 5e-05, + "loss": 0.0617, + "num_input_tokens_seen": 306327264, + "step": 3356 + }, + { + "epoch": 13.983333333333333, + "loss": 0.06342743337154388, + "loss_ce": 0.000423891848186031, + "loss_iou": 0.197265625, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 306327264, + "step": 3356 + }, + { + "epoch": 13.9875, + "grad_norm": 5.83970757945946, + "learning_rate": 5e-05, + "loss": 0.0613, + "num_input_tokens_seen": 306419288, + "step": 3357 + }, + { + "epoch": 13.9875, + "loss": 0.05281481146812439, + "loss_ce": 0.002430290449410677, + "loss_iou": 0.1787109375, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 306419288, + "step": 3357 + }, + { + "epoch": 13.991666666666667, + "grad_norm": 3.286523795520112, + "learning_rate": 5e-05, + "loss": 0.084, + "num_input_tokens_seen": 306510708, + "step": 3358 + }, + { + "epoch": 13.991666666666667, + "loss": 0.12988848984241486, + "loss_ce": 0.003622006392106414, + "loss_iou": 0.31640625, + "loss_num": 0.0252685546875, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 306510708, + "step": 3358 + }, + { + "epoch": 13.995833333333334, + "grad_norm": 3.2398357988179542, + "learning_rate": 5e-05, + "loss": 0.0581, + "num_input_tokens_seen": 306602088, + "step": 3359 + }, + { + "epoch": 13.995833333333334, + "loss": 0.05087633430957794, + "loss_ce": 0.0009114285348914564, + "loss_iou": 0.36328125, + "loss_num": 0.010009765625, + "loss_xval": 0.050048828125, + "num_input_tokens_seen": 306602088, + "step": 3359 + }, + { + "epoch": 14.0, + "grad_norm": 5.617467287281432, + "learning_rate": 5e-05, + "loss": 0.0711, + "num_input_tokens_seen": 306692936, + "step": 3360 + }, + { + "epoch": 14.0, + "loss": 0.07821273058652878, + "loss_ce": 3.8057094116084045e-06, + "loss_iou": 0.4140625, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 306692936, + "step": 3360 + }, + { + "epoch": 14.004166666666666, + "grad_norm": 4.188985861313355, + "learning_rate": 5e-05, + "loss": 0.0354, + "num_input_tokens_seen": 306784476, + "step": 3361 + }, + { + "epoch": 14.004166666666666, + "loss": 0.03913436457514763, + "loss_ce": 1.0830644896486774e-05, + "loss_iou": 0.27734375, + "loss_num": 0.0078125, + "loss_xval": 0.0390625, + "num_input_tokens_seen": 306784476, + "step": 3361 + }, + { + "epoch": 14.008333333333333, + "grad_norm": 3.77837181129567, + "learning_rate": 5e-05, + "loss": 0.059, + "num_input_tokens_seen": 306875068, + "step": 3362 + }, + { + "epoch": 14.008333333333333, + "loss": 0.040547363460063934, + "loss_ce": 2.0018822397105396e-05, + "loss_iou": 0.353515625, + "loss_num": 0.00811767578125, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 306875068, + "step": 3362 + }, + { + "epoch": 14.0125, + "grad_norm": 2.5090734897570366, + "learning_rate": 5e-05, + "loss": 0.0624, + "num_input_tokens_seen": 306966612, + "step": 3363 + }, + { + "epoch": 14.0125, + "loss": 0.08640769869089127, + "loss_ce": 0.00011924252612516284, + "loss_iou": 0.25390625, + "loss_num": 0.0172119140625, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 306966612, + "step": 3363 + }, + { + "epoch": 14.016666666666667, + "grad_norm": 2.772188082718569, + "learning_rate": 5e-05, + "loss": 0.0461, + "num_input_tokens_seen": 307058192, + "step": 3364 + }, + { + "epoch": 14.016666666666667, + "loss": 0.05798184871673584, + "loss_ce": 1.3712133295484819e-05, + "loss_iou": 0.2001953125, + "loss_num": 0.0115966796875, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 307058192, + "step": 3364 + }, + { + "epoch": 14.020833333333334, + "grad_norm": 2.3346947679030703, + "learning_rate": 5e-05, + "loss": 0.0469, + "num_input_tokens_seen": 307149620, + "step": 3365 + }, + { + "epoch": 14.020833333333334, + "loss": 0.04485933482646942, + "loss_ce": 4.4271389924688265e-05, + "loss_iou": 0.166015625, + "loss_num": 0.00897216796875, + "loss_xval": 0.044921875, + "num_input_tokens_seen": 307149620, + "step": 3365 + }, + { + "epoch": 14.025, + "grad_norm": 1.825324947239382, + "learning_rate": 5e-05, + "loss": 0.0711, + "num_input_tokens_seen": 307240716, + "step": 3366 + }, + { + "epoch": 14.025, + "loss": 0.0676366314291954, + "loss_ce": 9.680274160928093e-06, + "loss_iou": 0.298828125, + "loss_num": 0.0135498046875, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 307240716, + "step": 3366 + }, + { + "epoch": 14.029166666666667, + "grad_norm": 3.571026495378173, + "learning_rate": 5e-05, + "loss": 0.0773, + "num_input_tokens_seen": 307332192, + "step": 3367 + }, + { + "epoch": 14.029166666666667, + "loss": 0.029795819893479347, + "loss_ce": 3.355112858116627e-05, + "loss_iou": 0.201171875, + "loss_num": 0.005950927734375, + "loss_xval": 0.02978515625, + "num_input_tokens_seen": 307332192, + "step": 3367 + }, + { + "epoch": 14.033333333333333, + "grad_norm": 2.6230559325630014, + "learning_rate": 5e-05, + "loss": 0.0332, + "num_input_tokens_seen": 307423484, + "step": 3368 + }, + { + "epoch": 14.033333333333333, + "loss": 0.03575886785984039, + "loss_ce": 2.2783957319916226e-05, + "loss_iou": 0.265625, + "loss_num": 0.00714111328125, + "loss_xval": 0.03564453125, + "num_input_tokens_seen": 307423484, + "step": 3368 + }, + { + "epoch": 14.0375, + "grad_norm": 12.282876672065623, + "learning_rate": 5e-05, + "loss": 0.0665, + "num_input_tokens_seen": 307513036, + "step": 3369 + }, + { + "epoch": 14.0375, + "loss": 0.05255713686347008, + "loss_ce": 3.638456837506965e-05, + "loss_iou": 0.15234375, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 307513036, + "step": 3369 + }, + { + "epoch": 14.041666666666666, + "grad_norm": 6.657644439684569, + "learning_rate": 5e-05, + "loss": 0.062, + "num_input_tokens_seen": 307603948, + "step": 3370 + }, + { + "epoch": 14.041666666666666, + "loss": 0.07731955498456955, + "loss_ce": 3.269945864303736e-06, + "loss_iou": 0.26953125, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 307603948, + "step": 3370 + }, + { + "epoch": 14.045833333333333, + "grad_norm": 4.046932156015916, + "learning_rate": 5e-05, + "loss": 0.0886, + "num_input_tokens_seen": 307696304, + "step": 3371 + }, + { + "epoch": 14.045833333333333, + "loss": 0.05779094621539116, + "loss_ce": 4.787262878380716e-05, + "loss_iou": 0.33984375, + "loss_num": 0.01153564453125, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 307696304, + "step": 3371 + }, + { + "epoch": 14.05, + "grad_norm": 2.9804179259547467, + "learning_rate": 5e-05, + "loss": 0.0794, + "num_input_tokens_seen": 307787224, + "step": 3372 + }, + { + "epoch": 14.05, + "loss": 0.0840468555688858, + "loss_ce": 4.7224642912624404e-05, + "loss_iou": 0.142578125, + "loss_num": 0.0167236328125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 307787224, + "step": 3372 + }, + { + "epoch": 14.054166666666667, + "grad_norm": 2.744260667240401, + "learning_rate": 5e-05, + "loss": 0.0484, + "num_input_tokens_seen": 307879208, + "step": 3373 + }, + { + "epoch": 14.054166666666667, + "loss": 0.03362205624580383, + "loss_ce": 0.00018241905490867794, + "loss_iou": 0.2421875, + "loss_num": 0.006683349609375, + "loss_xval": 0.033447265625, + "num_input_tokens_seen": 307879208, + "step": 3373 + }, + { + "epoch": 14.058333333333334, + "grad_norm": 9.798140760602497, + "learning_rate": 5e-05, + "loss": 0.0871, + "num_input_tokens_seen": 307970324, + "step": 3374 + }, + { + "epoch": 14.058333333333334, + "loss": 0.05981835722923279, + "loss_ce": 3.90674313166528e-06, + "loss_iou": 0.3984375, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 307970324, + "step": 3374 + }, + { + "epoch": 14.0625, + "grad_norm": 4.46546384352056, + "learning_rate": 5e-05, + "loss": 0.0523, + "num_input_tokens_seen": 308061856, + "step": 3375 + }, + { + "epoch": 14.0625, + "loss": 0.06678728759288788, + "loss_ce": 4.534380423137918e-05, + "loss_iou": 0.275390625, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 308061856, + "step": 3375 + }, + { + "epoch": 14.066666666666666, + "grad_norm": 2.906845444184048, + "learning_rate": 5e-05, + "loss": 0.033, + "num_input_tokens_seen": 308153420, + "step": 3376 + }, + { + "epoch": 14.066666666666666, + "loss": 0.028707150369882584, + "loss_ce": 0.0005699438625015318, + "loss_iou": 0.1669921875, + "loss_num": 0.005615234375, + "loss_xval": 0.028076171875, + "num_input_tokens_seen": 308153420, + "step": 3376 + }, + { + "epoch": 14.070833333333333, + "grad_norm": 3.116604490032572, + "learning_rate": 5e-05, + "loss": 0.0639, + "num_input_tokens_seen": 308245176, + "step": 3377 + }, + { + "epoch": 14.070833333333333, + "loss": 0.06883566081523895, + "loss_ce": 0.00015584740322083235, + "loss_iou": 0.349609375, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 308245176, + "step": 3377 + }, + { + "epoch": 14.075, + "grad_norm": 1.9318580460017465, + "learning_rate": 5e-05, + "loss": 0.0492, + "num_input_tokens_seen": 308336652, + "step": 3378 + }, + { + "epoch": 14.075, + "loss": 0.04875883460044861, + "loss_ce": 2.226128890470136e-05, + "loss_iou": 0.1875, + "loss_num": 0.009765625, + "loss_xval": 0.048828125, + "num_input_tokens_seen": 308336652, + "step": 3378 + }, + { + "epoch": 14.079166666666667, + "grad_norm": 4.22545329602741, + "learning_rate": 5e-05, + "loss": 0.0801, + "num_input_tokens_seen": 308428316, + "step": 3379 + }, + { + "epoch": 14.079166666666667, + "loss": 0.10865432024002075, + "loss_ce": 0.002613367047160864, + "loss_iou": 0.2275390625, + "loss_num": 0.021240234375, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 308428316, + "step": 3379 + }, + { + "epoch": 14.083333333333334, + "grad_norm": 1.2427563657952412, + "learning_rate": 5e-05, + "loss": 0.0406, + "num_input_tokens_seen": 308519912, + "step": 3380 + }, + { + "epoch": 14.083333333333334, + "loss": 0.049828533083200455, + "loss_ce": 8.586703188484535e-06, + "loss_iou": 0.28125, + "loss_num": 0.00994873046875, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 308519912, + "step": 3380 + }, + { + "epoch": 14.0875, + "grad_norm": 1.1341950938205148, + "learning_rate": 5e-05, + "loss": 0.059, + "num_input_tokens_seen": 308610988, + "step": 3381 + }, + { + "epoch": 14.0875, + "loss": 0.04335072636604309, + "loss_ce": 8.138382327160798e-06, + "loss_iou": 0.240234375, + "loss_num": 0.0086669921875, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 308610988, + "step": 3381 + }, + { + "epoch": 14.091666666666667, + "grad_norm": 2.1222583710275025, + "learning_rate": 5e-05, + "loss": 0.0434, + "num_input_tokens_seen": 308702240, + "step": 3382 + }, + { + "epoch": 14.091666666666667, + "loss": 0.0367155522108078, + "loss_ce": 2.908373062382452e-06, + "loss_iou": 0.220703125, + "loss_num": 0.00732421875, + "loss_xval": 0.03662109375, + "num_input_tokens_seen": 308702240, + "step": 3382 + }, + { + "epoch": 14.095833333333333, + "grad_norm": 3.906211488436715, + "learning_rate": 5e-05, + "loss": 0.0907, + "num_input_tokens_seen": 308793188, + "step": 3383 + }, + { + "epoch": 14.095833333333333, + "loss": 0.055466026067733765, + "loss_ce": 1.5587416783091612e-05, + "loss_iou": 0.2314453125, + "loss_num": 0.0111083984375, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 308793188, + "step": 3383 + }, + { + "epoch": 14.1, + "grad_norm": 3.340470863630639, + "learning_rate": 5e-05, + "loss": 0.0756, + "num_input_tokens_seen": 308883792, + "step": 3384 + }, + { + "epoch": 14.1, + "loss": 0.10700327903032303, + "loss_ce": 3.9162659959401935e-05, + "loss_iou": 0.31640625, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 308883792, + "step": 3384 + }, + { + "epoch": 14.104166666666666, + "grad_norm": 3.95383471125247, + "learning_rate": 5e-05, + "loss": 0.0436, + "num_input_tokens_seen": 308975832, + "step": 3385 + }, + { + "epoch": 14.104166666666666, + "loss": 0.03994490206241608, + "loss_ce": 1.2653750673052855e-05, + "loss_iou": 0.37109375, + "loss_num": 0.00799560546875, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 308975832, + "step": 3385 + }, + { + "epoch": 14.108333333333333, + "grad_norm": 3.0562950230924226, + "learning_rate": 5e-05, + "loss": 0.0565, + "num_input_tokens_seen": 309066836, + "step": 3386 + }, + { + "epoch": 14.108333333333333, + "loss": 0.07869721204042435, + "loss_ce": 0.0010452393908053637, + "loss_iou": 0.2578125, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 309066836, + "step": 3386 + }, + { + "epoch": 14.1125, + "grad_norm": 3.4501582032949134, + "learning_rate": 5e-05, + "loss": 0.0633, + "num_input_tokens_seen": 309158444, + "step": 3387 + }, + { + "epoch": 14.1125, + "loss": 0.030149977654218674, + "loss_ce": 4.438710311660543e-05, + "loss_iou": 0.3125, + "loss_num": 0.00604248046875, + "loss_xval": 0.0301513671875, + "num_input_tokens_seen": 309158444, + "step": 3387 + }, + { + "epoch": 14.116666666666667, + "grad_norm": 2.3305611167856184, + "learning_rate": 5e-05, + "loss": 0.0494, + "num_input_tokens_seen": 309249628, + "step": 3388 + }, + { + "epoch": 14.116666666666667, + "loss": 0.0578351654112339, + "loss_ce": 4.353590156824794e-06, + "loss_iou": 0.349609375, + "loss_num": 0.01153564453125, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 309249628, + "step": 3388 + }, + { + "epoch": 14.120833333333334, + "grad_norm": 2.9320244192284792, + "learning_rate": 5e-05, + "loss": 0.0659, + "num_input_tokens_seen": 309340384, + "step": 3389 + }, + { + "epoch": 14.120833333333334, + "loss": 0.07648331671953201, + "loss_ce": 6.266511263675056e-06, + "loss_iou": 0.138671875, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 309340384, + "step": 3389 + }, + { + "epoch": 14.125, + "grad_norm": 2.5052687026659686, + "learning_rate": 5e-05, + "loss": 0.0485, + "num_input_tokens_seen": 309431800, + "step": 3390 + }, + { + "epoch": 14.125, + "loss": 0.0563613623380661, + "loss_ce": 2.5910518161254004e-05, + "loss_iou": 0.255859375, + "loss_num": 0.01129150390625, + "loss_xval": 0.056396484375, + "num_input_tokens_seen": 309431800, + "step": 3390 + }, + { + "epoch": 14.129166666666666, + "grad_norm": 3.1261621170052343, + "learning_rate": 5e-05, + "loss": 0.0476, + "num_input_tokens_seen": 309522736, + "step": 3391 + }, + { + "epoch": 14.129166666666666, + "loss": 0.046834796667099, + "loss_ce": 5.573211637965869e-06, + "loss_iou": 0.29296875, + "loss_num": 0.0093994140625, + "loss_xval": 0.046875, + "num_input_tokens_seen": 309522736, + "step": 3391 + }, + { + "epoch": 14.133333333333333, + "grad_norm": 2.719234641985888, + "learning_rate": 5e-05, + "loss": 0.0778, + "num_input_tokens_seen": 309613952, + "step": 3392 + }, + { + "epoch": 14.133333333333333, + "loss": 0.11014129221439362, + "loss_ce": 0.0001406825758749619, + "loss_iou": 0.349609375, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 309613952, + "step": 3392 + }, + { + "epoch": 14.1375, + "grad_norm": 2.6314723677992595, + "learning_rate": 5e-05, + "loss": 0.0278, + "num_input_tokens_seen": 309705516, + "step": 3393 + }, + { + "epoch": 14.1375, + "loss": 0.024617359042167664, + "loss_ce": 0.0006610597483813763, + "loss_iou": 0.13671875, + "loss_num": 0.004791259765625, + "loss_xval": 0.02392578125, + "num_input_tokens_seen": 309705516, + "step": 3393 + }, + { + "epoch": 14.141666666666667, + "grad_norm": 15.164866486052677, + "learning_rate": 5e-05, + "loss": 0.0555, + "num_input_tokens_seen": 309797308, + "step": 3394 + }, + { + "epoch": 14.141666666666667, + "loss": 0.0349428728222847, + "loss_ce": 3.076143184443936e-05, + "loss_iou": 0.236328125, + "loss_num": 0.006988525390625, + "loss_xval": 0.034912109375, + "num_input_tokens_seen": 309797308, + "step": 3394 + }, + { + "epoch": 14.145833333333334, + "grad_norm": 3.320160941361757, + "learning_rate": 5e-05, + "loss": 0.0621, + "num_input_tokens_seen": 309888808, + "step": 3395 + }, + { + "epoch": 14.145833333333334, + "loss": 0.07468635588884354, + "loss_ce": 2.5103210646193475e-05, + "loss_iou": 0.283203125, + "loss_num": 0.014892578125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 309888808, + "step": 3395 + }, + { + "epoch": 14.15, + "grad_norm": 7.294682697484828, + "learning_rate": 5e-05, + "loss": 0.0669, + "num_input_tokens_seen": 309980220, + "step": 3396 + }, + { + "epoch": 14.15, + "loss": 0.04793470725417137, + "loss_ce": 0.00011366332182660699, + "loss_iou": 0.28515625, + "loss_num": 0.00958251953125, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 309980220, + "step": 3396 + }, + { + "epoch": 14.154166666666667, + "grad_norm": 3.0591828989824954, + "learning_rate": 5e-05, + "loss": 0.066, + "num_input_tokens_seen": 310071624, + "step": 3397 + }, + { + "epoch": 14.154166666666667, + "loss": 0.06359957903623581, + "loss_ce": 9.428997600480216e-07, + "loss_iou": 0.1865234375, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 310071624, + "step": 3397 + }, + { + "epoch": 14.158333333333333, + "grad_norm": 0.7538136953677265, + "learning_rate": 5e-05, + "loss": 0.0613, + "num_input_tokens_seen": 310163344, + "step": 3398 + }, + { + "epoch": 14.158333333333333, + "loss": 0.07692218571901321, + "loss_ce": 2.6306820473109838e-06, + "loss_iou": 0.2578125, + "loss_num": 0.015380859375, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 310163344, + "step": 3398 + }, + { + "epoch": 14.1625, + "grad_norm": 1.6353708868238535, + "learning_rate": 5e-05, + "loss": 0.0794, + "num_input_tokens_seen": 310255072, + "step": 3399 + }, + { + "epoch": 14.1625, + "loss": 0.09011051058769226, + "loss_ce": 1.4989738701842725e-05, + "loss_iou": 0.248046875, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 310255072, + "step": 3399 + }, + { + "epoch": 14.166666666666666, + "grad_norm": 4.702226228150257, + "learning_rate": 5e-05, + "loss": 0.0382, + "num_input_tokens_seen": 310346900, + "step": 3400 + }, + { + "epoch": 14.166666666666666, + "loss": 0.05216794088482857, + "loss_ce": 0.0003185725654475391, + "loss_iou": 0.27734375, + "loss_num": 0.0103759765625, + "loss_xval": 0.0517578125, + "num_input_tokens_seen": 310346900, + "step": 3400 + }, + { + "epoch": 14.170833333333333, + "grad_norm": 2.424212958036511, + "learning_rate": 5e-05, + "loss": 0.1025, + "num_input_tokens_seen": 310438064, + "step": 3401 + }, + { + "epoch": 14.170833333333333, + "loss": 0.16618862748146057, + "loss_ce": 5.094004882266745e-05, + "loss_iou": 0.2392578125, + "loss_num": 0.033203125, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 310438064, + "step": 3401 + }, + { + "epoch": 14.175, + "grad_norm": 2.365340500812245, + "learning_rate": 5e-05, + "loss": 0.0641, + "num_input_tokens_seen": 310529152, + "step": 3402 + }, + { + "epoch": 14.175, + "loss": 0.05468135327100754, + "loss_ce": 5.488721944857389e-05, + "loss_iou": 0.1982421875, + "loss_num": 0.01092529296875, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 310529152, + "step": 3402 + }, + { + "epoch": 14.179166666666667, + "grad_norm": 5.275771030939526, + "learning_rate": 5e-05, + "loss": 0.053, + "num_input_tokens_seen": 310620380, + "step": 3403 + }, + { + "epoch": 14.179166666666667, + "loss": 0.04898332804441452, + "loss_ce": 2.6164125301875174e-06, + "loss_iou": 0.3046875, + "loss_num": 0.00982666015625, + "loss_xval": 0.049072265625, + "num_input_tokens_seen": 310620380, + "step": 3403 + }, + { + "epoch": 14.183333333333334, + "grad_norm": 4.5906342531945405, + "learning_rate": 5e-05, + "loss": 0.0419, + "num_input_tokens_seen": 310711712, + "step": 3404 + }, + { + "epoch": 14.183333333333334, + "loss": 0.044072914868593216, + "loss_ce": 5.13083505211398e-05, + "loss_iou": 0.255859375, + "loss_num": 0.0087890625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 310711712, + "step": 3404 + }, + { + "epoch": 14.1875, + "grad_norm": 3.312375728688097, + "learning_rate": 5e-05, + "loss": 0.0776, + "num_input_tokens_seen": 310803616, + "step": 3405 + }, + { + "epoch": 14.1875, + "loss": 0.07145528495311737, + "loss_ce": 5.941041308688e-05, + "loss_iou": 0.283203125, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 310803616, + "step": 3405 + }, + { + "epoch": 14.191666666666666, + "grad_norm": 11.85380634188713, + "learning_rate": 5e-05, + "loss": 0.1209, + "num_input_tokens_seen": 310894768, + "step": 3406 + }, + { + "epoch": 14.191666666666666, + "loss": 0.15935643017292023, + "loss_ce": 0.0001004475197987631, + "loss_iou": 0.22265625, + "loss_num": 0.03173828125, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 310894768, + "step": 3406 + }, + { + "epoch": 14.195833333333333, + "grad_norm": 3.7060806106659654, + "learning_rate": 5e-05, + "loss": 0.078, + "num_input_tokens_seen": 310985924, + "step": 3407 + }, + { + "epoch": 14.195833333333333, + "loss": 0.08576707541942596, + "loss_ce": 0.0003026033518835902, + "loss_iou": 0.279296875, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 310985924, + "step": 3407 + }, + { + "epoch": 14.2, + "grad_norm": 2.983513817753317, + "learning_rate": 5e-05, + "loss": 0.0525, + "num_input_tokens_seen": 311077484, + "step": 3408 + }, + { + "epoch": 14.2, + "loss": 0.04799432307481766, + "loss_ce": 0.00013513251906260848, + "loss_iou": 0.287109375, + "loss_num": 0.00958251953125, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 311077484, + "step": 3408 + }, + { + "epoch": 14.204166666666667, + "grad_norm": 2.1804492896885983, + "learning_rate": 5e-05, + "loss": 0.0458, + "num_input_tokens_seen": 311169032, + "step": 3409 + }, + { + "epoch": 14.204166666666667, + "loss": 0.050522398203611374, + "loss_ce": 7.684266165597364e-05, + "loss_iou": 0.2392578125, + "loss_num": 0.01007080078125, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 311169032, + "step": 3409 + }, + { + "epoch": 14.208333333333334, + "grad_norm": 2.7746453176567187, + "learning_rate": 5e-05, + "loss": 0.0874, + "num_input_tokens_seen": 311259696, + "step": 3410 + }, + { + "epoch": 14.208333333333334, + "loss": 0.10649485886096954, + "loss_ce": 3.762988853850402e-06, + "loss_iou": 0.2119140625, + "loss_num": 0.021240234375, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 311259696, + "step": 3410 + }, + { + "epoch": 14.2125, + "grad_norm": 3.558439839591837, + "learning_rate": 5e-05, + "loss": 0.0433, + "num_input_tokens_seen": 311350888, + "step": 3411 + }, + { + "epoch": 14.2125, + "loss": 0.051032066345214844, + "loss_ce": 5.24527276866138e-05, + "loss_iou": 0.310546875, + "loss_num": 0.01019287109375, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 311350888, + "step": 3411 + }, + { + "epoch": 14.216666666666667, + "grad_norm": 3.6374062420646713, + "learning_rate": 5e-05, + "loss": 0.0612, + "num_input_tokens_seen": 311442084, + "step": 3412 + }, + { + "epoch": 14.216666666666667, + "loss": 0.036105334758758545, + "loss_ce": 3.0388860068342183e-06, + "loss_iou": 0.380859375, + "loss_num": 0.007232666015625, + "loss_xval": 0.0361328125, + "num_input_tokens_seen": 311442084, + "step": 3412 + }, + { + "epoch": 14.220833333333333, + "grad_norm": 3.0367560816333987, + "learning_rate": 5e-05, + "loss": 0.0494, + "num_input_tokens_seen": 311533748, + "step": 3413 + }, + { + "epoch": 14.220833333333333, + "loss": 0.04989338293671608, + "loss_ce": 3.529056630213745e-05, + "loss_iou": 0.171875, + "loss_num": 0.010009765625, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 311533748, + "step": 3413 + }, + { + "epoch": 14.225, + "grad_norm": 2.3895487962927464, + "learning_rate": 5e-05, + "loss": 0.0595, + "num_input_tokens_seen": 311625040, + "step": 3414 + }, + { + "epoch": 14.225, + "loss": 0.07540172338485718, + "loss_ce": 8.049310054047965e-06, + "loss_iou": 0.287109375, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 311625040, + "step": 3414 + }, + { + "epoch": 14.229166666666666, + "grad_norm": 3.3165335421466415, + "learning_rate": 5e-05, + "loss": 0.0616, + "num_input_tokens_seen": 311716492, + "step": 3415 + }, + { + "epoch": 14.229166666666666, + "loss": 0.045228682458400726, + "loss_ce": 0.004213057924062014, + "loss_iou": 0.26953125, + "loss_num": 0.0081787109375, + "loss_xval": 0.041015625, + "num_input_tokens_seen": 311716492, + "step": 3415 + }, + { + "epoch": 14.233333333333333, + "grad_norm": 2.8110109784901676, + "learning_rate": 5e-05, + "loss": 0.0401, + "num_input_tokens_seen": 311807624, + "step": 3416 + }, + { + "epoch": 14.233333333333333, + "loss": 0.028724106028676033, + "loss_ce": 7.06592118149274e-06, + "loss_iou": 0.271484375, + "loss_num": 0.0057373046875, + "loss_xval": 0.0286865234375, + "num_input_tokens_seen": 311807624, + "step": 3416 + }, + { + "epoch": 14.2375, + "grad_norm": 7.586267983628135, + "learning_rate": 5e-05, + "loss": 0.0575, + "num_input_tokens_seen": 311899152, + "step": 3417 + }, + { + "epoch": 14.2375, + "loss": 0.04901839420199394, + "loss_ce": 8.345822425326332e-05, + "loss_iou": 0.3984375, + "loss_num": 0.009765625, + "loss_xval": 0.048828125, + "num_input_tokens_seen": 311899152, + "step": 3417 + }, + { + "epoch": 14.241666666666667, + "grad_norm": 2.708253309616207, + "learning_rate": 5e-05, + "loss": 0.111, + "num_input_tokens_seen": 311989696, + "step": 3418 + }, + { + "epoch": 14.241666666666667, + "loss": 0.12908238172531128, + "loss_ce": 0.0003134589351247996, + "loss_iou": 0.2216796875, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 311989696, + "step": 3418 + }, + { + "epoch": 14.245833333333334, + "grad_norm": 1.306529737096536, + "learning_rate": 5e-05, + "loss": 0.0559, + "num_input_tokens_seen": 312080840, + "step": 3419 + }, + { + "epoch": 14.245833333333334, + "loss": 0.060873642563819885, + "loss_ce": 0.0001436614984413609, + "loss_iou": 0.2578125, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 312080840, + "step": 3419 + }, + { + "epoch": 14.25, + "grad_norm": 3.000357538609261, + "learning_rate": 5e-05, + "loss": 0.0679, + "num_input_tokens_seen": 312172280, + "step": 3420 + }, + { + "epoch": 14.25, + "loss": 0.05053142458200455, + "loss_ce": 0.00010112797463079914, + "loss_iou": 0.2109375, + "loss_num": 0.01007080078125, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 312172280, + "step": 3420 + }, + { + "epoch": 14.254166666666666, + "grad_norm": 2.3051491048317834, + "learning_rate": 5e-05, + "loss": 0.0397, + "num_input_tokens_seen": 312264132, + "step": 3421 + }, + { + "epoch": 14.254166666666666, + "loss": 0.03641936928033829, + "loss_ce": 0.004787897691130638, + "loss_iou": 0.154296875, + "loss_num": 0.006317138671875, + "loss_xval": 0.03173828125, + "num_input_tokens_seen": 312264132, + "step": 3421 + }, + { + "epoch": 14.258333333333333, + "grad_norm": 2.3064057360485966, + "learning_rate": 5e-05, + "loss": 0.084, + "num_input_tokens_seen": 312355324, + "step": 3422 + }, + { + "epoch": 14.258333333333333, + "loss": 0.03896855190396309, + "loss_ce": 0.000268447125563398, + "loss_iou": 0.1953125, + "loss_num": 0.00775146484375, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 312355324, + "step": 3422 + }, + { + "epoch": 14.2625, + "grad_norm": 2.441656794874996, + "learning_rate": 5e-05, + "loss": 0.0842, + "num_input_tokens_seen": 312446936, + "step": 3423 + }, + { + "epoch": 14.2625, + "loss": 0.07822424173355103, + "loss_ce": 0.0009155810112133622, + "loss_iou": 0.2373046875, + "loss_num": 0.0155029296875, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 312446936, + "step": 3423 + }, + { + "epoch": 14.266666666666667, + "grad_norm": 1.7532222834884197, + "learning_rate": 5e-05, + "loss": 0.0713, + "num_input_tokens_seen": 312537936, + "step": 3424 + }, + { + "epoch": 14.266666666666667, + "loss": 0.09173586964607239, + "loss_ce": 6.106894579716027e-05, + "loss_iou": 0.328125, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 312537936, + "step": 3424 + }, + { + "epoch": 14.270833333333334, + "grad_norm": 6.687486262938646, + "learning_rate": 5e-05, + "loss": 0.0512, + "num_input_tokens_seen": 312629212, + "step": 3425 + }, + { + "epoch": 14.270833333333334, + "loss": 0.06332320719957352, + "loss_ce": 0.00022811289818491787, + "loss_iou": 0.271484375, + "loss_num": 0.01263427734375, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 312629212, + "step": 3425 + }, + { + "epoch": 14.275, + "grad_norm": 2.8460177705290843, + "learning_rate": 5e-05, + "loss": 0.043, + "num_input_tokens_seen": 312719904, + "step": 3426 + }, + { + "epoch": 14.275, + "loss": 0.05278380960226059, + "loss_ce": 3.6613828342524357e-06, + "loss_iou": 0.2734375, + "loss_num": 0.01055908203125, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 312719904, + "step": 3426 + }, + { + "epoch": 14.279166666666667, + "grad_norm": 2.9263352700930794, + "learning_rate": 5e-05, + "loss": 0.1052, + "num_input_tokens_seen": 312810740, + "step": 3427 + }, + { + "epoch": 14.279166666666667, + "loss": 0.10622579604387283, + "loss_ce": 0.00016195495845749974, + "loss_iou": 0.341796875, + "loss_num": 0.021240234375, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 312810740, + "step": 3427 + }, + { + "epoch": 14.283333333333333, + "grad_norm": 3.215534074928557, + "learning_rate": 5e-05, + "loss": 0.0646, + "num_input_tokens_seen": 312902320, + "step": 3428 + }, + { + "epoch": 14.283333333333333, + "loss": 0.059037499129772186, + "loss_ce": 6.227454287e-05, + "loss_iou": 0.1650390625, + "loss_num": 0.01177978515625, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 312902320, + "step": 3428 + }, + { + "epoch": 14.2875, + "grad_norm": 4.164203787467469, + "learning_rate": 5e-05, + "loss": 0.0523, + "num_input_tokens_seen": 312994128, + "step": 3429 + }, + { + "epoch": 14.2875, + "loss": 0.053357671946287155, + "loss_ce": 0.0009589894907549024, + "loss_iou": 0.3046875, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 312994128, + "step": 3429 + }, + { + "epoch": 14.291666666666666, + "grad_norm": 3.3424798425486393, + "learning_rate": 5e-05, + "loss": 0.0696, + "num_input_tokens_seen": 313086072, + "step": 3430 + }, + { + "epoch": 14.291666666666666, + "loss": 0.06933162361383438, + "loss_ce": 8.724544750293717e-05, + "loss_iou": 0.2578125, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 313086072, + "step": 3430 + }, + { + "epoch": 14.295833333333333, + "grad_norm": 3.221319375620749, + "learning_rate": 5e-05, + "loss": 0.0801, + "num_input_tokens_seen": 313177428, + "step": 3431 + }, + { + "epoch": 14.295833333333333, + "loss": 0.050983380526304245, + "loss_ce": 9.531808609608561e-05, + "loss_iou": 0.224609375, + "loss_num": 0.01019287109375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 313177428, + "step": 3431 + }, + { + "epoch": 14.3, + "grad_norm": 6.893536221165098, + "learning_rate": 5e-05, + "loss": 0.0545, + "num_input_tokens_seen": 313268856, + "step": 3432 + }, + { + "epoch": 14.3, + "loss": 0.06522677838802338, + "loss_ce": 1.0717561053752434e-05, + "loss_iou": 0.306640625, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 313268856, + "step": 3432 + }, + { + "epoch": 14.304166666666667, + "grad_norm": 2.266375022124694, + "learning_rate": 5e-05, + "loss": 0.0503, + "num_input_tokens_seen": 313359784, + "step": 3433 + }, + { + "epoch": 14.304166666666667, + "loss": 0.04838399589061737, + "loss_ce": 2.8890797693748027e-05, + "loss_iou": 0.2109375, + "loss_num": 0.00970458984375, + "loss_xval": 0.04833984375, + "num_input_tokens_seen": 313359784, + "step": 3433 + }, + { + "epoch": 14.308333333333334, + "grad_norm": 2.6935861241346992, + "learning_rate": 5e-05, + "loss": 0.065, + "num_input_tokens_seen": 313451584, + "step": 3434 + }, + { + "epoch": 14.308333333333334, + "loss": 0.03947412967681885, + "loss_ce": 3.778855170821771e-05, + "loss_iou": 0.173828125, + "loss_num": 0.00787353515625, + "loss_xval": 0.03955078125, + "num_input_tokens_seen": 313451584, + "step": 3434 + }, + { + "epoch": 14.3125, + "grad_norm": 2.491603991707111, + "learning_rate": 5e-05, + "loss": 0.0562, + "num_input_tokens_seen": 313542728, + "step": 3435 + }, + { + "epoch": 14.3125, + "loss": 0.06238909065723419, + "loss_ce": 7.219469989649951e-05, + "loss_iou": 0.1796875, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 313542728, + "step": 3435 + }, + { + "epoch": 14.316666666666666, + "grad_norm": 4.281069025371908, + "learning_rate": 5e-05, + "loss": 0.0691, + "num_input_tokens_seen": 313634348, + "step": 3436 + }, + { + "epoch": 14.316666666666666, + "loss": 0.0943194031715393, + "loss_ce": 0.0006533203413709998, + "loss_iou": 0.212890625, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 313634348, + "step": 3436 + }, + { + "epoch": 14.320833333333333, + "grad_norm": 2.3967973579395525, + "learning_rate": 5e-05, + "loss": 0.0634, + "num_input_tokens_seen": 313725236, + "step": 3437 + }, + { + "epoch": 14.320833333333333, + "loss": 0.042279839515686035, + "loss_ce": 8.928626630222425e-05, + "loss_iou": 0.265625, + "loss_num": 0.0084228515625, + "loss_xval": 0.042236328125, + "num_input_tokens_seen": 313725236, + "step": 3437 + }, + { + "epoch": 14.325, + "grad_norm": 2.257621931213549, + "learning_rate": 5e-05, + "loss": 0.0503, + "num_input_tokens_seen": 313816824, + "step": 3438 + }, + { + "epoch": 14.325, + "loss": 0.054193347692489624, + "loss_ce": 1.7587877891855896e-06, + "loss_iou": 0.263671875, + "loss_num": 0.01080322265625, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 313816824, + "step": 3438 + }, + { + "epoch": 14.329166666666667, + "grad_norm": 5.272133435023301, + "learning_rate": 5e-05, + "loss": 0.052, + "num_input_tokens_seen": 313908068, + "step": 3439 + }, + { + "epoch": 14.329166666666667, + "loss": 0.04028581082820892, + "loss_ce": 5.2200211939634755e-05, + "loss_iou": 0.2373046875, + "loss_num": 0.008056640625, + "loss_xval": 0.040283203125, + "num_input_tokens_seen": 313908068, + "step": 3439 + }, + { + "epoch": 14.333333333333334, + "grad_norm": 2.7108855978850372, + "learning_rate": 5e-05, + "loss": 0.0568, + "num_input_tokens_seen": 313999224, + "step": 3440 + }, + { + "epoch": 14.333333333333334, + "loss": 0.023377256467938423, + "loss_ce": 6.18260819464922e-05, + "loss_iou": 0.2392578125, + "loss_num": 0.004669189453125, + "loss_xval": 0.0233154296875, + "num_input_tokens_seen": 313999224, + "step": 3440 + }, + { + "epoch": 14.3375, + "grad_norm": 2.528980208355573, + "learning_rate": 5e-05, + "loss": 0.1086, + "num_input_tokens_seen": 314090544, + "step": 3441 + }, + { + "epoch": 14.3375, + "loss": 0.06901770830154419, + "loss_ce": 2.204579686804209e-06, + "loss_iou": 0.2890625, + "loss_num": 0.01385498046875, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 314090544, + "step": 3441 + }, + { + "epoch": 14.341666666666667, + "grad_norm": 2.5497706986522486, + "learning_rate": 5e-05, + "loss": 0.0559, + "num_input_tokens_seen": 314181788, + "step": 3442 + }, + { + "epoch": 14.341666666666667, + "loss": 0.06665512919425964, + "loss_ce": 4.742196324514225e-06, + "loss_iou": 0.30078125, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 314181788, + "step": 3442 + }, + { + "epoch": 14.345833333333333, + "grad_norm": 3.328778861443991, + "learning_rate": 5e-05, + "loss": 0.0548, + "num_input_tokens_seen": 314272928, + "step": 3443 + }, + { + "epoch": 14.345833333333333, + "loss": 0.07248881459236145, + "loss_ce": 0.0018406271701678634, + "loss_iou": 0.158203125, + "loss_num": 0.01409912109375, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 314272928, + "step": 3443 + }, + { + "epoch": 14.35, + "grad_norm": 4.11741321863604, + "learning_rate": 5e-05, + "loss": 0.0648, + "num_input_tokens_seen": 314364008, + "step": 3444 + }, + { + "epoch": 14.35, + "loss": 0.08235032856464386, + "loss_ce": 7.493438897654414e-05, + "loss_iou": 0.306640625, + "loss_num": 0.0164794921875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 314364008, + "step": 3444 + }, + { + "epoch": 14.354166666666666, + "grad_norm": 5.913859683796368, + "learning_rate": 5e-05, + "loss": 0.0735, + "num_input_tokens_seen": 314455664, + "step": 3445 + }, + { + "epoch": 14.354166666666666, + "loss": 0.10439564287662506, + "loss_ce": 0.004648941569030285, + "loss_iou": 0.33203125, + "loss_num": 0.02001953125, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 314455664, + "step": 3445 + }, + { + "epoch": 14.358333333333333, + "grad_norm": 2.042636336445981, + "learning_rate": 5e-05, + "loss": 0.0526, + "num_input_tokens_seen": 314547192, + "step": 3446 + }, + { + "epoch": 14.358333333333333, + "loss": 0.04504550248384476, + "loss_ce": 1.5587525012961123e-06, + "loss_iou": 0.3359375, + "loss_num": 0.009033203125, + "loss_xval": 0.044921875, + "num_input_tokens_seen": 314547192, + "step": 3446 + }, + { + "epoch": 14.3625, + "grad_norm": 11.13085697956147, + "learning_rate": 5e-05, + "loss": 0.0776, + "num_input_tokens_seen": 314638912, + "step": 3447 + }, + { + "epoch": 14.3625, + "loss": 0.10157739371061325, + "loss_ce": 1.4897475921316072e-05, + "loss_iou": 0.3046875, + "loss_num": 0.020263671875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 314638912, + "step": 3447 + }, + { + "epoch": 14.366666666666667, + "grad_norm": 1.8191878542115973, + "learning_rate": 5e-05, + "loss": 0.086, + "num_input_tokens_seen": 314728820, + "step": 3448 + }, + { + "epoch": 14.366666666666667, + "loss": 0.1349654495716095, + "loss_ce": 0.0005660292226821184, + "loss_iou": 0.21875, + "loss_num": 0.02685546875, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 314728820, + "step": 3448 + }, + { + "epoch": 14.370833333333334, + "grad_norm": 0.8656441342338449, + "learning_rate": 5e-05, + "loss": 0.0806, + "num_input_tokens_seen": 314820188, + "step": 3449 + }, + { + "epoch": 14.370833333333334, + "loss": 0.02592162787914276, + "loss_ce": 0.0002105679304804653, + "loss_iou": 0.16796875, + "loss_num": 0.005157470703125, + "loss_xval": 0.0257568359375, + "num_input_tokens_seen": 314820188, + "step": 3449 + }, + { + "epoch": 14.375, + "grad_norm": 4.541659388869669, + "learning_rate": 5e-05, + "loss": 0.0895, + "num_input_tokens_seen": 314911288, + "step": 3450 + }, + { + "epoch": 14.375, + "loss": 0.138495534658432, + "loss_ce": 1.4394384379556868e-05, + "loss_iou": 0.1982421875, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 314911288, + "step": 3450 + }, + { + "epoch": 14.379166666666666, + "grad_norm": 2.3182304855785305, + "learning_rate": 5e-05, + "loss": 0.0688, + "num_input_tokens_seen": 315002220, + "step": 3451 + }, + { + "epoch": 14.379166666666666, + "loss": 0.0673556923866272, + "loss_ce": 1.1029018423869275e-05, + "loss_iou": 0.322265625, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 315002220, + "step": 3451 + }, + { + "epoch": 14.383333333333333, + "grad_norm": 0.8455518123782996, + "learning_rate": 5e-05, + "loss": 0.0396, + "num_input_tokens_seen": 315092588, + "step": 3452 + }, + { + "epoch": 14.383333333333333, + "loss": 0.05230352282524109, + "loss_ce": 6.505908822873607e-05, + "loss_iou": 0.2451171875, + "loss_num": 0.01043701171875, + "loss_xval": 0.05224609375, + "num_input_tokens_seen": 315092588, + "step": 3452 + }, + { + "epoch": 14.3875, + "grad_norm": 1.9418601665956887, + "learning_rate": 5e-05, + "loss": 0.07, + "num_input_tokens_seen": 315183924, + "step": 3453 + }, + { + "epoch": 14.3875, + "loss": 0.05696332827210426, + "loss_ce": 1.752637399476953e-05, + "loss_iou": 0.1826171875, + "loss_num": 0.01141357421875, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 315183924, + "step": 3453 + }, + { + "epoch": 14.391666666666667, + "grad_norm": 4.802017746819228, + "learning_rate": 5e-05, + "loss": 0.0559, + "num_input_tokens_seen": 315275104, + "step": 3454 + }, + { + "epoch": 14.391666666666667, + "loss": 0.06346727162599564, + "loss_ce": 3.6482193536357954e-05, + "loss_iou": 0.2392578125, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 315275104, + "step": 3454 + }, + { + "epoch": 14.395833333333334, + "grad_norm": 1.966066230471116, + "learning_rate": 5e-05, + "loss": 0.0649, + "num_input_tokens_seen": 315366048, + "step": 3455 + }, + { + "epoch": 14.395833333333334, + "loss": 0.05793462693691254, + "loss_ce": 4.63609194412129e-06, + "loss_iou": 0.30078125, + "loss_num": 0.0115966796875, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 315366048, + "step": 3455 + }, + { + "epoch": 14.4, + "grad_norm": 3.4031793886468655, + "learning_rate": 5e-05, + "loss": 0.0896, + "num_input_tokens_seen": 315457876, + "step": 3456 + }, + { + "epoch": 14.4, + "loss": 0.08385931700468063, + "loss_ce": 0.002087468048557639, + "loss_iou": 0.1767578125, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 315457876, + "step": 3456 + }, + { + "epoch": 14.404166666666667, + "grad_norm": 2.496511746648192, + "learning_rate": 5e-05, + "loss": 0.0644, + "num_input_tokens_seen": 315548672, + "step": 3457 + }, + { + "epoch": 14.404166666666667, + "loss": 0.06105422228574753, + "loss_ce": 3.432453377172351e-05, + "loss_iou": 0.31640625, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 315548672, + "step": 3457 + }, + { + "epoch": 14.408333333333333, + "grad_norm": 1.6076830601599892, + "learning_rate": 5e-05, + "loss": 0.0253, + "num_input_tokens_seen": 315640468, + "step": 3458 + }, + { + "epoch": 14.408333333333333, + "loss": 0.017314031720161438, + "loss_ce": 0.0003538889577612281, + "loss_iou": 0.2255859375, + "loss_num": 0.003387451171875, + "loss_xval": 0.0169677734375, + "num_input_tokens_seen": 315640468, + "step": 3458 + }, + { + "epoch": 14.4125, + "grad_norm": 3.0724346663416675, + "learning_rate": 5e-05, + "loss": 0.052, + "num_input_tokens_seen": 315732096, + "step": 3459 + }, + { + "epoch": 14.4125, + "loss": 0.04059988260269165, + "loss_ce": 0.00048452624469064176, + "loss_iou": 0.3125, + "loss_num": 0.00799560546875, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 315732096, + "step": 3459 + }, + { + "epoch": 14.416666666666666, + "grad_norm": 2.415147114180416, + "learning_rate": 5e-05, + "loss": 0.0614, + "num_input_tokens_seen": 315823008, + "step": 3460 + }, + { + "epoch": 14.416666666666666, + "loss": 0.06670276820659637, + "loss_ce": 6.607610885112081e-06, + "loss_iou": 0.255859375, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 315823008, + "step": 3460 + }, + { + "epoch": 14.420833333333333, + "grad_norm": 2.6978788677723986, + "learning_rate": 5e-05, + "loss": 0.0575, + "num_input_tokens_seen": 315913928, + "step": 3461 + }, + { + "epoch": 14.420833333333333, + "loss": 0.04788322001695633, + "loss_ce": 1.6398162188124843e-05, + "loss_iou": 0.22265625, + "loss_num": 0.00958251953125, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 315913928, + "step": 3461 + }, + { + "epoch": 14.425, + "grad_norm": 3.792149348929219, + "learning_rate": 5e-05, + "loss": 0.0602, + "num_input_tokens_seen": 316003724, + "step": 3462 + }, + { + "epoch": 14.425, + "loss": 0.08763362467288971, + "loss_ce": 4.817647277377546e-05, + "loss_iou": 0.263671875, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 316003724, + "step": 3462 + }, + { + "epoch": 14.429166666666667, + "grad_norm": 3.6421126679520106, + "learning_rate": 5e-05, + "loss": 0.0782, + "num_input_tokens_seen": 316094972, + "step": 3463 + }, + { + "epoch": 14.429166666666667, + "loss": 0.08040214329957962, + "loss_ce": 0.00032401629141531885, + "loss_iou": 0.2373046875, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 316094972, + "step": 3463 + }, + { + "epoch": 14.433333333333334, + "grad_norm": 2.1389868850931673, + "learning_rate": 5e-05, + "loss": 0.0699, + "num_input_tokens_seen": 316186284, + "step": 3464 + }, + { + "epoch": 14.433333333333334, + "loss": 0.07011739909648895, + "loss_ce": 0.0002626621862873435, + "loss_iou": 0.412109375, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 316186284, + "step": 3464 + }, + { + "epoch": 14.4375, + "grad_norm": 6.537398366871755, + "learning_rate": 5e-05, + "loss": 0.0766, + "num_input_tokens_seen": 316277796, + "step": 3465 + }, + { + "epoch": 14.4375, + "loss": 0.08371639251708984, + "loss_ce": 0.000311847310513258, + "loss_iou": 0.259765625, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 316277796, + "step": 3465 + }, + { + "epoch": 14.441666666666666, + "grad_norm": 3.078759532509579, + "learning_rate": 5e-05, + "loss": 0.0517, + "num_input_tokens_seen": 316368988, + "step": 3466 + }, + { + "epoch": 14.441666666666666, + "loss": 0.04081658646464348, + "loss_ce": 2.9841428840882145e-05, + "loss_iou": 0.224609375, + "loss_num": 0.00811767578125, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 316368988, + "step": 3466 + }, + { + "epoch": 14.445833333333333, + "grad_norm": 2.831385150482735, + "learning_rate": 5e-05, + "loss": 0.0516, + "num_input_tokens_seen": 316460692, + "step": 3467 + }, + { + "epoch": 14.445833333333333, + "loss": 0.057911500334739685, + "loss_ce": 8.069008617894724e-05, + "loss_iou": 0.32421875, + "loss_num": 0.01153564453125, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 316460692, + "step": 3467 + }, + { + "epoch": 14.45, + "grad_norm": 1.719084843590879, + "learning_rate": 5e-05, + "loss": 0.0479, + "num_input_tokens_seen": 316551832, + "step": 3468 + }, + { + "epoch": 14.45, + "loss": 0.044491663575172424, + "loss_ce": 1.2295596206968185e-05, + "loss_iou": 0.2333984375, + "loss_num": 0.0089111328125, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 316551832, + "step": 3468 + }, + { + "epoch": 14.454166666666667, + "grad_norm": 3.5030037950264177, + "learning_rate": 5e-05, + "loss": 0.0806, + "num_input_tokens_seen": 316642824, + "step": 3469 + }, + { + "epoch": 14.454166666666667, + "loss": 0.10698297619819641, + "loss_ce": 3.608635779528413e-06, + "loss_iou": 0.0654296875, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 316642824, + "step": 3469 + }, + { + "epoch": 14.458333333333334, + "grad_norm": 2.4394930623115587, + "learning_rate": 5e-05, + "loss": 0.0712, + "num_input_tokens_seen": 316734436, + "step": 3470 + }, + { + "epoch": 14.458333333333334, + "loss": 0.08319145441055298, + "loss_ce": 0.00012260537187103182, + "loss_iou": 0.185546875, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 316734436, + "step": 3470 + }, + { + "epoch": 14.4625, + "grad_norm": 2.740534558701278, + "learning_rate": 5e-05, + "loss": 0.0676, + "num_input_tokens_seen": 316826096, + "step": 3471 + }, + { + "epoch": 14.4625, + "loss": 0.05262012034654617, + "loss_ce": 0.00017566494352649897, + "loss_iou": 0.271484375, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 316826096, + "step": 3471 + }, + { + "epoch": 14.466666666666667, + "grad_norm": 3.325097662068458, + "learning_rate": 5e-05, + "loss": 0.0512, + "num_input_tokens_seen": 316917172, + "step": 3472 + }, + { + "epoch": 14.466666666666667, + "loss": 0.07085268199443817, + "loss_ce": 0.0014404429821297526, + "loss_iou": 0.28515625, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 316917172, + "step": 3472 + }, + { + "epoch": 14.470833333333333, + "grad_norm": 3.4718357598911647, + "learning_rate": 5e-05, + "loss": 0.0673, + "num_input_tokens_seen": 317008100, + "step": 3473 + }, + { + "epoch": 14.470833333333333, + "loss": 0.09283044934272766, + "loss_ce": 7.226588058983907e-05, + "loss_iou": 0.296875, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 317008100, + "step": 3473 + }, + { + "epoch": 14.475, + "grad_norm": 4.438588630076275, + "learning_rate": 5e-05, + "loss": 0.0571, + "num_input_tokens_seen": 317100020, + "step": 3474 + }, + { + "epoch": 14.475, + "loss": 0.06110034137964249, + "loss_ce": 4.149102096562274e-06, + "loss_iou": 0.30078125, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 317100020, + "step": 3474 + }, + { + "epoch": 14.479166666666666, + "grad_norm": 2.9706067154667113, + "learning_rate": 5e-05, + "loss": 0.0807, + "num_input_tokens_seen": 317191784, + "step": 3475 + }, + { + "epoch": 14.479166666666666, + "loss": 0.08926853537559509, + "loss_ce": 4.615443231159588e-06, + "loss_iou": 0.228515625, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 317191784, + "step": 3475 + }, + { + "epoch": 14.483333333333333, + "grad_norm": 3.3619350623101614, + "learning_rate": 5e-05, + "loss": 0.0673, + "num_input_tokens_seen": 317283224, + "step": 3476 + }, + { + "epoch": 14.483333333333333, + "loss": 0.08284099400043488, + "loss_ce": 0.00022990800789557397, + "loss_iou": 0.18359375, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 317283224, + "step": 3476 + }, + { + "epoch": 14.4875, + "grad_norm": 2.419172275959208, + "learning_rate": 5e-05, + "loss": 0.0778, + "num_input_tokens_seen": 317373992, + "step": 3477 + }, + { + "epoch": 14.4875, + "loss": 0.04498765617609024, + "loss_ce": 2.0008901628898457e-05, + "loss_iou": 0.1005859375, + "loss_num": 0.00897216796875, + "loss_xval": 0.044921875, + "num_input_tokens_seen": 317373992, + "step": 3477 + }, + { + "epoch": 14.491666666666667, + "grad_norm": 5.847089711942539, + "learning_rate": 5e-05, + "loss": 0.0843, + "num_input_tokens_seen": 317465668, + "step": 3478 + }, + { + "epoch": 14.491666666666667, + "loss": 0.1135324239730835, + "loss_ce": 0.00020540252444334328, + "loss_iou": 0.28125, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 317465668, + "step": 3478 + }, + { + "epoch": 14.495833333333334, + "grad_norm": 4.647839770700526, + "learning_rate": 5e-05, + "loss": 0.0527, + "num_input_tokens_seen": 317556852, + "step": 3479 + }, + { + "epoch": 14.495833333333334, + "loss": 0.06063609942793846, + "loss_ce": 4.344819171819836e-05, + "loss_iou": 0.296875, + "loss_num": 0.01214599609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 317556852, + "step": 3479 + }, + { + "epoch": 14.5, + "grad_norm": 2.4680288450953287, + "learning_rate": 5e-05, + "loss": 0.0548, + "num_input_tokens_seen": 317648416, + "step": 3480 + }, + { + "epoch": 14.5, + "loss": 0.05539741367101669, + "loss_ce": 6.904706242494285e-05, + "loss_iou": 0.27734375, + "loss_num": 0.01104736328125, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 317648416, + "step": 3480 + }, + { + "epoch": 14.504166666666666, + "grad_norm": 2.1585672480870173, + "learning_rate": 5e-05, + "loss": 0.0551, + "num_input_tokens_seen": 317739400, + "step": 3481 + }, + { + "epoch": 14.504166666666666, + "loss": 0.07324524223804474, + "loss_ce": 3.0585702006646898e-06, + "loss_iou": 0.3046875, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 317739400, + "step": 3481 + }, + { + "epoch": 14.508333333333333, + "grad_norm": 2.959837229857713, + "learning_rate": 5e-05, + "loss": 0.0448, + "num_input_tokens_seen": 317830996, + "step": 3482 + }, + { + "epoch": 14.508333333333333, + "loss": 0.04864703118801117, + "loss_ce": 0.0007496916223317385, + "loss_iou": 0.31640625, + "loss_num": 0.00958251953125, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 317830996, + "step": 3482 + }, + { + "epoch": 14.5125, + "grad_norm": 5.104023715434689, + "learning_rate": 5e-05, + "loss": 0.0755, + "num_input_tokens_seen": 317922784, + "step": 3483 + }, + { + "epoch": 14.5125, + "loss": 0.08295391499996185, + "loss_ce": 0.0008768883417360485, + "loss_iou": 0.3125, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 317922784, + "step": 3483 + }, + { + "epoch": 14.516666666666667, + "grad_norm": 5.579693820814273, + "learning_rate": 5e-05, + "loss": 0.1435, + "num_input_tokens_seen": 318014244, + "step": 3484 + }, + { + "epoch": 14.516666666666667, + "loss": 0.12112545222043991, + "loss_ce": 0.0006878351559862494, + "loss_iou": 0.251953125, + "loss_num": 0.0240478515625, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 318014244, + "step": 3484 + }, + { + "epoch": 14.520833333333334, + "grad_norm": 1.3620977718613712, + "learning_rate": 5e-05, + "loss": 0.0772, + "num_input_tokens_seen": 318106844, + "step": 3485 + }, + { + "epoch": 14.520833333333334, + "loss": 0.04919442534446716, + "loss_ce": 0.00012978771701455116, + "loss_iou": 0.1923828125, + "loss_num": 0.00982666015625, + "loss_xval": 0.049072265625, + "num_input_tokens_seen": 318106844, + "step": 3485 + }, + { + "epoch": 14.525, + "grad_norm": 3.750694641880807, + "learning_rate": 5e-05, + "loss": 0.0617, + "num_input_tokens_seen": 318198276, + "step": 3486 + }, + { + "epoch": 14.525, + "loss": 0.05821506679058075, + "loss_ce": 0.00023166877508629113, + "loss_iou": 0.296875, + "loss_num": 0.0115966796875, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 318198276, + "step": 3486 + }, + { + "epoch": 14.529166666666667, + "grad_norm": 2.837096414759929, + "learning_rate": 5e-05, + "loss": 0.0568, + "num_input_tokens_seen": 318289184, + "step": 3487 + }, + { + "epoch": 14.529166666666667, + "loss": 0.04990578815340996, + "loss_ce": 9.547175068291835e-06, + "loss_iou": 0.2099609375, + "loss_num": 0.00994873046875, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 318289184, + "step": 3487 + }, + { + "epoch": 14.533333333333333, + "grad_norm": 2.6995133820829134, + "learning_rate": 5e-05, + "loss": 0.069, + "num_input_tokens_seen": 318380240, + "step": 3488 + }, + { + "epoch": 14.533333333333333, + "loss": 0.11265411972999573, + "loss_ce": 0.00027314224280416965, + "loss_iou": 0.248046875, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 318380240, + "step": 3488 + }, + { + "epoch": 14.5375, + "grad_norm": 3.222938431023567, + "learning_rate": 5e-05, + "loss": 0.0693, + "num_input_tokens_seen": 318471796, + "step": 3489 + }, + { + "epoch": 14.5375, + "loss": 0.07313777506351471, + "loss_ce": 1.7655922420090064e-05, + "loss_iou": 0.41015625, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 318471796, + "step": 3489 + }, + { + "epoch": 14.541666666666666, + "grad_norm": 2.6278819311326034, + "learning_rate": 5e-05, + "loss": 0.042, + "num_input_tokens_seen": 318563232, + "step": 3490 + }, + { + "epoch": 14.541666666666666, + "loss": 0.04149797558784485, + "loss_ce": 0.0021226725075393915, + "loss_iou": 0.197265625, + "loss_num": 0.00787353515625, + "loss_xval": 0.039306640625, + "num_input_tokens_seen": 318563232, + "step": 3490 + }, + { + "epoch": 14.545833333333333, + "grad_norm": 1.5850535962182528, + "learning_rate": 5e-05, + "loss": 0.0677, + "num_input_tokens_seen": 318654996, + "step": 3491 + }, + { + "epoch": 14.545833333333333, + "loss": 0.05964607372879982, + "loss_ce": 0.000335162301780656, + "loss_iou": 0.2275390625, + "loss_num": 0.0118408203125, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 318654996, + "step": 3491 + }, + { + "epoch": 14.55, + "grad_norm": 2.6214365855600863, + "learning_rate": 5e-05, + "loss": 0.0534, + "num_input_tokens_seen": 318746168, + "step": 3492 + }, + { + "epoch": 14.55, + "loss": 0.04661758244037628, + "loss_ce": 1.97783606381563e-06, + "loss_iou": 0.357421875, + "loss_num": 0.00933837890625, + "loss_xval": 0.046630859375, + "num_input_tokens_seen": 318746168, + "step": 3492 + }, + { + "epoch": 14.554166666666667, + "grad_norm": 2.2552122955124583, + "learning_rate": 5e-05, + "loss": 0.0341, + "num_input_tokens_seen": 318837452, + "step": 3493 + }, + { + "epoch": 14.554166666666667, + "loss": 0.037708431482315063, + "loss_ce": 3.448417555773631e-05, + "loss_iou": 0.306640625, + "loss_num": 0.007537841796875, + "loss_xval": 0.03759765625, + "num_input_tokens_seen": 318837452, + "step": 3493 + }, + { + "epoch": 14.558333333333334, + "grad_norm": 4.160645839197537, + "learning_rate": 5e-05, + "loss": 0.1045, + "num_input_tokens_seen": 318928580, + "step": 3494 + }, + { + "epoch": 14.558333333333334, + "loss": 0.037462908774614334, + "loss_ce": 2.5830672711890657e-06, + "loss_iou": 0.27734375, + "loss_num": 0.00750732421875, + "loss_xval": 0.037353515625, + "num_input_tokens_seen": 318928580, + "step": 3494 + }, + { + "epoch": 14.5625, + "grad_norm": 3.1912535546962335, + "learning_rate": 5e-05, + "loss": 0.0519, + "num_input_tokens_seen": 319019968, + "step": 3495 + }, + { + "epoch": 14.5625, + "loss": 0.0376228466629982, + "loss_ce": 9.932819011737593e-06, + "loss_iou": 0.322265625, + "loss_num": 0.00750732421875, + "loss_xval": 0.03759765625, + "num_input_tokens_seen": 319019968, + "step": 3495 + }, + { + "epoch": 14.566666666666666, + "grad_norm": 2.5813431829196265, + "learning_rate": 5e-05, + "loss": 0.0436, + "num_input_tokens_seen": 319111048, + "step": 3496 + }, + { + "epoch": 14.566666666666666, + "loss": 0.053882814943790436, + "loss_ce": 0.0002939487749245018, + "loss_iou": 0.1630859375, + "loss_num": 0.01068115234375, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 319111048, + "step": 3496 + }, + { + "epoch": 14.570833333333333, + "grad_norm": 2.923435349630488, + "learning_rate": 5e-05, + "loss": 0.0628, + "num_input_tokens_seen": 319202188, + "step": 3497 + }, + { + "epoch": 14.570833333333333, + "loss": 0.06960056722164154, + "loss_ce": 3.575097071006894e-05, + "loss_iou": 0.314453125, + "loss_num": 0.013916015625, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 319202188, + "step": 3497 + }, + { + "epoch": 14.575, + "grad_norm": 3.2955743647970683, + "learning_rate": 5e-05, + "loss": 0.0365, + "num_input_tokens_seen": 319293164, + "step": 3498 + }, + { + "epoch": 14.575, + "loss": 0.04828281328082085, + "loss_ce": 4.007476945844246e-06, + "loss_iou": 0.265625, + "loss_num": 0.0096435546875, + "loss_xval": 0.04833984375, + "num_input_tokens_seen": 319293164, + "step": 3498 + }, + { + "epoch": 14.579166666666667, + "grad_norm": 2.2018418363550913, + "learning_rate": 5e-05, + "loss": 0.0621, + "num_input_tokens_seen": 319384616, + "step": 3499 + }, + { + "epoch": 14.579166666666667, + "loss": 0.07954820990562439, + "loss_ce": 0.00032457130146212876, + "loss_iou": 0.1904296875, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 319384616, + "step": 3499 + }, + { + "epoch": 14.583333333333334, + "grad_norm": 3.429875070101406, + "learning_rate": 5e-05, + "loss": 0.0684, + "num_input_tokens_seen": 319475956, + "step": 3500 + }, + { + "epoch": 14.583333333333334, + "eval_seeclick_CIoU": 0.17655083164572716, + "eval_seeclick_GIoU": 0.15622518584132195, + "eval_seeclick_IoU": 0.2974148243665695, + "eval_seeclick_MAE_all": 0.11146583780646324, + "eval_seeclick_MAE_h": 0.09194779396057129, + "eval_seeclick_MAE_w": 0.23992763459682465, + "eval_seeclick_MAE_x_boxes": 0.25715453177690506, + "eval_seeclick_MAE_y_boxes": 0.09402742981910706, + "eval_seeclick_NUM_probability": 0.9999983608722687, + "eval_seeclick_inside_bbox": 0.4318181872367859, + "eval_seeclick_loss": 0.6375502943992615, + "eval_seeclick_loss_ce": 0.13811881095170975, + "eval_seeclick_loss_iou": 0.39617919921875, + "eval_seeclick_loss_num": 0.09464263916015625, + "eval_seeclick_loss_xval": 0.4732666015625, + "eval_seeclick_runtime": 76.4697, + "eval_seeclick_samples_per_second": 0.562, + "eval_seeclick_steps_per_second": 0.026, + "num_input_tokens_seen": 319475956, + "step": 3500 + }, + { + "epoch": 14.583333333333334, + "eval_icons_CIoU": 0.3210890293121338, + "eval_icons_GIoU": 0.3353418707847595, + "eval_icons_IoU": 0.3973853290081024, + "eval_icons_MAE_all": 0.06360300816595554, + "eval_icons_MAE_h": 0.136297307908535, + "eval_icons_MAE_w": 0.09041904658079147, + "eval_icons_MAE_x_boxes": 0.09114982187747955, + "eval_icons_MAE_y_boxes": 0.13882280513644218, + "eval_icons_NUM_probability": 0.9999990165233612, + "eval_icons_inside_bbox": 0.5034722238779068, + "eval_icons_loss": 0.3158852756023407, + "eval_icons_loss_ce": 2.5450488465139642e-05, + "eval_icons_loss_iou": 0.275146484375, + "eval_icons_loss_num": 0.064208984375, + "eval_icons_loss_xval": 0.32122802734375, + "eval_icons_runtime": 90.401, + "eval_icons_samples_per_second": 0.553, + "eval_icons_steps_per_second": 0.022, + "num_input_tokens_seen": 319475956, + "step": 3500 + }, + { + "epoch": 14.583333333333334, + "eval_screenspot_CIoU": 0.3727397421995799, + "eval_screenspot_GIoU": 0.3544403314590454, + "eval_screenspot_IoU": 0.44670842091242474, + "eval_screenspot_MAE_all": 0.09947741279999416, + "eval_screenspot_MAE_h": 0.0852027287085851, + "eval_screenspot_MAE_w": 0.21258516609668732, + "eval_screenspot_MAE_x_boxes": 0.19677802920341492, + "eval_screenspot_MAE_y_boxes": 0.0816585545738538, + "eval_screenspot_NUM_probability": 0.999982754389445, + "eval_screenspot_inside_bbox": 0.659583330154419, + "eval_screenspot_loss": 0.4953005015850067, + "eval_screenspot_loss_ce": 1.7430343916657876e-05, + "eval_screenspot_loss_iou": 0.3859456380208333, + "eval_screenspot_loss_num": 0.09992472330729167, + "eval_screenspot_loss_xval": 0.4994099934895833, + "eval_screenspot_runtime": 155.3672, + "eval_screenspot_samples_per_second": 0.573, + "eval_screenspot_steps_per_second": 0.019, + "num_input_tokens_seen": 319475956, + "step": 3500 + }, + { + "epoch": 14.583333333333334, + "eval_compot_CIoU": 0.5295035243034363, + "eval_compot_GIoU": 0.5284956693649292, + "eval_compot_IoU": 0.5923766493797302, + "eval_compot_MAE_all": 0.04935946501791477, + "eval_compot_MAE_h": 0.0681275837123394, + "eval_compot_MAE_w": 0.11527523770928383, + "eval_compot_MAE_x_boxes": 0.1153593361377716, + "eval_compot_MAE_y_boxes": 0.0679020918905735, + "eval_compot_NUM_probability": 0.999997466802597, + "eval_compot_inside_bbox": 0.7795138955116272, + "eval_compot_loss": 0.2786952555179596, + "eval_compot_loss_ce": 0.03497672267258167, + "eval_compot_loss_iou": 0.33575439453125, + "eval_compot_loss_num": 0.0444488525390625, + "eval_compot_loss_xval": 0.222320556640625, + "eval_compot_runtime": 89.9096, + "eval_compot_samples_per_second": 0.556, + "eval_compot_steps_per_second": 0.022, + "num_input_tokens_seen": 319475956, + "step": 3500 + }, + { + "epoch": 14.583333333333334, + "loss": 0.2446812093257904, + "loss_ce": 0.03169901296496391, + "loss_iou": 0.353515625, + "loss_num": 0.04248046875, + "loss_xval": 0.212890625, + "num_input_tokens_seen": 319475956, + "step": 3500 + }, + { + "epoch": 14.5875, + "grad_norm": 4.118534474092619, + "learning_rate": 5e-05, + "loss": 0.0661, + "num_input_tokens_seen": 319567420, + "step": 3501 + }, + { + "epoch": 14.5875, + "loss": 0.032026711851358414, + "loss_ce": 6.144516191852745e-06, + "loss_iou": 0.33203125, + "loss_num": 0.00640869140625, + "loss_xval": 0.031982421875, + "num_input_tokens_seen": 319567420, + "step": 3501 + }, + { + "epoch": 14.591666666666667, + "grad_norm": 4.308728866604463, + "learning_rate": 5e-05, + "loss": 0.0601, + "num_input_tokens_seen": 319659252, + "step": 3502 + }, + { + "epoch": 14.591666666666667, + "loss": 0.04407627880573273, + "loss_ce": 0.00013096319162286818, + "loss_iou": 0.2490234375, + "loss_num": 0.0087890625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 319659252, + "step": 3502 + }, + { + "epoch": 14.595833333333333, + "grad_norm": 3.0546123104614242, + "learning_rate": 5e-05, + "loss": 0.0746, + "num_input_tokens_seen": 319750036, + "step": 3503 + }, + { + "epoch": 14.595833333333333, + "loss": 0.10922509431838989, + "loss_ce": 2.682106924112304e-06, + "loss_iou": 0.322265625, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 319750036, + "step": 3503 + }, + { + "epoch": 14.6, + "grad_norm": 5.275806577118591, + "learning_rate": 5e-05, + "loss": 0.08, + "num_input_tokens_seen": 319841108, + "step": 3504 + }, + { + "epoch": 14.6, + "loss": 0.09145887196063995, + "loss_ce": 8.924029680201784e-05, + "loss_iou": 0.2109375, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 319841108, + "step": 3504 + }, + { + "epoch": 14.604166666666666, + "grad_norm": 2.3741590971225217, + "learning_rate": 5e-05, + "loss": 0.0608, + "num_input_tokens_seen": 319932436, + "step": 3505 + }, + { + "epoch": 14.604166666666666, + "loss": 0.05104352533817291, + "loss_ce": 0.00017072322953026742, + "loss_iou": 0.2216796875, + "loss_num": 0.01019287109375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 319932436, + "step": 3505 + }, + { + "epoch": 14.608333333333333, + "grad_norm": 2.2175225111331467, + "learning_rate": 5e-05, + "loss": 0.0364, + "num_input_tokens_seen": 320023800, + "step": 3506 + }, + { + "epoch": 14.608333333333333, + "loss": 0.026951856911182404, + "loss_ce": 0.00031001074239611626, + "loss_iou": 0.3125, + "loss_num": 0.005340576171875, + "loss_xval": 0.026611328125, + "num_input_tokens_seen": 320023800, + "step": 3506 + }, + { + "epoch": 14.6125, + "grad_norm": 3.0426971693638762, + "learning_rate": 5e-05, + "loss": 0.0665, + "num_input_tokens_seen": 320115288, + "step": 3507 + }, + { + "epoch": 14.6125, + "loss": 0.05835458263754845, + "loss_ce": 0.002160276984795928, + "loss_iou": 0.1484375, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 320115288, + "step": 3507 + }, + { + "epoch": 14.616666666666667, + "grad_norm": 1.2476075034819456, + "learning_rate": 5e-05, + "loss": 0.0636, + "num_input_tokens_seen": 320206484, + "step": 3508 + }, + { + "epoch": 14.616666666666667, + "loss": 0.09845627099275589, + "loss_ce": 0.00025833185645751655, + "loss_iou": 0.212890625, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 320206484, + "step": 3508 + }, + { + "epoch": 14.620833333333334, + "grad_norm": 2.110972411286442, + "learning_rate": 5e-05, + "loss": 0.05, + "num_input_tokens_seen": 320297496, + "step": 3509 + }, + { + "epoch": 14.620833333333334, + "loss": 0.06258494406938553, + "loss_ce": 1.0235469289909815e-06, + "loss_iou": 0.193359375, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 320297496, + "step": 3509 + }, + { + "epoch": 14.625, + "grad_norm": 3.157498930857514, + "learning_rate": 5e-05, + "loss": 0.0533, + "num_input_tokens_seen": 320388916, + "step": 3510 + }, + { + "epoch": 14.625, + "loss": 0.058251626789569855, + "loss_ce": 0.0027477818075567484, + "loss_iou": 0.18359375, + "loss_num": 0.0111083984375, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 320388916, + "step": 3510 + }, + { + "epoch": 14.629166666666666, + "grad_norm": 2.057699984429364, + "learning_rate": 5e-05, + "loss": 0.0534, + "num_input_tokens_seen": 320480088, + "step": 3511 + }, + { + "epoch": 14.629166666666666, + "loss": 0.0456683523952961, + "loss_ce": 0.00048707760288380086, + "loss_iou": 0.2177734375, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 320480088, + "step": 3511 + }, + { + "epoch": 14.633333333333333, + "grad_norm": 2.6068400938906544, + "learning_rate": 5e-05, + "loss": 0.0995, + "num_input_tokens_seen": 320571404, + "step": 3512 + }, + { + "epoch": 14.633333333333333, + "loss": 0.07493845373392105, + "loss_ce": 1.779539707058575e-05, + "loss_iou": 0.21875, + "loss_num": 0.0150146484375, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 320571404, + "step": 3512 + }, + { + "epoch": 14.6375, + "grad_norm": 3.6310490309966594, + "learning_rate": 5e-05, + "loss": 0.0379, + "num_input_tokens_seen": 320662948, + "step": 3513 + }, + { + "epoch": 14.6375, + "loss": 0.04782600700855255, + "loss_ce": 0.00014228891814127564, + "loss_iou": 0.20703125, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 320662948, + "step": 3513 + }, + { + "epoch": 14.641666666666667, + "grad_norm": 4.0663307305655865, + "learning_rate": 5e-05, + "loss": 0.0599, + "num_input_tokens_seen": 320754252, + "step": 3514 + }, + { + "epoch": 14.641666666666667, + "loss": 0.04836490750312805, + "loss_ce": 2.1755488432972925e-06, + "loss_iou": 0.18359375, + "loss_num": 0.00970458984375, + "loss_xval": 0.04833984375, + "num_input_tokens_seen": 320754252, + "step": 3514 + }, + { + "epoch": 14.645833333333334, + "grad_norm": 2.7017123754218693, + "learning_rate": 5e-05, + "loss": 0.1109, + "num_input_tokens_seen": 320845896, + "step": 3515 + }, + { + "epoch": 14.645833333333334, + "loss": 0.13035404682159424, + "loss_ce": 1.3472451428242493e-05, + "loss_iou": 0.224609375, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 320845896, + "step": 3515 + }, + { + "epoch": 14.65, + "grad_norm": 3.3750597376134737, + "learning_rate": 5e-05, + "loss": 0.0543, + "num_input_tokens_seen": 320936400, + "step": 3516 + }, + { + "epoch": 14.65, + "loss": 0.028896596282720566, + "loss_ce": 0.00015666562831029296, + "loss_iou": 0.2060546875, + "loss_num": 0.0057373046875, + "loss_xval": 0.0286865234375, + "num_input_tokens_seen": 320936400, + "step": 3516 + }, + { + "epoch": 14.654166666666667, + "grad_norm": 1.8696180434658125, + "learning_rate": 5e-05, + "loss": 0.0313, + "num_input_tokens_seen": 321027888, + "step": 3517 + }, + { + "epoch": 14.654166666666667, + "loss": 0.03676997870206833, + "loss_ce": 0.0014916585059836507, + "loss_iou": 0.22265625, + "loss_num": 0.007049560546875, + "loss_xval": 0.03515625, + "num_input_tokens_seen": 321027888, + "step": 3517 + }, + { + "epoch": 14.658333333333333, + "grad_norm": 2.2360835380693866, + "learning_rate": 5e-05, + "loss": 0.0406, + "num_input_tokens_seen": 321119464, + "step": 3518 + }, + { + "epoch": 14.658333333333333, + "loss": 0.04221915453672409, + "loss_ce": 0.00010489897249499336, + "loss_iou": 0.34765625, + "loss_num": 0.0084228515625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 321119464, + "step": 3518 + }, + { + "epoch": 14.6625, + "grad_norm": 1.9138621502707591, + "learning_rate": 5e-05, + "loss": 0.0576, + "num_input_tokens_seen": 321210948, + "step": 3519 + }, + { + "epoch": 14.6625, + "loss": 0.07550536841154099, + "loss_ce": 0.00032531472970731556, + "loss_iou": 0.1806640625, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 321210948, + "step": 3519 + }, + { + "epoch": 14.666666666666666, + "grad_norm": 2.5165631989947834, + "learning_rate": 5e-05, + "loss": 0.0314, + "num_input_tokens_seen": 321301772, + "step": 3520 + }, + { + "epoch": 14.666666666666666, + "loss": 0.037338871508836746, + "loss_ce": 6.164409001030435e-07, + "loss_iou": 0.1875, + "loss_num": 0.007476806640625, + "loss_xval": 0.037353515625, + "num_input_tokens_seen": 321301772, + "step": 3520 + }, + { + "epoch": 14.670833333333333, + "grad_norm": 2.594196631908786, + "learning_rate": 5e-05, + "loss": 0.0635, + "num_input_tokens_seen": 321393016, + "step": 3521 + }, + { + "epoch": 14.670833333333333, + "loss": 0.09475166350603104, + "loss_ce": 2.214087089669192e-06, + "loss_iou": 0.18359375, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 321393016, + "step": 3521 + }, + { + "epoch": 14.675, + "grad_norm": 1.5223843939552286, + "learning_rate": 5e-05, + "loss": 0.0282, + "num_input_tokens_seen": 321484240, + "step": 3522 + }, + { + "epoch": 14.675, + "loss": 0.02998613566160202, + "loss_ce": 7.890904089435935e-05, + "loss_iou": 0.2275390625, + "loss_num": 0.0059814453125, + "loss_xval": 0.0299072265625, + "num_input_tokens_seen": 321484240, + "step": 3522 + }, + { + "epoch": 14.679166666666667, + "grad_norm": 3.0551389908396978, + "learning_rate": 5e-05, + "loss": 0.0744, + "num_input_tokens_seen": 321575676, + "step": 3523 + }, + { + "epoch": 14.679166666666667, + "loss": 0.07173063606023788, + "loss_ce": 0.0012197702890262008, + "loss_iou": 0.23828125, + "loss_num": 0.01409912109375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 321575676, + "step": 3523 + }, + { + "epoch": 14.683333333333334, + "grad_norm": 3.0763495528620277, + "learning_rate": 5e-05, + "loss": 0.0296, + "num_input_tokens_seen": 321666800, + "step": 3524 + }, + { + "epoch": 14.683333333333334, + "loss": 0.026495546102523804, + "loss_ce": 0.0001893942098831758, + "loss_iou": 0.26171875, + "loss_num": 0.0052490234375, + "loss_xval": 0.0263671875, + "num_input_tokens_seen": 321666800, + "step": 3524 + }, + { + "epoch": 14.6875, + "grad_norm": 8.334019762100176, + "learning_rate": 5e-05, + "loss": 0.0605, + "num_input_tokens_seen": 321758752, + "step": 3525 + }, + { + "epoch": 14.6875, + "loss": 0.07537412643432617, + "loss_ce": 0.0002398555225227028, + "loss_iou": 0.2177734375, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 321758752, + "step": 3525 + }, + { + "epoch": 14.691666666666666, + "grad_norm": 2.336356169742045, + "learning_rate": 5e-05, + "loss": 0.097, + "num_input_tokens_seen": 321849716, + "step": 3526 + }, + { + "epoch": 14.691666666666666, + "loss": 0.08213899284601212, + "loss_ce": 9.316157729699626e-07, + "loss_iou": 0.3359375, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 321849716, + "step": 3526 + }, + { + "epoch": 14.695833333333333, + "grad_norm": 2.31938679107471, + "learning_rate": 5e-05, + "loss": 0.0463, + "num_input_tokens_seen": 321939640, + "step": 3527 + }, + { + "epoch": 14.695833333333333, + "loss": 0.03845227137207985, + "loss_ce": 0.00016796833369880915, + "loss_iou": 0.236328125, + "loss_num": 0.007659912109375, + "loss_xval": 0.038330078125, + "num_input_tokens_seen": 321939640, + "step": 3527 + }, + { + "epoch": 14.7, + "grad_norm": 2.0268901653200326, + "learning_rate": 5e-05, + "loss": 0.0385, + "num_input_tokens_seen": 322030928, + "step": 3528 + }, + { + "epoch": 14.7, + "loss": 0.02897455170750618, + "loss_ce": 1.3370583474170417e-05, + "loss_iou": 0.1572265625, + "loss_num": 0.00579833984375, + "loss_xval": 0.0289306640625, + "num_input_tokens_seen": 322030928, + "step": 3528 + }, + { + "epoch": 14.704166666666667, + "grad_norm": 5.8854308711480146, + "learning_rate": 5e-05, + "loss": 0.0667, + "num_input_tokens_seen": 322122012, + "step": 3529 + }, + { + "epoch": 14.704166666666667, + "loss": 0.09826751798391342, + "loss_ce": 9.15083489871904e-07, + "loss_iou": 0.3671875, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 322122012, + "step": 3529 + }, + { + "epoch": 14.708333333333334, + "grad_norm": 2.763988709289915, + "learning_rate": 5e-05, + "loss": 0.0495, + "num_input_tokens_seen": 322213168, + "step": 3530 + }, + { + "epoch": 14.708333333333334, + "loss": 0.024704724550247192, + "loss_ce": 0.000122814453789033, + "loss_iou": 0.2275390625, + "loss_num": 0.004913330078125, + "loss_xval": 0.0245361328125, + "num_input_tokens_seen": 322213168, + "step": 3530 + }, + { + "epoch": 14.7125, + "grad_norm": 2.5625093123581135, + "learning_rate": 5e-05, + "loss": 0.0621, + "num_input_tokens_seen": 322304540, + "step": 3531 + }, + { + "epoch": 14.7125, + "loss": 0.07655003666877747, + "loss_ce": 0.00045445383875630796, + "loss_iou": 0.2255859375, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 322304540, + "step": 3531 + }, + { + "epoch": 14.716666666666667, + "grad_norm": 1.5464455321738027, + "learning_rate": 5e-05, + "loss": 0.051, + "num_input_tokens_seen": 322395988, + "step": 3532 + }, + { + "epoch": 14.716666666666667, + "loss": 0.06164345145225525, + "loss_ce": 5.573797807301162e-06, + "loss_iou": 0.1728515625, + "loss_num": 0.0123291015625, + "loss_xval": 0.0615234375, + "num_input_tokens_seen": 322395988, + "step": 3532 + }, + { + "epoch": 14.720833333333333, + "grad_norm": 1.6193154533522065, + "learning_rate": 5e-05, + "loss": 0.0385, + "num_input_tokens_seen": 322487532, + "step": 3533 + }, + { + "epoch": 14.720833333333333, + "loss": 0.044892363250255585, + "loss_ce": 1.0047299383586505e-06, + "loss_iou": 0.275390625, + "loss_num": 0.00897216796875, + "loss_xval": 0.044921875, + "num_input_tokens_seen": 322487532, + "step": 3533 + }, + { + "epoch": 14.725, + "grad_norm": 3.6761690727969683, + "learning_rate": 5e-05, + "loss": 0.0655, + "num_input_tokens_seen": 322579200, + "step": 3534 + }, + { + "epoch": 14.725, + "loss": 0.04250044375658035, + "loss_ce": 4.715973773272708e-06, + "loss_iou": 0.3203125, + "loss_num": 0.00848388671875, + "loss_xval": 0.04248046875, + "num_input_tokens_seen": 322579200, + "step": 3534 + }, + { + "epoch": 14.729166666666666, + "grad_norm": 1.551974938766234, + "learning_rate": 5e-05, + "loss": 0.0395, + "num_input_tokens_seen": 322670388, + "step": 3535 + }, + { + "epoch": 14.729166666666666, + "loss": 0.03715559095144272, + "loss_ce": 1.5698578863521107e-05, + "loss_iou": 0.255859375, + "loss_num": 0.0074462890625, + "loss_xval": 0.037109375, + "num_input_tokens_seen": 322670388, + "step": 3535 + }, + { + "epoch": 14.733333333333333, + "grad_norm": 3.6318519870465376, + "learning_rate": 5e-05, + "loss": 0.0711, + "num_input_tokens_seen": 322761260, + "step": 3536 + }, + { + "epoch": 14.733333333333333, + "loss": 0.10466891527175903, + "loss_ce": 9.280054655391723e-05, + "loss_iou": 0.2021484375, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 322761260, + "step": 3536 + }, + { + "epoch": 14.7375, + "grad_norm": 13.28436486626998, + "learning_rate": 5e-05, + "loss": 0.0812, + "num_input_tokens_seen": 322852928, + "step": 3537 + }, + { + "epoch": 14.7375, + "loss": 0.10683774203062057, + "loss_ce": 0.0003008772328030318, + "loss_iou": 0.1875, + "loss_num": 0.0213623046875, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 322852928, + "step": 3537 + }, + { + "epoch": 14.741666666666667, + "grad_norm": 2.416420583465496, + "learning_rate": 5e-05, + "loss": 0.0722, + "num_input_tokens_seen": 322944620, + "step": 3538 + }, + { + "epoch": 14.741666666666667, + "loss": 0.06891767680644989, + "loss_ce": 0.0003637520712800324, + "loss_iou": 0.3125, + "loss_num": 0.01373291015625, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 322944620, + "step": 3538 + }, + { + "epoch": 14.745833333333334, + "grad_norm": 2.5616024779607884, + "learning_rate": 5e-05, + "loss": 0.0494, + "num_input_tokens_seen": 323036304, + "step": 3539 + }, + { + "epoch": 14.745833333333334, + "loss": 0.05444856733083725, + "loss_ce": 5.208913535170723e-06, + "loss_iou": 0.2470703125, + "loss_num": 0.0108642578125, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 323036304, + "step": 3539 + }, + { + "epoch": 14.75, + "grad_norm": 3.4890030998851023, + "learning_rate": 5e-05, + "loss": 0.0777, + "num_input_tokens_seen": 323127580, + "step": 3540 + }, + { + "epoch": 14.75, + "loss": 0.06150487810373306, + "loss_ce": 2.721688724705018e-05, + "loss_iou": 0.29296875, + "loss_num": 0.01226806640625, + "loss_xval": 0.0615234375, + "num_input_tokens_seen": 323127580, + "step": 3540 + }, + { + "epoch": 14.754166666666666, + "grad_norm": 6.813408628687027, + "learning_rate": 5e-05, + "loss": 0.0591, + "num_input_tokens_seen": 323218656, + "step": 3541 + }, + { + "epoch": 14.754166666666666, + "loss": 0.07420557737350464, + "loss_ce": 2.0836291696468834e-06, + "loss_iou": 0.30078125, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 323218656, + "step": 3541 + }, + { + "epoch": 14.758333333333333, + "grad_norm": 3.70710954353085, + "learning_rate": 5e-05, + "loss": 0.0415, + "num_input_tokens_seen": 323309872, + "step": 3542 + }, + { + "epoch": 14.758333333333333, + "loss": 0.03889217972755432, + "loss_ce": 0.0007299504359252751, + "loss_iou": 0.294921875, + "loss_num": 0.00762939453125, + "loss_xval": 0.0380859375, + "num_input_tokens_seen": 323309872, + "step": 3542 + }, + { + "epoch": 14.7625, + "grad_norm": 2.7670308299627657, + "learning_rate": 5e-05, + "loss": 0.0595, + "num_input_tokens_seen": 323401128, + "step": 3543 + }, + { + "epoch": 14.7625, + "loss": 0.07968062162399292, + "loss_ce": 9.077793220058084e-05, + "loss_iou": 0.298828125, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 323401128, + "step": 3543 + }, + { + "epoch": 14.766666666666667, + "grad_norm": 2.275986846611061, + "learning_rate": 5e-05, + "loss": 0.0475, + "num_input_tokens_seen": 323492344, + "step": 3544 + }, + { + "epoch": 14.766666666666667, + "loss": 0.03380130976438522, + "loss_ce": 0.0008423267281614244, + "loss_iou": 0.24609375, + "loss_num": 0.006591796875, + "loss_xval": 0.032958984375, + "num_input_tokens_seen": 323492344, + "step": 3544 + }, + { + "epoch": 14.770833333333334, + "grad_norm": 2.614157998303283, + "learning_rate": 5e-05, + "loss": 0.0599, + "num_input_tokens_seen": 323583560, + "step": 3545 + }, + { + "epoch": 14.770833333333334, + "loss": 0.03604413568973541, + "loss_ce": 1.8135235222871415e-05, + "loss_iou": 0.341796875, + "loss_num": 0.0072021484375, + "loss_xval": 0.0361328125, + "num_input_tokens_seen": 323583560, + "step": 3545 + }, + { + "epoch": 14.775, + "grad_norm": 4.118987210157419, + "learning_rate": 5e-05, + "loss": 0.0666, + "num_input_tokens_seen": 323675416, + "step": 3546 + }, + { + "epoch": 14.775, + "loss": 0.04108428210020065, + "loss_ce": 9.53611233853735e-05, + "loss_iou": 0.34375, + "loss_num": 0.0081787109375, + "loss_xval": 0.041015625, + "num_input_tokens_seen": 323675416, + "step": 3546 + }, + { + "epoch": 14.779166666666667, + "grad_norm": 6.501283613064894, + "learning_rate": 5e-05, + "loss": 0.0603, + "num_input_tokens_seen": 323766912, + "step": 3547 + }, + { + "epoch": 14.779166666666667, + "loss": 0.040303364396095276, + "loss_ce": 4.9025234147848096e-06, + "loss_iou": 0.251953125, + "loss_num": 0.008056640625, + "loss_xval": 0.040283203125, + "num_input_tokens_seen": 323766912, + "step": 3547 + }, + { + "epoch": 14.783333333333333, + "grad_norm": 6.079005531482708, + "learning_rate": 5e-05, + "loss": 0.0669, + "num_input_tokens_seen": 323858404, + "step": 3548 + }, + { + "epoch": 14.783333333333333, + "loss": 0.06674402207136154, + "loss_ce": 0.002924136118963361, + "loss_iou": 0.373046875, + "loss_num": 0.01275634765625, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 323858404, + "step": 3548 + }, + { + "epoch": 14.7875, + "grad_norm": 2.8525996359524197, + "learning_rate": 5e-05, + "loss": 0.0354, + "num_input_tokens_seen": 323949236, + "step": 3549 + }, + { + "epoch": 14.7875, + "loss": 0.04042597860097885, + "loss_ce": 3.596375972847454e-05, + "loss_iou": 0.2412109375, + "loss_num": 0.008056640625, + "loss_xval": 0.040283203125, + "num_input_tokens_seen": 323949236, + "step": 3549 + }, + { + "epoch": 14.791666666666666, + "grad_norm": 4.001291035694281, + "learning_rate": 5e-05, + "loss": 0.0619, + "num_input_tokens_seen": 324040436, + "step": 3550 + }, + { + "epoch": 14.791666666666666, + "loss": 0.03389447182416916, + "loss_ce": 4.702661499322858e-06, + "loss_iou": 0.296875, + "loss_num": 0.00677490234375, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 324040436, + "step": 3550 + }, + { + "epoch": 14.795833333333333, + "grad_norm": 2.5368419134219238, + "learning_rate": 5e-05, + "loss": 0.0312, + "num_input_tokens_seen": 324131804, + "step": 3551 + }, + { + "epoch": 14.795833333333333, + "loss": 0.03450581058859825, + "loss_ce": 5.689916179107968e-06, + "loss_iou": 0.1533203125, + "loss_num": 0.00689697265625, + "loss_xval": 0.034423828125, + "num_input_tokens_seen": 324131804, + "step": 3551 + }, + { + "epoch": 14.8, + "grad_norm": 2.013270021900645, + "learning_rate": 5e-05, + "loss": 0.0483, + "num_input_tokens_seen": 324223076, + "step": 3552 + }, + { + "epoch": 14.8, + "loss": 0.02905517816543579, + "loss_ce": 2.4460521217406495e-06, + "loss_iou": 0.353515625, + "loss_num": 0.00579833984375, + "loss_xval": 0.029052734375, + "num_input_tokens_seen": 324223076, + "step": 3552 + }, + { + "epoch": 14.804166666666667, + "grad_norm": 2.4595982818652824, + "learning_rate": 5e-05, + "loss": 0.063, + "num_input_tokens_seen": 324314896, + "step": 3553 + }, + { + "epoch": 14.804166666666667, + "loss": 0.07248193770647049, + "loss_ce": 1.7949929315363988e-05, + "loss_iou": 0.349609375, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 324314896, + "step": 3553 + }, + { + "epoch": 14.808333333333334, + "grad_norm": 2.6005864024543137, + "learning_rate": 5e-05, + "loss": 0.0768, + "num_input_tokens_seen": 324406368, + "step": 3554 + }, + { + "epoch": 14.808333333333334, + "loss": 0.11534042656421661, + "loss_ce": 0.0003959625319112092, + "loss_iou": 0.10546875, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 324406368, + "step": 3554 + }, + { + "epoch": 14.8125, + "grad_norm": 1.6112301855903222, + "learning_rate": 5e-05, + "loss": 0.0461, + "num_input_tokens_seen": 324497652, + "step": 3555 + }, + { + "epoch": 14.8125, + "loss": 0.057490721344947815, + "loss_ce": 0.0001100460285670124, + "loss_iou": 0.3203125, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 324497652, + "step": 3555 + }, + { + "epoch": 14.816666666666666, + "grad_norm": 2.515654842620723, + "learning_rate": 5e-05, + "loss": 0.0691, + "num_input_tokens_seen": 324588820, + "step": 3556 + }, + { + "epoch": 14.816666666666666, + "loss": 0.047474320977926254, + "loss_ce": 1.9487149984342977e-05, + "loss_iou": 0.2421875, + "loss_num": 0.009521484375, + "loss_xval": 0.04736328125, + "num_input_tokens_seen": 324588820, + "step": 3556 + }, + { + "epoch": 14.820833333333333, + "grad_norm": 5.683309356890506, + "learning_rate": 5e-05, + "loss": 0.0955, + "num_input_tokens_seen": 324680276, + "step": 3557 + }, + { + "epoch": 14.820833333333333, + "loss": 0.12900656461715698, + "loss_ce": 0.0008174782851710916, + "loss_iou": 0.345703125, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 324680276, + "step": 3557 + }, + { + "epoch": 14.825, + "grad_norm": 4.9157808656117075, + "learning_rate": 5e-05, + "loss": 0.0749, + "num_input_tokens_seen": 324771392, + "step": 3558 + }, + { + "epoch": 14.825, + "loss": 0.09027700126171112, + "loss_ce": 9.755467181093991e-05, + "loss_iou": 0.1806640625, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 324771392, + "step": 3558 + }, + { + "epoch": 14.829166666666667, + "grad_norm": 4.327085265970158, + "learning_rate": 5e-05, + "loss": 0.0585, + "num_input_tokens_seen": 324862568, + "step": 3559 + }, + { + "epoch": 14.829166666666667, + "loss": 0.07519317418336868, + "loss_ce": 5.3175652283243835e-05, + "loss_iou": 0.35546875, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 324862568, + "step": 3559 + }, + { + "epoch": 14.833333333333334, + "grad_norm": 2.9200242618316183, + "learning_rate": 5e-05, + "loss": 0.0619, + "num_input_tokens_seen": 324953504, + "step": 3560 + }, + { + "epoch": 14.833333333333334, + "loss": 0.0702124685049057, + "loss_ce": 5.256131771602668e-05, + "loss_iou": 0.21484375, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 324953504, + "step": 3560 + }, + { + "epoch": 14.8375, + "grad_norm": 7.980168289767925, + "learning_rate": 5e-05, + "loss": 0.094, + "num_input_tokens_seen": 325044980, + "step": 3561 + }, + { + "epoch": 14.8375, + "loss": 0.06561485677957535, + "loss_ce": 3.2580312108621e-05, + "loss_iou": 0.30078125, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 325044980, + "step": 3561 + }, + { + "epoch": 14.841666666666667, + "grad_norm": 2.534301338125501, + "learning_rate": 5e-05, + "loss": 0.0696, + "num_input_tokens_seen": 325136512, + "step": 3562 + }, + { + "epoch": 14.841666666666667, + "loss": 0.05281366780400276, + "loss_ce": 2.9979187274875585e-06, + "loss_iou": 0.1904296875, + "loss_num": 0.01055908203125, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 325136512, + "step": 3562 + }, + { + "epoch": 14.845833333333333, + "grad_norm": 2.3211136578655696, + "learning_rate": 5e-05, + "loss": 0.0704, + "num_input_tokens_seen": 325227988, + "step": 3563 + }, + { + "epoch": 14.845833333333333, + "loss": 0.10832767188549042, + "loss_ce": 5.5274854275921825e-06, + "loss_iou": 0.2392578125, + "loss_num": 0.0216064453125, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 325227988, + "step": 3563 + }, + { + "epoch": 14.85, + "grad_norm": 2.0010843935107183, + "learning_rate": 5e-05, + "loss": 0.0885, + "num_input_tokens_seen": 325319428, + "step": 3564 + }, + { + "epoch": 14.85, + "loss": 0.10545650124549866, + "loss_ce": 1.0635448234097566e-05, + "loss_iou": 0.3359375, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 325319428, + "step": 3564 + }, + { + "epoch": 14.854166666666666, + "grad_norm": 2.2152933496585656, + "learning_rate": 5e-05, + "loss": 0.0541, + "num_input_tokens_seen": 325411304, + "step": 3565 + }, + { + "epoch": 14.854166666666666, + "loss": 0.04346586763858795, + "loss_ce": 8.837326276989188e-06, + "loss_iou": 0.2216796875, + "loss_num": 0.0086669921875, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 325411304, + "step": 3565 + }, + { + "epoch": 14.858333333333333, + "grad_norm": 3.133004974905781, + "learning_rate": 5e-05, + "loss": 0.0471, + "num_input_tokens_seen": 325502720, + "step": 3566 + }, + { + "epoch": 14.858333333333333, + "loss": 0.05578949302434921, + "loss_ce": 3.3608721423661336e-06, + "loss_iou": 0.27734375, + "loss_num": 0.01116943359375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 325502720, + "step": 3566 + }, + { + "epoch": 14.8625, + "grad_norm": 2.2451849806419304, + "learning_rate": 5e-05, + "loss": 0.0704, + "num_input_tokens_seen": 325593764, + "step": 3567 + }, + { + "epoch": 14.8625, + "loss": 0.041220203042030334, + "loss_ce": 6.215145731403027e-06, + "loss_iou": 0.2578125, + "loss_num": 0.00823974609375, + "loss_xval": 0.041259765625, + "num_input_tokens_seen": 325593764, + "step": 3567 + }, + { + "epoch": 14.866666666666667, + "grad_norm": 1.2833324031924245, + "learning_rate": 5e-05, + "loss": 0.0418, + "num_input_tokens_seen": 325685004, + "step": 3568 + }, + { + "epoch": 14.866666666666667, + "loss": 0.044435691088438034, + "loss_ce": 4.78729052701965e-05, + "loss_iou": 0.203125, + "loss_num": 0.00885009765625, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 325685004, + "step": 3568 + }, + { + "epoch": 14.870833333333334, + "grad_norm": 1.6872962235402553, + "learning_rate": 5e-05, + "loss": 0.0504, + "num_input_tokens_seen": 325776788, + "step": 3569 + }, + { + "epoch": 14.870833333333334, + "loss": 0.053148359060287476, + "loss_ce": 3.251617818023078e-05, + "loss_iou": 0.21875, + "loss_num": 0.0106201171875, + "loss_xval": 0.05322265625, + "num_input_tokens_seen": 325776788, + "step": 3569 + }, + { + "epoch": 14.875, + "grad_norm": 4.775188991555694, + "learning_rate": 5e-05, + "loss": 0.0582, + "num_input_tokens_seen": 325868156, + "step": 3570 + }, + { + "epoch": 14.875, + "loss": 0.0691394954919815, + "loss_ce": 7.821511826477945e-05, + "loss_iou": 0.29296875, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 325868156, + "step": 3570 + }, + { + "epoch": 14.879166666666666, + "grad_norm": 2.131116925129658, + "learning_rate": 5e-05, + "loss": 0.0378, + "num_input_tokens_seen": 325959612, + "step": 3571 + }, + { + "epoch": 14.879166666666666, + "loss": 0.049073703587055206, + "loss_ce": 0.0016188665758818388, + "loss_iou": 0.376953125, + "loss_num": 0.00946044921875, + "loss_xval": 0.04736328125, + "num_input_tokens_seen": 325959612, + "step": 3571 + }, + { + "epoch": 14.883333333333333, + "grad_norm": 2.7535034525581246, + "learning_rate": 5e-05, + "loss": 0.0796, + "num_input_tokens_seen": 326051084, + "step": 3572 + }, + { + "epoch": 14.883333333333333, + "loss": 0.09897946566343307, + "loss_ce": 1.0963100066874176e-05, + "loss_iou": 0.259765625, + "loss_num": 0.019775390625, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 326051084, + "step": 3572 + }, + { + "epoch": 14.8875, + "grad_norm": 1.2638926995119009, + "learning_rate": 5e-05, + "loss": 0.0634, + "num_input_tokens_seen": 326141908, + "step": 3573 + }, + { + "epoch": 14.8875, + "loss": 0.05105225369334221, + "loss_ce": 0.0002023401320911944, + "loss_iou": 0.220703125, + "loss_num": 0.01019287109375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 326141908, + "step": 3573 + }, + { + "epoch": 14.891666666666667, + "grad_norm": 1.6713820019916632, + "learning_rate": 5e-05, + "loss": 0.0629, + "num_input_tokens_seen": 326233920, + "step": 3574 + }, + { + "epoch": 14.891666666666667, + "loss": 0.08183803409337997, + "loss_ce": 0.0005849840235896409, + "loss_iou": 0.21484375, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 326233920, + "step": 3574 + }, + { + "epoch": 14.895833333333334, + "grad_norm": 1.8503550101021848, + "learning_rate": 5e-05, + "loss": 0.0991, + "num_input_tokens_seen": 326325268, + "step": 3575 + }, + { + "epoch": 14.895833333333334, + "loss": 0.1190267950296402, + "loss_ce": 0.0015951523091644049, + "loss_iou": 0.134765625, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 326325268, + "step": 3575 + }, + { + "epoch": 14.9, + "grad_norm": 1.9195103637938333, + "learning_rate": 5e-05, + "loss": 0.0689, + "num_input_tokens_seen": 326416216, + "step": 3576 + }, + { + "epoch": 14.9, + "loss": 0.10601796954870224, + "loss_ce": 0.000640773621853441, + "loss_iou": 0.30078125, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 326416216, + "step": 3576 + }, + { + "epoch": 14.904166666666667, + "grad_norm": 5.487943418890862, + "learning_rate": 5e-05, + "loss": 0.0937, + "num_input_tokens_seen": 326507140, + "step": 3577 + }, + { + "epoch": 14.904166666666667, + "loss": 0.08022335171699524, + "loss_ce": 2.315099845873192e-05, + "loss_iou": 0.2451171875, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 326507140, + "step": 3577 + }, + { + "epoch": 14.908333333333333, + "grad_norm": 2.446270218723829, + "learning_rate": 5e-05, + "loss": 0.0698, + "num_input_tokens_seen": 326598324, + "step": 3578 + }, + { + "epoch": 14.908333333333333, + "loss": 0.05691614747047424, + "loss_ce": 8.657324315208825e-07, + "loss_iou": 0.26171875, + "loss_num": 0.01141357421875, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 326598324, + "step": 3578 + }, + { + "epoch": 14.9125, + "grad_norm": 1.9621850861220034, + "learning_rate": 5e-05, + "loss": 0.0494, + "num_input_tokens_seen": 326689168, + "step": 3579 + }, + { + "epoch": 14.9125, + "loss": 0.06024498492479324, + "loss_ce": 3.2864827517187223e-06, + "loss_iou": 0.2333984375, + "loss_num": 0.01202392578125, + "loss_xval": 0.060302734375, + "num_input_tokens_seen": 326689168, + "step": 3579 + }, + { + "epoch": 14.916666666666666, + "grad_norm": 2.3981445276381455, + "learning_rate": 5e-05, + "loss": 0.0552, + "num_input_tokens_seen": 326780392, + "step": 3580 + }, + { + "epoch": 14.916666666666666, + "loss": 0.05113024637103081, + "loss_ce": 0.0007762408349663019, + "loss_iou": 0.166015625, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 326780392, + "step": 3580 + }, + { + "epoch": 14.920833333333333, + "grad_norm": 4.281863455784161, + "learning_rate": 5e-05, + "loss": 0.0489, + "num_input_tokens_seen": 326871508, + "step": 3581 + }, + { + "epoch": 14.920833333333333, + "loss": 0.039055127650499344, + "loss_ce": 0.0033800816163420677, + "loss_iou": 0.134765625, + "loss_num": 0.00714111328125, + "loss_xval": 0.03564453125, + "num_input_tokens_seen": 326871508, + "step": 3581 + }, + { + "epoch": 14.925, + "grad_norm": 4.393403438025354, + "learning_rate": 5e-05, + "loss": 0.0739, + "num_input_tokens_seen": 326963588, + "step": 3582 + }, + { + "epoch": 14.925, + "loss": 0.07046674937009811, + "loss_ce": 0.00018477137200534344, + "loss_iou": 0.134765625, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 326963588, + "step": 3582 + }, + { + "epoch": 14.929166666666667, + "grad_norm": 3.570579908241656, + "learning_rate": 5e-05, + "loss": 0.0638, + "num_input_tokens_seen": 327054772, + "step": 3583 + }, + { + "epoch": 14.929166666666667, + "loss": 0.06317311525344849, + "loss_ce": 1.7325469343631994e-06, + "loss_iou": 0.25390625, + "loss_num": 0.01263427734375, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 327054772, + "step": 3583 + }, + { + "epoch": 14.933333333333334, + "grad_norm": 2.4158229427503337, + "learning_rate": 5e-05, + "loss": 0.0642, + "num_input_tokens_seen": 327145804, + "step": 3584 + }, + { + "epoch": 14.933333333333334, + "loss": 0.08556769043207169, + "loss_ce": 0.0010339971631765366, + "loss_iou": 0.3125, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 327145804, + "step": 3584 + }, + { + "epoch": 14.9375, + "grad_norm": 3.440511605747932, + "learning_rate": 5e-05, + "loss": 0.0766, + "num_input_tokens_seen": 327236888, + "step": 3585 + }, + { + "epoch": 14.9375, + "loss": 0.08531348407268524, + "loss_ce": 0.000154180932440795, + "loss_iou": 0.35546875, + "loss_num": 0.01708984375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 327236888, + "step": 3585 + }, + { + "epoch": 14.941666666666666, + "grad_norm": 17.60435585784368, + "learning_rate": 5e-05, + "loss": 0.0817, + "num_input_tokens_seen": 327328136, + "step": 3586 + }, + { + "epoch": 14.941666666666666, + "loss": 0.10764288902282715, + "loss_ce": 0.00023627388873137534, + "loss_iou": 0.3359375, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 327328136, + "step": 3586 + }, + { + "epoch": 14.945833333333333, + "grad_norm": 2.269990229583021, + "learning_rate": 5e-05, + "loss": 0.045, + "num_input_tokens_seen": 327418900, + "step": 3587 + }, + { + "epoch": 14.945833333333333, + "loss": 0.047153279185295105, + "loss_ce": 3.623671091190772e-06, + "loss_iou": 0.2255859375, + "loss_num": 0.00946044921875, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 327418900, + "step": 3587 + }, + { + "epoch": 14.95, + "grad_norm": 3.0739284813249776, + "learning_rate": 5e-05, + "loss": 0.0693, + "num_input_tokens_seen": 327509888, + "step": 3588 + }, + { + "epoch": 14.95, + "loss": 0.08897919952869415, + "loss_ce": 8.148964116116986e-05, + "loss_iou": 0.333984375, + "loss_num": 0.017822265625, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 327509888, + "step": 3588 + }, + { + "epoch": 14.954166666666667, + "grad_norm": 3.041000004524471, + "learning_rate": 5e-05, + "loss": 0.0343, + "num_input_tokens_seen": 327601236, + "step": 3589 + }, + { + "epoch": 14.954166666666667, + "loss": 0.03820054233074188, + "loss_ce": 3.831431968137622e-05, + "loss_iou": 0.1533203125, + "loss_num": 0.00762939453125, + "loss_xval": 0.0380859375, + "num_input_tokens_seen": 327601236, + "step": 3589 + }, + { + "epoch": 14.958333333333334, + "grad_norm": 2.4864528317602104, + "learning_rate": 5e-05, + "loss": 0.0633, + "num_input_tokens_seen": 327691636, + "step": 3590 + }, + { + "epoch": 14.958333333333334, + "loss": 0.06497173756361008, + "loss_ce": 1.5070919289428275e-05, + "loss_iou": 0.251953125, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 327691636, + "step": 3590 + }, + { + "epoch": 14.9625, + "grad_norm": 1.8455373863541833, + "learning_rate": 5e-05, + "loss": 0.0575, + "num_input_tokens_seen": 327782680, + "step": 3591 + }, + { + "epoch": 14.9625, + "loss": 0.05134090036153793, + "loss_ce": 1.0335241313441657e-05, + "loss_iou": 0.33984375, + "loss_num": 0.01025390625, + "loss_xval": 0.05126953125, + "num_input_tokens_seen": 327782680, + "step": 3591 + }, + { + "epoch": 14.966666666666667, + "grad_norm": 1.96959325048514, + "learning_rate": 5e-05, + "loss": 0.0468, + "num_input_tokens_seen": 327873792, + "step": 3592 + }, + { + "epoch": 14.966666666666667, + "loss": 0.041168928146362305, + "loss_ce": 7.187348955994821e-07, + "loss_iou": 0.279296875, + "loss_num": 0.00823974609375, + "loss_xval": 0.041259765625, + "num_input_tokens_seen": 327873792, + "step": 3592 + }, + { + "epoch": 14.970833333333333, + "grad_norm": 5.7229393107077735, + "learning_rate": 5e-05, + "loss": 0.0624, + "num_input_tokens_seen": 327965508, + "step": 3593 + }, + { + "epoch": 14.970833333333333, + "loss": 0.0583159439265728, + "loss_ce": 1.2109203453292139e-05, + "loss_iou": 0.263671875, + "loss_num": 0.01165771484375, + "loss_xval": 0.058349609375, + "num_input_tokens_seen": 327965508, + "step": 3593 + }, + { + "epoch": 14.975, + "grad_norm": 2.2666128192625825, + "learning_rate": 5e-05, + "loss": 0.0529, + "num_input_tokens_seen": 328056804, + "step": 3594 + }, + { + "epoch": 14.975, + "loss": 0.03889273852109909, + "loss_ce": 3.432005178183317e-05, + "loss_iou": 0.212890625, + "loss_num": 0.007781982421875, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 328056804, + "step": 3594 + }, + { + "epoch": 14.979166666666666, + "grad_norm": 4.750338180067551, + "learning_rate": 5e-05, + "loss": 0.0669, + "num_input_tokens_seen": 328148388, + "step": 3595 + }, + { + "epoch": 14.979166666666666, + "loss": 0.0718790739774704, + "loss_ce": 7.121425733203068e-05, + "loss_iou": 0.380859375, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 328148388, + "step": 3595 + }, + { + "epoch": 14.983333333333333, + "grad_norm": 2.4401063130844283, + "learning_rate": 5e-05, + "loss": 0.0357, + "num_input_tokens_seen": 328239896, + "step": 3596 + }, + { + "epoch": 14.983333333333333, + "loss": 0.02217935025691986, + "loss_ce": 3.5031724110012874e-05, + "loss_iou": 0.203125, + "loss_num": 0.004425048828125, + "loss_xval": 0.0220947265625, + "num_input_tokens_seen": 328239896, + "step": 3596 + }, + { + "epoch": 14.9875, + "grad_norm": 2.442549598675774, + "learning_rate": 5e-05, + "loss": 0.0579, + "num_input_tokens_seen": 328331384, + "step": 3597 + }, + { + "epoch": 14.9875, + "loss": 0.05925852432847023, + "loss_ce": 6.205165118444711e-05, + "loss_iou": 0.2119140625, + "loss_num": 0.0118408203125, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 328331384, + "step": 3597 + }, + { + "epoch": 14.991666666666667, + "grad_norm": 2.055400133996437, + "learning_rate": 5e-05, + "loss": 0.0391, + "num_input_tokens_seen": 328422936, + "step": 3598 + }, + { + "epoch": 14.991666666666667, + "loss": 0.03320079296827316, + "loss_ce": 5.295316441333853e-06, + "loss_iou": 0.2109375, + "loss_num": 0.00665283203125, + "loss_xval": 0.033203125, + "num_input_tokens_seen": 328422936, + "step": 3598 + }, + { + "epoch": 14.995833333333334, + "grad_norm": 1.9865687878745821, + "learning_rate": 5e-05, + "loss": 0.0439, + "num_input_tokens_seen": 328514684, + "step": 3599 + }, + { + "epoch": 14.995833333333334, + "loss": 0.0515916682779789, + "loss_ce": 0.00016955150931607932, + "loss_iou": 0.234375, + "loss_num": 0.01025390625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 328514684, + "step": 3599 + }, + { + "epoch": 15.0, + "grad_norm": 4.937737227297816, + "learning_rate": 5e-05, + "loss": 0.0381, + "num_input_tokens_seen": 328605860, + "step": 3600 + }, + { + "epoch": 15.0, + "loss": 0.04353149235248566, + "loss_ce": 0.0005398534703999758, + "loss_iou": 0.291015625, + "loss_num": 0.00860595703125, + "loss_xval": 0.04296875, + "num_input_tokens_seen": 328605860, + "step": 3600 + }, + { + "epoch": 15.004166666666666, + "grad_norm": 4.7503086302857405, + "learning_rate": 5e-05, + "loss": 0.0544, + "num_input_tokens_seen": 328697836, + "step": 3601 + }, + { + "epoch": 15.004166666666666, + "loss": 0.05971873551607132, + "loss_ce": 0.0028492261189967394, + "loss_iou": 0.25390625, + "loss_num": 0.0113525390625, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 328697836, + "step": 3601 + }, + { + "epoch": 15.008333333333333, + "grad_norm": 4.643644494444229, + "learning_rate": 5e-05, + "loss": 0.0901, + "num_input_tokens_seen": 328789168, + "step": 3602 + }, + { + "epoch": 15.008333333333333, + "loss": 0.03825650364160538, + "loss_ce": 2.7188930289412383e-06, + "loss_iou": 0.2734375, + "loss_num": 0.007659912109375, + "loss_xval": 0.038330078125, + "num_input_tokens_seen": 328789168, + "step": 3602 + }, + { + "epoch": 15.0125, + "grad_norm": 3.1429965086107567, + "learning_rate": 5e-05, + "loss": 0.0363, + "num_input_tokens_seen": 328879752, + "step": 3603 + }, + { + "epoch": 15.0125, + "loss": 0.03419603407382965, + "loss_ce": 9.26396605791524e-05, + "loss_iou": 0.36328125, + "loss_num": 0.0068359375, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 328879752, + "step": 3603 + }, + { + "epoch": 15.016666666666667, + "grad_norm": 3.654518939083431, + "learning_rate": 5e-05, + "loss": 0.0649, + "num_input_tokens_seen": 328971004, + "step": 3604 + }, + { + "epoch": 15.016666666666667, + "loss": 0.03746304288506508, + "loss_ce": 7.901009666966274e-05, + "loss_iou": 0.298828125, + "loss_num": 0.007476806640625, + "loss_xval": 0.037353515625, + "num_input_tokens_seen": 328971004, + "step": 3604 + }, + { + "epoch": 15.020833333333334, + "grad_norm": 2.6319228156521968, + "learning_rate": 5e-05, + "loss": 0.0572, + "num_input_tokens_seen": 329062488, + "step": 3605 + }, + { + "epoch": 15.020833333333334, + "loss": 0.07449492067098618, + "loss_ce": 1.5129454595808056e-06, + "loss_iou": 0.283203125, + "loss_num": 0.014892578125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 329062488, + "step": 3605 + }, + { + "epoch": 15.025, + "grad_norm": 9.610639039151225, + "learning_rate": 5e-05, + "loss": 0.0581, + "num_input_tokens_seen": 329153660, + "step": 3606 + }, + { + "epoch": 15.025, + "loss": 0.050093911588191986, + "loss_ce": 2.2191003154148348e-05, + "loss_iou": 0.2890625, + "loss_num": 0.010009765625, + "loss_xval": 0.050048828125, + "num_input_tokens_seen": 329153660, + "step": 3606 + }, + { + "epoch": 15.029166666666667, + "grad_norm": 2.774691887572913, + "learning_rate": 5e-05, + "loss": 0.0746, + "num_input_tokens_seen": 329245388, + "step": 3607 + }, + { + "epoch": 15.029166666666667, + "loss": 0.07727287709712982, + "loss_ce": 2.3686347958573606e-06, + "loss_iou": 0.30078125, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 329245388, + "step": 3607 + }, + { + "epoch": 15.033333333333333, + "grad_norm": 6.824942188349254, + "learning_rate": 5e-05, + "loss": 0.0472, + "num_input_tokens_seen": 329337076, + "step": 3608 + }, + { + "epoch": 15.033333333333333, + "loss": 0.0612323172390461, + "loss_ce": 0.003782975487411022, + "loss_iou": 0.298828125, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 329337076, + "step": 3608 + }, + { + "epoch": 15.0375, + "grad_norm": 4.709692774966925, + "learning_rate": 5e-05, + "loss": 0.0645, + "num_input_tokens_seen": 329428540, + "step": 3609 + }, + { + "epoch": 15.0375, + "loss": 0.06253225356340408, + "loss_ce": 1.7341440070595127e-06, + "loss_iou": 0.30859375, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 329428540, + "step": 3609 + }, + { + "epoch": 15.041666666666666, + "grad_norm": 2.2319162438458187, + "learning_rate": 5e-05, + "loss": 0.0425, + "num_input_tokens_seen": 329519428, + "step": 3610 + }, + { + "epoch": 15.041666666666666, + "loss": 0.025795357301831245, + "loss_ce": 4.1901939766830765e-06, + "loss_iou": 0.2265625, + "loss_num": 0.005157470703125, + "loss_xval": 0.0257568359375, + "num_input_tokens_seen": 329519428, + "step": 3610 + }, + { + "epoch": 15.045833333333333, + "grad_norm": 3.7526942246991566, + "learning_rate": 5e-05, + "loss": 0.0354, + "num_input_tokens_seen": 329611248, + "step": 3611 + }, + { + "epoch": 15.045833333333333, + "loss": 0.021277323365211487, + "loss_ce": 2.1829899196745828e-05, + "loss_iou": 0.322265625, + "loss_num": 0.004241943359375, + "loss_xval": 0.021240234375, + "num_input_tokens_seen": 329611248, + "step": 3611 + }, + { + "epoch": 15.05, + "grad_norm": 9.139209536856107, + "learning_rate": 5e-05, + "loss": 0.0549, + "num_input_tokens_seen": 329701952, + "step": 3612 + }, + { + "epoch": 15.05, + "loss": 0.050409846007823944, + "loss_ce": 0.0002618358703330159, + "loss_iou": 0.271484375, + "loss_num": 0.010009765625, + "loss_xval": 0.050048828125, + "num_input_tokens_seen": 329701952, + "step": 3612 + }, + { + "epoch": 15.054166666666667, + "grad_norm": 2.2795942382093677, + "learning_rate": 5e-05, + "loss": 0.056, + "num_input_tokens_seen": 329793288, + "step": 3613 + }, + { + "epoch": 15.054166666666667, + "loss": 0.05421944707632065, + "loss_ce": 6.600363121833652e-05, + "loss_iou": 0.32421875, + "loss_num": 0.0108642578125, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 329793288, + "step": 3613 + }, + { + "epoch": 15.058333333333334, + "grad_norm": 2.303786347859785, + "learning_rate": 5e-05, + "loss": 0.0593, + "num_input_tokens_seen": 329884836, + "step": 3614 + }, + { + "epoch": 15.058333333333334, + "loss": 0.07987719774246216, + "loss_ce": 7.373141124844551e-05, + "loss_iou": 0.30078125, + "loss_num": 0.0159912109375, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 329884836, + "step": 3614 + }, + { + "epoch": 15.0625, + "grad_norm": 2.4687676014422, + "learning_rate": 5e-05, + "loss": 0.0376, + "num_input_tokens_seen": 329975512, + "step": 3615 + }, + { + "epoch": 15.0625, + "loss": 0.03640202432870865, + "loss_ce": 2.184108780056704e-06, + "loss_iou": 0.251953125, + "loss_num": 0.00726318359375, + "loss_xval": 0.036376953125, + "num_input_tokens_seen": 329975512, + "step": 3615 + }, + { + "epoch": 15.066666666666666, + "grad_norm": 3.028327068968897, + "learning_rate": 5e-05, + "loss": 0.0531, + "num_input_tokens_seen": 330066736, + "step": 3616 + }, + { + "epoch": 15.066666666666666, + "loss": 0.07391392439603806, + "loss_ce": 1.5607045497745275e-05, + "loss_iou": 0.30078125, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 330066736, + "step": 3616 + }, + { + "epoch": 15.070833333333333, + "grad_norm": 6.055814960292821, + "learning_rate": 5e-05, + "loss": 0.0493, + "num_input_tokens_seen": 330157904, + "step": 3617 + }, + { + "epoch": 15.070833333333333, + "loss": 0.0358605720102787, + "loss_ce": 2.418515578028746e-06, + "loss_iou": 0.365234375, + "loss_num": 0.007171630859375, + "loss_xval": 0.035888671875, + "num_input_tokens_seen": 330157904, + "step": 3617 + }, + { + "epoch": 15.075, + "grad_norm": 3.246438483533676, + "learning_rate": 5e-05, + "loss": 0.0357, + "num_input_tokens_seen": 330248900, + "step": 3618 + }, + { + "epoch": 15.075, + "loss": 0.038288623094558716, + "loss_ce": 0.0002484585565980524, + "loss_iou": 0.099609375, + "loss_num": 0.007598876953125, + "loss_xval": 0.0380859375, + "num_input_tokens_seen": 330248900, + "step": 3618 + }, + { + "epoch": 15.079166666666667, + "grad_norm": 2.9315869774054555, + "learning_rate": 5e-05, + "loss": 0.0755, + "num_input_tokens_seen": 330340052, + "step": 3619 + }, + { + "epoch": 15.079166666666667, + "loss": 0.09723498672246933, + "loss_ce": 5.990674253553152e-06, + "loss_iou": 0.1943359375, + "loss_num": 0.01953125, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 330340052, + "step": 3619 + }, + { + "epoch": 15.083333333333334, + "grad_norm": 2.854142624643113, + "learning_rate": 5e-05, + "loss": 0.0771, + "num_input_tokens_seen": 330431588, + "step": 3620 + }, + { + "epoch": 15.083333333333334, + "loss": 0.061580635607242584, + "loss_ce": 2.6683097530622035e-05, + "loss_iou": 0.2333984375, + "loss_num": 0.0123291015625, + "loss_xval": 0.0615234375, + "num_input_tokens_seen": 330431588, + "step": 3620 + }, + { + "epoch": 15.0875, + "grad_norm": 6.519389816149147, + "learning_rate": 5e-05, + "loss": 0.0862, + "num_input_tokens_seen": 330523172, + "step": 3621 + }, + { + "epoch": 15.0875, + "loss": 0.11213652789592743, + "loss_ce": 0.00028960229246877134, + "loss_iou": 0.291015625, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 330523172, + "step": 3621 + }, + { + "epoch": 15.091666666666667, + "grad_norm": 3.8389583619655046, + "learning_rate": 5e-05, + "loss": 0.0501, + "num_input_tokens_seen": 330614360, + "step": 3622 + }, + { + "epoch": 15.091666666666667, + "loss": 0.04965383931994438, + "loss_ce": 1.7411761064067832e-06, + "loss_iou": 0.3359375, + "loss_num": 0.00994873046875, + "loss_xval": 0.049560546875, + "num_input_tokens_seen": 330614360, + "step": 3622 + }, + { + "epoch": 15.095833333333333, + "grad_norm": 3.2749420098599127, + "learning_rate": 5e-05, + "loss": 0.0623, + "num_input_tokens_seen": 330705472, + "step": 3623 + }, + { + "epoch": 15.095833333333333, + "loss": 0.05485723540186882, + "loss_ce": 1.8875684872909915e-06, + "loss_iou": 0.158203125, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 330705472, + "step": 3623 + }, + { + "epoch": 15.1, + "grad_norm": 2.7497409913777666, + "learning_rate": 5e-05, + "loss": 0.0831, + "num_input_tokens_seen": 330796792, + "step": 3624 + }, + { + "epoch": 15.1, + "loss": 0.09329473972320557, + "loss_ce": 0.00027715889154933393, + "loss_iou": 0.2080078125, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 330796792, + "step": 3624 + }, + { + "epoch": 15.104166666666666, + "grad_norm": 3.011368972190201, + "learning_rate": 5e-05, + "loss": 0.0428, + "num_input_tokens_seen": 330887952, + "step": 3625 + }, + { + "epoch": 15.104166666666666, + "loss": 0.034572720527648926, + "loss_ce": 0.0007878522155806422, + "loss_iou": 0.1796875, + "loss_num": 0.006744384765625, + "loss_xval": 0.03369140625, + "num_input_tokens_seen": 330887952, + "step": 3625 + }, + { + "epoch": 15.108333333333333, + "grad_norm": 2.537962697527776, + "learning_rate": 5e-05, + "loss": 0.0446, + "num_input_tokens_seen": 330979616, + "step": 3626 + }, + { + "epoch": 15.108333333333333, + "loss": 0.05786379426717758, + "loss_ce": 1.772696850821376e-05, + "loss_iou": 0.259765625, + "loss_num": 0.0115966796875, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 330979616, + "step": 3626 + }, + { + "epoch": 15.1125, + "grad_norm": 4.051346261798277, + "learning_rate": 5e-05, + "loss": 0.0546, + "num_input_tokens_seen": 331070492, + "step": 3627 + }, + { + "epoch": 15.1125, + "loss": 0.04033700376749039, + "loss_ce": 1.565598722663708e-05, + "loss_iou": 0.072265625, + "loss_num": 0.008056640625, + "loss_xval": 0.040283203125, + "num_input_tokens_seen": 331070492, + "step": 3627 + }, + { + "epoch": 15.116666666666667, + "grad_norm": 4.603074725929813, + "learning_rate": 5e-05, + "loss": 0.088, + "num_input_tokens_seen": 331160904, + "step": 3628 + }, + { + "epoch": 15.116666666666667, + "loss": 0.07139493525028229, + "loss_ce": 1.4317161912913434e-05, + "loss_iou": 0.19140625, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 331160904, + "step": 3628 + }, + { + "epoch": 15.120833333333334, + "grad_norm": 3.2322360125048104, + "learning_rate": 5e-05, + "loss": 0.063, + "num_input_tokens_seen": 331251768, + "step": 3629 + }, + { + "epoch": 15.120833333333334, + "loss": 0.05669593811035156, + "loss_ce": 2.4793273041723296e-05, + "loss_iou": 0.203125, + "loss_num": 0.0113525390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 331251768, + "step": 3629 + }, + { + "epoch": 15.125, + "grad_norm": 4.3279161634296495, + "learning_rate": 5e-05, + "loss": 0.0566, + "num_input_tokens_seen": 331343652, + "step": 3630 + }, + { + "epoch": 15.125, + "loss": 0.04381554201245308, + "loss_ce": 2.2818392608314753e-05, + "loss_iou": 0.255859375, + "loss_num": 0.00872802734375, + "loss_xval": 0.043701171875, + "num_input_tokens_seen": 331343652, + "step": 3630 + }, + { + "epoch": 15.129166666666666, + "grad_norm": 4.0162518502250455, + "learning_rate": 5e-05, + "loss": 0.0579, + "num_input_tokens_seen": 331434912, + "step": 3631 + }, + { + "epoch": 15.129166666666666, + "loss": 0.04392173886299133, + "loss_ce": 9.468426287639886e-05, + "loss_iou": 0.05517578125, + "loss_num": 0.0087890625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 331434912, + "step": 3631 + }, + { + "epoch": 15.133333333333333, + "grad_norm": 2.3099710437696825, + "learning_rate": 5e-05, + "loss": 0.0494, + "num_input_tokens_seen": 331526496, + "step": 3632 + }, + { + "epoch": 15.133333333333333, + "loss": 0.04162848740816116, + "loss_ce": 2.510636250008247e-06, + "loss_iou": 0.1953125, + "loss_num": 0.00830078125, + "loss_xval": 0.04150390625, + "num_input_tokens_seen": 331526496, + "step": 3632 + }, + { + "epoch": 15.1375, + "grad_norm": 2.3738202552434315, + "learning_rate": 5e-05, + "loss": 0.0657, + "num_input_tokens_seen": 331617872, + "step": 3633 + }, + { + "epoch": 15.1375, + "loss": 0.08647745847702026, + "loss_ce": 2.1168279999983497e-05, + "loss_iou": 0.1494140625, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 331617872, + "step": 3633 + }, + { + "epoch": 15.141666666666667, + "grad_norm": 5.825434013763398, + "learning_rate": 5e-05, + "loss": 0.0571, + "num_input_tokens_seen": 331709636, + "step": 3634 + }, + { + "epoch": 15.141666666666667, + "loss": 0.08738897740840912, + "loss_ce": 4.7672125219833106e-05, + "loss_iou": 0.365234375, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 331709636, + "step": 3634 + }, + { + "epoch": 15.145833333333334, + "grad_norm": 1.1844947286072622, + "learning_rate": 5e-05, + "loss": 0.066, + "num_input_tokens_seen": 331801648, + "step": 3635 + }, + { + "epoch": 15.145833333333334, + "loss": 0.05893933027982712, + "loss_ce": 3.27760171785485e-05, + "loss_iou": 0.26171875, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 331801648, + "step": 3635 + }, + { + "epoch": 15.15, + "grad_norm": 4.953652363856765, + "learning_rate": 5e-05, + "loss": 0.0679, + "num_input_tokens_seen": 331893020, + "step": 3636 + }, + { + "epoch": 15.15, + "loss": 0.06529572606086731, + "loss_ce": 0.0003543271741364151, + "loss_iou": 0.28125, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 331893020, + "step": 3636 + }, + { + "epoch": 15.154166666666667, + "grad_norm": 3.4201083531366914, + "learning_rate": 5e-05, + "loss": 0.0775, + "num_input_tokens_seen": 331984624, + "step": 3637 + }, + { + "epoch": 15.154166666666667, + "loss": 0.09096341580152512, + "loss_ce": 0.04213528707623482, + "loss_iou": 0.2236328125, + "loss_num": 0.009765625, + "loss_xval": 0.048828125, + "num_input_tokens_seen": 331984624, + "step": 3637 + }, + { + "epoch": 15.158333333333333, + "grad_norm": 2.0518076595764794, + "learning_rate": 5e-05, + "loss": 0.0536, + "num_input_tokens_seen": 332076068, + "step": 3638 + }, + { + "epoch": 15.158333333333333, + "loss": 0.04714092239737511, + "loss_ce": 9.807346214074641e-05, + "loss_iou": 0.28125, + "loss_num": 0.0093994140625, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 332076068, + "step": 3638 + }, + { + "epoch": 15.1625, + "grad_norm": 4.691395639111677, + "learning_rate": 5e-05, + "loss": 0.0806, + "num_input_tokens_seen": 332167548, + "step": 3639 + }, + { + "epoch": 15.1625, + "loss": 0.09495604038238525, + "loss_ce": 3.111699697910808e-05, + "loss_iou": 0.34765625, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 332167548, + "step": 3639 + }, + { + "epoch": 15.166666666666666, + "grad_norm": 4.28942326396276, + "learning_rate": 5e-05, + "loss": 0.038, + "num_input_tokens_seen": 332259140, + "step": 3640 + }, + { + "epoch": 15.166666666666666, + "loss": 0.03751256689429283, + "loss_ce": 6.46383978164522e-06, + "loss_iou": 0.306640625, + "loss_num": 0.00750732421875, + "loss_xval": 0.03759765625, + "num_input_tokens_seen": 332259140, + "step": 3640 + }, + { + "epoch": 15.170833333333333, + "grad_norm": 3.1445487000295334, + "learning_rate": 5e-05, + "loss": 0.0454, + "num_input_tokens_seen": 332350108, + "step": 3641 + }, + { + "epoch": 15.170833333333333, + "loss": 0.04145258292555809, + "loss_ce": 4.023050132673234e-05, + "loss_iou": 0.25, + "loss_num": 0.00830078125, + "loss_xval": 0.04150390625, + "num_input_tokens_seen": 332350108, + "step": 3641 + }, + { + "epoch": 15.175, + "grad_norm": 2.438382664266843, + "learning_rate": 5e-05, + "loss": 0.0554, + "num_input_tokens_seen": 332441836, + "step": 3642 + }, + { + "epoch": 15.175, + "loss": 0.04246381297707558, + "loss_ce": 9.778579988051206e-05, + "loss_iou": 0.22265625, + "loss_num": 0.00848388671875, + "loss_xval": 0.04248046875, + "num_input_tokens_seen": 332441836, + "step": 3642 + }, + { + "epoch": 15.179166666666667, + "grad_norm": 2.68848117982997, + "learning_rate": 5e-05, + "loss": 0.0802, + "num_input_tokens_seen": 332533088, + "step": 3643 + }, + { + "epoch": 15.179166666666667, + "loss": 0.09259673953056335, + "loss_ce": 6.405875410564477e-06, + "loss_iou": 0.33203125, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 332533088, + "step": 3643 + }, + { + "epoch": 15.183333333333334, + "grad_norm": 7.997747451676923, + "learning_rate": 5e-05, + "loss": 0.0579, + "num_input_tokens_seen": 332624424, + "step": 3644 + }, + { + "epoch": 15.183333333333334, + "loss": 0.04149026423692703, + "loss_ce": 1.615691644474282e-06, + "loss_iou": 0.2314453125, + "loss_num": 0.00830078125, + "loss_xval": 0.04150390625, + "num_input_tokens_seen": 332624424, + "step": 3644 + }, + { + "epoch": 15.1875, + "grad_norm": 2.216687066270345, + "learning_rate": 5e-05, + "loss": 0.0498, + "num_input_tokens_seen": 332715968, + "step": 3645 + }, + { + "epoch": 15.1875, + "loss": 0.06847859174013138, + "loss_ce": 0.0003328350721858442, + "loss_iou": 0.2255859375, + "loss_num": 0.01361083984375, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 332715968, + "step": 3645 + }, + { + "epoch": 15.191666666666666, + "grad_norm": 1.7072100404061863, + "learning_rate": 5e-05, + "loss": 0.0503, + "num_input_tokens_seen": 332807160, + "step": 3646 + }, + { + "epoch": 15.191666666666666, + "loss": 0.07139115035533905, + "loss_ce": 2.904944722104119e-06, + "loss_iou": 0.09033203125, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 332807160, + "step": 3646 + }, + { + "epoch": 15.195833333333333, + "grad_norm": 40.80216402513253, + "learning_rate": 5e-05, + "loss": 0.0476, + "num_input_tokens_seen": 332898216, + "step": 3647 + }, + { + "epoch": 15.195833333333333, + "loss": 0.04613684490323067, + "loss_ce": 3.241437298129313e-05, + "loss_iou": 0.244140625, + "loss_num": 0.00921630859375, + "loss_xval": 0.046142578125, + "num_input_tokens_seen": 332898216, + "step": 3647 + }, + { + "epoch": 15.2, + "grad_norm": 2.800792431588098, + "learning_rate": 5e-05, + "loss": 0.1075, + "num_input_tokens_seen": 332989600, + "step": 3648 + }, + { + "epoch": 15.2, + "loss": 0.06875791400671005, + "loss_ce": 1.7064860003301874e-05, + "loss_iou": 0.248046875, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 332989600, + "step": 3648 + }, + { + "epoch": 15.204166666666667, + "grad_norm": 6.290240929768387, + "learning_rate": 5e-05, + "loss": 0.0639, + "num_input_tokens_seen": 333081132, + "step": 3649 + }, + { + "epoch": 15.204166666666667, + "loss": 0.049857739359140396, + "loss_ce": 1.4904736417520326e-05, + "loss_iou": 0.185546875, + "loss_num": 0.00994873046875, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 333081132, + "step": 3649 + }, + { + "epoch": 15.208333333333334, + "grad_norm": 4.303558482273825, + "learning_rate": 5e-05, + "loss": 0.1207, + "num_input_tokens_seen": 333172096, + "step": 3650 + }, + { + "epoch": 15.208333333333334, + "loss": 0.11342789977788925, + "loss_ce": 9.32297461986309e-06, + "loss_iou": 0.384765625, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 333172096, + "step": 3650 + }, + { + "epoch": 15.2125, + "grad_norm": 2.49403191773149, + "learning_rate": 5e-05, + "loss": 0.0635, + "num_input_tokens_seen": 333263640, + "step": 3651 + }, + { + "epoch": 15.2125, + "loss": 0.10153535008430481, + "loss_ce": 3.3662881833151914e-06, + "loss_iou": 0.24609375, + "loss_num": 0.020263671875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 333263640, + "step": 3651 + }, + { + "epoch": 15.216666666666667, + "grad_norm": 4.595165834455283, + "learning_rate": 5e-05, + "loss": 0.103, + "num_input_tokens_seen": 333355240, + "step": 3652 + }, + { + "epoch": 15.216666666666667, + "loss": 0.06734571605920792, + "loss_ce": 0.0010310175130143762, + "loss_iou": 0.294921875, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 333355240, + "step": 3652 + }, + { + "epoch": 15.220833333333333, + "grad_norm": 2.703493008041717, + "learning_rate": 5e-05, + "loss": 0.0567, + "num_input_tokens_seen": 333445676, + "step": 3653 + }, + { + "epoch": 15.220833333333333, + "loss": 0.05132361128926277, + "loss_ce": 2.3561253328807652e-05, + "loss_iou": 0.185546875, + "loss_num": 0.01025390625, + "loss_xval": 0.05126953125, + "num_input_tokens_seen": 333445676, + "step": 3653 + }, + { + "epoch": 15.225, + "grad_norm": 1.4419388376125457, + "learning_rate": 5e-05, + "loss": 0.0544, + "num_input_tokens_seen": 333537284, + "step": 3654 + }, + { + "epoch": 15.225, + "loss": 0.056093744933605194, + "loss_ce": 4.058098420500755e-05, + "loss_iou": 0.271484375, + "loss_num": 0.01116943359375, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 333537284, + "step": 3654 + }, + { + "epoch": 15.229166666666666, + "grad_norm": 1.8212927080711885, + "learning_rate": 5e-05, + "loss": 0.068, + "num_input_tokens_seen": 333628176, + "step": 3655 + }, + { + "epoch": 15.229166666666666, + "loss": 0.053495533764362335, + "loss_ce": 5.848135970154544e-06, + "loss_iou": 0.1552734375, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 333628176, + "step": 3655 + }, + { + "epoch": 15.233333333333333, + "grad_norm": 1.6671543863495777, + "learning_rate": 5e-05, + "loss": 0.0413, + "num_input_tokens_seen": 333719424, + "step": 3656 + }, + { + "epoch": 15.233333333333333, + "loss": 0.03739180788397789, + "loss_ce": 0.00017562204448040575, + "loss_iou": 0.31640625, + "loss_num": 0.0074462890625, + "loss_xval": 0.037109375, + "num_input_tokens_seen": 333719424, + "step": 3656 + }, + { + "epoch": 15.2375, + "grad_norm": 2.4930836197541706, + "learning_rate": 5e-05, + "loss": 0.0568, + "num_input_tokens_seen": 333810568, + "step": 3657 + }, + { + "epoch": 15.2375, + "loss": 0.04177769273519516, + "loss_ce": 1.43868601298891e-05, + "loss_iou": 0.291015625, + "loss_num": 0.00836181640625, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 333810568, + "step": 3657 + }, + { + "epoch": 15.241666666666667, + "grad_norm": 2.571465833651463, + "learning_rate": 5e-05, + "loss": 0.0305, + "num_input_tokens_seen": 333901300, + "step": 3658 + }, + { + "epoch": 15.241666666666667, + "loss": 0.025437403470277786, + "loss_ce": 8.630683623778168e-06, + "loss_iou": 0.23828125, + "loss_num": 0.005096435546875, + "loss_xval": 0.025390625, + "num_input_tokens_seen": 333901300, + "step": 3658 + }, + { + "epoch": 15.245833333333334, + "grad_norm": 6.086562990212025, + "learning_rate": 5e-05, + "loss": 0.0363, + "num_input_tokens_seen": 333992744, + "step": 3659 + }, + { + "epoch": 15.245833333333334, + "loss": 0.0372001938521862, + "loss_ce": 0.00012133685959270224, + "loss_iou": 0.330078125, + "loss_num": 0.007415771484375, + "loss_xval": 0.037109375, + "num_input_tokens_seen": 333992744, + "step": 3659 + }, + { + "epoch": 15.25, + "grad_norm": 3.414750208420596, + "learning_rate": 5e-05, + "loss": 0.0557, + "num_input_tokens_seen": 334084372, + "step": 3660 + }, + { + "epoch": 15.25, + "loss": 0.0666206032037735, + "loss_ce": 1.5992854969226755e-05, + "loss_iou": 0.27734375, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 334084372, + "step": 3660 + }, + { + "epoch": 15.254166666666666, + "grad_norm": 2.9417870993211124, + "learning_rate": 5e-05, + "loss": 0.0716, + "num_input_tokens_seen": 334175668, + "step": 3661 + }, + { + "epoch": 15.254166666666666, + "loss": 0.045750442892313004, + "loss_ce": 4.591216566041112e-06, + "loss_iou": 0.2021484375, + "loss_num": 0.0091552734375, + "loss_xval": 0.045654296875, + "num_input_tokens_seen": 334175668, + "step": 3661 + }, + { + "epoch": 15.258333333333333, + "grad_norm": 4.215670622888787, + "learning_rate": 5e-05, + "loss": 0.0522, + "num_input_tokens_seen": 334266836, + "step": 3662 + }, + { + "epoch": 15.258333333333333, + "loss": 0.05722185969352722, + "loss_ce": 1.6664025679347105e-05, + "loss_iou": 0.17578125, + "loss_num": 0.01141357421875, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 334266836, + "step": 3662 + }, + { + "epoch": 15.2625, + "grad_norm": 9.404887556711703, + "learning_rate": 5e-05, + "loss": 0.0351, + "num_input_tokens_seen": 334358152, + "step": 3663 + }, + { + "epoch": 15.2625, + "loss": 0.039673589169979095, + "loss_ce": 7.389946858893381e-07, + "loss_iou": 0.234375, + "loss_num": 0.0079345703125, + "loss_xval": 0.03955078125, + "num_input_tokens_seen": 334358152, + "step": 3663 + }, + { + "epoch": 15.266666666666667, + "grad_norm": 2.4858143158405177, + "learning_rate": 5e-05, + "loss": 0.0724, + "num_input_tokens_seen": 334448752, + "step": 3664 + }, + { + "epoch": 15.266666666666667, + "loss": 0.061934880912303925, + "loss_ce": 9.101112664211541e-05, + "loss_iou": 0.224609375, + "loss_num": 0.01239013671875, + "loss_xval": 0.061767578125, + "num_input_tokens_seen": 334448752, + "step": 3664 + }, + { + "epoch": 15.270833333333334, + "grad_norm": 1.9686179382224764, + "learning_rate": 5e-05, + "loss": 0.0862, + "num_input_tokens_seen": 334539696, + "step": 3665 + }, + { + "epoch": 15.270833333333334, + "loss": 0.13766661286354065, + "loss_ce": 0.0003375158121343702, + "loss_iou": 0.265625, + "loss_num": 0.0274658203125, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 334539696, + "step": 3665 + }, + { + "epoch": 15.275, + "grad_norm": 4.593574012253526, + "learning_rate": 5e-05, + "loss": 0.0562, + "num_input_tokens_seen": 334630908, + "step": 3666 + }, + { + "epoch": 15.275, + "loss": 0.05104167014360428, + "loss_ce": 1.0194967217103112e-06, + "loss_iou": 0.24609375, + "loss_num": 0.01019287109375, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 334630908, + "step": 3666 + }, + { + "epoch": 15.279166666666667, + "grad_norm": 1.778737458581344, + "learning_rate": 5e-05, + "loss": 0.0379, + "num_input_tokens_seen": 334722220, + "step": 3667 + }, + { + "epoch": 15.279166666666667, + "loss": 0.051860690116882324, + "loss_ce": 2.658232369867619e-05, + "loss_iou": 0.26953125, + "loss_num": 0.0103759765625, + "loss_xval": 0.0517578125, + "num_input_tokens_seen": 334722220, + "step": 3667 + }, + { + "epoch": 15.283333333333333, + "grad_norm": 4.357471825362792, + "learning_rate": 5e-05, + "loss": 0.0537, + "num_input_tokens_seen": 334813188, + "step": 3668 + }, + { + "epoch": 15.283333333333333, + "loss": 0.043428897857666016, + "loss_ce": 3.290424865554087e-05, + "loss_iou": 0.21875, + "loss_num": 0.0086669921875, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 334813188, + "step": 3668 + }, + { + "epoch": 15.2875, + "grad_norm": 2.7862133487294996, + "learning_rate": 5e-05, + "loss": 0.0603, + "num_input_tokens_seen": 334904468, + "step": 3669 + }, + { + "epoch": 15.2875, + "loss": 0.046986065804958344, + "loss_ce": 4.255929979990469e-06, + "loss_iou": 0.306640625, + "loss_num": 0.0093994140625, + "loss_xval": 0.046875, + "num_input_tokens_seen": 334904468, + "step": 3669 + }, + { + "epoch": 15.291666666666666, + "grad_norm": 3.181565942625571, + "learning_rate": 5e-05, + "loss": 0.0459, + "num_input_tokens_seen": 334995568, + "step": 3670 + }, + { + "epoch": 15.291666666666666, + "loss": 0.051836755126714706, + "loss_ce": 2.553722879383713e-05, + "loss_iou": 0.216796875, + "loss_num": 0.0103759765625, + "loss_xval": 0.0517578125, + "num_input_tokens_seen": 334995568, + "step": 3670 + }, + { + "epoch": 15.295833333333333, + "grad_norm": 2.2787344894670634, + "learning_rate": 5e-05, + "loss": 0.041, + "num_input_tokens_seen": 335086432, + "step": 3671 + }, + { + "epoch": 15.295833333333333, + "loss": 0.04101455956697464, + "loss_ce": 3.708266012836248e-05, + "loss_iou": 0.0986328125, + "loss_num": 0.0081787109375, + "loss_xval": 0.041015625, + "num_input_tokens_seen": 335086432, + "step": 3671 + }, + { + "epoch": 15.3, + "grad_norm": 1.786113667363368, + "learning_rate": 5e-05, + "loss": 0.0464, + "num_input_tokens_seen": 335177840, + "step": 3672 + }, + { + "epoch": 15.3, + "loss": 0.033018480986356735, + "loss_ce": 1.3719601156481076e-05, + "loss_iou": 0.28125, + "loss_num": 0.006591796875, + "loss_xval": 0.032958984375, + "num_input_tokens_seen": 335177840, + "step": 3672 + }, + { + "epoch": 15.304166666666667, + "grad_norm": 2.113044401356243, + "learning_rate": 5e-05, + "loss": 0.0504, + "num_input_tokens_seen": 335269108, + "step": 3673 + }, + { + "epoch": 15.304166666666667, + "loss": 0.07917618006467819, + "loss_ce": 0.0013105771504342556, + "loss_iou": 0.291015625, + "loss_num": 0.01556396484375, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 335269108, + "step": 3673 + }, + { + "epoch": 15.308333333333334, + "grad_norm": 1.4949228973993478, + "learning_rate": 5e-05, + "loss": 0.0463, + "num_input_tokens_seen": 335360528, + "step": 3674 + }, + { + "epoch": 15.308333333333334, + "loss": 0.029251961037516594, + "loss_ce": 8.631383821011696e-07, + "loss_iou": 0.28125, + "loss_num": 0.005859375, + "loss_xval": 0.029296875, + "num_input_tokens_seen": 335360528, + "step": 3674 + }, + { + "epoch": 15.3125, + "grad_norm": 2.6006503688280285, + "learning_rate": 5e-05, + "loss": 0.041, + "num_input_tokens_seen": 335451900, + "step": 3675 + }, + { + "epoch": 15.3125, + "loss": 0.026928190141916275, + "loss_ce": 2.6945717763737775e-05, + "loss_iou": 0.2099609375, + "loss_num": 0.00537109375, + "loss_xval": 0.02685546875, + "num_input_tokens_seen": 335451900, + "step": 3675 + }, + { + "epoch": 15.316666666666666, + "grad_norm": 2.8694457709743233, + "learning_rate": 5e-05, + "loss": 0.0397, + "num_input_tokens_seen": 335543404, + "step": 3676 + }, + { + "epoch": 15.316666666666666, + "loss": 0.040234267711639404, + "loss_ce": 3.8800335460109636e-05, + "loss_iou": 0.33203125, + "loss_num": 0.008056640625, + "loss_xval": 0.040283203125, + "num_input_tokens_seen": 335543404, + "step": 3676 + }, + { + "epoch": 15.320833333333333, + "grad_norm": 4.660576388851059, + "learning_rate": 5e-05, + "loss": 0.0402, + "num_input_tokens_seen": 335635024, + "step": 3677 + }, + { + "epoch": 15.320833333333333, + "loss": 0.04173457995057106, + "loss_ce": 1.7916502201842377e-06, + "loss_iou": 0.2392578125, + "loss_num": 0.00836181640625, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 335635024, + "step": 3677 + }, + { + "epoch": 15.325, + "grad_norm": 3.8747091262044515, + "learning_rate": 5e-05, + "loss": 0.0507, + "num_input_tokens_seen": 335725892, + "step": 3678 + }, + { + "epoch": 15.325, + "loss": 0.052254341542720795, + "loss_ce": 6.148645752546145e-07, + "loss_iou": 0.1767578125, + "loss_num": 0.01043701171875, + "loss_xval": 0.05224609375, + "num_input_tokens_seen": 335725892, + "step": 3678 + }, + { + "epoch": 15.329166666666667, + "grad_norm": 2.500436542626791, + "learning_rate": 5e-05, + "loss": 0.0532, + "num_input_tokens_seen": 335815720, + "step": 3679 + }, + { + "epoch": 15.329166666666667, + "loss": 0.07567595690488815, + "loss_ce": 0.00011443771654739976, + "loss_iou": 0.2734375, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 335815720, + "step": 3679 + }, + { + "epoch": 15.333333333333334, + "grad_norm": 1.7950942209894891, + "learning_rate": 5e-05, + "loss": 0.0435, + "num_input_tokens_seen": 335906696, + "step": 3680 + }, + { + "epoch": 15.333333333333334, + "loss": 0.047455932945013046, + "loss_ce": 1.100194594982895e-06, + "loss_iou": 0.2001953125, + "loss_num": 0.009521484375, + "loss_xval": 0.04736328125, + "num_input_tokens_seen": 335906696, + "step": 3680 + }, + { + "epoch": 15.3375, + "grad_norm": 2.603836604353161, + "learning_rate": 5e-05, + "loss": 0.0955, + "num_input_tokens_seen": 335997680, + "step": 3681 + }, + { + "epoch": 15.3375, + "loss": 0.13542352616786957, + "loss_ce": 1.774315933289472e-06, + "loss_iou": 0.17578125, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 335997680, + "step": 3681 + }, + { + "epoch": 15.341666666666667, + "grad_norm": 2.4985064327993722, + "learning_rate": 5e-05, + "loss": 0.0476, + "num_input_tokens_seen": 336088752, + "step": 3682 + }, + { + "epoch": 15.341666666666667, + "loss": 0.06178619712591171, + "loss_ce": 3.360460823387257e-06, + "loss_iou": 0.259765625, + "loss_num": 0.0123291015625, + "loss_xval": 0.061767578125, + "num_input_tokens_seen": 336088752, + "step": 3682 + }, + { + "epoch": 15.345833333333333, + "grad_norm": 2.156982488750445, + "learning_rate": 5e-05, + "loss": 0.0488, + "num_input_tokens_seen": 336180452, + "step": 3683 + }, + { + "epoch": 15.345833333333333, + "loss": 0.0459609180688858, + "loss_ce": 9.299816883867607e-05, + "loss_iou": 0.283203125, + "loss_num": 0.00921630859375, + "loss_xval": 0.0458984375, + "num_input_tokens_seen": 336180452, + "step": 3683 + }, + { + "epoch": 15.35, + "grad_norm": 2.593791060516741, + "learning_rate": 5e-05, + "loss": 0.0568, + "num_input_tokens_seen": 336271508, + "step": 3684 + }, + { + "epoch": 15.35, + "loss": 0.058250319212675095, + "loss_ce": 2.2780919607612304e-05, + "loss_iou": 0.21875, + "loss_num": 0.01165771484375, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 336271508, + "step": 3684 + }, + { + "epoch": 15.354166666666666, + "grad_norm": 5.079357675038114, + "learning_rate": 5e-05, + "loss": 0.0386, + "num_input_tokens_seen": 336363056, + "step": 3685 + }, + { + "epoch": 15.354166666666666, + "loss": 0.04351774975657463, + "loss_ce": 0.0006786992307752371, + "loss_iou": 0.21484375, + "loss_num": 0.008544921875, + "loss_xval": 0.042724609375, + "num_input_tokens_seen": 336363056, + "step": 3685 + }, + { + "epoch": 15.358333333333333, + "grad_norm": 3.1484998429187874, + "learning_rate": 5e-05, + "loss": 0.0337, + "num_input_tokens_seen": 336454452, + "step": 3686 + }, + { + "epoch": 15.358333333333333, + "loss": 0.040700171142816544, + "loss_ce": 4.980157427780796e-06, + "loss_iou": 0.263671875, + "loss_num": 0.00811767578125, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 336454452, + "step": 3686 + }, + { + "epoch": 15.3625, + "grad_norm": 2.542866368364727, + "learning_rate": 5e-05, + "loss": 0.0367, + "num_input_tokens_seen": 336545208, + "step": 3687 + }, + { + "epoch": 15.3625, + "loss": 0.034080490469932556, + "loss_ce": 0.002601608633995056, + "loss_iou": 0.2060546875, + "loss_num": 0.00628662109375, + "loss_xval": 0.031494140625, + "num_input_tokens_seen": 336545208, + "step": 3687 + }, + { + "epoch": 15.366666666666667, + "grad_norm": 1.6240964189368892, + "learning_rate": 5e-05, + "loss": 0.0633, + "num_input_tokens_seen": 336637080, + "step": 3688 + }, + { + "epoch": 15.366666666666667, + "loss": 0.041000962257385254, + "loss_ce": 5.973120664748421e-07, + "loss_iou": 0.3046875, + "loss_num": 0.0081787109375, + "loss_xval": 0.041015625, + "num_input_tokens_seen": 336637080, + "step": 3688 + }, + { + "epoch": 15.370833333333334, + "grad_norm": 3.5883647756619697, + "learning_rate": 5e-05, + "loss": 0.0567, + "num_input_tokens_seen": 336728324, + "step": 3689 + }, + { + "epoch": 15.370833333333334, + "loss": 0.049876682460308075, + "loss_ce": 3.3272726795985363e-06, + "loss_iou": 0.1484375, + "loss_num": 0.010009765625, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 336728324, + "step": 3689 + }, + { + "epoch": 15.375, + "grad_norm": 1.8711898203308313, + "learning_rate": 5e-05, + "loss": 0.03, + "num_input_tokens_seen": 336819684, + "step": 3690 + }, + { + "epoch": 15.375, + "loss": 0.01915108412504196, + "loss_ce": 1.3037391681791632e-06, + "loss_iou": 0.2021484375, + "loss_num": 0.0038299560546875, + "loss_xval": 0.0191650390625, + "num_input_tokens_seen": 336819684, + "step": 3690 + }, + { + "epoch": 15.379166666666666, + "grad_norm": 3.0763388136092193, + "learning_rate": 5e-05, + "loss": 0.1026, + "num_input_tokens_seen": 336910716, + "step": 3691 + }, + { + "epoch": 15.379166666666666, + "loss": 0.10127241164445877, + "loss_ce": 0.00048811070155352354, + "loss_iou": 0.2890625, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 336910716, + "step": 3691 + }, + { + "epoch": 15.383333333333333, + "grad_norm": 1.6205927936647166, + "learning_rate": 5e-05, + "loss": 0.0679, + "num_input_tokens_seen": 337002312, + "step": 3692 + }, + { + "epoch": 15.383333333333333, + "loss": 0.03497444838285446, + "loss_ce": 0.00011192738747922704, + "loss_iou": 0.275390625, + "loss_num": 0.0069580078125, + "loss_xval": 0.034912109375, + "num_input_tokens_seen": 337002312, + "step": 3692 + }, + { + "epoch": 15.3875, + "grad_norm": 1.8502923008152186, + "learning_rate": 5e-05, + "loss": 0.0953, + "num_input_tokens_seen": 337093572, + "step": 3693 + }, + { + "epoch": 15.3875, + "loss": 0.12129370868206024, + "loss_ce": 1.603203713784751e-06, + "loss_iou": 0.162109375, + "loss_num": 0.0242919921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 337093572, + "step": 3693 + }, + { + "epoch": 15.391666666666667, + "grad_norm": 3.603181757698564, + "learning_rate": 5e-05, + "loss": 0.0522, + "num_input_tokens_seen": 337185096, + "step": 3694 + }, + { + "epoch": 15.391666666666667, + "loss": 0.032368484884500504, + "loss_ce": 1.2223916201037355e-05, + "loss_iou": 0.2021484375, + "loss_num": 0.0064697265625, + "loss_xval": 0.032470703125, + "num_input_tokens_seen": 337185096, + "step": 3694 + }, + { + "epoch": 15.395833333333334, + "grad_norm": 4.3647795322814265, + "learning_rate": 5e-05, + "loss": 0.0753, + "num_input_tokens_seen": 337275612, + "step": 3695 + }, + { + "epoch": 15.395833333333334, + "loss": 0.09905469417572021, + "loss_ce": 4.0408194763585925e-05, + "loss_iou": 0.291015625, + "loss_num": 0.019775390625, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 337275612, + "step": 3695 + }, + { + "epoch": 15.4, + "grad_norm": 3.20284183256731, + "learning_rate": 5e-05, + "loss": 0.0541, + "num_input_tokens_seen": 337366872, + "step": 3696 + }, + { + "epoch": 15.4, + "loss": 0.07359351217746735, + "loss_ce": 3.749448183043569e-07, + "loss_iou": 0.224609375, + "loss_num": 0.01470947265625, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 337366872, + "step": 3696 + }, + { + "epoch": 15.404166666666667, + "grad_norm": 1.755567458846452, + "learning_rate": 5e-05, + "loss": 0.0583, + "num_input_tokens_seen": 337458240, + "step": 3697 + }, + { + "epoch": 15.404166666666667, + "loss": 0.06957247108221054, + "loss_ce": 6.86895873514004e-05, + "loss_iou": 0.29296875, + "loss_num": 0.013916015625, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 337458240, + "step": 3697 + }, + { + "epoch": 15.408333333333333, + "grad_norm": 1.9444490464027675, + "learning_rate": 5e-05, + "loss": 0.0485, + "num_input_tokens_seen": 337549956, + "step": 3698 + }, + { + "epoch": 15.408333333333333, + "loss": 0.03700829669833183, + "loss_ce": 2.8621196179301478e-05, + "loss_iou": 0.3046875, + "loss_num": 0.007415771484375, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 337549956, + "step": 3698 + }, + { + "epoch": 15.4125, + "grad_norm": 1.6221037989352245, + "learning_rate": 5e-05, + "loss": 0.068, + "num_input_tokens_seen": 337640524, + "step": 3699 + }, + { + "epoch": 15.4125, + "loss": 0.10857482999563217, + "loss_ce": 9.140936185758619e-07, + "loss_iou": 0.314453125, + "loss_num": 0.021728515625, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 337640524, + "step": 3699 + }, + { + "epoch": 15.416666666666666, + "grad_norm": 3.614329912471893, + "learning_rate": 5e-05, + "loss": 0.0541, + "num_input_tokens_seen": 337731884, + "step": 3700 + }, + { + "epoch": 15.416666666666666, + "loss": 0.05494129657745361, + "loss_ce": 2.4917135306168348e-05, + "loss_iou": 0.19140625, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 337731884, + "step": 3700 + }, + { + "epoch": 15.420833333333333, + "grad_norm": 2.238256260352113, + "learning_rate": 5e-05, + "loss": 0.0581, + "num_input_tokens_seen": 337823232, + "step": 3701 + }, + { + "epoch": 15.420833333333333, + "loss": 0.0470069944858551, + "loss_ce": 0.0004142856923863292, + "loss_iou": 0.20703125, + "loss_num": 0.00933837890625, + "loss_xval": 0.046630859375, + "num_input_tokens_seen": 337823232, + "step": 3701 + }, + { + "epoch": 15.425, + "grad_norm": 1.6980198759268228, + "learning_rate": 5e-05, + "loss": 0.0599, + "num_input_tokens_seen": 337914872, + "step": 3702 + }, + { + "epoch": 15.425, + "loss": 0.040833212435245514, + "loss_ce": 1.5953277397784404e-05, + "loss_iou": 0.228515625, + "loss_num": 0.0081787109375, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 337914872, + "step": 3702 + }, + { + "epoch": 15.429166666666667, + "grad_norm": 1.0651156234654016, + "learning_rate": 5e-05, + "loss": 0.0691, + "num_input_tokens_seen": 338006000, + "step": 3703 + }, + { + "epoch": 15.429166666666667, + "loss": 0.028311312198638916, + "loss_ce": 0.0006394971860572696, + "loss_iou": 0.07275390625, + "loss_num": 0.005523681640625, + "loss_xval": 0.0277099609375, + "num_input_tokens_seen": 338006000, + "step": 3703 + }, + { + "epoch": 15.433333333333334, + "grad_norm": 1.6898483447623203, + "learning_rate": 5e-05, + "loss": 0.0508, + "num_input_tokens_seen": 338097044, + "step": 3704 + }, + { + "epoch": 15.433333333333334, + "loss": 0.037339404225349426, + "loss_ce": 0.0005886115832254291, + "loss_iou": 0.166015625, + "loss_num": 0.007354736328125, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 338097044, + "step": 3704 + }, + { + "epoch": 15.4375, + "grad_norm": 2.714367995414061, + "learning_rate": 5e-05, + "loss": 0.0962, + "num_input_tokens_seen": 338188904, + "step": 3705 + }, + { + "epoch": 15.4375, + "loss": 0.13568150997161865, + "loss_ce": 3.0868512112647295e-05, + "loss_iou": 0.28125, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 338188904, + "step": 3705 + }, + { + "epoch": 15.441666666666666, + "grad_norm": 1.5087140513638733, + "learning_rate": 5e-05, + "loss": 0.07, + "num_input_tokens_seen": 338279972, + "step": 3706 + }, + { + "epoch": 15.441666666666666, + "loss": 0.10155145823955536, + "loss_ce": 4.223983523843344e-06, + "loss_iou": 0.330078125, + "loss_num": 0.020263671875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 338279972, + "step": 3706 + }, + { + "epoch": 15.445833333333333, + "grad_norm": 1.318609741070438, + "learning_rate": 5e-05, + "loss": 0.0534, + "num_input_tokens_seen": 338371048, + "step": 3707 + }, + { + "epoch": 15.445833333333333, + "loss": 0.06752166152000427, + "loss_ce": 1.5217010513879359e-06, + "loss_iou": 0.2412109375, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 338371048, + "step": 3707 + }, + { + "epoch": 15.45, + "grad_norm": 1.7232080244761543, + "learning_rate": 5e-05, + "loss": 0.0576, + "num_input_tokens_seen": 338462560, + "step": 3708 + }, + { + "epoch": 15.45, + "loss": 0.04955866187810898, + "loss_ce": 0.0001430758275091648, + "loss_iou": 0.171875, + "loss_num": 0.0098876953125, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 338462560, + "step": 3708 + }, + { + "epoch": 15.454166666666667, + "grad_norm": 2.1165823242380437, + "learning_rate": 5e-05, + "loss": 0.0571, + "num_input_tokens_seen": 338553560, + "step": 3709 + }, + { + "epoch": 15.454166666666667, + "loss": 0.08360796421766281, + "loss_ce": 5.0576054491102695e-06, + "loss_iou": 0.265625, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 338553560, + "step": 3709 + }, + { + "epoch": 15.458333333333334, + "grad_norm": 3.0387382777837564, + "learning_rate": 5e-05, + "loss": 0.0427, + "num_input_tokens_seen": 338643152, + "step": 3710 + }, + { + "epoch": 15.458333333333334, + "loss": 0.0352066308259964, + "loss_ce": 4.60691808257252e-06, + "loss_iou": 0.2490234375, + "loss_num": 0.007049560546875, + "loss_xval": 0.03515625, + "num_input_tokens_seen": 338643152, + "step": 3710 + }, + { + "epoch": 15.4625, + "grad_norm": 1.7541841298933207, + "learning_rate": 5e-05, + "loss": 0.0939, + "num_input_tokens_seen": 338734472, + "step": 3711 + }, + { + "epoch": 15.4625, + "loss": 0.0419999435544014, + "loss_ce": 0.00011456872744020075, + "loss_iou": 0.181640625, + "loss_num": 0.00836181640625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 338734472, + "step": 3711 + }, + { + "epoch": 15.466666666666667, + "grad_norm": 6.008227836686593, + "learning_rate": 5e-05, + "loss": 0.0905, + "num_input_tokens_seen": 338826220, + "step": 3712 + }, + { + "epoch": 15.466666666666667, + "loss": 0.06825199723243713, + "loss_ce": 2.995487375301309e-05, + "loss_iou": 0.294921875, + "loss_num": 0.01361083984375, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 338826220, + "step": 3712 + }, + { + "epoch": 15.470833333333333, + "grad_norm": 3.9839977790493015, + "learning_rate": 5e-05, + "loss": 0.0596, + "num_input_tokens_seen": 338917768, + "step": 3713 + }, + { + "epoch": 15.470833333333333, + "loss": 0.05084093660116196, + "loss_ce": 2.9167258617235348e-05, + "loss_iou": 0.275390625, + "loss_num": 0.01019287109375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 338917768, + "step": 3713 + }, + { + "epoch": 15.475, + "grad_norm": 2.807098196818164, + "learning_rate": 5e-05, + "loss": 0.088, + "num_input_tokens_seen": 339008996, + "step": 3714 + }, + { + "epoch": 15.475, + "loss": 0.11402676999568939, + "loss_ce": 1.3098615454509854e-05, + "loss_iou": 0.1962890625, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 339008996, + "step": 3714 + }, + { + "epoch": 15.479166666666666, + "grad_norm": 1.4004717586879123, + "learning_rate": 5e-05, + "loss": 0.0543, + "num_input_tokens_seen": 339100072, + "step": 3715 + }, + { + "epoch": 15.479166666666666, + "loss": 0.056993044912815094, + "loss_ce": 1.4680579170089914e-06, + "loss_iou": 0.171875, + "loss_num": 0.01141357421875, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 339100072, + "step": 3715 + }, + { + "epoch": 15.483333333333333, + "grad_norm": 1.7399946641840538, + "learning_rate": 5e-05, + "loss": 0.0741, + "num_input_tokens_seen": 339189988, + "step": 3716 + }, + { + "epoch": 15.483333333333333, + "loss": 0.07177933305501938, + "loss_ce": 1.9884635094058467e-06, + "loss_iou": 0.189453125, + "loss_num": 0.014404296875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 339189988, + "step": 3716 + }, + { + "epoch": 15.4875, + "grad_norm": 5.881810042717518, + "learning_rate": 5e-05, + "loss": 0.1044, + "num_input_tokens_seen": 339281092, + "step": 3717 + }, + { + "epoch": 15.4875, + "loss": 0.1643996387720108, + "loss_ce": 1.446839405616629e-06, + "loss_iou": 0.25390625, + "loss_num": 0.032958984375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 339281092, + "step": 3717 + }, + { + "epoch": 15.491666666666667, + "grad_norm": 2.7759819223595805, + "learning_rate": 5e-05, + "loss": 0.0782, + "num_input_tokens_seen": 339372124, + "step": 3718 + }, + { + "epoch": 15.491666666666667, + "loss": 0.08161170780658722, + "loss_ce": 7.701344657107256e-06, + "loss_iou": 0.25390625, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 339372124, + "step": 3718 + }, + { + "epoch": 15.495833333333334, + "grad_norm": 4.453573063728625, + "learning_rate": 5e-05, + "loss": 0.0398, + "num_input_tokens_seen": 339463436, + "step": 3719 + }, + { + "epoch": 15.495833333333334, + "loss": 0.051303643733263016, + "loss_ce": 0.0001256657560588792, + "loss_iou": 0.259765625, + "loss_num": 0.01025390625, + "loss_xval": 0.05126953125, + "num_input_tokens_seen": 339463436, + "step": 3719 + }, + { + "epoch": 15.5, + "grad_norm": 3.943457288126614, + "learning_rate": 5e-05, + "loss": 0.056, + "num_input_tokens_seen": 339555000, + "step": 3720 + }, + { + "epoch": 15.5, + "loss": 0.08295242488384247, + "loss_ce": 2.090242378471885e-05, + "loss_iou": 0.185546875, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 339555000, + "step": 3720 + }, + { + "epoch": 15.504166666666666, + "grad_norm": 1.6440153934824757, + "learning_rate": 5e-05, + "loss": 0.0493, + "num_input_tokens_seen": 339646268, + "step": 3721 + }, + { + "epoch": 15.504166666666666, + "loss": 0.048688024282455444, + "loss_ce": 0.00023373885778710246, + "loss_iou": 0.169921875, + "loss_num": 0.00970458984375, + "loss_xval": 0.04833984375, + "num_input_tokens_seen": 339646268, + "step": 3721 + }, + { + "epoch": 15.508333333333333, + "grad_norm": 4.5372580731869885, + "learning_rate": 5e-05, + "loss": 0.0557, + "num_input_tokens_seen": 339737016, + "step": 3722 + }, + { + "epoch": 15.508333333333333, + "loss": 0.07211792469024658, + "loss_ce": 4.881694621872157e-06, + "loss_iou": 0.2734375, + "loss_num": 0.014404296875, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 339737016, + "step": 3722 + }, + { + "epoch": 15.5125, + "grad_norm": 2.9173584963090877, + "learning_rate": 5e-05, + "loss": 0.049, + "num_input_tokens_seen": 339828408, + "step": 3723 + }, + { + "epoch": 15.5125, + "loss": 0.04044932872056961, + "loss_ce": 2.8794276659027673e-05, + "loss_iou": 0.306640625, + "loss_num": 0.008056640625, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 339828408, + "step": 3723 + }, + { + "epoch": 15.516666666666667, + "grad_norm": 3.3504030651939107, + "learning_rate": 5e-05, + "loss": 0.0462, + "num_input_tokens_seen": 339919824, + "step": 3724 + }, + { + "epoch": 15.516666666666667, + "loss": 0.05100865289568901, + "loss_ce": 0.0015854372177273035, + "loss_iou": 0.197265625, + "loss_num": 0.0098876953125, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 339919824, + "step": 3724 + }, + { + "epoch": 15.520833333333334, + "grad_norm": 5.532775939930558, + "learning_rate": 5e-05, + "loss": 0.0541, + "num_input_tokens_seen": 340011676, + "step": 3725 + }, + { + "epoch": 15.520833333333334, + "loss": 0.056049033999443054, + "loss_ce": 9.505392517894506e-05, + "loss_iou": 0.3359375, + "loss_num": 0.01116943359375, + "loss_xval": 0.055908203125, + "num_input_tokens_seen": 340011676, + "step": 3725 + }, + { + "epoch": 15.525, + "grad_norm": 2.5293521700652435, + "learning_rate": 5e-05, + "loss": 0.1004, + "num_input_tokens_seen": 340102328, + "step": 3726 + }, + { + "epoch": 15.525, + "loss": 0.13496966660022736, + "loss_ce": 0.0020808603148907423, + "loss_iou": 0.224609375, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 340102328, + "step": 3726 + }, + { + "epoch": 15.529166666666667, + "grad_norm": 2.10725062293184, + "learning_rate": 5e-05, + "loss": 0.0869, + "num_input_tokens_seen": 340193392, + "step": 3727 + }, + { + "epoch": 15.529166666666667, + "loss": 0.08296191692352295, + "loss_ce": 4.565744529827498e-05, + "loss_iou": 0.296875, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 340193392, + "step": 3727 + }, + { + "epoch": 15.533333333333333, + "grad_norm": 2.7269435977165535, + "learning_rate": 5e-05, + "loss": 0.041, + "num_input_tokens_seen": 340284680, + "step": 3728 + }, + { + "epoch": 15.533333333333333, + "loss": 0.04044805467128754, + "loss_ce": 1.2264949873497244e-05, + "loss_iou": 0.2197265625, + "loss_num": 0.008056640625, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 340284680, + "step": 3728 + }, + { + "epoch": 15.5375, + "grad_norm": 2.668113882514995, + "learning_rate": 5e-05, + "loss": 0.0563, + "num_input_tokens_seen": 340375616, + "step": 3729 + }, + { + "epoch": 15.5375, + "loss": 0.0317254438996315, + "loss_ce": 1.767879439285025e-05, + "loss_iou": 0.298828125, + "loss_num": 0.00634765625, + "loss_xval": 0.03173828125, + "num_input_tokens_seen": 340375616, + "step": 3729 + }, + { + "epoch": 15.541666666666666, + "grad_norm": 2.5479889869323333, + "learning_rate": 5e-05, + "loss": 0.0682, + "num_input_tokens_seen": 340467096, + "step": 3730 + }, + { + "epoch": 15.541666666666666, + "loss": 0.09114043414592743, + "loss_ce": 4.546367927105166e-05, + "loss_iou": 0.28125, + "loss_num": 0.0181884765625, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 340467096, + "step": 3730 + }, + { + "epoch": 15.545833333333333, + "grad_norm": 2.9021149806382938, + "learning_rate": 5e-05, + "loss": 0.0506, + "num_input_tokens_seen": 340558564, + "step": 3731 + }, + { + "epoch": 15.545833333333333, + "loss": 0.03785187005996704, + "loss_ce": 1.0071442375192419e-05, + "loss_iou": 0.259765625, + "loss_num": 0.007568359375, + "loss_xval": 0.037841796875, + "num_input_tokens_seen": 340558564, + "step": 3731 + }, + { + "epoch": 15.55, + "grad_norm": 1.3694067401097818, + "learning_rate": 5e-05, + "loss": 0.0273, + "num_input_tokens_seen": 340649084, + "step": 3732 + }, + { + "epoch": 15.55, + "loss": 0.031138941645622253, + "loss_ce": 0.0001483419182477519, + "loss_iou": 0.28125, + "loss_num": 0.006195068359375, + "loss_xval": 0.031005859375, + "num_input_tokens_seen": 340649084, + "step": 3732 + }, + { + "epoch": 15.554166666666667, + "grad_norm": 2.3170750731989416, + "learning_rate": 5e-05, + "loss": 0.0631, + "num_input_tokens_seen": 340740796, + "step": 3733 + }, + { + "epoch": 15.554166666666667, + "loss": 0.06054166704416275, + "loss_ce": 2.4206601665355265e-06, + "loss_iou": 0.33203125, + "loss_num": 0.0120849609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 340740796, + "step": 3733 + }, + { + "epoch": 15.558333333333334, + "grad_norm": 2.8160029894918397, + "learning_rate": 5e-05, + "loss": 0.0436, + "num_input_tokens_seen": 340831928, + "step": 3734 + }, + { + "epoch": 15.558333333333334, + "loss": 0.052570268511772156, + "loss_ce": 4.951789742335677e-05, + "loss_iou": 0.162109375, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 340831928, + "step": 3734 + }, + { + "epoch": 15.5625, + "grad_norm": 5.974204890106191, + "learning_rate": 5e-05, + "loss": 0.0692, + "num_input_tokens_seen": 340924088, + "step": 3735 + }, + { + "epoch": 15.5625, + "loss": 0.07624347507953644, + "loss_ce": 1.056129258358851e-05, + "loss_iou": 0.38671875, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 340924088, + "step": 3735 + }, + { + "epoch": 15.566666666666666, + "grad_norm": 2.8797770212862535, + "learning_rate": 5e-05, + "loss": 0.0965, + "num_input_tokens_seen": 341016048, + "step": 3736 + }, + { + "epoch": 15.566666666666666, + "loss": 0.07347431033849716, + "loss_ce": 4.901489955955185e-05, + "loss_iou": 0.37109375, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 341016048, + "step": 3736 + }, + { + "epoch": 15.570833333333333, + "grad_norm": 1.1495050094163188, + "learning_rate": 5e-05, + "loss": 0.0315, + "num_input_tokens_seen": 341107272, + "step": 3737 + }, + { + "epoch": 15.570833333333333, + "loss": 0.030686549842357635, + "loss_ce": 0.0005428112344816327, + "loss_iou": 0.109375, + "loss_num": 0.00604248046875, + "loss_xval": 0.0301513671875, + "num_input_tokens_seen": 341107272, + "step": 3737 + }, + { + "epoch": 15.575, + "grad_norm": 4.9596779126302355, + "learning_rate": 5e-05, + "loss": 0.0684, + "num_input_tokens_seen": 341199252, + "step": 3738 + }, + { + "epoch": 15.575, + "loss": 0.0417996421456337, + "loss_ce": 4.3968546378891915e-05, + "loss_iou": 0.10791015625, + "loss_num": 0.00836181640625, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 341199252, + "step": 3738 + }, + { + "epoch": 15.579166666666667, + "grad_norm": 2.502675693921476, + "learning_rate": 5e-05, + "loss": 0.0588, + "num_input_tokens_seen": 341290672, + "step": 3739 + }, + { + "epoch": 15.579166666666667, + "loss": 0.039896052330732346, + "loss_ce": 0.00015453486412297934, + "loss_iou": 0.28125, + "loss_num": 0.0079345703125, + "loss_xval": 0.039794921875, + "num_input_tokens_seen": 341290672, + "step": 3739 + }, + { + "epoch": 15.583333333333334, + "grad_norm": 3.319192170242713, + "learning_rate": 5e-05, + "loss": 0.0503, + "num_input_tokens_seen": 341381892, + "step": 3740 + }, + { + "epoch": 15.583333333333334, + "loss": 0.061792608350515366, + "loss_ce": 4.028591138194315e-05, + "loss_iou": 0.208984375, + "loss_num": 0.0123291015625, + "loss_xval": 0.061767578125, + "num_input_tokens_seen": 341381892, + "step": 3740 + }, + { + "epoch": 15.5875, + "grad_norm": 3.7933353250214163, + "learning_rate": 5e-05, + "loss": 0.0376, + "num_input_tokens_seen": 341473844, + "step": 3741 + }, + { + "epoch": 15.5875, + "loss": 0.03918928653001785, + "loss_ce": 3.5236706025898457e-05, + "loss_iou": 0.3203125, + "loss_num": 0.0078125, + "loss_xval": 0.0390625, + "num_input_tokens_seen": 341473844, + "step": 3741 + }, + { + "epoch": 15.591666666666667, + "grad_norm": 1.8308961731771252, + "learning_rate": 5e-05, + "loss": 0.0385, + "num_input_tokens_seen": 341564556, + "step": 3742 + }, + { + "epoch": 15.591666666666667, + "loss": 0.04009087011218071, + "loss_ce": 1.365911157336086e-05, + "loss_iou": 0.2177734375, + "loss_num": 0.00799560546875, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 341564556, + "step": 3742 + }, + { + "epoch": 15.595833333333333, + "grad_norm": 4.196156902144123, + "learning_rate": 5e-05, + "loss": 0.0908, + "num_input_tokens_seen": 341656220, + "step": 3743 + }, + { + "epoch": 15.595833333333333, + "loss": 0.05994286388158798, + "loss_ce": 6.341123025777051e-06, + "loss_iou": 0.287109375, + "loss_num": 0.01202392578125, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 341656220, + "step": 3743 + }, + { + "epoch": 15.6, + "grad_norm": 5.485161124921998, + "learning_rate": 5e-05, + "loss": 0.0906, + "num_input_tokens_seen": 341746808, + "step": 3744 + }, + { + "epoch": 15.6, + "loss": 0.11784628033638, + "loss_ce": 2.6530929062573705e-06, + "loss_iou": 0.25390625, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 341746808, + "step": 3744 + }, + { + "epoch": 15.604166666666666, + "grad_norm": 7.266184511187221, + "learning_rate": 5e-05, + "loss": 0.0809, + "num_input_tokens_seen": 341838700, + "step": 3745 + }, + { + "epoch": 15.604166666666666, + "loss": 0.11083197593688965, + "loss_ce": 0.0003125613438896835, + "loss_iou": 0.2294921875, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 341838700, + "step": 3745 + }, + { + "epoch": 15.608333333333333, + "grad_norm": 2.7001024941371177, + "learning_rate": 5e-05, + "loss": 0.0324, + "num_input_tokens_seen": 341929980, + "step": 3746 + }, + { + "epoch": 15.608333333333333, + "loss": 0.03403391316533089, + "loss_ce": 0.00028910342371091247, + "loss_iou": 0.318359375, + "loss_num": 0.006744384765625, + "loss_xval": 0.03369140625, + "num_input_tokens_seen": 341929980, + "step": 3746 + }, + { + "epoch": 15.6125, + "grad_norm": 3.8337372188980274, + "learning_rate": 5e-05, + "loss": 0.0706, + "num_input_tokens_seen": 342021168, + "step": 3747 + }, + { + "epoch": 15.6125, + "loss": 0.08094524592161179, + "loss_ce": 1.2631683603103738e-05, + "loss_iou": 0.296875, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 342021168, + "step": 3747 + }, + { + "epoch": 15.616666666666667, + "grad_norm": 3.841329253786725, + "learning_rate": 5e-05, + "loss": 0.0403, + "num_input_tokens_seen": 342112668, + "step": 3748 + }, + { + "epoch": 15.616666666666667, + "loss": 0.047874949872493744, + "loss_ce": 5.3906893299426883e-05, + "loss_iou": 0.38671875, + "loss_num": 0.00958251953125, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 342112668, + "step": 3748 + }, + { + "epoch": 15.620833333333334, + "grad_norm": 3.7342170529204735, + "learning_rate": 5e-05, + "loss": 0.0488, + "num_input_tokens_seen": 342204740, + "step": 3749 + }, + { + "epoch": 15.620833333333334, + "loss": 0.040019918233156204, + "loss_ce": 0.0010489715496078134, + "loss_iou": 0.32421875, + "loss_num": 0.007781982421875, + "loss_xval": 0.0390625, + "num_input_tokens_seen": 342204740, + "step": 3749 + }, + { + "epoch": 15.625, + "grad_norm": 2.857990856155654, + "learning_rate": 5e-05, + "loss": 0.048, + "num_input_tokens_seen": 342295692, + "step": 3750 + }, + { + "epoch": 15.625, + "eval_seeclick_CIoU": 0.2125411108136177, + "eval_seeclick_GIoU": 0.20485394820570946, + "eval_seeclick_IoU": 0.3233272135257721, + "eval_seeclick_MAE_all": 0.09805087000131607, + "eval_seeclick_MAE_h": 0.08882058411836624, + "eval_seeclick_MAE_w": 0.20213264226913452, + "eval_seeclick_MAE_x_boxes": 0.22167783975601196, + "eval_seeclick_MAE_y_boxes": 0.09438033029437065, + "eval_seeclick_NUM_probability": 0.9999986588954926, + "eval_seeclick_inside_bbox": 0.4474431872367859, + "eval_seeclick_loss": 0.5405165553092957, + "eval_seeclick_loss_ce": 0.11569063365459442, + "eval_seeclick_loss_iou": 0.4593505859375, + "eval_seeclick_loss_num": 0.0818939208984375, + "eval_seeclick_loss_xval": 0.40948486328125, + "eval_seeclick_runtime": 76.5532, + "eval_seeclick_samples_per_second": 0.562, + "eval_seeclick_steps_per_second": 0.026, + "num_input_tokens_seen": 342295692, + "step": 3750 + }, + { + "epoch": 15.625, + "eval_icons_CIoU": 0.3149517923593521, + "eval_icons_GIoU": 0.32262296974658966, + "eval_icons_IoU": 0.3942710757255554, + "eval_icons_MAE_all": 0.06905381754040718, + "eval_icons_MAE_h": 0.1509195640683174, + "eval_icons_MAE_w": 0.09647001326084137, + "eval_icons_MAE_x_boxes": 0.09488264471292496, + "eval_icons_MAE_y_boxes": 0.15092498809099197, + "eval_icons_NUM_probability": 0.9999993443489075, + "eval_icons_inside_bbox": 0.515625, + "eval_icons_loss": 0.35029152035713196, + "eval_icons_loss_ce": 5.0143926273449324e-05, + "eval_icons_loss_iou": 0.26190185546875, + "eval_icons_loss_num": 0.0713348388671875, + "eval_icons_loss_xval": 0.3564453125, + "eval_icons_runtime": 97.8887, + "eval_icons_samples_per_second": 0.511, + "eval_icons_steps_per_second": 0.02, + "num_input_tokens_seen": 342295692, + "step": 3750 + }, + { + "epoch": 15.625, + "eval_screenspot_CIoU": 0.35719852646191913, + "eval_screenspot_GIoU": 0.34474583466847736, + "eval_screenspot_IoU": 0.4328311284383138, + "eval_screenspot_MAE_all": 0.09720409661531448, + "eval_screenspot_MAE_h": 0.09548324843247731, + "eval_screenspot_MAE_w": 0.20179721216360727, + "eval_screenspot_MAE_x_boxes": 0.1839049607515335, + "eval_screenspot_MAE_y_boxes": 0.09491645296414693, + "eval_screenspot_NUM_probability": 0.998277485370636, + "eval_screenspot_inside_bbox": 0.6833333373069763, + "eval_screenspot_loss": 0.49055179953575134, + "eval_screenspot_loss_ce": 0.002939210297578635, + "eval_screenspot_loss_iou": 0.3556315104166667, + "eval_screenspot_loss_num": 0.09782918294270833, + "eval_screenspot_loss_xval": 0.4890543619791667, + "eval_screenspot_runtime": 153.1993, + "eval_screenspot_samples_per_second": 0.581, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 342295692, + "step": 3750 + }, + { + "epoch": 15.625, + "eval_compot_CIoU": 0.5043479949235916, + "eval_compot_GIoU": 0.5061911940574646, + "eval_compot_IoU": 0.5701824128627777, + "eval_compot_MAE_all": 0.04921010322868824, + "eval_compot_MAE_h": 0.05424528568983078, + "eval_compot_MAE_w": 0.1251702681183815, + "eval_compot_MAE_x_boxes": 0.12915247306227684, + "eval_compot_MAE_y_boxes": 0.05076356418430805, + "eval_compot_NUM_probability": 0.999998539686203, + "eval_compot_inside_bbox": 0.7395833432674408, + "eval_compot_loss": 0.279593288898468, + "eval_compot_loss_ce": 0.04102969542145729, + "eval_compot_loss_iou": 0.3662109375, + "eval_compot_loss_num": 0.0433807373046875, + "eval_compot_loss_xval": 0.2169189453125, + "eval_compot_runtime": 88.2697, + "eval_compot_samples_per_second": 0.566, + "eval_compot_steps_per_second": 0.023, + "num_input_tokens_seen": 342295692, + "step": 3750 + }, + { + "epoch": 15.625, + "loss": 0.27310457825660706, + "loss_ce": 0.04312410205602646, + "loss_iou": 0.357421875, + "loss_num": 0.0458984375, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 342295692, + "step": 3750 + }, + { + "epoch": 15.629166666666666, + "grad_norm": 2.1199125592979478, + "learning_rate": 5e-05, + "loss": 0.0468, + "num_input_tokens_seen": 342387188, + "step": 3751 + }, + { + "epoch": 15.629166666666666, + "loss": 0.03941526263952255, + "loss_ce": 0.0004900917992927134, + "loss_iou": 0.255859375, + "loss_num": 0.007781982421875, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 342387188, + "step": 3751 + }, + { + "epoch": 15.633333333333333, + "grad_norm": 1.4474490571523944, + "learning_rate": 5e-05, + "loss": 0.0496, + "num_input_tokens_seen": 342478664, + "step": 3752 + }, + { + "epoch": 15.633333333333333, + "loss": 0.0719941109418869, + "loss_ce": 4.8926642193691805e-05, + "loss_iou": 0.2177734375, + "loss_num": 0.014404296875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 342478664, + "step": 3752 + }, + { + "epoch": 15.6375, + "grad_norm": 1.6387608992855431, + "learning_rate": 5e-05, + "loss": 0.0272, + "num_input_tokens_seen": 342570240, + "step": 3753 + }, + { + "epoch": 15.6375, + "loss": 0.028534431010484695, + "loss_ce": 0.00044300025911070406, + "loss_iou": 0.11669921875, + "loss_num": 0.005615234375, + "loss_xval": 0.028076171875, + "num_input_tokens_seen": 342570240, + "step": 3753 + }, + { + "epoch": 15.641666666666667, + "grad_norm": 4.2780588978724365, + "learning_rate": 5e-05, + "loss": 0.0824, + "num_input_tokens_seen": 342662172, + "step": 3754 + }, + { + "epoch": 15.641666666666667, + "loss": 0.03853003680706024, + "loss_ce": 1.5978862393239979e-06, + "loss_iou": 0.162109375, + "loss_num": 0.0076904296875, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 342662172, + "step": 3754 + }, + { + "epoch": 15.645833333333334, + "grad_norm": 7.357155194854795, + "learning_rate": 5e-05, + "loss": 0.0557, + "num_input_tokens_seen": 342753944, + "step": 3755 + }, + { + "epoch": 15.645833333333334, + "loss": 0.030950158834457397, + "loss_ce": 0.0001884377415990457, + "loss_iou": 0.154296875, + "loss_num": 0.006134033203125, + "loss_xval": 0.03076171875, + "num_input_tokens_seen": 342753944, + "step": 3755 + }, + { + "epoch": 15.65, + "grad_norm": 3.4080494115279896, + "learning_rate": 5e-05, + "loss": 0.0621, + "num_input_tokens_seen": 342846076, + "step": 3756 + }, + { + "epoch": 15.65, + "loss": 0.069695845246315, + "loss_ce": 0.0017408326966688037, + "loss_iou": 0.25, + "loss_num": 0.01361083984375, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 342846076, + "step": 3756 + }, + { + "epoch": 15.654166666666667, + "grad_norm": 2.4482605988739152, + "learning_rate": 5e-05, + "loss": 0.0549, + "num_input_tokens_seen": 342937468, + "step": 3757 + }, + { + "epoch": 15.654166666666667, + "loss": 0.04597485437989235, + "loss_ce": 7.641684351256117e-05, + "loss_iou": 0.291015625, + "loss_num": 0.0091552734375, + "loss_xval": 0.0458984375, + "num_input_tokens_seen": 342937468, + "step": 3757 + }, + { + "epoch": 15.658333333333333, + "grad_norm": 4.3127548720585205, + "learning_rate": 5e-05, + "loss": 0.074, + "num_input_tokens_seen": 343028756, + "step": 3758 + }, + { + "epoch": 15.658333333333333, + "loss": 0.1078638955950737, + "loss_ce": 0.001338477828539908, + "loss_iou": 0.259765625, + "loss_num": 0.0213623046875, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 343028756, + "step": 3758 + }, + { + "epoch": 15.6625, + "grad_norm": 3.222255941415551, + "learning_rate": 5e-05, + "loss": 0.0581, + "num_input_tokens_seen": 343120120, + "step": 3759 + }, + { + "epoch": 15.6625, + "loss": 0.06610622256994247, + "loss_ce": 0.00017299478349741548, + "loss_iou": 0.2119140625, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 343120120, + "step": 3759 + }, + { + "epoch": 15.666666666666666, + "grad_norm": 2.4420159963419183, + "learning_rate": 5e-05, + "loss": 0.0714, + "num_input_tokens_seen": 343211932, + "step": 3760 + }, + { + "epoch": 15.666666666666666, + "loss": 0.05773644149303436, + "loss_ce": 0.0019045292865484953, + "loss_iou": 0.283203125, + "loss_num": 0.01116943359375, + "loss_xval": 0.055908203125, + "num_input_tokens_seen": 343211932, + "step": 3760 + }, + { + "epoch": 15.670833333333333, + "grad_norm": 2.647108848460681, + "learning_rate": 5e-05, + "loss": 0.0537, + "num_input_tokens_seen": 343302424, + "step": 3761 + }, + { + "epoch": 15.670833333333333, + "loss": 0.052173398435115814, + "loss_ce": 0.002406858140602708, + "loss_iou": 0.16015625, + "loss_num": 0.00994873046875, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 343302424, + "step": 3761 + }, + { + "epoch": 15.675, + "grad_norm": 3.1681592427004786, + "learning_rate": 5e-05, + "loss": 0.0734, + "num_input_tokens_seen": 343393708, + "step": 3762 + }, + { + "epoch": 15.675, + "loss": 0.0612005740404129, + "loss_ce": 7.386229844996706e-05, + "loss_iou": 0.166015625, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 343393708, + "step": 3762 + }, + { + "epoch": 15.679166666666667, + "grad_norm": 2.8364370811250232, + "learning_rate": 5e-05, + "loss": 0.0602, + "num_input_tokens_seen": 343485656, + "step": 3763 + }, + { + "epoch": 15.679166666666667, + "loss": 0.08106990158557892, + "loss_ce": 1.5209958291961811e-05, + "loss_iou": 0.1474609375, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 343485656, + "step": 3763 + }, + { + "epoch": 15.683333333333334, + "grad_norm": 3.7959855480425033, + "learning_rate": 5e-05, + "loss": 0.0782, + "num_input_tokens_seen": 343576864, + "step": 3764 + }, + { + "epoch": 15.683333333333334, + "loss": 0.049277886748313904, + "loss_ce": 0.004951103124767542, + "loss_iou": 0.314453125, + "loss_num": 0.00885009765625, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 343576864, + "step": 3764 + }, + { + "epoch": 15.6875, + "grad_norm": 2.874665218560939, + "learning_rate": 5e-05, + "loss": 0.0459, + "num_input_tokens_seen": 343668400, + "step": 3765 + }, + { + "epoch": 15.6875, + "loss": 0.0330917127430439, + "loss_ce": 8.695048018125817e-05, + "loss_iou": 0.326171875, + "loss_num": 0.006591796875, + "loss_xval": 0.032958984375, + "num_input_tokens_seen": 343668400, + "step": 3765 + }, + { + "epoch": 15.691666666666666, + "grad_norm": 3.00878583114591, + "learning_rate": 5e-05, + "loss": 0.0394, + "num_input_tokens_seen": 343760056, + "step": 3766 + }, + { + "epoch": 15.691666666666666, + "loss": 0.0372559018433094, + "loss_ce": 0.0002838582149706781, + "loss_iou": 0.1953125, + "loss_num": 0.00738525390625, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 343760056, + "step": 3766 + }, + { + "epoch": 15.695833333333333, + "grad_norm": 3.6691462198495874, + "learning_rate": 5e-05, + "loss": 0.0583, + "num_input_tokens_seen": 343851080, + "step": 3767 + }, + { + "epoch": 15.695833333333333, + "loss": 0.03983243927359581, + "loss_ce": 5.2777741075260565e-05, + "loss_iou": 0.3046875, + "loss_num": 0.0079345703125, + "loss_xval": 0.039794921875, + "num_input_tokens_seen": 343851080, + "step": 3767 + }, + { + "epoch": 15.7, + "grad_norm": 2.070024420063255, + "learning_rate": 5e-05, + "loss": 0.0792, + "num_input_tokens_seen": 343942680, + "step": 3768 + }, + { + "epoch": 15.7, + "loss": 0.10269272327423096, + "loss_ce": 1.63358126883395e-05, + "loss_iou": 0.251953125, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 343942680, + "step": 3768 + }, + { + "epoch": 15.704166666666667, + "grad_norm": 1.5839008901424858, + "learning_rate": 5e-05, + "loss": 0.081, + "num_input_tokens_seen": 344033376, + "step": 3769 + }, + { + "epoch": 15.704166666666667, + "loss": 0.11858565360307693, + "loss_ce": 0.0005284000653773546, + "loss_iou": 0.201171875, + "loss_num": 0.023681640625, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 344033376, + "step": 3769 + }, + { + "epoch": 15.708333333333334, + "grad_norm": 1.2896545565054265, + "learning_rate": 5e-05, + "loss": 0.0419, + "num_input_tokens_seen": 344124880, + "step": 3770 + }, + { + "epoch": 15.708333333333334, + "loss": 0.03080589696764946, + "loss_ce": 2.8919712349306792e-05, + "loss_iou": 0.1650390625, + "loss_num": 0.00616455078125, + "loss_xval": 0.03076171875, + "num_input_tokens_seen": 344124880, + "step": 3770 + }, + { + "epoch": 15.7125, + "grad_norm": 2.5516228142288355, + "learning_rate": 5e-05, + "loss": 0.0679, + "num_input_tokens_seen": 344214680, + "step": 3771 + }, + { + "epoch": 15.7125, + "loss": 0.0802692100405693, + "loss_ce": 1.560797682031989e-05, + "loss_iou": 0.1962890625, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 344214680, + "step": 3771 + }, + { + "epoch": 15.716666666666667, + "grad_norm": 1.7650926491566428, + "learning_rate": 5e-05, + "loss": 0.0494, + "num_input_tokens_seen": 344305992, + "step": 3772 + }, + { + "epoch": 15.716666666666667, + "loss": 0.03394448012113571, + "loss_ce": 0.0013974817702546716, + "loss_iou": 0.1640625, + "loss_num": 0.006500244140625, + "loss_xval": 0.032470703125, + "num_input_tokens_seen": 344305992, + "step": 3772 + }, + { + "epoch": 15.720833333333333, + "grad_norm": 2.7475410562631817, + "learning_rate": 5e-05, + "loss": 0.0434, + "num_input_tokens_seen": 344397200, + "step": 3773 + }, + { + "epoch": 15.720833333333333, + "loss": 0.03981112688779831, + "loss_ce": 0.0001535326591692865, + "loss_iou": 0.220703125, + "loss_num": 0.0079345703125, + "loss_xval": 0.03955078125, + "num_input_tokens_seen": 344397200, + "step": 3773 + }, + { + "epoch": 15.725, + "grad_norm": 4.791021482348356, + "learning_rate": 5e-05, + "loss": 0.051, + "num_input_tokens_seen": 344488164, + "step": 3774 + }, + { + "epoch": 15.725, + "loss": 0.05577407032251358, + "loss_ce": 4.897027974948287e-05, + "loss_iou": 0.314453125, + "loss_num": 0.01116943359375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 344488164, + "step": 3774 + }, + { + "epoch": 15.729166666666666, + "grad_norm": 3.883300208504697, + "learning_rate": 5e-05, + "loss": 0.0415, + "num_input_tokens_seen": 344579300, + "step": 3775 + }, + { + "epoch": 15.729166666666666, + "loss": 0.051646310836076736, + "loss_ce": 2.5828194338828325e-05, + "loss_iou": 0.28125, + "loss_num": 0.01031494140625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 344579300, + "step": 3775 + }, + { + "epoch": 15.733333333333333, + "grad_norm": 3.076657890558606, + "learning_rate": 5e-05, + "loss": 0.0711, + "num_input_tokens_seen": 344670452, + "step": 3776 + }, + { + "epoch": 15.733333333333333, + "loss": 0.04161835089325905, + "loss_ce": 3.814951560343616e-05, + "loss_iou": 0.166015625, + "loss_num": 0.00830078125, + "loss_xval": 0.04150390625, + "num_input_tokens_seen": 344670452, + "step": 3776 + }, + { + "epoch": 15.7375, + "grad_norm": 7.921467089919973, + "learning_rate": 5e-05, + "loss": 0.0494, + "num_input_tokens_seen": 344761936, + "step": 3777 + }, + { + "epoch": 15.7375, + "loss": 0.04783114045858383, + "loss_ce": 0.00010927912080660462, + "loss_iou": 0.193359375, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 344761936, + "step": 3777 + }, + { + "epoch": 15.741666666666667, + "grad_norm": 4.195204208081409, + "learning_rate": 5e-05, + "loss": 0.0558, + "num_input_tokens_seen": 344853456, + "step": 3778 + }, + { + "epoch": 15.741666666666667, + "loss": 0.04136732965707779, + "loss_ce": 7.541826221313386e-07, + "loss_iou": 0.302734375, + "loss_num": 0.00830078125, + "loss_xval": 0.041259765625, + "num_input_tokens_seen": 344853456, + "step": 3778 + }, + { + "epoch": 15.745833333333334, + "grad_norm": 11.76621668340554, + "learning_rate": 5e-05, + "loss": 0.1202, + "num_input_tokens_seen": 344944524, + "step": 3779 + }, + { + "epoch": 15.745833333333334, + "loss": 0.189706951379776, + "loss_ce": 2.4941593437688425e-05, + "loss_iou": 0.1572265625, + "loss_num": 0.037841796875, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 344944524, + "step": 3779 + }, + { + "epoch": 15.75, + "grad_norm": 5.560246181302266, + "learning_rate": 5e-05, + "loss": 0.064, + "num_input_tokens_seen": 345035528, + "step": 3780 + }, + { + "epoch": 15.75, + "loss": 0.07846195995807648, + "loss_ce": 1.2656909120778437e-06, + "loss_iou": 0.26171875, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 345035528, + "step": 3780 + }, + { + "epoch": 15.754166666666666, + "grad_norm": 2.7483561888499244, + "learning_rate": 5e-05, + "loss": 0.0623, + "num_input_tokens_seen": 345126764, + "step": 3781 + }, + { + "epoch": 15.754166666666666, + "loss": 0.04758468642830849, + "loss_ce": 0.005729829426854849, + "loss_iou": 0.1875, + "loss_num": 0.00836181640625, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 345126764, + "step": 3781 + }, + { + "epoch": 15.758333333333333, + "grad_norm": 3.180942509291167, + "learning_rate": 5e-05, + "loss": 0.0409, + "num_input_tokens_seen": 345218748, + "step": 3782 + }, + { + "epoch": 15.758333333333333, + "loss": 0.02534525655210018, + "loss_ce": 3.855386603390798e-05, + "loss_iou": 0.1953125, + "loss_num": 0.00506591796875, + "loss_xval": 0.0252685546875, + "num_input_tokens_seen": 345218748, + "step": 3782 + }, + { + "epoch": 15.7625, + "grad_norm": 3.0123041833531845, + "learning_rate": 5e-05, + "loss": 0.0654, + "num_input_tokens_seen": 345309368, + "step": 3783 + }, + { + "epoch": 15.7625, + "loss": 0.06461061537265778, + "loss_ce": 6.59371362417005e-05, + "loss_iou": 0.33203125, + "loss_num": 0.012939453125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 345309368, + "step": 3783 + }, + { + "epoch": 15.766666666666667, + "grad_norm": 3.4175700842294456, + "learning_rate": 5e-05, + "loss": 0.0534, + "num_input_tokens_seen": 345400572, + "step": 3784 + }, + { + "epoch": 15.766666666666667, + "loss": 0.030454548075795174, + "loss_ce": 2.852268335118424e-05, + "loss_iou": 0.2392578125, + "loss_num": 0.006072998046875, + "loss_xval": 0.0303955078125, + "num_input_tokens_seen": 345400572, + "step": 3784 + }, + { + "epoch": 15.770833333333334, + "grad_norm": 5.864726623125807, + "learning_rate": 5e-05, + "loss": 0.0398, + "num_input_tokens_seen": 345491616, + "step": 3785 + }, + { + "epoch": 15.770833333333334, + "loss": 0.035654254257678986, + "loss_ce": 0.004770464263856411, + "loss_iou": 0.314453125, + "loss_num": 0.00616455078125, + "loss_xval": 0.0308837890625, + "num_input_tokens_seen": 345491616, + "step": 3785 + }, + { + "epoch": 15.775, + "grad_norm": 5.818370725960535, + "learning_rate": 5e-05, + "loss": 0.0468, + "num_input_tokens_seen": 345583084, + "step": 3786 + }, + { + "epoch": 15.775, + "loss": 0.04433032125234604, + "loss_ce": 0.0003087124787271023, + "loss_iou": 0.22265625, + "loss_num": 0.0087890625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 345583084, + "step": 3786 + }, + { + "epoch": 15.779166666666667, + "grad_norm": 1.1987201829282095, + "learning_rate": 5e-05, + "loss": 0.0262, + "num_input_tokens_seen": 345674276, + "step": 3787 + }, + { + "epoch": 15.779166666666667, + "loss": 0.019078008830547333, + "loss_ce": 1.2152680938015692e-05, + "loss_iou": 0.2109375, + "loss_num": 0.003814697265625, + "loss_xval": 0.01904296875, + "num_input_tokens_seen": 345674276, + "step": 3787 + }, + { + "epoch": 15.783333333333333, + "grad_norm": 15.543687534674307, + "learning_rate": 5e-05, + "loss": 0.0539, + "num_input_tokens_seen": 345765564, + "step": 3788 + }, + { + "epoch": 15.783333333333333, + "loss": 0.03491397574543953, + "loss_ce": 1.8656713791642687e-06, + "loss_iou": 0.37890625, + "loss_num": 0.006988525390625, + "loss_xval": 0.034912109375, + "num_input_tokens_seen": 345765564, + "step": 3788 + }, + { + "epoch": 15.7875, + "grad_norm": 5.564477666133809, + "learning_rate": 5e-05, + "loss": 0.069, + "num_input_tokens_seen": 345858000, + "step": 3789 + }, + { + "epoch": 15.7875, + "loss": 0.04144046828150749, + "loss_ce": 7.389086385956034e-05, + "loss_iou": 0.228515625, + "loss_num": 0.00830078125, + "loss_xval": 0.041259765625, + "num_input_tokens_seen": 345858000, + "step": 3789 + }, + { + "epoch": 15.791666666666666, + "grad_norm": 2.1167335482334555, + "learning_rate": 5e-05, + "loss": 0.0632, + "num_input_tokens_seen": 345948800, + "step": 3790 + }, + { + "epoch": 15.791666666666666, + "loss": 0.09141284227371216, + "loss_ce": 0.0029042325913906097, + "loss_iou": 0.224609375, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 345948800, + "step": 3790 + }, + { + "epoch": 15.795833333333333, + "grad_norm": 4.493681166342253, + "learning_rate": 5e-05, + "loss": 0.0486, + "num_input_tokens_seen": 346040064, + "step": 3791 + }, + { + "epoch": 15.795833333333333, + "loss": 0.026475880295038223, + "loss_ce": 1.8809511175277294e-06, + "loss_iou": 0.33984375, + "loss_num": 0.005279541015625, + "loss_xval": 0.0264892578125, + "num_input_tokens_seen": 346040064, + "step": 3791 + }, + { + "epoch": 15.8, + "grad_norm": 3.179135791427188, + "learning_rate": 5e-05, + "loss": 0.0534, + "num_input_tokens_seen": 346131284, + "step": 3792 + }, + { + "epoch": 15.8, + "loss": 0.039249323308467865, + "loss_ce": 0.0031012536492198706, + "loss_iou": 0.345703125, + "loss_num": 0.007232666015625, + "loss_xval": 0.0361328125, + "num_input_tokens_seen": 346131284, + "step": 3792 + }, + { + "epoch": 15.804166666666667, + "grad_norm": 3.056206637118679, + "learning_rate": 5e-05, + "loss": 0.0776, + "num_input_tokens_seen": 346223080, + "step": 3793 + }, + { + "epoch": 15.804166666666667, + "loss": 0.06411039084196091, + "loss_ce": 6.16227844147943e-05, + "loss_iou": 0.158203125, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 346223080, + "step": 3793 + }, + { + "epoch": 15.808333333333334, + "grad_norm": 3.8478526137418205, + "learning_rate": 5e-05, + "loss": 0.0413, + "num_input_tokens_seen": 346314700, + "step": 3794 + }, + { + "epoch": 15.808333333333334, + "loss": 0.028593793511390686, + "loss_ce": 2.9341121262405068e-05, + "loss_iou": 0.1650390625, + "loss_num": 0.005706787109375, + "loss_xval": 0.028564453125, + "num_input_tokens_seen": 346314700, + "step": 3794 + }, + { + "epoch": 15.8125, + "grad_norm": 3.0937844433452404, + "learning_rate": 5e-05, + "loss": 0.0735, + "num_input_tokens_seen": 346406248, + "step": 3795 + }, + { + "epoch": 15.8125, + "loss": 0.07463543117046356, + "loss_ce": 0.00020305861835367978, + "loss_iou": 0.259765625, + "loss_num": 0.014892578125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 346406248, + "step": 3795 + }, + { + "epoch": 15.816666666666666, + "grad_norm": 6.651472026105777, + "learning_rate": 5e-05, + "loss": 0.0496, + "num_input_tokens_seen": 346495560, + "step": 3796 + }, + { + "epoch": 15.816666666666666, + "loss": 0.058466482907533646, + "loss_ce": 0.0013680945849046111, + "loss_iou": 0.1396484375, + "loss_num": 0.01141357421875, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 346495560, + "step": 3796 + }, + { + "epoch": 15.820833333333333, + "grad_norm": 3.268823329261245, + "learning_rate": 5e-05, + "loss": 0.0574, + "num_input_tokens_seen": 346585652, + "step": 3797 + }, + { + "epoch": 15.820833333333333, + "loss": 0.05235815420746803, + "loss_ce": 0.003011230146512389, + "loss_iou": 0.205078125, + "loss_num": 0.0098876953125, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 346585652, + "step": 3797 + }, + { + "epoch": 15.825, + "grad_norm": 2.374070775676116, + "learning_rate": 5e-05, + "loss": 0.0549, + "num_input_tokens_seen": 346677300, + "step": 3798 + }, + { + "epoch": 15.825, + "loss": 0.0423770397901535, + "loss_ce": 3.385990794413374e-06, + "loss_iou": 0.28125, + "loss_num": 0.00848388671875, + "loss_xval": 0.04248046875, + "num_input_tokens_seen": 346677300, + "step": 3798 + }, + { + "epoch": 15.829166666666667, + "grad_norm": 3.8863831954622, + "learning_rate": 5e-05, + "loss": 0.0718, + "num_input_tokens_seen": 346768528, + "step": 3799 + }, + { + "epoch": 15.829166666666667, + "loss": 0.0791381374001503, + "loss_ce": 0.003668165998533368, + "loss_iou": 0.375, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 346768528, + "step": 3799 + }, + { + "epoch": 15.833333333333334, + "grad_norm": 2.455564703756506, + "learning_rate": 5e-05, + "loss": 0.0457, + "num_input_tokens_seen": 346860120, + "step": 3800 + }, + { + "epoch": 15.833333333333334, + "loss": 0.05979982763528824, + "loss_ce": 0.0008017204236239195, + "loss_iou": 0.333984375, + "loss_num": 0.01177978515625, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 346860120, + "step": 3800 + }, + { + "epoch": 15.8375, + "grad_norm": 3.1525991541958867, + "learning_rate": 5e-05, + "loss": 0.0537, + "num_input_tokens_seen": 346951548, + "step": 3801 + }, + { + "epoch": 15.8375, + "loss": 0.06513097882270813, + "loss_ce": 0.00012853417138103396, + "loss_iou": 0.283203125, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 346951548, + "step": 3801 + }, + { + "epoch": 15.841666666666667, + "grad_norm": 5.440069410822113, + "learning_rate": 5e-05, + "loss": 0.0437, + "num_input_tokens_seen": 347042116, + "step": 3802 + }, + { + "epoch": 15.841666666666667, + "loss": 0.05690130591392517, + "loss_ce": 1.2823103361370158e-06, + "loss_iou": 0.2138671875, + "loss_num": 0.01141357421875, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 347042116, + "step": 3802 + }, + { + "epoch": 15.845833333333333, + "grad_norm": 3.570548123212897, + "learning_rate": 5e-05, + "loss": 0.0553, + "num_input_tokens_seen": 347133712, + "step": 3803 + }, + { + "epoch": 15.845833333333333, + "loss": 0.05249658226966858, + "loss_ce": 6.34684738543001e-06, + "loss_iou": 0.29296875, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 347133712, + "step": 3803 + }, + { + "epoch": 15.85, + "grad_norm": 8.25724288412795, + "learning_rate": 5e-05, + "loss": 0.0405, + "num_input_tokens_seen": 347224136, + "step": 3804 + }, + { + "epoch": 15.85, + "loss": 0.046803995966911316, + "loss_ce": 5.291229626891436e-06, + "loss_iou": 0.23046875, + "loss_num": 0.00933837890625, + "loss_xval": 0.046875, + "num_input_tokens_seen": 347224136, + "step": 3804 + }, + { + "epoch": 15.854166666666666, + "grad_norm": 7.356355135686262, + "learning_rate": 5e-05, + "loss": 0.0374, + "num_input_tokens_seen": 347315452, + "step": 3805 + }, + { + "epoch": 15.854166666666666, + "loss": 0.037571605294942856, + "loss_ce": 4.463842287805164e-06, + "loss_iou": 0.15625, + "loss_num": 0.00750732421875, + "loss_xval": 0.03759765625, + "num_input_tokens_seen": 347315452, + "step": 3805 + }, + { + "epoch": 15.858333333333333, + "grad_norm": 1.7147176178915682, + "learning_rate": 5e-05, + "loss": 0.0716, + "num_input_tokens_seen": 347406764, + "step": 3806 + }, + { + "epoch": 15.858333333333333, + "loss": 0.06243875250220299, + "loss_ce": 4.174997593509033e-05, + "loss_iou": 0.201171875, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 347406764, + "step": 3806 + }, + { + "epoch": 15.8625, + "grad_norm": 1.8615371261266904, + "learning_rate": 5e-05, + "loss": 0.0741, + "num_input_tokens_seen": 347497104, + "step": 3807 + }, + { + "epoch": 15.8625, + "loss": 0.0767849013209343, + "loss_ce": 0.001971057616174221, + "loss_iou": 0.25, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 347497104, + "step": 3807 + }, + { + "epoch": 15.866666666666667, + "grad_norm": 1.4706378040439652, + "learning_rate": 5e-05, + "loss": 0.0433, + "num_input_tokens_seen": 347588500, + "step": 3808 + }, + { + "epoch": 15.866666666666667, + "loss": 0.04569356143474579, + "loss_ce": 8.748088475840632e-06, + "loss_iou": 0.29296875, + "loss_num": 0.0091552734375, + "loss_xval": 0.045654296875, + "num_input_tokens_seen": 347588500, + "step": 3808 + }, + { + "epoch": 15.870833333333334, + "grad_norm": 3.6079065021293917, + "learning_rate": 5e-05, + "loss": 0.0653, + "num_input_tokens_seen": 347680224, + "step": 3809 + }, + { + "epoch": 15.870833333333334, + "loss": 0.04604998230934143, + "loss_ce": 4.473171793506481e-05, + "loss_iou": 0.40625, + "loss_num": 0.00921630859375, + "loss_xval": 0.0458984375, + "num_input_tokens_seen": 347680224, + "step": 3809 + }, + { + "epoch": 15.875, + "grad_norm": 3.3903959022853996, + "learning_rate": 5e-05, + "loss": 0.052, + "num_input_tokens_seen": 347771716, + "step": 3810 + }, + { + "epoch": 15.875, + "loss": 0.04572838544845581, + "loss_ce": 7.408703095279634e-05, + "loss_iou": 0.28515625, + "loss_num": 0.0091552734375, + "loss_xval": 0.045654296875, + "num_input_tokens_seen": 347771716, + "step": 3810 + }, + { + "epoch": 15.879166666666666, + "grad_norm": 3.287637182186556, + "learning_rate": 5e-05, + "loss": 0.0458, + "num_input_tokens_seen": 347863140, + "step": 3811 + }, + { + "epoch": 15.879166666666666, + "loss": 0.037694744765758514, + "loss_ce": 0.0018060706788673997, + "loss_iou": 0.248046875, + "loss_num": 0.007171630859375, + "loss_xval": 0.035888671875, + "num_input_tokens_seen": 347863140, + "step": 3811 + }, + { + "epoch": 15.883333333333333, + "grad_norm": 5.253461756076304, + "learning_rate": 5e-05, + "loss": 0.0576, + "num_input_tokens_seen": 347954324, + "step": 3812 + }, + { + "epoch": 15.883333333333333, + "loss": 0.06424754858016968, + "loss_ce": 1.5671426808694378e-05, + "loss_iou": 0.0927734375, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 347954324, + "step": 3812 + }, + { + "epoch": 15.8875, + "grad_norm": 2.1774547294251603, + "learning_rate": 5e-05, + "loss": 0.0861, + "num_input_tokens_seen": 348045424, + "step": 3813 + }, + { + "epoch": 15.8875, + "loss": 0.05383475497364998, + "loss_ce": 0.00013907899847254157, + "loss_iou": 0.2578125, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 348045424, + "step": 3813 + }, + { + "epoch": 15.891666666666667, + "grad_norm": 10.51582874797255, + "learning_rate": 5e-05, + "loss": 0.0739, + "num_input_tokens_seen": 348137316, + "step": 3814 + }, + { + "epoch": 15.891666666666667, + "loss": 0.09596287459135056, + "loss_ce": 1.5606256056344137e-05, + "loss_iou": 0.171875, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 348137316, + "step": 3814 + }, + { + "epoch": 15.895833333333334, + "grad_norm": 11.267172703964794, + "learning_rate": 5e-05, + "loss": 0.0726, + "num_input_tokens_seen": 348228740, + "step": 3815 + }, + { + "epoch": 15.895833333333334, + "loss": 0.09679645299911499, + "loss_ce": 0.00014728127280250192, + "loss_iou": 0.26953125, + "loss_num": 0.019287109375, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 348228740, + "step": 3815 + }, + { + "epoch": 15.9, + "grad_norm": 6.034282483546973, + "learning_rate": 5e-05, + "loss": 0.063, + "num_input_tokens_seen": 348320732, + "step": 3816 + }, + { + "epoch": 15.9, + "loss": 0.05001269280910492, + "loss_ce": 0.0002690425608307123, + "loss_iou": 0.2890625, + "loss_num": 0.00994873046875, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 348320732, + "step": 3816 + }, + { + "epoch": 15.904166666666667, + "grad_norm": 3.4675060023574518, + "learning_rate": 5e-05, + "loss": 0.0711, + "num_input_tokens_seen": 348411824, + "step": 3817 + }, + { + "epoch": 15.904166666666667, + "loss": 0.06920676678419113, + "loss_ce": 8.158196578733623e-06, + "loss_iou": 0.2060546875, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 348411824, + "step": 3817 + }, + { + "epoch": 15.908333333333333, + "grad_norm": 2.1176805909112275, + "learning_rate": 5e-05, + "loss": 0.1009, + "num_input_tokens_seen": 348503136, + "step": 3818 + }, + { + "epoch": 15.908333333333333, + "loss": 0.07493134588003159, + "loss_ce": 0.01005860511213541, + "loss_iou": 0.203125, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 348503136, + "step": 3818 + }, + { + "epoch": 15.9125, + "grad_norm": 1.6286145537936416, + "learning_rate": 5e-05, + "loss": 0.0427, + "num_input_tokens_seen": 348594572, + "step": 3819 + }, + { + "epoch": 15.9125, + "loss": 0.046741899102926254, + "loss_ce": 0.0004619909741450101, + "loss_iou": 0.1904296875, + "loss_num": 0.00927734375, + "loss_xval": 0.04638671875, + "num_input_tokens_seen": 348594572, + "step": 3819 + }, + { + "epoch": 15.916666666666666, + "grad_norm": 3.4577450558540916, + "learning_rate": 5e-05, + "loss": 0.0581, + "num_input_tokens_seen": 348685828, + "step": 3820 + }, + { + "epoch": 15.916666666666666, + "loss": 0.030172022059559822, + "loss_ce": 5.395931566454237e-06, + "loss_iou": 0.341796875, + "loss_num": 0.00604248046875, + "loss_xval": 0.0301513671875, + "num_input_tokens_seen": 348685828, + "step": 3820 + }, + { + "epoch": 15.920833333333333, + "grad_norm": 2.268927461516382, + "learning_rate": 5e-05, + "loss": 0.0537, + "num_input_tokens_seen": 348777232, + "step": 3821 + }, + { + "epoch": 15.920833333333333, + "loss": 0.04325816035270691, + "loss_ce": 0.0003046693454962224, + "loss_iou": 0.201171875, + "loss_num": 0.00860595703125, + "loss_xval": 0.04296875, + "num_input_tokens_seen": 348777232, + "step": 3821 + }, + { + "epoch": 15.925, + "grad_norm": 2.7056408960960496, + "learning_rate": 5e-05, + "loss": 0.0727, + "num_input_tokens_seen": 348867292, + "step": 3822 + }, + { + "epoch": 15.925, + "loss": 0.08763208985328674, + "loss_ce": 8.628669547761092e-07, + "loss_iou": 0.1728515625, + "loss_num": 0.017578125, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 348867292, + "step": 3822 + }, + { + "epoch": 15.929166666666667, + "grad_norm": 2.220622709806258, + "learning_rate": 5e-05, + "loss": 0.04, + "num_input_tokens_seen": 348958972, + "step": 3823 + }, + { + "epoch": 15.929166666666667, + "loss": 0.04168154299259186, + "loss_ce": 0.006693140137940645, + "loss_iou": 0.1337890625, + "loss_num": 0.006988525390625, + "loss_xval": 0.034912109375, + "num_input_tokens_seen": 348958972, + "step": 3823 + }, + { + "epoch": 15.933333333333334, + "grad_norm": 1.3806298831490407, + "learning_rate": 5e-05, + "loss": 0.0355, + "num_input_tokens_seen": 349049560, + "step": 3824 + }, + { + "epoch": 15.933333333333334, + "loss": 0.029834497720003128, + "loss_ce": 3.5636912798509e-06, + "loss_iou": 0.28515625, + "loss_num": 0.0059814453125, + "loss_xval": 0.02978515625, + "num_input_tokens_seen": 349049560, + "step": 3824 + }, + { + "epoch": 15.9375, + "grad_norm": 1.9240761470930519, + "learning_rate": 5e-05, + "loss": 0.0478, + "num_input_tokens_seen": 349140632, + "step": 3825 + }, + { + "epoch": 15.9375, + "loss": 0.04357026517391205, + "loss_ce": 6.42470376988058e-06, + "loss_iou": 0.283203125, + "loss_num": 0.00872802734375, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 349140632, + "step": 3825 + }, + { + "epoch": 15.941666666666666, + "grad_norm": 102.84510315262223, + "learning_rate": 5e-05, + "loss": 0.0457, + "num_input_tokens_seen": 349230764, + "step": 3826 + }, + { + "epoch": 15.941666666666666, + "loss": 0.0428328663110733, + "loss_ce": 3.196366742486134e-05, + "loss_iou": 0.28515625, + "loss_num": 0.008544921875, + "loss_xval": 0.042724609375, + "num_input_tokens_seen": 349230764, + "step": 3826 + }, + { + "epoch": 15.945833333333333, + "grad_norm": 8.467889067309262, + "learning_rate": 5e-05, + "loss": 0.0836, + "num_input_tokens_seen": 349322084, + "step": 3827 + }, + { + "epoch": 15.945833333333333, + "loss": 0.07375296950340271, + "loss_ce": 0.009543980471789837, + "loss_iou": 0.283203125, + "loss_num": 0.0128173828125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 349322084, + "step": 3827 + }, + { + "epoch": 15.95, + "grad_norm": 3.01453202459369, + "learning_rate": 5e-05, + "loss": 0.051, + "num_input_tokens_seen": 349413060, + "step": 3828 + }, + { + "epoch": 15.95, + "loss": 0.05052509903907776, + "loss_ce": 3.2493758226337377e-06, + "loss_iou": 0.140625, + "loss_num": 0.01007080078125, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 349413060, + "step": 3828 + }, + { + "epoch": 15.954166666666667, + "grad_norm": 3.5161494715474446, + "learning_rate": 5e-05, + "loss": 0.081, + "num_input_tokens_seen": 349504056, + "step": 3829 + }, + { + "epoch": 15.954166666666667, + "loss": 0.07083917409181595, + "loss_ce": 0.00010705763270379975, + "loss_iou": 0.283203125, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 349504056, + "step": 3829 + }, + { + "epoch": 15.958333333333334, + "grad_norm": 2.800003641668997, + "learning_rate": 5e-05, + "loss": 0.1009, + "num_input_tokens_seen": 349595316, + "step": 3830 + }, + { + "epoch": 15.958333333333334, + "loss": 0.13452748954296112, + "loss_ce": 5.1784176321234554e-05, + "loss_iou": 0.283203125, + "loss_num": 0.02685546875, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 349595316, + "step": 3830 + }, + { + "epoch": 15.9625, + "grad_norm": 8.297436090370738, + "learning_rate": 5e-05, + "loss": 0.1103, + "num_input_tokens_seen": 349686492, + "step": 3831 + }, + { + "epoch": 15.9625, + "loss": 0.0906902551651001, + "loss_ce": 0.0003277028736192733, + "loss_iou": 0.2431640625, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 349686492, + "step": 3831 + }, + { + "epoch": 15.966666666666667, + "grad_norm": 3.1585541135793087, + "learning_rate": 5e-05, + "loss": 0.0534, + "num_input_tokens_seen": 349776952, + "step": 3832 + }, + { + "epoch": 15.966666666666667, + "loss": 0.036786098033189774, + "loss_ce": 0.0005006975261494517, + "loss_iou": 0.1982421875, + "loss_num": 0.00726318359375, + "loss_xval": 0.036376953125, + "num_input_tokens_seen": 349776952, + "step": 3832 + }, + { + "epoch": 15.970833333333333, + "grad_norm": 15.518672394577264, + "learning_rate": 5e-05, + "loss": 0.0723, + "num_input_tokens_seen": 349868364, + "step": 3833 + }, + { + "epoch": 15.970833333333333, + "loss": 0.05300527438521385, + "loss_ce": 0.0009270303999073803, + "loss_iou": 0.28515625, + "loss_num": 0.01043701171875, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 349868364, + "step": 3833 + }, + { + "epoch": 15.975, + "grad_norm": 3.0035389925614115, + "learning_rate": 5e-05, + "loss": 0.0351, + "num_input_tokens_seen": 349958552, + "step": 3834 + }, + { + "epoch": 15.975, + "loss": 0.039127472788095474, + "loss_ce": 9.549165406497195e-05, + "loss_iou": 0.181640625, + "loss_num": 0.0078125, + "loss_xval": 0.0390625, + "num_input_tokens_seen": 349958552, + "step": 3834 + }, + { + "epoch": 15.979166666666666, + "grad_norm": 2.303188031481693, + "learning_rate": 5e-05, + "loss": 0.0576, + "num_input_tokens_seen": 350050256, + "step": 3835 + }, + { + "epoch": 15.979166666666666, + "loss": 0.06078094244003296, + "loss_ce": 2.0446534108486958e-05, + "loss_iou": 0.3125, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 350050256, + "step": 3835 + }, + { + "epoch": 15.983333333333333, + "grad_norm": 6.314212930655639, + "learning_rate": 5e-05, + "loss": 0.0603, + "num_input_tokens_seen": 350141172, + "step": 3836 + }, + { + "epoch": 15.983333333333333, + "loss": 0.07146543264389038, + "loss_ce": 5.429885641206056e-05, + "loss_iou": 0.2734375, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 350141172, + "step": 3836 + }, + { + "epoch": 15.9875, + "grad_norm": 2.916406234312362, + "learning_rate": 5e-05, + "loss": 0.0582, + "num_input_tokens_seen": 350232684, + "step": 3837 + }, + { + "epoch": 15.9875, + "loss": 0.047817349433898926, + "loss_ce": 4.208229438518174e-05, + "loss_iou": 0.30078125, + "loss_num": 0.009521484375, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 350232684, + "step": 3837 + }, + { + "epoch": 15.991666666666667, + "grad_norm": 12.295296027812869, + "learning_rate": 5e-05, + "loss": 0.0765, + "num_input_tokens_seen": 350324372, + "step": 3838 + }, + { + "epoch": 15.991666666666667, + "loss": 0.04820753261446953, + "loss_ce": 3.5534183552954346e-05, + "loss_iou": 0.322265625, + "loss_num": 0.0096435546875, + "loss_xval": 0.048095703125, + "num_input_tokens_seen": 350324372, + "step": 3838 + }, + { + "epoch": 15.995833333333334, + "grad_norm": 5.8819263365479495, + "learning_rate": 5e-05, + "loss": 0.0449, + "num_input_tokens_seen": 350415192, + "step": 3839 + }, + { + "epoch": 15.995833333333334, + "loss": 0.02984732948243618, + "loss_ce": 1.139207711275958e-06, + "loss_iou": 0.232421875, + "loss_num": 0.0059814453125, + "loss_xval": 0.02978515625, + "num_input_tokens_seen": 350415192, + "step": 3839 + }, + { + "epoch": 16.0, + "grad_norm": 2.6258538181433733, + "learning_rate": 5e-05, + "loss": 0.0582, + "num_input_tokens_seen": 350506624, + "step": 3840 + }, + { + "epoch": 16.0, + "loss": 0.07496326416730881, + "loss_ce": 0.0024534992408007383, + "loss_iou": 0.1669921875, + "loss_num": 0.0145263671875, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 350506624, + "step": 3840 + }, + { + "epoch": 16.004166666666666, + "grad_norm": 4.381327587546374, + "learning_rate": 5e-05, + "loss": 0.0597, + "num_input_tokens_seen": 350597504, + "step": 3841 + }, + { + "epoch": 16.004166666666666, + "loss": 0.07269339263439178, + "loss_ce": 5.212401674725697e-07, + "loss_iou": 0.359375, + "loss_num": 0.01458740234375, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 350597504, + "step": 3841 + }, + { + "epoch": 16.008333333333333, + "grad_norm": 4.452645426253799, + "learning_rate": 5e-05, + "loss": 0.0654, + "num_input_tokens_seen": 350689396, + "step": 3842 + }, + { + "epoch": 16.008333333333333, + "loss": 0.09613090753555298, + "loss_ce": 8.168114618456457e-06, + "loss_iou": 0.296875, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 350689396, + "step": 3842 + }, + { + "epoch": 16.0125, + "grad_norm": 1.5707464711745487, + "learning_rate": 5e-05, + "loss": 0.0381, + "num_input_tokens_seen": 350780704, + "step": 3843 + }, + { + "epoch": 16.0125, + "loss": 0.02814393863081932, + "loss_ce": 4.488003833102994e-05, + "loss_iou": 0.2275390625, + "loss_num": 0.005615234375, + "loss_xval": 0.028076171875, + "num_input_tokens_seen": 350780704, + "step": 3843 + }, + { + "epoch": 16.016666666666666, + "grad_norm": 1.9733162652420486, + "learning_rate": 5e-05, + "loss": 0.0566, + "num_input_tokens_seen": 350871916, + "step": 3844 + }, + { + "epoch": 16.016666666666666, + "loss": 0.055129144340753555, + "loss_ce": 4.4918466301169246e-05, + "loss_iou": 0.291015625, + "loss_num": 0.010986328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 350871916, + "step": 3844 + }, + { + "epoch": 16.020833333333332, + "grad_norm": 1.9035264987053406, + "learning_rate": 5e-05, + "loss": 0.053, + "num_input_tokens_seen": 350963336, + "step": 3845 + }, + { + "epoch": 16.020833333333332, + "loss": 0.050709318369627, + "loss_ce": 4.362413164926693e-06, + "loss_iou": 0.2294921875, + "loss_num": 0.0101318359375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 350963336, + "step": 3845 + }, + { + "epoch": 16.025, + "grad_norm": 2.917842477642324, + "learning_rate": 5e-05, + "loss": 0.0933, + "num_input_tokens_seen": 351054600, + "step": 3846 + }, + { + "epoch": 16.025, + "loss": 0.10769159346818924, + "loss_ce": 1.0315693543816451e-05, + "loss_iou": 0.279296875, + "loss_num": 0.021484375, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 351054600, + "step": 3846 + }, + { + "epoch": 16.029166666666665, + "grad_norm": 3.9201172308607624, + "learning_rate": 5e-05, + "loss": 0.0812, + "num_input_tokens_seen": 351145784, + "step": 3847 + }, + { + "epoch": 16.029166666666665, + "loss": 0.10258331894874573, + "loss_ce": 0.001997376559302211, + "loss_iou": 0.23046875, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 351145784, + "step": 3847 + }, + { + "epoch": 16.033333333333335, + "grad_norm": 3.3330847054065043, + "learning_rate": 5e-05, + "loss": 0.0669, + "num_input_tokens_seen": 351237112, + "step": 3848 + }, + { + "epoch": 16.033333333333335, + "loss": 0.07070392370223999, + "loss_ce": 0.00011676585563691333, + "loss_iou": 0.28125, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 351237112, + "step": 3848 + }, + { + "epoch": 16.0375, + "grad_norm": 1.0271650861426744, + "learning_rate": 5e-05, + "loss": 0.0465, + "num_input_tokens_seen": 351328720, + "step": 3849 + }, + { + "epoch": 16.0375, + "loss": 0.06651593744754791, + "loss_ce": 4.8649879317963496e-05, + "loss_iou": 0.259765625, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 351328720, + "step": 3849 + }, + { + "epoch": 16.041666666666668, + "grad_norm": 0.8405494813029039, + "learning_rate": 5e-05, + "loss": 0.0406, + "num_input_tokens_seen": 351420196, + "step": 3850 + }, + { + "epoch": 16.041666666666668, + "loss": 0.0480114221572876, + "loss_ce": 6.830670463386923e-05, + "loss_iou": 0.234375, + "loss_num": 0.00958251953125, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 351420196, + "step": 3850 + }, + { + "epoch": 16.045833333333334, + "grad_norm": 1.8078136924110964, + "learning_rate": 5e-05, + "loss": 0.0433, + "num_input_tokens_seen": 351511276, + "step": 3851 + }, + { + "epoch": 16.045833333333334, + "loss": 0.0456475131213665, + "loss_ce": 2.373470488237217e-05, + "loss_iou": 0.298828125, + "loss_num": 0.00909423828125, + "loss_xval": 0.045654296875, + "num_input_tokens_seen": 351511276, + "step": 3851 + }, + { + "epoch": 16.05, + "grad_norm": 0.620052444040268, + "learning_rate": 5e-05, + "loss": 0.0313, + "num_input_tokens_seen": 351602388, + "step": 3852 + }, + { + "epoch": 16.05, + "loss": 0.0332469642162323, + "loss_ce": 0.00036427262239158154, + "loss_iou": 0.1728515625, + "loss_num": 0.006591796875, + "loss_xval": 0.032958984375, + "num_input_tokens_seen": 351602388, + "step": 3852 + }, + { + "epoch": 16.054166666666667, + "grad_norm": 1.6012531827704792, + "learning_rate": 5e-05, + "loss": 0.0711, + "num_input_tokens_seen": 351693092, + "step": 3853 + }, + { + "epoch": 16.054166666666667, + "loss": 0.061077818274497986, + "loss_ce": 0.0007598230731673539, + "loss_iou": 0.341796875, + "loss_num": 0.0120849609375, + "loss_xval": 0.060302734375, + "num_input_tokens_seen": 351693092, + "step": 3853 + }, + { + "epoch": 16.058333333333334, + "grad_norm": 23.89357125292572, + "learning_rate": 5e-05, + "loss": 0.0767, + "num_input_tokens_seen": 351783416, + "step": 3854 + }, + { + "epoch": 16.058333333333334, + "loss": 0.08595338463783264, + "loss_ce": 6.319621661532437e-07, + "loss_iou": 0.365234375, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 351783416, + "step": 3854 + }, + { + "epoch": 16.0625, + "grad_norm": 2.8630919901550156, + "learning_rate": 5e-05, + "loss": 0.0486, + "num_input_tokens_seen": 351874700, + "step": 3855 + }, + { + "epoch": 16.0625, + "loss": 0.040271710604429245, + "loss_ce": 3.7666129628632916e-06, + "loss_iou": 0.322265625, + "loss_num": 0.008056640625, + "loss_xval": 0.040283203125, + "num_input_tokens_seen": 351874700, + "step": 3855 + }, + { + "epoch": 16.066666666666666, + "grad_norm": 4.667296196315083, + "learning_rate": 5e-05, + "loss": 0.0571, + "num_input_tokens_seen": 351965908, + "step": 3856 + }, + { + "epoch": 16.066666666666666, + "loss": 0.048476576805114746, + "loss_ce": 0.00010621073306538165, + "loss_iou": 0.2470703125, + "loss_num": 0.0096435546875, + "loss_xval": 0.04833984375, + "num_input_tokens_seen": 351965908, + "step": 3856 + }, + { + "epoch": 16.070833333333333, + "grad_norm": 2.643841858219604, + "learning_rate": 5e-05, + "loss": 0.0688, + "num_input_tokens_seen": 352057156, + "step": 3857 + }, + { + "epoch": 16.070833333333333, + "loss": 0.06522645801305771, + "loss_ce": 1.0394860510132276e-05, + "loss_iou": 0.3046875, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 352057156, + "step": 3857 + }, + { + "epoch": 16.075, + "grad_norm": 3.3895794943722675, + "learning_rate": 5e-05, + "loss": 0.0711, + "num_input_tokens_seen": 352148756, + "step": 3858 + }, + { + "epoch": 16.075, + "loss": 0.09010984003543854, + "loss_ce": 6.772094639018178e-05, + "loss_iou": 0.287109375, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 352148756, + "step": 3858 + }, + { + "epoch": 16.079166666666666, + "grad_norm": 2.6239057736250326, + "learning_rate": 5e-05, + "loss": 0.0763, + "num_input_tokens_seen": 352239384, + "step": 3859 + }, + { + "epoch": 16.079166666666666, + "loss": 0.09036745131015778, + "loss_ce": 4.904800334770698e-06, + "loss_iou": 0.236328125, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 352239384, + "step": 3859 + }, + { + "epoch": 16.083333333333332, + "grad_norm": 1.8972793154833505, + "learning_rate": 5e-05, + "loss": 0.031, + "num_input_tokens_seen": 352330660, + "step": 3860 + }, + { + "epoch": 16.083333333333332, + "loss": 0.026490317657589912, + "loss_ce": 1.0600457471809932e-06, + "loss_iou": 0.2041015625, + "loss_num": 0.00531005859375, + "loss_xval": 0.0264892578125, + "num_input_tokens_seen": 352330660, + "step": 3860 + }, + { + "epoch": 16.0875, + "grad_norm": 1.3183903989842958, + "learning_rate": 5e-05, + "loss": 0.0443, + "num_input_tokens_seen": 352421700, + "step": 3861 + }, + { + "epoch": 16.0875, + "loss": 0.051079727709293365, + "loss_ce": 8.559488378523383e-06, + "loss_iou": 0.2578125, + "loss_num": 0.01019287109375, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 352421700, + "step": 3861 + }, + { + "epoch": 16.091666666666665, + "grad_norm": 1.0495152774309502, + "learning_rate": 5e-05, + "loss": 0.0407, + "num_input_tokens_seen": 352513560, + "step": 3862 + }, + { + "epoch": 16.091666666666665, + "loss": 0.018554434180259705, + "loss_ce": 1.8820934201357886e-05, + "loss_iou": 0.1171875, + "loss_num": 0.0037078857421875, + "loss_xval": 0.0185546875, + "num_input_tokens_seen": 352513560, + "step": 3862 + }, + { + "epoch": 16.095833333333335, + "grad_norm": 1.3600341470832789, + "learning_rate": 5e-05, + "loss": 0.0531, + "num_input_tokens_seen": 352604684, + "step": 3863 + }, + { + "epoch": 16.095833333333335, + "loss": 0.060372743755578995, + "loss_ce": 8.97386962606106e-06, + "loss_iou": 0.21875, + "loss_num": 0.0120849609375, + "loss_xval": 0.060302734375, + "num_input_tokens_seen": 352604684, + "step": 3863 + }, + { + "epoch": 16.1, + "grad_norm": 1.0768338932899961, + "learning_rate": 5e-05, + "loss": 0.0706, + "num_input_tokens_seen": 352695828, + "step": 3864 + }, + { + "epoch": 16.1, + "loss": 0.05559179559350014, + "loss_ce": 4.025570888188668e-06, + "loss_iou": 0.04833984375, + "loss_num": 0.0111083984375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 352695828, + "step": 3864 + }, + { + "epoch": 16.104166666666668, + "grad_norm": 2.247862443145029, + "learning_rate": 5e-05, + "loss": 0.062, + "num_input_tokens_seen": 352787496, + "step": 3865 + }, + { + "epoch": 16.104166666666668, + "loss": 0.05655861645936966, + "loss_ce": 0.00048256589798256755, + "loss_iou": 0.2255859375, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 352787496, + "step": 3865 + }, + { + "epoch": 16.108333333333334, + "grad_norm": 1.283053998660903, + "learning_rate": 5e-05, + "loss": 0.0384, + "num_input_tokens_seen": 352877856, + "step": 3866 + }, + { + "epoch": 16.108333333333334, + "loss": 0.041413579136133194, + "loss_ce": 1.2254739658601466e-06, + "loss_iou": 0.1728515625, + "loss_num": 0.00830078125, + "loss_xval": 0.04150390625, + "num_input_tokens_seen": 352877856, + "step": 3866 + }, + { + "epoch": 16.1125, + "grad_norm": 3.309203175656161, + "learning_rate": 5e-05, + "loss": 0.0744, + "num_input_tokens_seen": 352968712, + "step": 3867 + }, + { + "epoch": 16.1125, + "loss": 0.07202804833650589, + "loss_ce": 6.760506221326068e-05, + "loss_iou": 0.1591796875, + "loss_num": 0.014404296875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 352968712, + "step": 3867 + }, + { + "epoch": 16.116666666666667, + "grad_norm": 1.2593369880726677, + "learning_rate": 5e-05, + "loss": 0.0454, + "num_input_tokens_seen": 353060368, + "step": 3868 + }, + { + "epoch": 16.116666666666667, + "loss": 0.0669381394982338, + "loss_ce": 0.00010082882363349199, + "loss_iou": 0.26171875, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 353060368, + "step": 3868 + }, + { + "epoch": 16.120833333333334, + "grad_norm": 1.4114893449006416, + "learning_rate": 5e-05, + "loss": 0.0796, + "num_input_tokens_seen": 353151512, + "step": 3869 + }, + { + "epoch": 16.120833333333334, + "loss": 0.08161616325378418, + "loss_ce": 0.00012659240746870637, + "loss_iou": 0.234375, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 353151512, + "step": 3869 + }, + { + "epoch": 16.125, + "grad_norm": 1.7572843806708736, + "learning_rate": 5e-05, + "loss": 0.0397, + "num_input_tokens_seen": 353242324, + "step": 3870 + }, + { + "epoch": 16.125, + "loss": 0.03830033540725708, + "loss_ce": 0.00010758535063359886, + "loss_iou": 0.3203125, + "loss_num": 0.00762939453125, + "loss_xval": 0.0380859375, + "num_input_tokens_seen": 353242324, + "step": 3870 + }, + { + "epoch": 16.129166666666666, + "grad_norm": 1.8327798660955157, + "learning_rate": 5e-05, + "loss": 0.0385, + "num_input_tokens_seen": 353334108, + "step": 3871 + }, + { + "epoch": 16.129166666666666, + "loss": 0.04010815545916557, + "loss_ce": 0.00042004554416052997, + "loss_iou": 0.259765625, + "loss_num": 0.0079345703125, + "loss_xval": 0.039794921875, + "num_input_tokens_seen": 353334108, + "step": 3871 + }, + { + "epoch": 16.133333333333333, + "grad_norm": 4.91119435241483, + "learning_rate": 5e-05, + "loss": 0.0597, + "num_input_tokens_seen": 353424780, + "step": 3872 + }, + { + "epoch": 16.133333333333333, + "loss": 0.0698501318693161, + "loss_ce": 0.0009567025699652731, + "loss_iou": 0.349609375, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 353424780, + "step": 3872 + }, + { + "epoch": 16.1375, + "grad_norm": 3.6188922276676787, + "learning_rate": 5e-05, + "loss": 0.0337, + "num_input_tokens_seen": 353516128, + "step": 3873 + }, + { + "epoch": 16.1375, + "loss": 0.037483297288417816, + "loss_ce": 7.709463716309983e-06, + "loss_iou": 0.3046875, + "loss_num": 0.007476806640625, + "loss_xval": 0.03759765625, + "num_input_tokens_seen": 353516128, + "step": 3873 + }, + { + "epoch": 16.141666666666666, + "grad_norm": 2.89978017600951, + "learning_rate": 5e-05, + "loss": 0.0372, + "num_input_tokens_seen": 353607724, + "step": 3874 + }, + { + "epoch": 16.141666666666666, + "loss": 0.03462275117635727, + "loss_ce": 4.6336626837728545e-05, + "loss_iou": 0.33984375, + "loss_num": 0.006927490234375, + "loss_xval": 0.03466796875, + "num_input_tokens_seen": 353607724, + "step": 3874 + }, + { + "epoch": 16.145833333333332, + "grad_norm": 2.379705843982471, + "learning_rate": 5e-05, + "loss": 0.0424, + "num_input_tokens_seen": 353699508, + "step": 3875 + }, + { + "epoch": 16.145833333333332, + "loss": 0.047790978103876114, + "loss_ce": 0.0006565803778357804, + "loss_iou": 0.302734375, + "loss_num": 0.0093994140625, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 353699508, + "step": 3875 + }, + { + "epoch": 16.15, + "grad_norm": 3.237416759894803, + "learning_rate": 5e-05, + "loss": 0.0666, + "num_input_tokens_seen": 353791332, + "step": 3876 + }, + { + "epoch": 16.15, + "loss": 0.06048550829291344, + "loss_ce": 0.00010648273746483028, + "loss_iou": 0.322265625, + "loss_num": 0.0120849609375, + "loss_xval": 0.060302734375, + "num_input_tokens_seen": 353791332, + "step": 3876 + }, + { + "epoch": 16.154166666666665, + "grad_norm": 2.9326643294151253, + "learning_rate": 5e-05, + "loss": 0.0665, + "num_input_tokens_seen": 353881960, + "step": 3877 + }, + { + "epoch": 16.154166666666665, + "loss": 0.07204936444759369, + "loss_ce": 1.2621946552826557e-05, + "loss_iou": 0.3046875, + "loss_num": 0.014404296875, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 353881960, + "step": 3877 + }, + { + "epoch": 16.158333333333335, + "grad_norm": 10.049778256959911, + "learning_rate": 5e-05, + "loss": 0.072, + "num_input_tokens_seen": 353973588, + "step": 3878 + }, + { + "epoch": 16.158333333333335, + "loss": 0.10550323128700256, + "loss_ce": 0.004329821560531855, + "loss_iou": 0.296875, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 353973588, + "step": 3878 + }, + { + "epoch": 16.1625, + "grad_norm": 4.342489795079413, + "learning_rate": 5e-05, + "loss": 0.0486, + "num_input_tokens_seen": 354064720, + "step": 3879 + }, + { + "epoch": 16.1625, + "loss": 0.04531225189566612, + "loss_ce": 5.468219387694262e-05, + "loss_iou": 0.380859375, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 354064720, + "step": 3879 + }, + { + "epoch": 16.166666666666668, + "grad_norm": 1.6440358638616335, + "learning_rate": 5e-05, + "loss": 0.069, + "num_input_tokens_seen": 354156308, + "step": 3880 + }, + { + "epoch": 16.166666666666668, + "loss": 0.08422918617725372, + "loss_ce": 6.792954536649631e-07, + "loss_iou": 0.1875, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 354156308, + "step": 3880 + }, + { + "epoch": 16.170833333333334, + "grad_norm": 1.3478561125750785, + "learning_rate": 5e-05, + "loss": 0.0493, + "num_input_tokens_seen": 354247956, + "step": 3881 + }, + { + "epoch": 16.170833333333334, + "loss": 0.04582810401916504, + "loss_ce": 5.9628473536577076e-06, + "loss_iou": 0.236328125, + "loss_num": 0.0091552734375, + "loss_xval": 0.0458984375, + "num_input_tokens_seen": 354247956, + "step": 3881 + }, + { + "epoch": 16.175, + "grad_norm": 2.773034842459887, + "learning_rate": 5e-05, + "loss": 0.0627, + "num_input_tokens_seen": 354339412, + "step": 3882 + }, + { + "epoch": 16.175, + "loss": 0.0835675522685051, + "loss_ce": 5.619968578685075e-05, + "loss_iou": 0.22265625, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 354339412, + "step": 3882 + }, + { + "epoch": 16.179166666666667, + "grad_norm": 2.7796471112523973, + "learning_rate": 5e-05, + "loss": 0.0418, + "num_input_tokens_seen": 354430220, + "step": 3883 + }, + { + "epoch": 16.179166666666667, + "loss": 0.043384261429309845, + "loss_ce": 3.523240366121172e-06, + "loss_iou": 0.265625, + "loss_num": 0.0086669921875, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 354430220, + "step": 3883 + }, + { + "epoch": 16.183333333333334, + "grad_norm": 3.799689917817227, + "learning_rate": 5e-05, + "loss": 0.0609, + "num_input_tokens_seen": 354521452, + "step": 3884 + }, + { + "epoch": 16.183333333333334, + "loss": 0.05691567808389664, + "loss_ce": 1.5653604350518435e-05, + "loss_iou": 0.3203125, + "loss_num": 0.0113525390625, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 354521452, + "step": 3884 + }, + { + "epoch": 16.1875, + "grad_norm": 4.917707181166018, + "learning_rate": 5e-05, + "loss": 0.0935, + "num_input_tokens_seen": 354612872, + "step": 3885 + }, + { + "epoch": 16.1875, + "loss": 0.055576518177986145, + "loss_ce": 3.4524997317930683e-05, + "loss_iou": 0.232421875, + "loss_num": 0.0111083984375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 354612872, + "step": 3885 + }, + { + "epoch": 16.191666666666666, + "grad_norm": 3.0422164792449506, + "learning_rate": 5e-05, + "loss": 0.0366, + "num_input_tokens_seen": 354703848, + "step": 3886 + }, + { + "epoch": 16.191666666666666, + "loss": 0.03766234964132309, + "loss_ce": 3.6567685128829908e-06, + "loss_iou": 0.13671875, + "loss_num": 0.007537841796875, + "loss_xval": 0.03759765625, + "num_input_tokens_seen": 354703848, + "step": 3886 + }, + { + "epoch": 16.195833333333333, + "grad_norm": 2.255614957739726, + "learning_rate": 5e-05, + "loss": 0.0508, + "num_input_tokens_seen": 354795188, + "step": 3887 + }, + { + "epoch": 16.195833333333333, + "loss": 0.04246654734015465, + "loss_ce": 3.1855346605880186e-05, + "loss_iou": 0.2470703125, + "loss_num": 0.00848388671875, + "loss_xval": 0.04248046875, + "num_input_tokens_seen": 354795188, + "step": 3887 + }, + { + "epoch": 16.2, + "grad_norm": 2.71762488614364, + "learning_rate": 5e-05, + "loss": 0.0761, + "num_input_tokens_seen": 354886520, + "step": 3888 + }, + { + "epoch": 16.2, + "loss": 0.10885563492774963, + "loss_ce": 6.046430644346401e-05, + "loss_iou": 0.259765625, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 354886520, + "step": 3888 + }, + { + "epoch": 16.204166666666666, + "grad_norm": 2.0575852115195024, + "learning_rate": 5e-05, + "loss": 0.0476, + "num_input_tokens_seen": 354978144, + "step": 3889 + }, + { + "epoch": 16.204166666666666, + "loss": 0.034292981028556824, + "loss_ce": 0.00020484643755480647, + "loss_iou": 0.2109375, + "loss_num": 0.006805419921875, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 354978144, + "step": 3889 + }, + { + "epoch": 16.208333333333332, + "grad_norm": 2.458019324936502, + "learning_rate": 5e-05, + "loss": 0.0352, + "num_input_tokens_seen": 355068772, + "step": 3890 + }, + { + "epoch": 16.208333333333332, + "loss": 0.03707907348871231, + "loss_ce": 7.844500942155719e-06, + "loss_iou": 0.1455078125, + "loss_num": 0.007415771484375, + "loss_xval": 0.037109375, + "num_input_tokens_seen": 355068772, + "step": 3890 + }, + { + "epoch": 16.2125, + "grad_norm": 3.3796932033062683, + "learning_rate": 5e-05, + "loss": 0.0417, + "num_input_tokens_seen": 355160352, + "step": 3891 + }, + { + "epoch": 16.2125, + "loss": 0.0388079434633255, + "loss_ce": 4.8458564378961455e-06, + "loss_iou": 0.2119140625, + "loss_num": 0.00775146484375, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 355160352, + "step": 3891 + }, + { + "epoch": 16.216666666666665, + "grad_norm": 3.058885505423328, + "learning_rate": 5e-05, + "loss": 0.0564, + "num_input_tokens_seen": 355251504, + "step": 3892 + }, + { + "epoch": 16.216666666666665, + "loss": 0.04128335416316986, + "loss_ce": 3.885061596520245e-05, + "loss_iou": 0.10302734375, + "loss_num": 0.00823974609375, + "loss_xval": 0.041259765625, + "num_input_tokens_seen": 355251504, + "step": 3892 + }, + { + "epoch": 16.220833333333335, + "grad_norm": 9.888017978731519, + "learning_rate": 5e-05, + "loss": 0.0796, + "num_input_tokens_seen": 355343372, + "step": 3893 + }, + { + "epoch": 16.220833333333335, + "loss": 0.05901408940553665, + "loss_ce": 5.412587051978335e-05, + "loss_iou": 0.3125, + "loss_num": 0.01177978515625, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 355343372, + "step": 3893 + }, + { + "epoch": 16.225, + "grad_norm": 3.632287783487698, + "learning_rate": 5e-05, + "loss": 0.0631, + "num_input_tokens_seen": 355434980, + "step": 3894 + }, + { + "epoch": 16.225, + "loss": 0.050512395799160004, + "loss_ce": 5.807398792967433e-06, + "loss_iou": 0.1171875, + "loss_num": 0.0101318359375, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 355434980, + "step": 3894 + }, + { + "epoch": 16.229166666666668, + "grad_norm": 3.131015208397186, + "learning_rate": 5e-05, + "loss": 0.0467, + "num_input_tokens_seen": 355527132, + "step": 3895 + }, + { + "epoch": 16.229166666666668, + "loss": 0.04876472055912018, + "loss_ce": 0.005017775110900402, + "loss_iou": 0.234375, + "loss_num": 0.00872802734375, + "loss_xval": 0.043701171875, + "num_input_tokens_seen": 355527132, + "step": 3895 + }, + { + "epoch": 16.233333333333334, + "grad_norm": 1.3546156567536622, + "learning_rate": 5e-05, + "loss": 0.0402, + "num_input_tokens_seen": 355618440, + "step": 3896 + }, + { + "epoch": 16.233333333333334, + "loss": 0.05599173158407211, + "loss_ce": 3.775473669520579e-05, + "loss_iou": 0.212890625, + "loss_num": 0.01116943359375, + "loss_xval": 0.055908203125, + "num_input_tokens_seen": 355618440, + "step": 3896 + }, + { + "epoch": 16.2375, + "grad_norm": 5.400964442985786, + "learning_rate": 5e-05, + "loss": 0.0772, + "num_input_tokens_seen": 355710044, + "step": 3897 + }, + { + "epoch": 16.2375, + "loss": 0.08695336431264877, + "loss_ce": 0.0020076867658644915, + "loss_iou": 0.2734375, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 355710044, + "step": 3897 + }, + { + "epoch": 16.241666666666667, + "grad_norm": 5.143639772623531, + "learning_rate": 5e-05, + "loss": 0.0748, + "num_input_tokens_seen": 355801108, + "step": 3898 + }, + { + "epoch": 16.241666666666667, + "loss": 0.03608565032482147, + "loss_ce": 1.3872523595637176e-05, + "loss_iou": 0.2451171875, + "loss_num": 0.0072021484375, + "loss_xval": 0.0361328125, + "num_input_tokens_seen": 355801108, + "step": 3898 + }, + { + "epoch": 16.245833333333334, + "grad_norm": 1.987989936921633, + "learning_rate": 5e-05, + "loss": 0.0541, + "num_input_tokens_seen": 355892344, + "step": 3899 + }, + { + "epoch": 16.245833333333334, + "loss": 0.04525504633784294, + "loss_ce": 1.273614452657057e-05, + "loss_iou": 0.2197265625, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 355892344, + "step": 3899 + }, + { + "epoch": 16.25, + "grad_norm": 2.3838045785219486, + "learning_rate": 5e-05, + "loss": 0.0458, + "num_input_tokens_seen": 355983472, + "step": 3900 + }, + { + "epoch": 16.25, + "loss": 0.05004560574889183, + "loss_ce": 4.4070420699426904e-06, + "loss_iou": 0.259765625, + "loss_num": 0.010009765625, + "loss_xval": 0.050048828125, + "num_input_tokens_seen": 355983472, + "step": 3900 + }, + { + "epoch": 16.254166666666666, + "grad_norm": 4.322737510300905, + "learning_rate": 5e-05, + "loss": 0.0473, + "num_input_tokens_seen": 356073088, + "step": 3901 + }, + { + "epoch": 16.254166666666666, + "loss": 0.05106162279844284, + "loss_ce": 2.0969147954019718e-05, + "loss_iou": 0.26171875, + "loss_num": 0.01019287109375, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 356073088, + "step": 3901 + }, + { + "epoch": 16.258333333333333, + "grad_norm": 2.9326430406216017, + "learning_rate": 5e-05, + "loss": 0.0246, + "num_input_tokens_seen": 356164696, + "step": 3902 + }, + { + "epoch": 16.258333333333333, + "loss": 0.022506091743707657, + "loss_ce": 1.4636367268394679e-05, + "loss_iou": 0.2158203125, + "loss_num": 0.0045166015625, + "loss_xval": 0.0224609375, + "num_input_tokens_seen": 356164696, + "step": 3902 + }, + { + "epoch": 16.2625, + "grad_norm": 2.52037964776615, + "learning_rate": 5e-05, + "loss": 0.058, + "num_input_tokens_seen": 356256476, + "step": 3903 + }, + { + "epoch": 16.2625, + "loss": 0.03397050127387047, + "loss_ce": 0.00012650688586290926, + "loss_iou": 0.0791015625, + "loss_num": 0.00677490234375, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 356256476, + "step": 3903 + }, + { + "epoch": 16.266666666666666, + "grad_norm": 3.0138612484128715, + "learning_rate": 5e-05, + "loss": 0.0524, + "num_input_tokens_seen": 356347732, + "step": 3904 + }, + { + "epoch": 16.266666666666666, + "loss": 0.07414688169956207, + "loss_ce": 4.428348347573774e-06, + "loss_iou": 0.267578125, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 356347732, + "step": 3904 + }, + { + "epoch": 16.270833333333332, + "grad_norm": 2.551420311161361, + "learning_rate": 5e-05, + "loss": 0.0525, + "num_input_tokens_seen": 356439436, + "step": 3905 + }, + { + "epoch": 16.270833333333332, + "loss": 0.07031318545341492, + "loss_ce": 0.0002448215091135353, + "loss_iou": 0.365234375, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 356439436, + "step": 3905 + }, + { + "epoch": 16.275, + "grad_norm": 2.4837327366530757, + "learning_rate": 5e-05, + "loss": 0.0542, + "num_input_tokens_seen": 356530704, + "step": 3906 + }, + { + "epoch": 16.275, + "loss": 0.06787487864494324, + "loss_ce": 3.7870017877139617e-06, + "loss_iou": 0.236328125, + "loss_num": 0.01361083984375, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 356530704, + "step": 3906 + }, + { + "epoch": 16.279166666666665, + "grad_norm": 7.965636333564063, + "learning_rate": 5e-05, + "loss": 0.0483, + "num_input_tokens_seen": 356621628, + "step": 3907 + }, + { + "epoch": 16.279166666666665, + "loss": 0.03679324686527252, + "loss_ce": 4.2454055801499635e-05, + "loss_iou": 0.19921875, + "loss_num": 0.007354736328125, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 356621628, + "step": 3907 + }, + { + "epoch": 16.283333333333335, + "grad_norm": 1.2914497291020453, + "learning_rate": 5e-05, + "loss": 0.0695, + "num_input_tokens_seen": 356712460, + "step": 3908 + }, + { + "epoch": 16.283333333333335, + "loss": 0.0689861923456192, + "loss_ce": 1.208977664646227e-06, + "loss_iou": 0.203125, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 356712460, + "step": 3908 + }, + { + "epoch": 16.2875, + "grad_norm": 2.0278846333993186, + "learning_rate": 5e-05, + "loss": 0.1155, + "num_input_tokens_seen": 356801476, + "step": 3909 + }, + { + "epoch": 16.2875, + "loss": 0.15893718600273132, + "loss_ce": 1.632997964406968e-06, + "loss_iou": 0.251953125, + "loss_num": 0.03173828125, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 356801476, + "step": 3909 + }, + { + "epoch": 16.291666666666668, + "grad_norm": 1.8283743924331144, + "learning_rate": 5e-05, + "loss": 0.0513, + "num_input_tokens_seen": 356892728, + "step": 3910 + }, + { + "epoch": 16.291666666666668, + "loss": 0.04230141639709473, + "loss_ce": 4.052419626532355e-06, + "loss_iou": 0.2421875, + "loss_num": 0.00848388671875, + "loss_xval": 0.042236328125, + "num_input_tokens_seen": 356892728, + "step": 3910 + }, + { + "epoch": 16.295833333333334, + "grad_norm": 3.090564522459806, + "learning_rate": 5e-05, + "loss": 0.0609, + "num_input_tokens_seen": 356984724, + "step": 3911 + }, + { + "epoch": 16.295833333333334, + "loss": 0.06969290971755981, + "loss_ce": 0.0001280913274968043, + "loss_iou": 0.2138671875, + "loss_num": 0.013916015625, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 356984724, + "step": 3911 + }, + { + "epoch": 16.3, + "grad_norm": 2.1429694540052084, + "learning_rate": 5e-05, + "loss": 0.0969, + "num_input_tokens_seen": 357076264, + "step": 3912 + }, + { + "epoch": 16.3, + "loss": 0.0981968492269516, + "loss_ce": 6.758611561963335e-05, + "loss_iou": 0.34765625, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 357076264, + "step": 3912 + }, + { + "epoch": 16.304166666666667, + "grad_norm": 1.7474171153292801, + "learning_rate": 5e-05, + "loss": 0.0424, + "num_input_tokens_seen": 357168432, + "step": 3913 + }, + { + "epoch": 16.304166666666667, + "loss": 0.043973229825496674, + "loss_ce": 2.791850783978589e-05, + "loss_iou": 0.259765625, + "loss_num": 0.0087890625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 357168432, + "step": 3913 + }, + { + "epoch": 16.308333333333334, + "grad_norm": 1.129064597865002, + "learning_rate": 5e-05, + "loss": 0.0337, + "num_input_tokens_seen": 357259436, + "step": 3914 + }, + { + "epoch": 16.308333333333334, + "loss": 0.048458926379680634, + "loss_ce": 4.638748578145169e-06, + "loss_iou": 0.154296875, + "loss_num": 0.00970458984375, + "loss_xval": 0.04833984375, + "num_input_tokens_seen": 357259436, + "step": 3914 + }, + { + "epoch": 16.3125, + "grad_norm": 3.94881844780485, + "learning_rate": 5e-05, + "loss": 0.0688, + "num_input_tokens_seen": 357350308, + "step": 3915 + }, + { + "epoch": 16.3125, + "loss": 0.042931340634822845, + "loss_ce": 7.373155881396087e-07, + "loss_iou": 0.22265625, + "loss_num": 0.00860595703125, + "loss_xval": 0.04296875, + "num_input_tokens_seen": 357350308, + "step": 3915 + }, + { + "epoch": 16.316666666666666, + "grad_norm": 4.085693351868434, + "learning_rate": 5e-05, + "loss": 0.0494, + "num_input_tokens_seen": 357441296, + "step": 3916 + }, + { + "epoch": 16.316666666666666, + "loss": 0.04708992689847946, + "loss_ce": 1.3049620974925347e-06, + "loss_iou": 0.2734375, + "loss_num": 0.0093994140625, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 357441296, + "step": 3916 + }, + { + "epoch": 16.320833333333333, + "grad_norm": 6.100082012849477, + "learning_rate": 5e-05, + "loss": 0.0473, + "num_input_tokens_seen": 357532208, + "step": 3917 + }, + { + "epoch": 16.320833333333333, + "loss": 0.05945229530334473, + "loss_ce": 0.0006449182983487844, + "loss_iou": 0.32421875, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 357532208, + "step": 3917 + }, + { + "epoch": 16.325, + "grad_norm": 3.268667876262669, + "learning_rate": 5e-05, + "loss": 0.0665, + "num_input_tokens_seen": 357623884, + "step": 3918 + }, + { + "epoch": 16.325, + "loss": 0.03352963924407959, + "loss_ce": 6.079977083572885e-06, + "loss_iou": 0.25390625, + "loss_num": 0.0067138671875, + "loss_xval": 0.033447265625, + "num_input_tokens_seen": 357623884, + "step": 3918 + }, + { + "epoch": 16.329166666666666, + "grad_norm": 2.6731288129200648, + "learning_rate": 5e-05, + "loss": 0.0592, + "num_input_tokens_seen": 357714404, + "step": 3919 + }, + { + "epoch": 16.329166666666666, + "loss": 0.06423118710517883, + "loss_ce": 1.457569669582881e-05, + "loss_iou": 0.390625, + "loss_num": 0.0128173828125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 357714404, + "step": 3919 + }, + { + "epoch": 16.333333333333332, + "grad_norm": 2.760440924974219, + "learning_rate": 5e-05, + "loss": 0.0833, + "num_input_tokens_seen": 357805056, + "step": 3920 + }, + { + "epoch": 16.333333333333332, + "loss": 0.0687132477760315, + "loss_ce": 2.9257778351166053e-06, + "loss_iou": 0.2431640625, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 357805056, + "step": 3920 + }, + { + "epoch": 16.3375, + "grad_norm": 3.2057903850136404, + "learning_rate": 5e-05, + "loss": 0.0546, + "num_input_tokens_seen": 357896480, + "step": 3921 + }, + { + "epoch": 16.3375, + "loss": 0.03337109833955765, + "loss_ce": 0.0004884064546786249, + "loss_iou": 0.341796875, + "loss_num": 0.006591796875, + "loss_xval": 0.032958984375, + "num_input_tokens_seen": 357896480, + "step": 3921 + }, + { + "epoch": 16.341666666666665, + "grad_norm": 6.439634392685926, + "learning_rate": 5e-05, + "loss": 0.0671, + "num_input_tokens_seen": 357987832, + "step": 3922 + }, + { + "epoch": 16.341666666666665, + "loss": 0.07807601243257523, + "loss_ce": 0.0001798927114577964, + "loss_iou": 0.32421875, + "loss_num": 0.01556396484375, + "loss_xval": 0.078125, + "num_input_tokens_seen": 357987832, + "step": 3922 + }, + { + "epoch": 16.345833333333335, + "grad_norm": 2.2851150017124175, + "learning_rate": 5e-05, + "loss": 0.0386, + "num_input_tokens_seen": 358079152, + "step": 3923 + }, + { + "epoch": 16.345833333333335, + "loss": 0.0413411445915699, + "loss_ce": 0.00017293104610871524, + "loss_iou": 0.310546875, + "loss_num": 0.00823974609375, + "loss_xval": 0.041259765625, + "num_input_tokens_seen": 358079152, + "step": 3923 + }, + { + "epoch": 16.35, + "grad_norm": 1.56516964735357, + "learning_rate": 5e-05, + "loss": 0.0276, + "num_input_tokens_seen": 358169648, + "step": 3924 + }, + { + "epoch": 16.35, + "loss": 0.03811732679605484, + "loss_ce": 8.725569387024734e-07, + "loss_iou": 0.1767578125, + "loss_num": 0.00762939453125, + "loss_xval": 0.0380859375, + "num_input_tokens_seen": 358169648, + "step": 3924 + }, + { + "epoch": 16.354166666666668, + "grad_norm": 3.006722798471429, + "learning_rate": 5e-05, + "loss": 0.0472, + "num_input_tokens_seen": 358261204, + "step": 3925 + }, + { + "epoch": 16.354166666666668, + "loss": 0.0471673384308815, + "loss_ce": 1.7678248696029186e-05, + "loss_iou": 0.205078125, + "loss_num": 0.0093994140625, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 358261204, + "step": 3925 + }, + { + "epoch": 16.358333333333334, + "grad_norm": 2.434268017276299, + "learning_rate": 5e-05, + "loss": 0.0734, + "num_input_tokens_seen": 358352804, + "step": 3926 + }, + { + "epoch": 16.358333333333334, + "loss": 0.07035059481859207, + "loss_ce": 0.00023645992041565478, + "loss_iou": 0.08447265625, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 358352804, + "step": 3926 + }, + { + "epoch": 16.3625, + "grad_norm": 3.7321751821660896, + "learning_rate": 5e-05, + "loss": 0.0963, + "num_input_tokens_seen": 358443820, + "step": 3927 + }, + { + "epoch": 16.3625, + "loss": 0.12764135003089905, + "loss_ce": 1.5843133951420896e-06, + "loss_iou": 0.2265625, + "loss_num": 0.0255126953125, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 358443820, + "step": 3927 + }, + { + "epoch": 16.366666666666667, + "grad_norm": 4.455672992837086, + "learning_rate": 5e-05, + "loss": 0.0455, + "num_input_tokens_seen": 358535460, + "step": 3928 + }, + { + "epoch": 16.366666666666667, + "loss": 0.04032261669635773, + "loss_ce": 0.00014622484741266817, + "loss_iou": 0.2314453125, + "loss_num": 0.00799560546875, + "loss_xval": 0.040283203125, + "num_input_tokens_seen": 358535460, + "step": 3928 + }, + { + "epoch": 16.370833333333334, + "grad_norm": 2.084142284188999, + "learning_rate": 5e-05, + "loss": 0.0396, + "num_input_tokens_seen": 358626564, + "step": 3929 + }, + { + "epoch": 16.370833333333334, + "loss": 0.04891330376267433, + "loss_ce": 0.0002988032065331936, + "loss_iou": 0.220703125, + "loss_num": 0.00970458984375, + "loss_xval": 0.048583984375, + "num_input_tokens_seen": 358626564, + "step": 3929 + }, + { + "epoch": 16.375, + "grad_norm": 3.5570218841733103, + "learning_rate": 5e-05, + "loss": 0.068, + "num_input_tokens_seen": 358718064, + "step": 3930 + }, + { + "epoch": 16.375, + "loss": 0.05540819093585014, + "loss_ce": 3.5241994282841915e-06, + "loss_iou": 0.21875, + "loss_num": 0.01104736328125, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 358718064, + "step": 3930 + }, + { + "epoch": 16.379166666666666, + "grad_norm": 2.7408513904624936, + "learning_rate": 5e-05, + "loss": 0.0917, + "num_input_tokens_seen": 358809840, + "step": 3931 + }, + { + "epoch": 16.379166666666666, + "loss": 0.11305944621562958, + "loss_ce": 5.2851781219942495e-05, + "loss_iou": 0.314453125, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 358809840, + "step": 3931 + }, + { + "epoch": 16.383333333333333, + "grad_norm": 2.653064682663934, + "learning_rate": 5e-05, + "loss": 0.036, + "num_input_tokens_seen": 358901604, + "step": 3932 + }, + { + "epoch": 16.383333333333333, + "loss": 0.04911474883556366, + "loss_ce": 0.00022558898490387946, + "loss_iou": 0.283203125, + "loss_num": 0.009765625, + "loss_xval": 0.048828125, + "num_input_tokens_seen": 358901604, + "step": 3932 + }, + { + "epoch": 16.3875, + "grad_norm": 3.1667283570943643, + "learning_rate": 5e-05, + "loss": 0.0511, + "num_input_tokens_seen": 358992476, + "step": 3933 + }, + { + "epoch": 16.3875, + "loss": 0.06207574903964996, + "loss_ce": 2.996779585373588e-06, + "loss_iou": 0.38671875, + "loss_num": 0.01239013671875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 358992476, + "step": 3933 + }, + { + "epoch": 16.391666666666666, + "grad_norm": 3.507941496011046, + "learning_rate": 5e-05, + "loss": 0.0551, + "num_input_tokens_seen": 359083832, + "step": 3934 + }, + { + "epoch": 16.391666666666666, + "loss": 0.056043900549411774, + "loss_ce": 1.3627225598611403e-05, + "loss_iou": 0.2001953125, + "loss_num": 0.01116943359375, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 359083832, + "step": 3934 + }, + { + "epoch": 16.395833333333332, + "grad_norm": 3.6830244934152105, + "learning_rate": 5e-05, + "loss": 0.0789, + "num_input_tokens_seen": 359175132, + "step": 3935 + }, + { + "epoch": 16.395833333333332, + "loss": 0.060316603630781174, + "loss_ce": 0.003614944638684392, + "loss_iou": 0.341796875, + "loss_num": 0.0113525390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 359175132, + "step": 3935 + }, + { + "epoch": 16.4, + "grad_norm": 2.6887028100035666, + "learning_rate": 5e-05, + "loss": 0.0359, + "num_input_tokens_seen": 359266296, + "step": 3936 + }, + { + "epoch": 16.4, + "loss": 0.036618150770664215, + "loss_ce": 4.2830437450902537e-05, + "loss_iou": 0.21484375, + "loss_num": 0.00732421875, + "loss_xval": 0.03662109375, + "num_input_tokens_seen": 359266296, + "step": 3936 + }, + { + "epoch": 16.404166666666665, + "grad_norm": 1.836769364658006, + "learning_rate": 5e-05, + "loss": 0.0421, + "num_input_tokens_seen": 359358420, + "step": 3937 + }, + { + "epoch": 16.404166666666665, + "loss": 0.047061532735824585, + "loss_ce": 1.8682949303183705e-05, + "loss_iou": 0.1689453125, + "loss_num": 0.0093994140625, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 359358420, + "step": 3937 + }, + { + "epoch": 16.408333333333335, + "grad_norm": 2.861514110892917, + "learning_rate": 5e-05, + "loss": 0.0554, + "num_input_tokens_seen": 359449700, + "step": 3938 + }, + { + "epoch": 16.408333333333335, + "loss": 0.06305009871721268, + "loss_ce": 7.820833616278833e-07, + "loss_iou": 0.35546875, + "loss_num": 0.01263427734375, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 359449700, + "step": 3938 + }, + { + "epoch": 16.4125, + "grad_norm": 2.783997785575247, + "learning_rate": 5e-05, + "loss": 0.0546, + "num_input_tokens_seen": 359541136, + "step": 3939 + }, + { + "epoch": 16.4125, + "loss": 0.05213546007871628, + "loss_ce": 0.0002708357642404735, + "loss_iou": 0.3671875, + "loss_num": 0.0103759765625, + "loss_xval": 0.0517578125, + "num_input_tokens_seen": 359541136, + "step": 3939 + }, + { + "epoch": 16.416666666666668, + "grad_norm": 3.2877178753912886, + "learning_rate": 5e-05, + "loss": 0.0383, + "num_input_tokens_seen": 359632852, + "step": 3940 + }, + { + "epoch": 16.416666666666668, + "loss": 0.03417900949716568, + "loss_ce": 0.00018242768419440836, + "loss_iou": 0.310546875, + "loss_num": 0.006805419921875, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 359632852, + "step": 3940 + }, + { + "epoch": 16.420833333333334, + "grad_norm": 2.5094663365036936, + "learning_rate": 5e-05, + "loss": 0.0447, + "num_input_tokens_seen": 359724656, + "step": 3941 + }, + { + "epoch": 16.420833333333334, + "loss": 0.05305403470993042, + "loss_ce": 2.9741953767370433e-05, + "loss_iou": 0.37109375, + "loss_num": 0.0106201171875, + "loss_xval": 0.052978515625, + "num_input_tokens_seen": 359724656, + "step": 3941 + }, + { + "epoch": 16.425, + "grad_norm": 1.9912816973667706, + "learning_rate": 5e-05, + "loss": 0.0615, + "num_input_tokens_seen": 359816076, + "step": 3942 + }, + { + "epoch": 16.425, + "loss": 0.04908981919288635, + "loss_ce": 7.859131437726319e-05, + "loss_iou": 0.26953125, + "loss_num": 0.00982666015625, + "loss_xval": 0.049072265625, + "num_input_tokens_seen": 359816076, + "step": 3942 + }, + { + "epoch": 16.429166666666667, + "grad_norm": 3.071130093533867, + "learning_rate": 5e-05, + "loss": 0.0425, + "num_input_tokens_seen": 359907692, + "step": 3943 + }, + { + "epoch": 16.429166666666667, + "loss": 0.0289724413305521, + "loss_ce": 1.126076949731214e-05, + "loss_iou": 0.283203125, + "loss_num": 0.00579833984375, + "loss_xval": 0.0289306640625, + "num_input_tokens_seen": 359907692, + "step": 3943 + }, + { + "epoch": 16.433333333333334, + "grad_norm": 4.327886328416566, + "learning_rate": 5e-05, + "loss": 0.0461, + "num_input_tokens_seen": 359999264, + "step": 3944 + }, + { + "epoch": 16.433333333333334, + "loss": 0.046083178371191025, + "loss_ce": 1.689563214313239e-05, + "loss_iou": 0.177734375, + "loss_num": 0.00921630859375, + "loss_xval": 0.046142578125, + "num_input_tokens_seen": 359999264, + "step": 3944 + }, + { + "epoch": 16.4375, + "grad_norm": 4.330135050282263, + "learning_rate": 5e-05, + "loss": 0.1078, + "num_input_tokens_seen": 360090256, + "step": 3945 + }, + { + "epoch": 16.4375, + "loss": 0.10656769573688507, + "loss_ce": 3.0872138268023264e-07, + "loss_iou": 0.2734375, + "loss_num": 0.0213623046875, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 360090256, + "step": 3945 + }, + { + "epoch": 16.441666666666666, + "grad_norm": 3.3669976976099814, + "learning_rate": 5e-05, + "loss": 0.0554, + "num_input_tokens_seen": 360181368, + "step": 3946 + }, + { + "epoch": 16.441666666666666, + "loss": 0.042137518525123596, + "loss_ce": 8.001441528904252e-06, + "loss_iou": 0.32421875, + "loss_num": 0.0084228515625, + "loss_xval": 0.042236328125, + "num_input_tokens_seen": 360181368, + "step": 3946 + }, + { + "epoch": 16.445833333333333, + "grad_norm": 2.954780209526445, + "learning_rate": 5e-05, + "loss": 0.0461, + "num_input_tokens_seen": 360272876, + "step": 3947 + }, + { + "epoch": 16.445833333333333, + "loss": 0.05872957780957222, + "loss_ce": 0.0009140259935520589, + "loss_iou": 0.2470703125, + "loss_num": 0.0115966796875, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 360272876, + "step": 3947 + }, + { + "epoch": 16.45, + "grad_norm": 1.6089914984018734, + "learning_rate": 5e-05, + "loss": 0.0306, + "num_input_tokens_seen": 360364988, + "step": 3948 + }, + { + "epoch": 16.45, + "loss": 0.03769281879067421, + "loss_ce": 1.8869384803110734e-05, + "loss_iou": 0.1689453125, + "loss_num": 0.007537841796875, + "loss_xval": 0.03759765625, + "num_input_tokens_seen": 360364988, + "step": 3948 + }, + { + "epoch": 16.454166666666666, + "grad_norm": 0.7518614230479753, + "learning_rate": 5e-05, + "loss": 0.0627, + "num_input_tokens_seen": 360456460, + "step": 3949 + }, + { + "epoch": 16.454166666666666, + "loss": 0.08800274133682251, + "loss_ce": 5.303246325638611e-06, + "loss_iou": 0.3203125, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 360456460, + "step": 3949 + }, + { + "epoch": 16.458333333333332, + "grad_norm": 1.3710043821038695, + "learning_rate": 5e-05, + "loss": 0.0526, + "num_input_tokens_seen": 360547960, + "step": 3950 + }, + { + "epoch": 16.458333333333332, + "loss": 0.07577581703662872, + "loss_ce": 0.00010747826308943331, + "loss_iou": 0.177734375, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 360547960, + "step": 3950 + }, + { + "epoch": 16.4625, + "grad_norm": 1.8866717426357293, + "learning_rate": 5e-05, + "loss": 0.0429, + "num_input_tokens_seen": 360639164, + "step": 3951 + }, + { + "epoch": 16.4625, + "loss": 0.05358341708779335, + "loss_ce": 2.1791793187730946e-06, + "loss_iou": 0.29296875, + "loss_num": 0.0107421875, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 360639164, + "step": 3951 + }, + { + "epoch": 16.466666666666665, + "grad_norm": 2.5429951388449017, + "learning_rate": 5e-05, + "loss": 0.0313, + "num_input_tokens_seen": 360730344, + "step": 3952 + }, + { + "epoch": 16.466666666666665, + "loss": 0.028706632554531097, + "loss_ce": 4.849062406719895e-06, + "loss_iou": 0.283203125, + "loss_num": 0.0057373046875, + "loss_xval": 0.0286865234375, + "num_input_tokens_seen": 360730344, + "step": 3952 + }, + { + "epoch": 16.470833333333335, + "grad_norm": 3.1225498238347664, + "learning_rate": 5e-05, + "loss": 0.0436, + "num_input_tokens_seen": 360821232, + "step": 3953 + }, + { + "epoch": 16.470833333333335, + "loss": 0.05040149390697479, + "loss_ce": 1.710073433969228e-06, + "loss_iou": 0.353515625, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 360821232, + "step": 3953 + }, + { + "epoch": 16.475, + "grad_norm": 2.91013005176825, + "learning_rate": 5e-05, + "loss": 0.0691, + "num_input_tokens_seen": 360912412, + "step": 3954 + }, + { + "epoch": 16.475, + "loss": 0.03898601979017258, + "loss_ce": 3.0330040317494422e-05, + "loss_iou": 0.283203125, + "loss_num": 0.007781982421875, + "loss_xval": 0.0390625, + "num_input_tokens_seen": 360912412, + "step": 3954 + }, + { + "epoch": 16.479166666666668, + "grad_norm": 6.010620113229064, + "learning_rate": 5e-05, + "loss": 0.04, + "num_input_tokens_seen": 361004048, + "step": 3955 + }, + { + "epoch": 16.479166666666668, + "loss": 0.024341393262147903, + "loss_ce": 0.0021398558747023344, + "loss_iou": 0.2041015625, + "loss_num": 0.00445556640625, + "loss_xval": 0.022216796875, + "num_input_tokens_seen": 361004048, + "step": 3955 + }, + { + "epoch": 16.483333333333334, + "grad_norm": 2.832025449318552, + "learning_rate": 5e-05, + "loss": 0.066, + "num_input_tokens_seen": 361095264, + "step": 3956 + }, + { + "epoch": 16.483333333333334, + "loss": 0.03328515589237213, + "loss_ce": 1.3365360246098135e-05, + "loss_iou": 0.296875, + "loss_num": 0.00665283203125, + "loss_xval": 0.033203125, + "num_input_tokens_seen": 361095264, + "step": 3956 + }, + { + "epoch": 16.4875, + "grad_norm": 3.1603499478759023, + "learning_rate": 5e-05, + "loss": 0.0369, + "num_input_tokens_seen": 361186276, + "step": 3957 + }, + { + "epoch": 16.4875, + "loss": 0.03434790298342705, + "loss_ce": 7.6663403888233e-05, + "loss_iou": 0.23828125, + "loss_num": 0.006866455078125, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 361186276, + "step": 3957 + }, + { + "epoch": 16.491666666666667, + "grad_norm": 2.6357384326333406, + "learning_rate": 5e-05, + "loss": 0.056, + "num_input_tokens_seen": 361277428, + "step": 3958 + }, + { + "epoch": 16.491666666666667, + "loss": 0.06537545472383499, + "loss_ce": 0.000357751821866259, + "loss_iou": 0.2421875, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 361277428, + "step": 3958 + }, + { + "epoch": 16.495833333333334, + "grad_norm": 2.5433925195431177, + "learning_rate": 5e-05, + "loss": 0.0781, + "num_input_tokens_seen": 361368524, + "step": 3959 + }, + { + "epoch": 16.495833333333334, + "loss": 0.07380108535289764, + "loss_ce": 7.061405631247908e-05, + "loss_iou": 0.404296875, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 361368524, + "step": 3959 + }, + { + "epoch": 16.5, + "grad_norm": 2.6606692648061943, + "learning_rate": 5e-05, + "loss": 0.0386, + "num_input_tokens_seen": 361459344, + "step": 3960 + }, + { + "epoch": 16.5, + "loss": 0.033251769840717316, + "loss_ce": 9.442002919968218e-05, + "loss_iou": 0.2890625, + "loss_num": 0.006622314453125, + "loss_xval": 0.033203125, + "num_input_tokens_seen": 361459344, + "step": 3960 + }, + { + "epoch": 16.504166666666666, + "grad_norm": 3.4631409239854007, + "learning_rate": 5e-05, + "loss": 0.0586, + "num_input_tokens_seen": 361550156, + "step": 3961 + }, + { + "epoch": 16.504166666666666, + "loss": 0.061251938343048096, + "loss_ce": 3.1600191050529247e-06, + "loss_iou": 0.263671875, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 361550156, + "step": 3961 + }, + { + "epoch": 16.508333333333333, + "grad_norm": 2.229761925013082, + "learning_rate": 5e-05, + "loss": 0.0724, + "num_input_tokens_seen": 361641820, + "step": 3962 + }, + { + "epoch": 16.508333333333333, + "loss": 0.049893561750650406, + "loss_ce": 5.835596675751731e-05, + "loss_iou": 0.26953125, + "loss_num": 0.00994873046875, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 361641820, + "step": 3962 + }, + { + "epoch": 16.5125, + "grad_norm": 3.5200645848253407, + "learning_rate": 5e-05, + "loss": 0.0483, + "num_input_tokens_seen": 361733668, + "step": 3963 + }, + { + "epoch": 16.5125, + "loss": 0.0387338325381279, + "loss_ce": 2.2287305910140276e-05, + "loss_iou": 0.28125, + "loss_num": 0.00775146484375, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 361733668, + "step": 3963 + }, + { + "epoch": 16.516666666666666, + "grad_norm": 3.639270354142469, + "learning_rate": 5e-05, + "loss": 0.047, + "num_input_tokens_seen": 361824876, + "step": 3964 + }, + { + "epoch": 16.516666666666666, + "loss": 0.05050446093082428, + "loss_ce": 6.652936281170696e-05, + "loss_iou": 0.20703125, + "loss_num": 0.01007080078125, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 361824876, + "step": 3964 + }, + { + "epoch": 16.520833333333332, + "grad_norm": 3.1210787100609223, + "learning_rate": 5e-05, + "loss": 0.0453, + "num_input_tokens_seen": 361916172, + "step": 3965 + }, + { + "epoch": 16.520833333333332, + "loss": 0.030680567026138306, + "loss_ce": 4.0917599108070135e-05, + "loss_iou": 0.33203125, + "loss_num": 0.006134033203125, + "loss_xval": 0.0306396484375, + "num_input_tokens_seen": 361916172, + "step": 3965 + }, + { + "epoch": 16.525, + "grad_norm": 2.742357402552261, + "learning_rate": 5e-05, + "loss": 0.0427, + "num_input_tokens_seen": 362007716, + "step": 3966 + }, + { + "epoch": 16.525, + "loss": 0.029899753630161285, + "loss_ce": 2.3044289264362305e-05, + "loss_iou": 0.240234375, + "loss_num": 0.0059814453125, + "loss_xval": 0.0299072265625, + "num_input_tokens_seen": 362007716, + "step": 3966 + }, + { + "epoch": 16.529166666666665, + "grad_norm": 3.4393570074792135, + "learning_rate": 5e-05, + "loss": 0.0546, + "num_input_tokens_seen": 362098880, + "step": 3967 + }, + { + "epoch": 16.529166666666665, + "loss": 0.05629254877567291, + "loss_ce": 1.8134795027435757e-05, + "loss_iou": 0.099609375, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 362098880, + "step": 3967 + }, + { + "epoch": 16.533333333333335, + "grad_norm": 2.2122674663551476, + "learning_rate": 5e-05, + "loss": 0.0647, + "num_input_tokens_seen": 362190176, + "step": 3968 + }, + { + "epoch": 16.533333333333335, + "loss": 0.08881276845932007, + "loss_ce": 0.0005101521383039653, + "loss_iou": 0.349609375, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 362190176, + "step": 3968 + }, + { + "epoch": 16.5375, + "grad_norm": 1.754024790338209, + "learning_rate": 5e-05, + "loss": 0.0567, + "num_input_tokens_seen": 362281400, + "step": 3969 + }, + { + "epoch": 16.5375, + "loss": 0.030114684253931046, + "loss_ce": 9.093943845073227e-06, + "loss_iou": 0.130859375, + "loss_num": 0.006011962890625, + "loss_xval": 0.0301513671875, + "num_input_tokens_seen": 362281400, + "step": 3969 + }, + { + "epoch": 16.541666666666668, + "grad_norm": 1.4125522828745518, + "learning_rate": 5e-05, + "loss": 0.0554, + "num_input_tokens_seen": 362372464, + "step": 3970 + }, + { + "epoch": 16.541666666666668, + "loss": 0.02799748256802559, + "loss_ce": 5.100919952383265e-05, + "loss_iou": 0.2734375, + "loss_num": 0.005584716796875, + "loss_xval": 0.0279541015625, + "num_input_tokens_seen": 362372464, + "step": 3970 + }, + { + "epoch": 16.545833333333334, + "grad_norm": 1.9110897683022166, + "learning_rate": 5e-05, + "loss": 0.0363, + "num_input_tokens_seen": 362463244, + "step": 3971 + }, + { + "epoch": 16.545833333333334, + "loss": 0.0439089760184288, + "loss_ce": 2.4702756491024047e-05, + "loss_iou": 0.193359375, + "loss_num": 0.0087890625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 362463244, + "step": 3971 + }, + { + "epoch": 16.55, + "grad_norm": 2.557862939779694, + "learning_rate": 5e-05, + "loss": 0.0489, + "num_input_tokens_seen": 362555168, + "step": 3972 + }, + { + "epoch": 16.55, + "loss": 0.061159897595644, + "loss_ce": 2.6716365937318187e-06, + "loss_iou": 0.3203125, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 362555168, + "step": 3972 + }, + { + "epoch": 16.554166666666667, + "grad_norm": 8.832306564244698, + "learning_rate": 5e-05, + "loss": 0.0432, + "num_input_tokens_seen": 362645688, + "step": 3973 + }, + { + "epoch": 16.554166666666667, + "loss": 0.048586659133434296, + "loss_ce": 2.6755474209494423e-06, + "loss_iou": 0.314453125, + "loss_num": 0.00970458984375, + "loss_xval": 0.048583984375, + "num_input_tokens_seen": 362645688, + "step": 3973 + }, + { + "epoch": 16.558333333333334, + "grad_norm": 2.3787485737412917, + "learning_rate": 5e-05, + "loss": 0.0891, + "num_input_tokens_seen": 362736740, + "step": 3974 + }, + { + "epoch": 16.558333333333334, + "loss": 0.0625823587179184, + "loss_ce": 2.1322119209798984e-05, + "loss_iou": 0.26171875, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 362736740, + "step": 3974 + }, + { + "epoch": 16.5625, + "grad_norm": 3.0644700523015955, + "learning_rate": 5e-05, + "loss": 0.0315, + "num_input_tokens_seen": 362828236, + "step": 3975 + }, + { + "epoch": 16.5625, + "loss": 0.03315906971693039, + "loss_ce": 0.0006654754397459328, + "loss_iou": 0.12451171875, + "loss_num": 0.006500244140625, + "loss_xval": 0.032470703125, + "num_input_tokens_seen": 362828236, + "step": 3975 + }, + { + "epoch": 16.566666666666666, + "grad_norm": 2.9552589031927377, + "learning_rate": 5e-05, + "loss": 0.043, + "num_input_tokens_seen": 362919436, + "step": 3976 + }, + { + "epoch": 16.566666666666666, + "loss": 0.0296634454280138, + "loss_ce": 3.5889854643755825e-07, + "loss_iou": 0.3125, + "loss_num": 0.00592041015625, + "loss_xval": 0.0296630859375, + "num_input_tokens_seen": 362919436, + "step": 3976 + }, + { + "epoch": 16.570833333333333, + "grad_norm": 4.567618578224438, + "learning_rate": 5e-05, + "loss": 0.0752, + "num_input_tokens_seen": 363010696, + "step": 3977 + }, + { + "epoch": 16.570833333333333, + "loss": 0.052156493067741394, + "loss_ce": 1.7207483324455097e-05, + "loss_iou": 0.31640625, + "loss_num": 0.01043701171875, + "loss_xval": 0.05224609375, + "num_input_tokens_seen": 363010696, + "step": 3977 + }, + { + "epoch": 16.575, + "grad_norm": 2.7837989380544443, + "learning_rate": 5e-05, + "loss": 0.0476, + "num_input_tokens_seen": 363101464, + "step": 3978 + }, + { + "epoch": 16.575, + "loss": 0.030507449060678482, + "loss_ce": 0.00011957007518503815, + "loss_iou": 0.1796875, + "loss_num": 0.006072998046875, + "loss_xval": 0.0303955078125, + "num_input_tokens_seen": 363101464, + "step": 3978 + }, + { + "epoch": 16.579166666666666, + "grad_norm": 2.3226219034299707, + "learning_rate": 5e-05, + "loss": 0.0679, + "num_input_tokens_seen": 363193092, + "step": 3979 + }, + { + "epoch": 16.579166666666666, + "loss": 0.052284494042396545, + "loss_ce": 7.883674697950482e-06, + "loss_iou": 0.1376953125, + "loss_num": 0.01043701171875, + "loss_xval": 0.05224609375, + "num_input_tokens_seen": 363193092, + "step": 3979 + }, + { + "epoch": 16.583333333333332, + "grad_norm": 1.8494584473716926, + "learning_rate": 5e-05, + "loss": 0.0375, + "num_input_tokens_seen": 363284432, + "step": 3980 + }, + { + "epoch": 16.583333333333332, + "loss": 0.03750983625650406, + "loss_ce": 1.8991537217516452e-05, + "loss_iou": 0.349609375, + "loss_num": 0.00750732421875, + "loss_xval": 0.03759765625, + "num_input_tokens_seen": 363284432, + "step": 3980 + }, + { + "epoch": 16.5875, + "grad_norm": 1.3009296462354598, + "learning_rate": 5e-05, + "loss": 0.0532, + "num_input_tokens_seen": 363375496, + "step": 3981 + }, + { + "epoch": 16.5875, + "loss": 0.04523888975381851, + "loss_ce": 0.0003170108830090612, + "loss_iou": 0.2421875, + "loss_num": 0.00897216796875, + "loss_xval": 0.044921875, + "num_input_tokens_seen": 363375496, + "step": 3981 + }, + { + "epoch": 16.591666666666665, + "grad_norm": 1.4056978587534819, + "learning_rate": 5e-05, + "loss": 0.0526, + "num_input_tokens_seen": 363466500, + "step": 3982 + }, + { + "epoch": 16.591666666666665, + "loss": 0.05627135932445526, + "loss_ce": 2.746665086306166e-05, + "loss_iou": 0.271484375, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 363466500, + "step": 3982 + }, + { + "epoch": 16.595833333333335, + "grad_norm": 2.3504507881273815, + "learning_rate": 5e-05, + "loss": 0.0302, + "num_input_tokens_seen": 363558340, + "step": 3983 + }, + { + "epoch": 16.595833333333335, + "loss": 0.021505560725927353, + "loss_ce": 9.747860167408362e-05, + "loss_iou": 0.2060546875, + "loss_num": 0.0042724609375, + "loss_xval": 0.0213623046875, + "num_input_tokens_seen": 363558340, + "step": 3983 + }, + { + "epoch": 16.6, + "grad_norm": 2.4013451408399433, + "learning_rate": 5e-05, + "loss": 0.0523, + "num_input_tokens_seen": 363649460, + "step": 3984 + }, + { + "epoch": 16.6, + "loss": 0.06826449185609818, + "loss_ce": 0.00030947118648327887, + "loss_iou": 0.1669921875, + "loss_num": 0.01361083984375, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 363649460, + "step": 3984 + }, + { + "epoch": 16.604166666666668, + "grad_norm": 2.578801995737697, + "learning_rate": 5e-05, + "loss": 0.0427, + "num_input_tokens_seen": 363740344, + "step": 3985 + }, + { + "epoch": 16.604166666666668, + "loss": 0.04665427654981613, + "loss_ce": 6.156325980555266e-05, + "loss_iou": 0.1767578125, + "loss_num": 0.00933837890625, + "loss_xval": 0.046630859375, + "num_input_tokens_seen": 363740344, + "step": 3985 + }, + { + "epoch": 16.608333333333334, + "grad_norm": 3.2172138617040766, + "learning_rate": 5e-05, + "loss": 0.052, + "num_input_tokens_seen": 363831740, + "step": 3986 + }, + { + "epoch": 16.608333333333334, + "loss": 0.03587819263339043, + "loss_ce": 4.778653419634793e-06, + "loss_iou": 0.30859375, + "loss_num": 0.007171630859375, + "loss_xval": 0.035888671875, + "num_input_tokens_seen": 363831740, + "step": 3986 + }, + { + "epoch": 16.6125, + "grad_norm": 3.212871970724973, + "learning_rate": 5e-05, + "loss": 0.0692, + "num_input_tokens_seen": 363922936, + "step": 3987 + }, + { + "epoch": 16.6125, + "loss": 0.11252367496490479, + "loss_ce": 5.368115580495214e-06, + "loss_iou": 0.34375, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 363922936, + "step": 3987 + }, + { + "epoch": 16.616666666666667, + "grad_norm": 2.447288223052677, + "learning_rate": 5e-05, + "loss": 0.0409, + "num_input_tokens_seen": 364014936, + "step": 3988 + }, + { + "epoch": 16.616666666666667, + "loss": 0.03526037186384201, + "loss_ce": 1.2567723388201557e-05, + "loss_iou": 0.2490234375, + "loss_num": 0.007049560546875, + "loss_xval": 0.03515625, + "num_input_tokens_seen": 364014936, + "step": 3988 + }, + { + "epoch": 16.620833333333334, + "grad_norm": 2.315663400316989, + "learning_rate": 5e-05, + "loss": 0.0394, + "num_input_tokens_seen": 364105832, + "step": 3989 + }, + { + "epoch": 16.620833333333334, + "loss": 0.03379864618182182, + "loss_ce": 4.292664925742429e-07, + "loss_iou": 0.2734375, + "loss_num": 0.00677490234375, + "loss_xval": 0.03369140625, + "num_input_tokens_seen": 364105832, + "step": 3989 + }, + { + "epoch": 16.625, + "grad_norm": 2.3383386216465563, + "learning_rate": 5e-05, + "loss": 0.0692, + "num_input_tokens_seen": 364197140, + "step": 3990 + }, + { + "epoch": 16.625, + "loss": 0.03636720031499863, + "loss_ce": 5.127894110046327e-05, + "loss_iou": 0.265625, + "loss_num": 0.00726318359375, + "loss_xval": 0.036376953125, + "num_input_tokens_seen": 364197140, + "step": 3990 + }, + { + "epoch": 16.629166666666666, + "grad_norm": 3.0280427062356243, + "learning_rate": 5e-05, + "loss": 0.1054, + "num_input_tokens_seen": 364288356, + "step": 3991 + }, + { + "epoch": 16.629166666666666, + "loss": 0.0834091454744339, + "loss_ce": 4.607763003150467e-06, + "loss_iou": 0.33984375, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 364288356, + "step": 3991 + }, + { + "epoch": 16.633333333333333, + "grad_norm": 2.460740338451805, + "learning_rate": 5e-05, + "loss": 0.0453, + "num_input_tokens_seen": 364378880, + "step": 3992 + }, + { + "epoch": 16.633333333333333, + "loss": 0.039358705282211304, + "loss_ce": 6.287586074904539e-06, + "loss_iou": 0.263671875, + "loss_num": 0.00787353515625, + "loss_xval": 0.039306640625, + "num_input_tokens_seen": 364378880, + "step": 3992 + }, + { + "epoch": 16.6375, + "grad_norm": 4.056336631532033, + "learning_rate": 5e-05, + "loss": 0.1004, + "num_input_tokens_seen": 364469260, + "step": 3993 + }, + { + "epoch": 16.6375, + "loss": 0.15124809741973877, + "loss_ce": 3.350642873556353e-05, + "loss_iou": 0.234375, + "loss_num": 0.0302734375, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 364469260, + "step": 3993 + }, + { + "epoch": 16.641666666666666, + "grad_norm": 5.771803115622093, + "learning_rate": 5e-05, + "loss": 0.0689, + "num_input_tokens_seen": 364560676, + "step": 3994 + }, + { + "epoch": 16.641666666666666, + "loss": 0.03899519890546799, + "loss_ce": 0.00010054669110104442, + "loss_iou": 0.310546875, + "loss_num": 0.007781982421875, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 364560676, + "step": 3994 + }, + { + "epoch": 16.645833333333332, + "grad_norm": 2.505074864393875, + "learning_rate": 5e-05, + "loss": 0.0943, + "num_input_tokens_seen": 364652532, + "step": 3995 + }, + { + "epoch": 16.645833333333332, + "loss": 0.14425472915172577, + "loss_ce": 7.443044160027057e-05, + "loss_iou": 0.267578125, + "loss_num": 0.02880859375, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 364652532, + "step": 3995 + }, + { + "epoch": 16.65, + "grad_norm": 3.750496925366113, + "learning_rate": 5e-05, + "loss": 0.0398, + "num_input_tokens_seen": 364743872, + "step": 3996 + }, + { + "epoch": 16.65, + "loss": 0.03405202552676201, + "loss_ce": 0.00011647972860373557, + "loss_iou": 0.255859375, + "loss_num": 0.00677490234375, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 364743872, + "step": 3996 + }, + { + "epoch": 16.654166666666665, + "grad_norm": 2.780715096761446, + "learning_rate": 5e-05, + "loss": 0.0462, + "num_input_tokens_seen": 364835340, + "step": 3997 + }, + { + "epoch": 16.654166666666665, + "loss": 0.037465497851371765, + "loss_ce": 5.172362762095872e-06, + "loss_iou": 0.251953125, + "loss_num": 0.007476806640625, + "loss_xval": 0.037353515625, + "num_input_tokens_seen": 364835340, + "step": 3997 + }, + { + "epoch": 16.658333333333335, + "grad_norm": 4.023870708348408, + "learning_rate": 5e-05, + "loss": 0.0559, + "num_input_tokens_seen": 364926784, + "step": 3998 + }, + { + "epoch": 16.658333333333335, + "loss": 0.06420918554067612, + "loss_ce": 4.597852239385247e-05, + "loss_iou": 0.255859375, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 364926784, + "step": 3998 + }, + { + "epoch": 16.6625, + "grad_norm": 2.9223755657733244, + "learning_rate": 5e-05, + "loss": 0.0447, + "num_input_tokens_seen": 365018136, + "step": 3999 + }, + { + "epoch": 16.6625, + "loss": 0.04452334716916084, + "loss_ce": 2.8717840905301273e-05, + "loss_iou": 0.423828125, + "loss_num": 0.0089111328125, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 365018136, + "step": 3999 + }, + { + "epoch": 16.666666666666668, + "grad_norm": 1.9494567562697138, + "learning_rate": 5e-05, + "loss": 0.035, + "num_input_tokens_seen": 365109720, + "step": 4000 + }, + { + "epoch": 16.666666666666668, + "eval_seeclick_CIoU": 0.2187977135181427, + "eval_seeclick_GIoU": 0.19433742761611938, + "eval_seeclick_IoU": 0.32613541185855865, + "eval_seeclick_MAE_all": 0.10025185346603394, + "eval_seeclick_MAE_h": 0.0734252966940403, + "eval_seeclick_MAE_w": 0.21040697395801544, + "eval_seeclick_MAE_x_boxes": 0.23823681473731995, + "eval_seeclick_MAE_y_boxes": 0.07987504452466965, + "eval_seeclick_NUM_probability": 0.9999950528144836, + "eval_seeclick_inside_bbox": 0.5397727340459824, + "eval_seeclick_loss": 0.6068128347396851, + "eval_seeclick_loss_ce": 0.15084724873304367, + "eval_seeclick_loss_iou": 0.4635009765625, + "eval_seeclick_loss_num": 0.0870208740234375, + "eval_seeclick_loss_xval": 0.43511962890625, + "eval_seeclick_runtime": 79.9986, + "eval_seeclick_samples_per_second": 0.538, + "eval_seeclick_steps_per_second": 0.025, + "num_input_tokens_seen": 365109720, + "step": 4000 + }, + { + "epoch": 16.666666666666668, + "eval_icons_CIoU": 0.25196781009435654, + "eval_icons_GIoU": 0.24798469990491867, + "eval_icons_IoU": 0.3420267254114151, + "eval_icons_MAE_all": 0.07624227181077003, + "eval_icons_MAE_h": 0.17375393956899643, + "eval_icons_MAE_w": 0.1023324653506279, + "eval_icons_MAE_x_boxes": 0.10294432565569878, + "eval_icons_MAE_y_boxes": 0.17453518509864807, + "eval_icons_NUM_probability": 0.9999966323375702, + "eval_icons_inside_bbox": 0.5190972238779068, + "eval_icons_loss": 0.37557896971702576, + "eval_icons_loss_ce": 0.00021423189900815487, + "eval_icons_loss_iou": 0.19970703125, + "eval_icons_loss_num": 0.0756378173828125, + "eval_icons_loss_xval": 0.3778076171875, + "eval_icons_runtime": 88.2075, + "eval_icons_samples_per_second": 0.567, + "eval_icons_steps_per_second": 0.023, + "num_input_tokens_seen": 365109720, + "step": 4000 + }, + { + "epoch": 16.666666666666668, + "eval_screenspot_CIoU": 0.3996092975139618, + "eval_screenspot_GIoU": 0.3912068208058675, + "eval_screenspot_IoU": 0.47129369775454205, + "eval_screenspot_MAE_all": 0.09338084111611049, + "eval_screenspot_MAE_h": 0.08267445862293243, + "eval_screenspot_MAE_w": 0.20502433677514395, + "eval_screenspot_MAE_x_boxes": 0.1848097344239553, + "eval_screenspot_MAE_y_boxes": 0.07364016274611156, + "eval_screenspot_NUM_probability": 0.9999982317288717, + "eval_screenspot_inside_bbox": 0.7041666706403097, + "eval_screenspot_loss": 0.4725082814693451, + "eval_screenspot_loss_ce": 0.0009411601656192184, + "eval_screenspot_loss_iou": 0.3634440104166667, + "eval_screenspot_loss_num": 0.095428466796875, + "eval_screenspot_loss_xval": 0.47698974609375, + "eval_screenspot_runtime": 161.4524, + "eval_screenspot_samples_per_second": 0.551, + "eval_screenspot_steps_per_second": 0.019, + "num_input_tokens_seen": 365109720, + "step": 4000 + }, + { + "epoch": 16.666666666666668, + "eval_compot_CIoU": 0.5026089549064636, + "eval_compot_GIoU": 0.5021179020404816, + "eval_compot_IoU": 0.5728136301040649, + "eval_compot_MAE_all": 0.052255457267165184, + "eval_compot_MAE_h": 0.06820886395871639, + "eval_compot_MAE_w": 0.1275174878537655, + "eval_compot_MAE_x_boxes": 0.12862426042556763, + "eval_compot_MAE_y_boxes": 0.06744185462594032, + "eval_compot_NUM_probability": 0.9999986290931702, + "eval_compot_inside_bbox": 0.7673611044883728, + "eval_compot_loss": 0.30062806606292725, + "eval_compot_loss_ce": 0.04959471523761749, + "eval_compot_loss_iou": 0.33392333984375, + "eval_compot_loss_num": 0.04592132568359375, + "eval_compot_loss_xval": 0.2295684814453125, + "eval_compot_runtime": 92.9128, + "eval_compot_samples_per_second": 0.538, + "eval_compot_steps_per_second": 0.022, + "num_input_tokens_seen": 365109720, + "step": 4000 + }, + { + "epoch": 16.666666666666668, + "loss": 0.278637170791626, + "loss_ce": 0.04554390907287598, + "loss_iou": 0.361328125, + "loss_num": 0.046630859375, + "loss_xval": 0.2333984375, + "num_input_tokens_seen": 365109720, + "step": 4000 + }, + { + "epoch": 16.670833333333334, + "grad_norm": 1.6410736715136567, + "learning_rate": 5e-05, + "loss": 0.0633, + "num_input_tokens_seen": 365200588, + "step": 4001 + }, + { + "epoch": 16.670833333333334, + "loss": 0.04317476600408554, + "loss_ce": 7.651606210856698e-06, + "loss_iou": 0.27734375, + "loss_num": 0.0086669921875, + "loss_xval": 0.043212890625, + "num_input_tokens_seen": 365200588, + "step": 4001 + }, + { + "epoch": 16.675, + "grad_norm": 2.832863444144271, + "learning_rate": 5e-05, + "loss": 0.0384, + "num_input_tokens_seen": 365292064, + "step": 4002 + }, + { + "epoch": 16.675, + "loss": 0.03555441275238991, + "loss_ce": 7.772813114570454e-05, + "loss_iou": 0.2333984375, + "loss_num": 0.007080078125, + "loss_xval": 0.035400390625, + "num_input_tokens_seen": 365292064, + "step": 4002 + }, + { + "epoch": 16.679166666666667, + "grad_norm": 6.63601226201065, + "learning_rate": 5e-05, + "loss": 0.0554, + "num_input_tokens_seen": 365383416, + "step": 4003 + }, + { + "epoch": 16.679166666666667, + "loss": 0.036875009536743164, + "loss_ce": 9.77577565208776e-06, + "loss_iou": 0.251953125, + "loss_num": 0.00738525390625, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 365383416, + "step": 4003 + }, + { + "epoch": 16.683333333333334, + "grad_norm": 2.3054296641080487, + "learning_rate": 5e-05, + "loss": 0.0334, + "num_input_tokens_seen": 365474616, + "step": 4004 + }, + { + "epoch": 16.683333333333334, + "loss": 0.03364076465368271, + "loss_ce": 1.0395049685030244e-05, + "loss_iou": 0.2177734375, + "loss_num": 0.0067138671875, + "loss_xval": 0.03369140625, + "num_input_tokens_seen": 365474616, + "step": 4004 + }, + { + "epoch": 16.6875, + "grad_norm": 2.754968795338958, + "learning_rate": 5e-05, + "loss": 0.0642, + "num_input_tokens_seen": 365566040, + "step": 4005 + }, + { + "epoch": 16.6875, + "loss": 0.037810347974300385, + "loss_ce": 0.001326585072092712, + "loss_iou": 0.26953125, + "loss_num": 0.007293701171875, + "loss_xval": 0.036376953125, + "num_input_tokens_seen": 365566040, + "step": 4005 + }, + { + "epoch": 16.691666666666666, + "grad_norm": 2.7741713692211545, + "learning_rate": 5e-05, + "loss": 0.0629, + "num_input_tokens_seen": 365657660, + "step": 4006 + }, + { + "epoch": 16.691666666666666, + "loss": 0.06708613783121109, + "loss_ce": 0.00026027217973023653, + "loss_iou": 0.26953125, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 365657660, + "step": 4006 + }, + { + "epoch": 16.695833333333333, + "grad_norm": 2.4848960988166726, + "learning_rate": 5e-05, + "loss": 0.0378, + "num_input_tokens_seen": 365748648, + "step": 4007 + }, + { + "epoch": 16.695833333333333, + "loss": 0.03710198774933815, + "loss_ce": 3.076103166677058e-05, + "loss_iou": 0.23046875, + "loss_num": 0.007415771484375, + "loss_xval": 0.037109375, + "num_input_tokens_seen": 365748648, + "step": 4007 + }, + { + "epoch": 16.7, + "grad_norm": 2.6255179088156115, + "learning_rate": 5e-05, + "loss": 0.059, + "num_input_tokens_seen": 365839664, + "step": 4008 + }, + { + "epoch": 16.7, + "loss": 0.041383929550647736, + "loss_ce": 2.094629053317476e-06, + "loss_iou": 0.259765625, + "loss_num": 0.00830078125, + "loss_xval": 0.04150390625, + "num_input_tokens_seen": 365839664, + "step": 4008 + }, + { + "epoch": 16.704166666666666, + "grad_norm": 1.7964645414736218, + "learning_rate": 5e-05, + "loss": 0.0493, + "num_input_tokens_seen": 365931420, + "step": 4009 + }, + { + "epoch": 16.704166666666666, + "loss": 0.04012474790215492, + "loss_ce": 0.00010094503522850573, + "loss_iou": 0.314453125, + "loss_num": 0.00799560546875, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 365931420, + "step": 4009 + }, + { + "epoch": 16.708333333333332, + "grad_norm": 5.893781572467153, + "learning_rate": 5e-05, + "loss": 0.0609, + "num_input_tokens_seen": 366022664, + "step": 4010 + }, + { + "epoch": 16.708333333333332, + "loss": 0.04142048954963684, + "loss_ce": 0.00019124093523714691, + "loss_iou": 0.27734375, + "loss_num": 0.00823974609375, + "loss_xval": 0.041259765625, + "num_input_tokens_seen": 366022664, + "step": 4010 + }, + { + "epoch": 16.7125, + "grad_norm": 4.09881481698516, + "learning_rate": 5e-05, + "loss": 0.0499, + "num_input_tokens_seen": 366113676, + "step": 4011 + }, + { + "epoch": 16.7125, + "loss": 0.029480930417776108, + "loss_ce": 9.494428923062515e-07, + "loss_iou": 0.205078125, + "loss_num": 0.005889892578125, + "loss_xval": 0.029541015625, + "num_input_tokens_seen": 366113676, + "step": 4011 + }, + { + "epoch": 16.716666666666665, + "grad_norm": 2.49927976290558, + "learning_rate": 5e-05, + "loss": 0.0462, + "num_input_tokens_seen": 366204924, + "step": 4012 + }, + { + "epoch": 16.716666666666665, + "loss": 0.06279443204402924, + "loss_ce": 0.00014947263116482645, + "loss_iou": 0.283203125, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 366204924, + "step": 4012 + }, + { + "epoch": 16.720833333333335, + "grad_norm": 5.643743771940805, + "learning_rate": 5e-05, + "loss": 0.0512, + "num_input_tokens_seen": 366295484, + "step": 4013 + }, + { + "epoch": 16.720833333333335, + "loss": 0.05839596316218376, + "loss_ce": 1.5833831639611162e-05, + "loss_iou": 0.32421875, + "loss_num": 0.01165771484375, + "loss_xval": 0.058349609375, + "num_input_tokens_seen": 366295484, + "step": 4013 + }, + { + "epoch": 16.725, + "grad_norm": 3.155206503441892, + "learning_rate": 5e-05, + "loss": 0.0546, + "num_input_tokens_seen": 366386272, + "step": 4014 + }, + { + "epoch": 16.725, + "loss": 0.04876965284347534, + "loss_ce": 2.559177573857596e-06, + "loss_iou": 0.193359375, + "loss_num": 0.009765625, + "loss_xval": 0.048828125, + "num_input_tokens_seen": 366386272, + "step": 4014 + }, + { + "epoch": 16.729166666666668, + "grad_norm": 2.0339484177133733, + "learning_rate": 5e-05, + "loss": 0.0324, + "num_input_tokens_seen": 366477516, + "step": 4015 + }, + { + "epoch": 16.729166666666668, + "loss": 0.02809450402855873, + "loss_ce": 3.0740268357476452e-06, + "loss_iou": 0.1796875, + "loss_num": 0.005615234375, + "loss_xval": 0.028076171875, + "num_input_tokens_seen": 366477516, + "step": 4015 + }, + { + "epoch": 16.733333333333334, + "grad_norm": 3.1178931117337405, + "learning_rate": 5e-05, + "loss": 0.0507, + "num_input_tokens_seen": 366568904, + "step": 4016 + }, + { + "epoch": 16.733333333333334, + "loss": 0.03765561431646347, + "loss_ce": 0.0005538675468415022, + "loss_iou": 0.359375, + "loss_num": 0.007415771484375, + "loss_xval": 0.037109375, + "num_input_tokens_seen": 366568904, + "step": 4016 + }, + { + "epoch": 16.7375, + "grad_norm": 3.7011023537882366, + "learning_rate": 5e-05, + "loss": 0.0544, + "num_input_tokens_seen": 366660568, + "step": 4017 + }, + { + "epoch": 16.7375, + "loss": 0.06564977020025253, + "loss_ce": 0.00011327103129588068, + "loss_iou": 0.373046875, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 366660568, + "step": 4017 + }, + { + "epoch": 16.741666666666667, + "grad_norm": 2.707010113087898, + "learning_rate": 5e-05, + "loss": 0.0402, + "num_input_tokens_seen": 366752124, + "step": 4018 + }, + { + "epoch": 16.741666666666667, + "loss": 0.04419136792421341, + "loss_ce": 1.9155565951223252e-06, + "loss_iou": 0.28515625, + "loss_num": 0.00885009765625, + "loss_xval": 0.044189453125, + "num_input_tokens_seen": 366752124, + "step": 4018 + }, + { + "epoch": 16.745833333333334, + "grad_norm": 2.8384005053193624, + "learning_rate": 5e-05, + "loss": 0.0307, + "num_input_tokens_seen": 366843888, + "step": 4019 + }, + { + "epoch": 16.745833333333334, + "loss": 0.03002469427883625, + "loss_ce": 1.0656568520062137e-05, + "loss_iou": 0.244140625, + "loss_num": 0.006011962890625, + "loss_xval": 0.030029296875, + "num_input_tokens_seen": 366843888, + "step": 4019 + }, + { + "epoch": 16.75, + "grad_norm": 3.1653530277293815, + "learning_rate": 5e-05, + "loss": 0.0794, + "num_input_tokens_seen": 366935140, + "step": 4020 + }, + { + "epoch": 16.75, + "loss": 0.09292182326316833, + "loss_ce": 0.0009265905246138573, + "loss_iou": 0.34765625, + "loss_num": 0.0184326171875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 366935140, + "step": 4020 + }, + { + "epoch": 16.754166666666666, + "grad_norm": 2.122395995444454, + "learning_rate": 5e-05, + "loss": 0.0405, + "num_input_tokens_seen": 367026432, + "step": 4021 + }, + { + "epoch": 16.754166666666666, + "loss": 0.030421065166592598, + "loss_ce": 1.0298483175574802e-05, + "loss_iou": 0.19140625, + "loss_num": 0.006103515625, + "loss_xval": 0.0303955078125, + "num_input_tokens_seen": 367026432, + "step": 4021 + }, + { + "epoch": 16.758333333333333, + "grad_norm": 2.562085608406176, + "learning_rate": 5e-05, + "loss": 0.0564, + "num_input_tokens_seen": 367118212, + "step": 4022 + }, + { + "epoch": 16.758333333333333, + "loss": 0.047422319650650024, + "loss_ce": 0.00013533404853660613, + "loss_iou": 0.26953125, + "loss_num": 0.00946044921875, + "loss_xval": 0.04736328125, + "num_input_tokens_seen": 367118212, + "step": 4022 + }, + { + "epoch": 16.7625, + "grad_norm": 2.655294975293356, + "learning_rate": 5e-05, + "loss": 0.0559, + "num_input_tokens_seen": 367209388, + "step": 4023 + }, + { + "epoch": 16.7625, + "loss": 0.04857932776212692, + "loss_ce": 1.060061822499847e-05, + "loss_iou": 0.1748046875, + "loss_num": 0.00970458984375, + "loss_xval": 0.048583984375, + "num_input_tokens_seen": 367209388, + "step": 4023 + }, + { + "epoch": 16.766666666666666, + "grad_norm": 2.447192267899224, + "learning_rate": 5e-05, + "loss": 0.0516, + "num_input_tokens_seen": 367300760, + "step": 4024 + }, + { + "epoch": 16.766666666666666, + "loss": 0.072332464158535, + "loss_ce": 0.0010510298889130354, + "loss_iou": 0.177734375, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 367300760, + "step": 4024 + }, + { + "epoch": 16.770833333333332, + "grad_norm": 2.851437977074312, + "learning_rate": 5e-05, + "loss": 0.0535, + "num_input_tokens_seen": 367392496, + "step": 4025 + }, + { + "epoch": 16.770833333333332, + "loss": 0.027581773698329926, + "loss_ce": 2.4399269022978842e-05, + "loss_iou": 0.2158203125, + "loss_num": 0.005523681640625, + "loss_xval": 0.027587890625, + "num_input_tokens_seen": 367392496, + "step": 4025 + }, + { + "epoch": 16.775, + "grad_norm": 2.6368920937836626, + "learning_rate": 5e-05, + "loss": 0.0593, + "num_input_tokens_seen": 367484288, + "step": 4026 + }, + { + "epoch": 16.775, + "loss": 0.06597106158733368, + "loss_ce": 3.784064028877765e-05, + "loss_iou": 0.2421875, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 367484288, + "step": 4026 + }, + { + "epoch": 16.779166666666665, + "grad_norm": 2.794030801023625, + "learning_rate": 5e-05, + "loss": 0.0781, + "num_input_tokens_seen": 367575732, + "step": 4027 + }, + { + "epoch": 16.779166666666665, + "loss": 0.09035658836364746, + "loss_ce": 0.00037551255081780255, + "loss_iou": 0.12353515625, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 367575732, + "step": 4027 + }, + { + "epoch": 16.783333333333335, + "grad_norm": 3.200663608710046, + "learning_rate": 5e-05, + "loss": 0.0532, + "num_input_tokens_seen": 367667884, + "step": 4028 + }, + { + "epoch": 16.783333333333335, + "loss": 0.052529964596033096, + "loss_ce": 9.211295946442988e-06, + "loss_iou": 0.2412109375, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 367667884, + "step": 4028 + }, + { + "epoch": 16.7875, + "grad_norm": 3.2428786244901433, + "learning_rate": 5e-05, + "loss": 0.0382, + "num_input_tokens_seen": 367758828, + "step": 4029 + }, + { + "epoch": 16.7875, + "loss": 0.03407984972000122, + "loss_ce": 5.274821523926221e-05, + "loss_iou": 0.2470703125, + "loss_num": 0.006805419921875, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 367758828, + "step": 4029 + }, + { + "epoch": 16.791666666666668, + "grad_norm": 5.145981390784509, + "learning_rate": 5e-05, + "loss": 0.0937, + "num_input_tokens_seen": 367850296, + "step": 4030 + }, + { + "epoch": 16.791666666666668, + "loss": 0.05094943568110466, + "loss_ce": 1.559699376230128e-05, + "loss_iou": 0.26953125, + "loss_num": 0.01019287109375, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 367850296, + "step": 4030 + }, + { + "epoch": 16.795833333333334, + "grad_norm": 1.1260859347377563, + "learning_rate": 5e-05, + "loss": 0.0593, + "num_input_tokens_seen": 367941600, + "step": 4031 + }, + { + "epoch": 16.795833333333334, + "loss": 0.024731360375881195, + "loss_ce": 1.2122669431846589e-05, + "loss_iou": 0.267578125, + "loss_num": 0.00494384765625, + "loss_xval": 0.024658203125, + "num_input_tokens_seen": 367941600, + "step": 4031 + }, + { + "epoch": 16.8, + "grad_norm": 1.794489654910749, + "learning_rate": 5e-05, + "loss": 0.0559, + "num_input_tokens_seen": 368033408, + "step": 4032 + }, + { + "epoch": 16.8, + "loss": 0.05247056856751442, + "loss_ce": 0.00022447184892371297, + "loss_iou": 0.24609375, + "loss_num": 0.01043701171875, + "loss_xval": 0.05224609375, + "num_input_tokens_seen": 368033408, + "step": 4032 + }, + { + "epoch": 16.804166666666667, + "grad_norm": 6.159764645003933, + "learning_rate": 5e-05, + "loss": 0.0545, + "num_input_tokens_seen": 368125576, + "step": 4033 + }, + { + "epoch": 16.804166666666667, + "loss": 0.060446955263614655, + "loss_ce": 6.894314083183417e-06, + "loss_iou": 0.259765625, + "loss_num": 0.0120849609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 368125576, + "step": 4033 + }, + { + "epoch": 16.808333333333334, + "grad_norm": 2.4625028230047485, + "learning_rate": 5e-05, + "loss": 0.0826, + "num_input_tokens_seen": 368216032, + "step": 4034 + }, + { + "epoch": 16.808333333333334, + "loss": 0.12289955466985703, + "loss_ce": 0.0005393251776695251, + "loss_iou": 0.2255859375, + "loss_num": 0.0244140625, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 368216032, + "step": 4034 + }, + { + "epoch": 16.8125, + "grad_norm": 3.44767132408497, + "learning_rate": 5e-05, + "loss": 0.0358, + "num_input_tokens_seen": 368307628, + "step": 4035 + }, + { + "epoch": 16.8125, + "loss": 0.03818739950656891, + "loss_ce": 9.91188244370278e-06, + "loss_iou": 0.375, + "loss_num": 0.00762939453125, + "loss_xval": 0.0380859375, + "num_input_tokens_seen": 368307628, + "step": 4035 + }, + { + "epoch": 16.816666666666666, + "grad_norm": 2.5776685347298294, + "learning_rate": 5e-05, + "loss": 0.0512, + "num_input_tokens_seen": 368399232, + "step": 4036 + }, + { + "epoch": 16.816666666666666, + "loss": 0.053744807839393616, + "loss_ce": 0.00014068148448131979, + "loss_iou": 0.39453125, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 368399232, + "step": 4036 + }, + { + "epoch": 16.820833333333333, + "grad_norm": 2.9819800608908413, + "learning_rate": 5e-05, + "loss": 0.0502, + "num_input_tokens_seen": 368490508, + "step": 4037 + }, + { + "epoch": 16.820833333333333, + "loss": 0.05096079409122467, + "loss_ce": 5.747316754423082e-05, + "loss_iou": 0.2421875, + "loss_num": 0.0101318359375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 368490508, + "step": 4037 + }, + { + "epoch": 16.825, + "grad_norm": 2.4913500292685553, + "learning_rate": 5e-05, + "loss": 0.0931, + "num_input_tokens_seen": 368581764, + "step": 4038 + }, + { + "epoch": 16.825, + "loss": 0.09946852922439575, + "loss_ce": 4.116043783142231e-06, + "loss_iou": 0.28515625, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 368581764, + "step": 4038 + }, + { + "epoch": 16.829166666666666, + "grad_norm": 3.0700934805413937, + "learning_rate": 5e-05, + "loss": 0.056, + "num_input_tokens_seen": 368672488, + "step": 4039 + }, + { + "epoch": 16.829166666666666, + "loss": 0.03233366832137108, + "loss_ce": 2.9432834480758174e-07, + "loss_iou": 0.2578125, + "loss_num": 0.0064697265625, + "loss_xval": 0.0322265625, + "num_input_tokens_seen": 368672488, + "step": 4039 + }, + { + "epoch": 16.833333333333332, + "grad_norm": 2.8659320252554616, + "learning_rate": 5e-05, + "loss": 0.0652, + "num_input_tokens_seen": 368763464, + "step": 4040 + }, + { + "epoch": 16.833333333333332, + "loss": 0.05825977399945259, + "loss_ce": 1.718512294246466e-06, + "loss_iou": 0.234375, + "loss_num": 0.01165771484375, + "loss_xval": 0.058349609375, + "num_input_tokens_seen": 368763464, + "step": 4040 + }, + { + "epoch": 16.8375, + "grad_norm": 2.717146392782448, + "learning_rate": 5e-05, + "loss": 0.0712, + "num_input_tokens_seen": 368854860, + "step": 4041 + }, + { + "epoch": 16.8375, + "loss": 0.08421778678894043, + "loss_ce": 0.0014998923288658261, + "loss_iou": 0.26171875, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 368854860, + "step": 4041 + }, + { + "epoch": 16.841666666666665, + "grad_norm": 1.9585966729625848, + "learning_rate": 5e-05, + "loss": 0.0574, + "num_input_tokens_seen": 368946416, + "step": 4042 + }, + { + "epoch": 16.841666666666665, + "loss": 0.05129852890968323, + "loss_ce": 0.00014343684597406536, + "loss_iou": 0.216796875, + "loss_num": 0.01025390625, + "loss_xval": 0.05126953125, + "num_input_tokens_seen": 368946416, + "step": 4042 + }, + { + "epoch": 16.845833333333335, + "grad_norm": 2.453415700006763, + "learning_rate": 5e-05, + "loss": 0.0507, + "num_input_tokens_seen": 369037700, + "step": 4043 + }, + { + "epoch": 16.845833333333335, + "loss": 0.053867191076278687, + "loss_ce": 3.6648129935201723e-06, + "loss_iou": 0.3515625, + "loss_num": 0.0107421875, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 369037700, + "step": 4043 + }, + { + "epoch": 16.85, + "grad_norm": 2.545531334753407, + "learning_rate": 5e-05, + "loss": 0.0485, + "num_input_tokens_seen": 369128312, + "step": 4044 + }, + { + "epoch": 16.85, + "loss": 0.053280819207429886, + "loss_ce": 2.7642910936265253e-05, + "loss_iou": 0.2275390625, + "loss_num": 0.01068115234375, + "loss_xval": 0.05322265625, + "num_input_tokens_seen": 369128312, + "step": 4044 + }, + { + "epoch": 16.854166666666668, + "grad_norm": 2.8526151681009715, + "learning_rate": 5e-05, + "loss": 0.0608, + "num_input_tokens_seen": 369219084, + "step": 4045 + }, + { + "epoch": 16.854166666666668, + "loss": 0.0599316768348217, + "loss_ce": 2.56677685683826e-05, + "loss_iou": 0.1787109375, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 369219084, + "step": 4045 + }, + { + "epoch": 16.858333333333334, + "grad_norm": 2.734789273628204, + "learning_rate": 5e-05, + "loss": 0.0498, + "num_input_tokens_seen": 369310688, + "step": 4046 + }, + { + "epoch": 16.858333333333334, + "loss": 0.04425501078367233, + "loss_ce": 3.504356573102996e-05, + "loss_iou": 0.33203125, + "loss_num": 0.00885009765625, + "loss_xval": 0.044189453125, + "num_input_tokens_seen": 369310688, + "step": 4046 + }, + { + "epoch": 16.8625, + "grad_norm": 3.226174012215865, + "learning_rate": 5e-05, + "loss": 0.0339, + "num_input_tokens_seen": 369401544, + "step": 4047 + }, + { + "epoch": 16.8625, + "loss": 0.03497467562556267, + "loss_ce": 1.5316002190957079e-06, + "loss_iou": 0.298828125, + "loss_num": 0.006988525390625, + "loss_xval": 0.034912109375, + "num_input_tokens_seen": 369401544, + "step": 4047 + }, + { + "epoch": 16.866666666666667, + "grad_norm": 4.093369892681781, + "learning_rate": 5e-05, + "loss": 0.0694, + "num_input_tokens_seen": 369493136, + "step": 4048 + }, + { + "epoch": 16.866666666666667, + "loss": 0.07229539752006531, + "loss_ce": 0.0024711769074201584, + "loss_iou": 0.33984375, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 369493136, + "step": 4048 + }, + { + "epoch": 16.870833333333334, + "grad_norm": 15.300191803326936, + "learning_rate": 5e-05, + "loss": 0.0975, + "num_input_tokens_seen": 369584212, + "step": 4049 + }, + { + "epoch": 16.870833333333334, + "loss": 0.14228758215904236, + "loss_ce": 1.4646837371401489e-05, + "loss_iou": 0.34375, + "loss_num": 0.0284423828125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 369584212, + "step": 4049 + }, + { + "epoch": 16.875, + "grad_norm": 1.9861558181634973, + "learning_rate": 5e-05, + "loss": 0.0849, + "num_input_tokens_seen": 369675404, + "step": 4050 + }, + { + "epoch": 16.875, + "loss": 0.07799716293811798, + "loss_ce": 9.495933227299247e-06, + "loss_iou": 0.341796875, + "loss_num": 0.01556396484375, + "loss_xval": 0.078125, + "num_input_tokens_seen": 369675404, + "step": 4050 + }, + { + "epoch": 16.879166666666666, + "grad_norm": 7.091214839519732, + "learning_rate": 5e-05, + "loss": 0.0713, + "num_input_tokens_seen": 369766796, + "step": 4051 + }, + { + "epoch": 16.879166666666666, + "loss": 0.043971382081508636, + "loss_ce": 0.0003846481558866799, + "loss_iou": 0.32421875, + "loss_num": 0.00872802734375, + "loss_xval": 0.043701171875, + "num_input_tokens_seen": 369766796, + "step": 4051 + }, + { + "epoch": 16.883333333333333, + "grad_norm": 2.0299282941576875, + "learning_rate": 5e-05, + "loss": 0.0483, + "num_input_tokens_seen": 369858008, + "step": 4052 + }, + { + "epoch": 16.883333333333333, + "loss": 0.041609566658735275, + "loss_ce": 0.000990670290775597, + "loss_iou": 0.2314453125, + "loss_num": 0.00811767578125, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 369858008, + "step": 4052 + }, + { + "epoch": 16.8875, + "grad_norm": 7.830960151468681, + "learning_rate": 5e-05, + "loss": 0.0631, + "num_input_tokens_seen": 369948996, + "step": 4053 + }, + { + "epoch": 16.8875, + "loss": 0.0572042316198349, + "loss_ce": 6.6593884184840135e-06, + "loss_iou": 0.2314453125, + "loss_num": 0.011474609375, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 369948996, + "step": 4053 + }, + { + "epoch": 16.891666666666666, + "grad_norm": 5.273891058134392, + "learning_rate": 5e-05, + "loss": 0.0913, + "num_input_tokens_seen": 370040188, + "step": 4054 + }, + { + "epoch": 16.891666666666666, + "loss": 0.05034564435482025, + "loss_ce": 6.896343620610423e-06, + "loss_iou": 0.205078125, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 370040188, + "step": 4054 + }, + { + "epoch": 16.895833333333332, + "grad_norm": 3.4729443952903285, + "learning_rate": 5e-05, + "loss": 0.1036, + "num_input_tokens_seen": 370130656, + "step": 4055 + }, + { + "epoch": 16.895833333333332, + "loss": 0.06628492474555969, + "loss_ce": 7.389370466626133e-07, + "loss_iou": 0.37109375, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 370130656, + "step": 4055 + }, + { + "epoch": 16.9, + "grad_norm": 2.3412868639267717, + "learning_rate": 5e-05, + "loss": 0.0664, + "num_input_tokens_seen": 370222484, + "step": 4056 + }, + { + "epoch": 16.9, + "loss": 0.08415798842906952, + "loss_ce": 0.00017361801292281598, + "loss_iou": 0.265625, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 370222484, + "step": 4056 + }, + { + "epoch": 16.904166666666665, + "grad_norm": 2.3881985763644384, + "learning_rate": 5e-05, + "loss": 0.1178, + "num_input_tokens_seen": 370313636, + "step": 4057 + }, + { + "epoch": 16.904166666666665, + "loss": 0.12934906780719757, + "loss_ce": 0.0005191150703467429, + "loss_iou": 0.1796875, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 370313636, + "step": 4057 + }, + { + "epoch": 16.908333333333335, + "grad_norm": 3.0055102224007664, + "learning_rate": 5e-05, + "loss": 0.0472, + "num_input_tokens_seen": 370405208, + "step": 4058 + }, + { + "epoch": 16.908333333333335, + "loss": 0.04517017677426338, + "loss_ce": 4.16390503232833e-06, + "loss_iou": 0.298828125, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 370405208, + "step": 4058 + }, + { + "epoch": 16.9125, + "grad_norm": 2.365789937828196, + "learning_rate": 5e-05, + "loss": 0.0509, + "num_input_tokens_seen": 370496724, + "step": 4059 + }, + { + "epoch": 16.9125, + "loss": 0.053006406873464584, + "loss_ce": 0.010983701795339584, + "loss_iou": 0.265625, + "loss_num": 0.0084228515625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 370496724, + "step": 4059 + }, + { + "epoch": 16.916666666666668, + "grad_norm": 3.0113908440107897, + "learning_rate": 5e-05, + "loss": 0.125, + "num_input_tokens_seen": 370588216, + "step": 4060 + }, + { + "epoch": 16.916666666666668, + "loss": 0.2107238471508026, + "loss_ce": 0.00015255525067914277, + "loss_iou": 0.2080078125, + "loss_num": 0.0419921875, + "loss_xval": 0.2109375, + "num_input_tokens_seen": 370588216, + "step": 4060 + }, + { + "epoch": 16.920833333333334, + "grad_norm": 2.0049932346862103, + "learning_rate": 5e-05, + "loss": 0.0299, + "num_input_tokens_seen": 370679492, + "step": 4061 + }, + { + "epoch": 16.920833333333334, + "loss": 0.03363718464970589, + "loss_ce": 8.787655679043382e-05, + "loss_iou": 0.208984375, + "loss_num": 0.0067138671875, + "loss_xval": 0.033447265625, + "num_input_tokens_seen": 370679492, + "step": 4061 + }, + { + "epoch": 16.925, + "grad_norm": 3.3662195481642363, + "learning_rate": 5e-05, + "loss": 0.0565, + "num_input_tokens_seen": 370770924, + "step": 4062 + }, + { + "epoch": 16.925, + "loss": 0.05911504104733467, + "loss_ce": 2.4945893528638408e-06, + "loss_iou": 0.1708984375, + "loss_num": 0.0118408203125, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 370770924, + "step": 4062 + }, + { + "epoch": 16.929166666666667, + "grad_norm": 2.6258405561734888, + "learning_rate": 5e-05, + "loss": 0.0629, + "num_input_tokens_seen": 370862820, + "step": 4063 + }, + { + "epoch": 16.929166666666667, + "loss": 0.06796564161777496, + "loss_ce": 2.9951866054034326e-06, + "loss_iou": 0.19140625, + "loss_num": 0.01361083984375, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 370862820, + "step": 4063 + }, + { + "epoch": 16.933333333333334, + "grad_norm": 4.403878392486105, + "learning_rate": 5e-05, + "loss": 0.0363, + "num_input_tokens_seen": 370953876, + "step": 4064 + }, + { + "epoch": 16.933333333333334, + "loss": 0.03829577565193176, + "loss_ce": 3.847268999379594e-06, + "loss_iou": 0.154296875, + "loss_num": 0.007659912109375, + "loss_xval": 0.038330078125, + "num_input_tokens_seen": 370953876, + "step": 4064 + }, + { + "epoch": 16.9375, + "grad_norm": 1.9362293574628262, + "learning_rate": 5e-05, + "loss": 0.0369, + "num_input_tokens_seen": 371045536, + "step": 4065 + }, + { + "epoch": 16.9375, + "loss": 0.03752783685922623, + "loss_ce": 2.660123755049426e-06, + "loss_iou": 0.2451171875, + "loss_num": 0.00750732421875, + "loss_xval": 0.03759765625, + "num_input_tokens_seen": 371045536, + "step": 4065 + }, + { + "epoch": 16.941666666666666, + "grad_norm": 2.959179694047592, + "learning_rate": 5e-05, + "loss": 0.0647, + "num_input_tokens_seen": 371136800, + "step": 4066 + }, + { + "epoch": 16.941666666666666, + "loss": 0.06771986931562424, + "loss_ce": 0.00012343730486463755, + "loss_iou": 0.23828125, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 371136800, + "step": 4066 + }, + { + "epoch": 16.945833333333333, + "grad_norm": 2.6218360895053427, + "learning_rate": 5e-05, + "loss": 0.1005, + "num_input_tokens_seen": 371228512, + "step": 4067 + }, + { + "epoch": 16.945833333333333, + "loss": 0.12994059920310974, + "loss_ce": 2.7274709282210097e-05, + "loss_iou": 0.1630859375, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 371228512, + "step": 4067 + }, + { + "epoch": 16.95, + "grad_norm": 3.357540956809214, + "learning_rate": 5e-05, + "loss": 0.0484, + "num_input_tokens_seen": 371320428, + "step": 4068 + }, + { + "epoch": 16.95, + "loss": 0.03651657700538635, + "loss_ce": 0.00015488412464037538, + "loss_iou": 0.1787109375, + "loss_num": 0.00726318359375, + "loss_xval": 0.036376953125, + "num_input_tokens_seen": 371320428, + "step": 4068 + }, + { + "epoch": 16.954166666666666, + "grad_norm": 2.794785336945426, + "learning_rate": 5e-05, + "loss": 0.0641, + "num_input_tokens_seen": 371410556, + "step": 4069 + }, + { + "epoch": 16.954166666666666, + "loss": 0.08668608218431473, + "loss_ce": 9.055524969880935e-07, + "loss_iou": 0.32421875, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 371410556, + "step": 4069 + }, + { + "epoch": 16.958333333333332, + "grad_norm": 3.6340431629838528, + "learning_rate": 5e-05, + "loss": 0.0407, + "num_input_tokens_seen": 371502528, + "step": 4070 + }, + { + "epoch": 16.958333333333332, + "loss": 0.03125636279582977, + "loss_ce": 0.0002886476868297905, + "loss_iou": 0.314453125, + "loss_num": 0.006195068359375, + "loss_xval": 0.031005859375, + "num_input_tokens_seen": 371502528, + "step": 4070 + }, + { + "epoch": 16.9625, + "grad_norm": 3.603111563405317, + "learning_rate": 5e-05, + "loss": 0.0723, + "num_input_tokens_seen": 371593780, + "step": 4071 + }, + { + "epoch": 16.9625, + "loss": 0.0696285292506218, + "loss_ce": 1.7936108633875847e-05, + "loss_iou": 0.1591796875, + "loss_num": 0.013916015625, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 371593780, + "step": 4071 + }, + { + "epoch": 16.966666666666665, + "grad_norm": 3.9638524560359367, + "learning_rate": 5e-05, + "loss": 0.0528, + "num_input_tokens_seen": 371685136, + "step": 4072 + }, + { + "epoch": 16.966666666666665, + "loss": 0.07412572205066681, + "loss_ce": 0.0007156896172091365, + "loss_iou": 0.26953125, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 371685136, + "step": 4072 + }, + { + "epoch": 16.970833333333335, + "grad_norm": 2.6761592787839263, + "learning_rate": 5e-05, + "loss": 0.0601, + "num_input_tokens_seen": 371776312, + "step": 4073 + }, + { + "epoch": 16.970833333333335, + "loss": 0.032429054379463196, + "loss_ce": 1.9384531697141938e-05, + "loss_iou": 0.208984375, + "loss_num": 0.0064697265625, + "loss_xval": 0.032470703125, + "num_input_tokens_seen": 371776312, + "step": 4073 + }, + { + "epoch": 16.975, + "grad_norm": 10.428568635001916, + "learning_rate": 5e-05, + "loss": 0.0496, + "num_input_tokens_seen": 371867092, + "step": 4074 + }, + { + "epoch": 16.975, + "loss": 0.045186370611190796, + "loss_ce": 5.093787876830902e-06, + "loss_iou": 0.240234375, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 371867092, + "step": 4074 + }, + { + "epoch": 16.979166666666668, + "grad_norm": 16.999770390677803, + "learning_rate": 5e-05, + "loss": 0.0867, + "num_input_tokens_seen": 371957968, + "step": 4075 + }, + { + "epoch": 16.979166666666668, + "loss": 0.14276185631752014, + "loss_ce": 6.23431390067708e-07, + "loss_iou": 0.314453125, + "loss_num": 0.0284423828125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 371957968, + "step": 4075 + }, + { + "epoch": 16.983333333333334, + "grad_norm": 2.6052418985851724, + "learning_rate": 5e-05, + "loss": 0.0568, + "num_input_tokens_seen": 372048676, + "step": 4076 + }, + { + "epoch": 16.983333333333334, + "loss": 0.04159224405884743, + "loss_ce": 2.7303876777295955e-05, + "loss_iou": 0.2392578125, + "loss_num": 0.00830078125, + "loss_xval": 0.04150390625, + "num_input_tokens_seen": 372048676, + "step": 4076 + }, + { + "epoch": 16.9875, + "grad_norm": 3.1679506494094456, + "learning_rate": 5e-05, + "loss": 0.0307, + "num_input_tokens_seen": 372140012, + "step": 4077 + }, + { + "epoch": 16.9875, + "loss": 0.028769517317414284, + "loss_ce": 0.0001440290652681142, + "loss_iou": 0.30859375, + "loss_num": 0.0057373046875, + "loss_xval": 0.028564453125, + "num_input_tokens_seen": 372140012, + "step": 4077 + }, + { + "epoch": 16.991666666666667, + "grad_norm": 4.048569369498823, + "learning_rate": 5e-05, + "loss": 0.0605, + "num_input_tokens_seen": 372231528, + "step": 4078 + }, + { + "epoch": 16.991666666666667, + "loss": 0.04449920356273651, + "loss_ce": 1.98340458155144e-05, + "loss_iou": 0.3203125, + "loss_num": 0.0089111328125, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 372231528, + "step": 4078 + }, + { + "epoch": 16.995833333333334, + "grad_norm": 36.34557991333924, + "learning_rate": 5e-05, + "loss": 0.0881, + "num_input_tokens_seen": 372322872, + "step": 4079 + }, + { + "epoch": 16.995833333333334, + "loss": 0.07955171167850494, + "loss_ce": 6.867582851555198e-05, + "loss_iou": 0.36328125, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 372322872, + "step": 4079 + }, + { + "epoch": 17.0, + "grad_norm": 4.769963548994012, + "learning_rate": 5e-05, + "loss": 0.0583, + "num_input_tokens_seen": 372414160, + "step": 4080 + }, + { + "epoch": 17.0, + "loss": 0.051101259887218475, + "loss_ce": 4.5351465814746916e-05, + "loss_iou": 0.298828125, + "loss_num": 0.01019287109375, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 372414160, + "step": 4080 + }, + { + "epoch": 17.004166666666666, + "grad_norm": 2.662306389973745, + "learning_rate": 5e-05, + "loss": 0.0737, + "num_input_tokens_seen": 372505456, + "step": 4081 + }, + { + "epoch": 17.004166666666666, + "loss": 0.10184650123119354, + "loss_ce": 9.338312338513788e-06, + "loss_iou": 0.259765625, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 372505456, + "step": 4081 + }, + { + "epoch": 17.008333333333333, + "grad_norm": 2.9907076141286497, + "learning_rate": 5e-05, + "loss": 0.0346, + "num_input_tokens_seen": 372596884, + "step": 4082 + }, + { + "epoch": 17.008333333333333, + "loss": 0.03885851427912712, + "loss_ce": 2.4898748961277306e-05, + "loss_iou": 0.2734375, + "loss_num": 0.00775146484375, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 372596884, + "step": 4082 + }, + { + "epoch": 17.0125, + "grad_norm": 3.301548128088256, + "learning_rate": 5e-05, + "loss": 0.0449, + "num_input_tokens_seen": 372687932, + "step": 4083 + }, + { + "epoch": 17.0125, + "loss": 0.05405256524682045, + "loss_ce": 1.3564389519160613e-05, + "loss_iou": 0.15234375, + "loss_num": 0.01080322265625, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 372687932, + "step": 4083 + }, + { + "epoch": 17.016666666666666, + "grad_norm": 3.7730795965773454, + "learning_rate": 5e-05, + "loss": 0.07, + "num_input_tokens_seen": 372778696, + "step": 4084 + }, + { + "epoch": 17.016666666666666, + "loss": 0.08227451145648956, + "loss_ce": 2.9634154998348095e-05, + "loss_iou": 0.361328125, + "loss_num": 0.0164794921875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 372778696, + "step": 4084 + }, + { + "epoch": 17.020833333333332, + "grad_norm": 2.2226393759552336, + "learning_rate": 5e-05, + "loss": 0.0541, + "num_input_tokens_seen": 372870048, + "step": 4085 + }, + { + "epoch": 17.020833333333332, + "loss": 0.04948572814464569, + "loss_ce": 1.4736596085640485e-06, + "loss_iou": 0.41015625, + "loss_num": 0.0098876953125, + "loss_xval": 0.049560546875, + "num_input_tokens_seen": 372870048, + "step": 4085 + }, + { + "epoch": 17.025, + "grad_norm": 3.994770241494494, + "learning_rate": 5e-05, + "loss": 0.0907, + "num_input_tokens_seen": 372961008, + "step": 4086 + }, + { + "epoch": 17.025, + "loss": 0.10503510385751724, + "loss_ce": 1.6490666894242167e-05, + "loss_iou": 0.1748046875, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 372961008, + "step": 4086 + }, + { + "epoch": 17.029166666666665, + "grad_norm": 1.298938361841086, + "learning_rate": 5e-05, + "loss": 0.0451, + "num_input_tokens_seen": 373052104, + "step": 4087 + }, + { + "epoch": 17.029166666666665, + "loss": 0.03302188217639923, + "loss_ce": 1.8632302953847102e-06, + "loss_iou": 0.134765625, + "loss_num": 0.006591796875, + "loss_xval": 0.032958984375, + "num_input_tokens_seen": 373052104, + "step": 4087 + }, + { + "epoch": 17.033333333333335, + "grad_norm": 1.9625396497308418, + "learning_rate": 5e-05, + "loss": 0.0755, + "num_input_tokens_seen": 373143348, + "step": 4088 + }, + { + "epoch": 17.033333333333335, + "loss": 0.11454355716705322, + "loss_ce": 3.4617858091223752e-06, + "loss_iou": 0.302734375, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 373143348, + "step": 4088 + }, + { + "epoch": 17.0375, + "grad_norm": 3.986045902684438, + "learning_rate": 5e-05, + "loss": 0.0487, + "num_input_tokens_seen": 373234840, + "step": 4089 + }, + { + "epoch": 17.0375, + "loss": 0.054869748651981354, + "loss_ce": 6.017951454850845e-05, + "loss_iou": 0.16796875, + "loss_num": 0.01092529296875, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 373234840, + "step": 4089 + }, + { + "epoch": 17.041666666666668, + "grad_norm": 2.2693008135127943, + "learning_rate": 5e-05, + "loss": 0.0511, + "num_input_tokens_seen": 373326132, + "step": 4090 + }, + { + "epoch": 17.041666666666668, + "loss": 0.04873867332935333, + "loss_ce": 0.0002080958365695551, + "loss_iou": 0.337890625, + "loss_num": 0.00970458984375, + "loss_xval": 0.048583984375, + "num_input_tokens_seen": 373326132, + "step": 4090 + }, + { + "epoch": 17.045833333333334, + "grad_norm": 1.701540327520391, + "learning_rate": 5e-05, + "loss": 0.0387, + "num_input_tokens_seen": 373417112, + "step": 4091 + }, + { + "epoch": 17.045833333333334, + "loss": 0.03578822314739227, + "loss_ce": 2.1621737687382847e-05, + "loss_iou": 0.279296875, + "loss_num": 0.00714111328125, + "loss_xval": 0.03564453125, + "num_input_tokens_seen": 373417112, + "step": 4091 + }, + { + "epoch": 17.05, + "grad_norm": 4.281085206791119, + "learning_rate": 5e-05, + "loss": 0.0503, + "num_input_tokens_seen": 373508804, + "step": 4092 + }, + { + "epoch": 17.05, + "loss": 0.04737226292490959, + "loss_ce": 1.3522824247047538e-06, + "loss_iou": 0.25, + "loss_num": 0.00946044921875, + "loss_xval": 0.04736328125, + "num_input_tokens_seen": 373508804, + "step": 4092 + }, + { + "epoch": 17.054166666666667, + "grad_norm": 2.945342924581056, + "learning_rate": 5e-05, + "loss": 0.0743, + "num_input_tokens_seen": 373600388, + "step": 4093 + }, + { + "epoch": 17.054166666666667, + "loss": 0.04209952801465988, + "loss_ce": 0.00018363283015787601, + "loss_iou": 0.302734375, + "loss_num": 0.00836181640625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 373600388, + "step": 4093 + }, + { + "epoch": 17.058333333333334, + "grad_norm": 2.70778543173169, + "learning_rate": 5e-05, + "loss": 0.0676, + "num_input_tokens_seen": 373692000, + "step": 4094 + }, + { + "epoch": 17.058333333333334, + "loss": 0.04033127427101135, + "loss_ce": 3.28097194142174e-05, + "loss_iou": 0.35546875, + "loss_num": 0.008056640625, + "loss_xval": 0.040283203125, + "num_input_tokens_seen": 373692000, + "step": 4094 + }, + { + "epoch": 17.0625, + "grad_norm": 3.236834280345472, + "learning_rate": 5e-05, + "loss": 0.0395, + "num_input_tokens_seen": 373783412, + "step": 4095 + }, + { + "epoch": 17.0625, + "loss": 0.03508774936199188, + "loss_ce": 3.0683379009133205e-05, + "loss_iou": 0.279296875, + "loss_num": 0.00701904296875, + "loss_xval": 0.03515625, + "num_input_tokens_seen": 373783412, + "step": 4095 + }, + { + "epoch": 17.066666666666666, + "grad_norm": 3.560904092381228, + "learning_rate": 5e-05, + "loss": 0.043, + "num_input_tokens_seen": 373875084, + "step": 4096 + }, + { + "epoch": 17.066666666666666, + "loss": 0.02976866066455841, + "loss_ce": 9.031443187268451e-05, + "loss_iou": 0.328125, + "loss_num": 0.00592041015625, + "loss_xval": 0.0296630859375, + "num_input_tokens_seen": 373875084, + "step": 4096 + }, + { + "epoch": 17.070833333333333, + "grad_norm": 2.7391476161511314, + "learning_rate": 5e-05, + "loss": 0.0548, + "num_input_tokens_seen": 373966272, + "step": 4097 + }, + { + "epoch": 17.070833333333333, + "loss": 0.0847180038690567, + "loss_ce": 1.6464753571199253e-05, + "loss_iou": 0.3046875, + "loss_num": 0.0169677734375, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 373966272, + "step": 4097 + }, + { + "epoch": 17.075, + "grad_norm": 1.4101527221877117, + "learning_rate": 5e-05, + "loss": 0.0308, + "num_input_tokens_seen": 374057068, + "step": 4098 + }, + { + "epoch": 17.075, + "loss": 0.03125577047467232, + "loss_ce": 5.771456471848069e-06, + "loss_iou": 0.166015625, + "loss_num": 0.006256103515625, + "loss_xval": 0.03125, + "num_input_tokens_seen": 374057068, + "step": 4098 + }, + { + "epoch": 17.079166666666666, + "grad_norm": 2.191619742883779, + "learning_rate": 5e-05, + "loss": 0.0623, + "num_input_tokens_seen": 374148516, + "step": 4099 + }, + { + "epoch": 17.079166666666666, + "loss": 0.09695076942443848, + "loss_ce": 2.693852729862556e-05, + "loss_iou": 0.19140625, + "loss_num": 0.0194091796875, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 374148516, + "step": 4099 + }, + { + "epoch": 17.083333333333332, + "grad_norm": 3.593796620958026, + "learning_rate": 5e-05, + "loss": 0.0652, + "num_input_tokens_seen": 374239580, + "step": 4100 + }, + { + "epoch": 17.083333333333332, + "loss": 0.07341619580984116, + "loss_ce": 6.16206853010226e-06, + "loss_iou": 0.302734375, + "loss_num": 0.01470947265625, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 374239580, + "step": 4100 + }, + { + "epoch": 17.0875, + "grad_norm": 3.377762992988838, + "learning_rate": 5e-05, + "loss": 0.0446, + "num_input_tokens_seen": 374330488, + "step": 4101 + }, + { + "epoch": 17.0875, + "loss": 0.04819894954562187, + "loss_ce": 0.00013376145216170698, + "loss_iou": 0.291015625, + "loss_num": 0.0096435546875, + "loss_xval": 0.048095703125, + "num_input_tokens_seen": 374330488, + "step": 4101 + }, + { + "epoch": 17.091666666666665, + "grad_norm": 3.5510953998518926, + "learning_rate": 5e-05, + "loss": 0.0558, + "num_input_tokens_seen": 374421696, + "step": 4102 + }, + { + "epoch": 17.091666666666665, + "loss": 0.05382417142391205, + "loss_ce": 6.4256664700224064e-06, + "loss_iou": 0.37890625, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 374421696, + "step": 4102 + }, + { + "epoch": 17.095833333333335, + "grad_norm": 2.3490478729597593, + "learning_rate": 5e-05, + "loss": 0.076, + "num_input_tokens_seen": 374513196, + "step": 4103 + }, + { + "epoch": 17.095833333333335, + "loss": 0.03897378221154213, + "loss_ce": 2.8363986075419234e-06, + "loss_iou": 0.30859375, + "loss_num": 0.0078125, + "loss_xval": 0.0390625, + "num_input_tokens_seen": 374513196, + "step": 4103 + }, + { + "epoch": 17.1, + "grad_norm": 2.139081143620778, + "learning_rate": 5e-05, + "loss": 0.0371, + "num_input_tokens_seen": 374604392, + "step": 4104 + }, + { + "epoch": 17.1, + "loss": 0.04249031841754913, + "loss_ce": 9.846298780757934e-06, + "loss_iou": 0.322265625, + "loss_num": 0.00848388671875, + "loss_xval": 0.04248046875, + "num_input_tokens_seen": 374604392, + "step": 4104 + }, + { + "epoch": 17.104166666666668, + "grad_norm": 3.1062600113588323, + "learning_rate": 5e-05, + "loss": 0.0637, + "num_input_tokens_seen": 374696044, + "step": 4105 + }, + { + "epoch": 17.104166666666668, + "loss": 0.06857717782258987, + "loss_ce": 4.18104900745675e-06, + "loss_iou": 0.047607421875, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 374696044, + "step": 4105 + }, + { + "epoch": 17.108333333333334, + "grad_norm": 3.907289254950295, + "learning_rate": 5e-05, + "loss": 0.0317, + "num_input_tokens_seen": 374787464, + "step": 4106 + }, + { + "epoch": 17.108333333333334, + "loss": 0.035776399075984955, + "loss_ce": 2.167586899304297e-06, + "loss_iou": 0.2890625, + "loss_num": 0.00714111328125, + "loss_xval": 0.035888671875, + "num_input_tokens_seen": 374787464, + "step": 4106 + }, + { + "epoch": 17.1125, + "grad_norm": 1.5061928543804801, + "learning_rate": 5e-05, + "loss": 0.058, + "num_input_tokens_seen": 374878312, + "step": 4107 + }, + { + "epoch": 17.1125, + "loss": 0.029756616801023483, + "loss_ce": 1.978759428311605e-06, + "loss_iou": 0.220703125, + "loss_num": 0.005950927734375, + "loss_xval": 0.02978515625, + "num_input_tokens_seen": 374878312, + "step": 4107 + }, + { + "epoch": 17.116666666666667, + "grad_norm": 1.4717582325581207, + "learning_rate": 5e-05, + "loss": 0.0715, + "num_input_tokens_seen": 374969656, + "step": 4108 + }, + { + "epoch": 17.116666666666667, + "loss": 0.08082762360572815, + "loss_ce": 9.448397577216383e-06, + "loss_iou": 0.15625, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 374969656, + "step": 4108 + }, + { + "epoch": 17.120833333333334, + "grad_norm": 1.2700583951215059, + "learning_rate": 5e-05, + "loss": 0.0367, + "num_input_tokens_seen": 375061020, + "step": 4109 + }, + { + "epoch": 17.120833333333334, + "loss": 0.033961232751607895, + "loss_ce": 8.932576065490139e-07, + "loss_iou": 0.2236328125, + "loss_num": 0.006805419921875, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 375061020, + "step": 4109 + }, + { + "epoch": 17.125, + "grad_norm": 0.9037241210993366, + "learning_rate": 5e-05, + "loss": 0.0322, + "num_input_tokens_seen": 375152520, + "step": 4110 + }, + { + "epoch": 17.125, + "loss": 0.024991333484649658, + "loss_ce": 0.00114184629637748, + "loss_iou": 0.337890625, + "loss_num": 0.0047607421875, + "loss_xval": 0.0238037109375, + "num_input_tokens_seen": 375152520, + "step": 4110 + }, + { + "epoch": 17.129166666666666, + "grad_norm": 1.5900244598953917, + "learning_rate": 5e-05, + "loss": 0.0276, + "num_input_tokens_seen": 375243896, + "step": 4111 + }, + { + "epoch": 17.129166666666666, + "loss": 0.034204646944999695, + "loss_ce": 2.495873559382744e-05, + "loss_iou": 0.2177734375, + "loss_num": 0.0068359375, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 375243896, + "step": 4111 + }, + { + "epoch": 17.133333333333333, + "grad_norm": 2.2261162090891533, + "learning_rate": 5e-05, + "loss": 0.0397, + "num_input_tokens_seen": 375335536, + "step": 4112 + }, + { + "epoch": 17.133333333333333, + "loss": 0.03536083921790123, + "loss_ce": 6.22510469838744e-06, + "loss_iou": 0.2578125, + "loss_num": 0.007080078125, + "loss_xval": 0.035400390625, + "num_input_tokens_seen": 375335536, + "step": 4112 + }, + { + "epoch": 17.1375, + "grad_norm": 1.4003842951662424, + "learning_rate": 5e-05, + "loss": 0.0454, + "num_input_tokens_seen": 375426984, + "step": 4113 + }, + { + "epoch": 17.1375, + "loss": 0.05160084366798401, + "loss_ce": 1.0880850823014043e-05, + "loss_iou": 0.08935546875, + "loss_num": 0.01031494140625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 375426984, + "step": 4113 + }, + { + "epoch": 17.141666666666666, + "grad_norm": 1.288984151087767, + "learning_rate": 5e-05, + "loss": 0.0431, + "num_input_tokens_seen": 375517832, + "step": 4114 + }, + { + "epoch": 17.141666666666666, + "loss": 0.04785723611712456, + "loss_ce": 5.67401957596303e-06, + "loss_iou": 0.275390625, + "loss_num": 0.00958251953125, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 375517832, + "step": 4114 + }, + { + "epoch": 17.145833333333332, + "grad_norm": 1.4372619479754545, + "learning_rate": 5e-05, + "loss": 0.0469, + "num_input_tokens_seen": 375608728, + "step": 4115 + }, + { + "epoch": 17.145833333333332, + "loss": 0.052819229662418365, + "loss_ce": 8.561160939279944e-06, + "loss_iou": 0.2392578125, + "loss_num": 0.01055908203125, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 375608728, + "step": 4115 + }, + { + "epoch": 17.15, + "grad_norm": 2.6959178219886644, + "learning_rate": 5e-05, + "loss": 0.061, + "num_input_tokens_seen": 375700688, + "step": 4116 + }, + { + "epoch": 17.15, + "loss": 0.04783296585083008, + "loss_ce": 4.293494384910446e-06, + "loss_iou": 0.255859375, + "loss_num": 0.00958251953125, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 375700688, + "step": 4116 + }, + { + "epoch": 17.154166666666665, + "grad_norm": 8.95136108189134, + "learning_rate": 5e-05, + "loss": 0.0668, + "num_input_tokens_seen": 375792212, + "step": 4117 + }, + { + "epoch": 17.154166666666665, + "loss": 0.07952168583869934, + "loss_ce": 0.000160721450811252, + "loss_iou": 0.251953125, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 375792212, + "step": 4117 + }, + { + "epoch": 17.158333333333335, + "grad_norm": 4.873580710701036, + "learning_rate": 5e-05, + "loss": 0.0593, + "num_input_tokens_seen": 375883520, + "step": 4118 + }, + { + "epoch": 17.158333333333335, + "loss": 0.09023542702198029, + "loss_ce": 1.0212111192231532e-05, + "loss_iou": 0.3203125, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 375883520, + "step": 4118 + }, + { + "epoch": 17.1625, + "grad_norm": 2.642787145601659, + "learning_rate": 5e-05, + "loss": 0.0379, + "num_input_tokens_seen": 375975216, + "step": 4119 + }, + { + "epoch": 17.1625, + "loss": 0.026552706956863403, + "loss_ce": 2.530133497202769e-05, + "loss_iou": 0.267578125, + "loss_num": 0.00531005859375, + "loss_xval": 0.0264892578125, + "num_input_tokens_seen": 375975216, + "step": 4119 + }, + { + "epoch": 17.166666666666668, + "grad_norm": 2.403293357052846, + "learning_rate": 5e-05, + "loss": 0.0629, + "num_input_tokens_seen": 376066804, + "step": 4120 + }, + { + "epoch": 17.166666666666668, + "loss": 0.05794864892959595, + "loss_ce": 1.102811802411452e-05, + "loss_iou": 0.291015625, + "loss_num": 0.0115966796875, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 376066804, + "step": 4120 + }, + { + "epoch": 17.170833333333334, + "grad_norm": 2.187880455047866, + "learning_rate": 5e-05, + "loss": 0.0462, + "num_input_tokens_seen": 376158144, + "step": 4121 + }, + { + "epoch": 17.170833333333334, + "loss": 0.042102497071027756, + "loss_ce": 3.4964855331054423e-06, + "loss_iou": 0.267578125, + "loss_num": 0.0084228515625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 376158144, + "step": 4121 + }, + { + "epoch": 17.175, + "grad_norm": 3.2858897350605276, + "learning_rate": 5e-05, + "loss": 0.0673, + "num_input_tokens_seen": 376249596, + "step": 4122 + }, + { + "epoch": 17.175, + "loss": 0.058754559606313705, + "loss_ce": 8.451620669802651e-05, + "loss_iou": 0.23046875, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 376249596, + "step": 4122 + }, + { + "epoch": 17.179166666666667, + "grad_norm": 2.077742822143257, + "learning_rate": 5e-05, + "loss": 0.044, + "num_input_tokens_seen": 376340460, + "step": 4123 + }, + { + "epoch": 17.179166666666667, + "loss": 0.039612989872694016, + "loss_ce": 1.1738272860384313e-06, + "loss_iou": 0.255859375, + "loss_num": 0.0079345703125, + "loss_xval": 0.03955078125, + "num_input_tokens_seen": 376340460, + "step": 4123 + }, + { + "epoch": 17.183333333333334, + "grad_norm": 2.5078670252009854, + "learning_rate": 5e-05, + "loss": 0.0858, + "num_input_tokens_seen": 376431304, + "step": 4124 + }, + { + "epoch": 17.183333333333334, + "loss": 0.07966112345457077, + "loss_ce": 1.7874119293992408e-05, + "loss_iou": 0.248046875, + "loss_num": 0.0159912109375, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 376431304, + "step": 4124 + }, + { + "epoch": 17.1875, + "grad_norm": 2.5328257559712233, + "learning_rate": 5e-05, + "loss": 0.0267, + "num_input_tokens_seen": 376522512, + "step": 4125 + }, + { + "epoch": 17.1875, + "loss": 0.022409534081816673, + "loss_ce": 0.0002537722757551819, + "loss_iou": 0.20703125, + "loss_num": 0.004425048828125, + "loss_xval": 0.022216796875, + "num_input_tokens_seen": 376522512, + "step": 4125 + }, + { + "epoch": 17.191666666666666, + "grad_norm": 4.78932799513451, + "learning_rate": 5e-05, + "loss": 0.0548, + "num_input_tokens_seen": 376613316, + "step": 4126 + }, + { + "epoch": 17.191666666666666, + "loss": 0.07543014734983444, + "loss_ce": 0.002737276954576373, + "loss_iou": 0.1982421875, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 376613316, + "step": 4126 + }, + { + "epoch": 17.195833333333333, + "grad_norm": 3.4560754545560988, + "learning_rate": 5e-05, + "loss": 0.0634, + "num_input_tokens_seen": 376704784, + "step": 4127 + }, + { + "epoch": 17.195833333333333, + "loss": 0.053305864334106445, + "loss_ce": 1.4543708857672755e-05, + "loss_iou": 0.21875, + "loss_num": 0.01068115234375, + "loss_xval": 0.05322265625, + "num_input_tokens_seen": 376704784, + "step": 4127 + }, + { + "epoch": 17.2, + "grad_norm": 2.979212702062069, + "learning_rate": 5e-05, + "loss": 0.0893, + "num_input_tokens_seen": 376796540, + "step": 4128 + }, + { + "epoch": 17.2, + "loss": 0.1086147353053093, + "loss_ce": 2.6740895009425003e-06, + "loss_iou": 0.296875, + "loss_num": 0.021728515625, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 376796540, + "step": 4128 + }, + { + "epoch": 17.204166666666666, + "grad_norm": 2.9795527868387097, + "learning_rate": 5e-05, + "loss": 0.046, + "num_input_tokens_seen": 376887624, + "step": 4129 + }, + { + "epoch": 17.204166666666666, + "loss": 0.031053537502884865, + "loss_ce": 1.901970676954079e-06, + "loss_iou": 0.2578125, + "loss_num": 0.006195068359375, + "loss_xval": 0.031005859375, + "num_input_tokens_seen": 376887624, + "step": 4129 + }, + { + "epoch": 17.208333333333332, + "grad_norm": 3.1958465536868865, + "learning_rate": 5e-05, + "loss": 0.0645, + "num_input_tokens_seen": 376978240, + "step": 4130 + }, + { + "epoch": 17.208333333333332, + "loss": 0.040278829634189606, + "loss_ce": 3.377207394805737e-05, + "loss_iou": 0.244140625, + "loss_num": 0.008056640625, + "loss_xval": 0.040283203125, + "num_input_tokens_seen": 376978240, + "step": 4130 + }, + { + "epoch": 17.2125, + "grad_norm": 2.7260891925276276, + "learning_rate": 5e-05, + "loss": 0.039, + "num_input_tokens_seen": 377069312, + "step": 4131 + }, + { + "epoch": 17.2125, + "loss": 0.04666922241449356, + "loss_ce": 7.84874373493949e-06, + "loss_iou": 0.259765625, + "loss_num": 0.00933837890625, + "loss_xval": 0.046630859375, + "num_input_tokens_seen": 377069312, + "step": 4131 + }, + { + "epoch": 17.216666666666665, + "grad_norm": 2.651070129394983, + "learning_rate": 5e-05, + "loss": 0.0676, + "num_input_tokens_seen": 377161164, + "step": 4132 + }, + { + "epoch": 17.216666666666665, + "loss": 0.07825395464897156, + "loss_ce": 6.8832137003482785e-06, + "loss_iou": 0.4140625, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 377161164, + "step": 4132 + }, + { + "epoch": 17.220833333333335, + "grad_norm": 2.2683718350970232, + "learning_rate": 5e-05, + "loss": 0.1661, + "num_input_tokens_seen": 377251592, + "step": 4133 + }, + { + "epoch": 17.220833333333335, + "loss": 0.1615155190229416, + "loss_ce": 1.2367737554086489e-06, + "loss_iou": 0.10791015625, + "loss_num": 0.0322265625, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 377251592, + "step": 4133 + }, + { + "epoch": 17.225, + "grad_norm": 2.6366418603858346, + "learning_rate": 5e-05, + "loss": 0.0953, + "num_input_tokens_seen": 377342520, + "step": 4134 + }, + { + "epoch": 17.225, + "loss": 0.1185772716999054, + "loss_ce": 1.2214568414492533e-06, + "loss_iou": 0.216796875, + "loss_num": 0.023681640625, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 377342520, + "step": 4134 + }, + { + "epoch": 17.229166666666668, + "grad_norm": 3.5264559338621893, + "learning_rate": 5e-05, + "loss": 0.043, + "num_input_tokens_seen": 377434136, + "step": 4135 + }, + { + "epoch": 17.229166666666668, + "loss": 0.054434407502412796, + "loss_ce": 6.306642717390787e-06, + "loss_iou": 0.21484375, + "loss_num": 0.0108642578125, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 377434136, + "step": 4135 + }, + { + "epoch": 17.233333333333334, + "grad_norm": 3.1055052262870766, + "learning_rate": 5e-05, + "loss": 0.041, + "num_input_tokens_seen": 377525448, + "step": 4136 + }, + { + "epoch": 17.233333333333334, + "loss": 0.043866340070962906, + "loss_ce": 0.00011939093383261934, + "loss_iou": 0.1708984375, + "loss_num": 0.00872802734375, + "loss_xval": 0.043701171875, + "num_input_tokens_seen": 377525448, + "step": 4136 + }, + { + "epoch": 17.2375, + "grad_norm": 2.208501603893322, + "learning_rate": 5e-05, + "loss": 0.0532, + "num_input_tokens_seen": 377616740, + "step": 4137 + }, + { + "epoch": 17.2375, + "loss": 0.06535966694355011, + "loss_ce": 6.272749942581868e-06, + "loss_iou": 0.255859375, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 377616740, + "step": 4137 + }, + { + "epoch": 17.241666666666667, + "grad_norm": 2.0908215843581397, + "learning_rate": 5e-05, + "loss": 0.0515, + "num_input_tokens_seen": 377707964, + "step": 4138 + }, + { + "epoch": 17.241666666666667, + "loss": 0.04869557544589043, + "loss_ce": 4.780476956511848e-06, + "loss_iou": 0.212890625, + "loss_num": 0.00970458984375, + "loss_xval": 0.048583984375, + "num_input_tokens_seen": 377707964, + "step": 4138 + }, + { + "epoch": 17.245833333333334, + "grad_norm": 3.4463473343026565, + "learning_rate": 5e-05, + "loss": 0.0763, + "num_input_tokens_seen": 377799740, + "step": 4139 + }, + { + "epoch": 17.245833333333334, + "loss": 0.1159019023180008, + "loss_ce": 0.0008964104927144945, + "loss_iou": 0.2578125, + "loss_num": 0.02294921875, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 377799740, + "step": 4139 + }, + { + "epoch": 17.25, + "grad_norm": 4.382896012197364, + "learning_rate": 5e-05, + "loss": 0.0522, + "num_input_tokens_seen": 377891368, + "step": 4140 + }, + { + "epoch": 17.25, + "loss": 0.04679463803768158, + "loss_ce": 0.00010274462692905217, + "loss_iou": 0.2138671875, + "loss_num": 0.00933837890625, + "loss_xval": 0.046630859375, + "num_input_tokens_seen": 377891368, + "step": 4140 + }, + { + "epoch": 17.254166666666666, + "grad_norm": 3.0559161879412344, + "learning_rate": 5e-05, + "loss": 0.0594, + "num_input_tokens_seen": 377982636, + "step": 4141 + }, + { + "epoch": 17.254166666666666, + "loss": 0.06926178932189941, + "loss_ce": 2.143523033737438e-06, + "loss_iou": 0.203125, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 377982636, + "step": 4141 + }, + { + "epoch": 17.258333333333333, + "grad_norm": 1.0516071756116985, + "learning_rate": 5e-05, + "loss": 0.0891, + "num_input_tokens_seen": 378073500, + "step": 4142 + }, + { + "epoch": 17.258333333333333, + "loss": 0.07638757675886154, + "loss_ce": 9.710823178465944e-06, + "loss_iou": 0.1484375, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 378073500, + "step": 4142 + }, + { + "epoch": 17.2625, + "grad_norm": 1.5393423311904353, + "learning_rate": 5e-05, + "loss": 0.0661, + "num_input_tokens_seen": 378164848, + "step": 4143 + }, + { + "epoch": 17.2625, + "loss": 0.05576720088720322, + "loss_ce": 1.1585127140278928e-05, + "loss_iou": 0.126953125, + "loss_num": 0.01116943359375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 378164848, + "step": 4143 + }, + { + "epoch": 17.266666666666666, + "grad_norm": 2.2986887079216682, + "learning_rate": 5e-05, + "loss": 0.0641, + "num_input_tokens_seen": 378256496, + "step": 4144 + }, + { + "epoch": 17.266666666666666, + "loss": 0.05642838403582573, + "loss_ce": 3.1898129236651585e-05, + "loss_iou": 0.212890625, + "loss_num": 0.01129150390625, + "loss_xval": 0.056396484375, + "num_input_tokens_seen": 378256496, + "step": 4144 + }, + { + "epoch": 17.270833333333332, + "grad_norm": 3.637160341144884, + "learning_rate": 5e-05, + "loss": 0.0502, + "num_input_tokens_seen": 378348148, + "step": 4145 + }, + { + "epoch": 17.270833333333332, + "loss": 0.06469674408435822, + "loss_ce": 0.00013680808478966355, + "loss_iou": 0.337890625, + "loss_num": 0.012939453125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 378348148, + "step": 4145 + }, + { + "epoch": 17.275, + "grad_norm": 2.17254788913164, + "learning_rate": 5e-05, + "loss": 0.0361, + "num_input_tokens_seen": 378439328, + "step": 4146 + }, + { + "epoch": 17.275, + "loss": 0.03240314871072769, + "loss_ce": 8.741089004615787e-06, + "loss_iou": 0.33203125, + "loss_num": 0.0064697265625, + "loss_xval": 0.032470703125, + "num_input_tokens_seen": 378439328, + "step": 4146 + }, + { + "epoch": 17.279166666666665, + "grad_norm": 3.348143426755929, + "learning_rate": 5e-05, + "loss": 0.0463, + "num_input_tokens_seen": 378530776, + "step": 4147 + }, + { + "epoch": 17.279166666666665, + "loss": 0.0521889366209507, + "loss_ce": 3.8771340769017115e-06, + "loss_iou": 0.275390625, + "loss_num": 0.01043701171875, + "loss_xval": 0.05224609375, + "num_input_tokens_seen": 378530776, + "step": 4147 + }, + { + "epoch": 17.283333333333335, + "grad_norm": 4.117528658266027, + "learning_rate": 5e-05, + "loss": 0.0859, + "num_input_tokens_seen": 378621812, + "step": 4148 + }, + { + "epoch": 17.283333333333335, + "loss": 0.09836510568857193, + "loss_ce": 6.947469046281185e-06, + "loss_iou": 0.2294921875, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 378621812, + "step": 4148 + }, + { + "epoch": 17.2875, + "grad_norm": 2.0040948660561937, + "learning_rate": 5e-05, + "loss": 0.0489, + "num_input_tokens_seen": 378713528, + "step": 4149 + }, + { + "epoch": 17.2875, + "loss": 0.047562677413225174, + "loss_ce": 1.030675889523991e-06, + "loss_iou": 0.267578125, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 378713528, + "step": 4149 + }, + { + "epoch": 17.291666666666668, + "grad_norm": 2.9819458574398814, + "learning_rate": 5e-05, + "loss": 0.0263, + "num_input_tokens_seen": 378804916, + "step": 4150 + }, + { + "epoch": 17.291666666666668, + "loss": 0.03186263144016266, + "loss_ce": 2.277978410347714e-06, + "loss_iou": 0.0849609375, + "loss_num": 0.006378173828125, + "loss_xval": 0.03173828125, + "num_input_tokens_seen": 378804916, + "step": 4150 + }, + { + "epoch": 17.295833333333334, + "grad_norm": 2.409193723532535, + "learning_rate": 5e-05, + "loss": 0.0403, + "num_input_tokens_seen": 378896124, + "step": 4151 + }, + { + "epoch": 17.295833333333334, + "loss": 0.04503517970442772, + "loss_ce": 0.00014382405788637698, + "loss_iou": 0.171875, + "loss_num": 0.00897216796875, + "loss_xval": 0.044921875, + "num_input_tokens_seen": 378896124, + "step": 4151 + }, + { + "epoch": 17.3, + "grad_norm": 1.5030586417918494, + "learning_rate": 5e-05, + "loss": 0.0809, + "num_input_tokens_seen": 378987300, + "step": 4152 + }, + { + "epoch": 17.3, + "loss": 0.12397777289152145, + "loss_ce": 4.588945739669725e-05, + "loss_iou": 0.154296875, + "loss_num": 0.0247802734375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 378987300, + "step": 4152 + }, + { + "epoch": 17.304166666666667, + "grad_norm": 4.31789317113666, + "learning_rate": 5e-05, + "loss": 0.0346, + "num_input_tokens_seen": 379078612, + "step": 4153 + }, + { + "epoch": 17.304166666666667, + "loss": 0.03499021381139755, + "loss_ce": 1.8140335669158958e-06, + "loss_iou": 0.201171875, + "loss_num": 0.006988525390625, + "loss_xval": 0.034912109375, + "num_input_tokens_seen": 379078612, + "step": 4153 + }, + { + "epoch": 17.308333333333334, + "grad_norm": 2.107743511654392, + "learning_rate": 5e-05, + "loss": 0.0642, + "num_input_tokens_seen": 379169924, + "step": 4154 + }, + { + "epoch": 17.308333333333334, + "loss": 0.049952924251556396, + "loss_ce": 1.0907536307058763e-05, + "loss_iou": 0.1943359375, + "loss_num": 0.010009765625, + "loss_xval": 0.050048828125, + "num_input_tokens_seen": 379169924, + "step": 4154 + }, + { + "epoch": 17.3125, + "grad_norm": 2.507618094018235, + "learning_rate": 5e-05, + "loss": 0.0643, + "num_input_tokens_seen": 379261204, + "step": 4155 + }, + { + "epoch": 17.3125, + "loss": 0.047419168055057526, + "loss_ce": 4.0629114664625376e-05, + "loss_iou": 0.357421875, + "loss_num": 0.00946044921875, + "loss_xval": 0.04736328125, + "num_input_tokens_seen": 379261204, + "step": 4155 + }, + { + "epoch": 17.316666666666666, + "grad_norm": 2.961298842268306, + "learning_rate": 5e-05, + "loss": 0.0495, + "num_input_tokens_seen": 379352556, + "step": 4156 + }, + { + "epoch": 17.316666666666666, + "loss": 0.039200618863105774, + "loss_ce": 7.923441671664477e-07, + "loss_iou": 0.2392578125, + "loss_num": 0.0078125, + "loss_xval": 0.039306640625, + "num_input_tokens_seen": 379352556, + "step": 4156 + }, + { + "epoch": 17.320833333333333, + "grad_norm": 3.599819057412839, + "learning_rate": 5e-05, + "loss": 0.0293, + "num_input_tokens_seen": 379444232, + "step": 4157 + }, + { + "epoch": 17.320833333333333, + "loss": 0.028475811704993248, + "loss_ce": 0.0016508603002876043, + "loss_iou": 0.28125, + "loss_num": 0.00537109375, + "loss_xval": 0.02685546875, + "num_input_tokens_seen": 379444232, + "step": 4157 + }, + { + "epoch": 17.325, + "grad_norm": 4.796508671220804, + "learning_rate": 5e-05, + "loss": 0.0557, + "num_input_tokens_seen": 379535720, + "step": 4158 + }, + { + "epoch": 17.325, + "loss": 0.06499719619750977, + "loss_ce": 1.0012766324507538e-05, + "loss_iou": 0.36328125, + "loss_num": 0.012939453125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 379535720, + "step": 4158 + }, + { + "epoch": 17.329166666666666, + "grad_norm": 3.0562988991039353, + "learning_rate": 5e-05, + "loss": 0.0862, + "num_input_tokens_seen": 379627084, + "step": 4159 + }, + { + "epoch": 17.329166666666666, + "loss": 0.1044941172003746, + "loss_ce": 1.9318108570587356e-06, + "loss_iou": 0.26171875, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 379627084, + "step": 4159 + }, + { + "epoch": 17.333333333333332, + "grad_norm": 2.30199537755403, + "learning_rate": 5e-05, + "loss": 0.0406, + "num_input_tokens_seen": 379718140, + "step": 4160 + }, + { + "epoch": 17.333333333333332, + "loss": 0.040326207876205444, + "loss_ce": 1.2487752428569365e-05, + "loss_iou": 0.1865234375, + "loss_num": 0.008056640625, + "loss_xval": 0.040283203125, + "num_input_tokens_seen": 379718140, + "step": 4160 + }, + { + "epoch": 17.3375, + "grad_norm": 17.81326134141207, + "learning_rate": 5e-05, + "loss": 0.0781, + "num_input_tokens_seen": 379809208, + "step": 4161 + }, + { + "epoch": 17.3375, + "loss": 0.05243492126464844, + "loss_ce": 5.7217112043872476e-06, + "loss_iou": 0.29296875, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 379809208, + "step": 4161 + }, + { + "epoch": 17.341666666666665, + "grad_norm": 1.422884101591852, + "learning_rate": 5e-05, + "loss": 0.0299, + "num_input_tokens_seen": 379901088, + "step": 4162 + }, + { + "epoch": 17.341666666666665, + "loss": 0.023801235482096672, + "loss_ce": 8.907601295504719e-05, + "loss_iou": 0.11279296875, + "loss_num": 0.004730224609375, + "loss_xval": 0.023681640625, + "num_input_tokens_seen": 379901088, + "step": 4162 + }, + { + "epoch": 17.345833333333335, + "grad_norm": 2.9900858693580643, + "learning_rate": 5e-05, + "loss": 0.0447, + "num_input_tokens_seen": 379992896, + "step": 4163 + }, + { + "epoch": 17.345833333333335, + "loss": 0.028920790180563927, + "loss_ce": 5.385740678320872e-06, + "loss_iou": 0.291015625, + "loss_num": 0.005767822265625, + "loss_xval": 0.0289306640625, + "num_input_tokens_seen": 379992896, + "step": 4163 + }, + { + "epoch": 17.35, + "grad_norm": 5.004765317149916, + "learning_rate": 5e-05, + "loss": 0.0645, + "num_input_tokens_seen": 380083900, + "step": 4164 + }, + { + "epoch": 17.35, + "loss": 0.09242373704910278, + "loss_ce": 1.2471190302676405e-06, + "loss_iou": 0.24609375, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 380083900, + "step": 4164 + }, + { + "epoch": 17.354166666666668, + "grad_norm": 3.5720222777192694, + "learning_rate": 5e-05, + "loss": 0.0551, + "num_input_tokens_seen": 380175476, + "step": 4165 + }, + { + "epoch": 17.354166666666668, + "loss": 0.0531640350818634, + "loss_ce": 2.41236944020784e-06, + "loss_iou": 0.35546875, + "loss_num": 0.0106201171875, + "loss_xval": 0.05322265625, + "num_input_tokens_seen": 380175476, + "step": 4165 + }, + { + "epoch": 17.358333333333334, + "grad_norm": 2.918456205575103, + "learning_rate": 5e-05, + "loss": 0.0444, + "num_input_tokens_seen": 380266748, + "step": 4166 + }, + { + "epoch": 17.358333333333334, + "loss": 0.04650936275720596, + "loss_ce": 5.731297960664961e-07, + "loss_iou": 0.251953125, + "loss_num": 0.00933837890625, + "loss_xval": 0.04638671875, + "num_input_tokens_seen": 380266748, + "step": 4166 + }, + { + "epoch": 17.3625, + "grad_norm": 4.120291909874971, + "learning_rate": 5e-05, + "loss": 0.0542, + "num_input_tokens_seen": 380358840, + "step": 4167 + }, + { + "epoch": 17.3625, + "loss": 0.06932120025157928, + "loss_ce": 0.00012258999049663544, + "loss_iou": 0.1708984375, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 380358840, + "step": 4167 + }, + { + "epoch": 17.366666666666667, + "grad_norm": 17.50068370015219, + "learning_rate": 5e-05, + "loss": 0.0654, + "num_input_tokens_seen": 380450272, + "step": 4168 + }, + { + "epoch": 17.366666666666667, + "loss": 0.06793436408042908, + "loss_ce": 1.748909562593326e-05, + "loss_iou": 0.294921875, + "loss_num": 0.01361083984375, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 380450272, + "step": 4168 + }, + { + "epoch": 17.370833333333334, + "grad_norm": 2.6697212933765195, + "learning_rate": 5e-05, + "loss": 0.0495, + "num_input_tokens_seen": 380541972, + "step": 4169 + }, + { + "epoch": 17.370833333333334, + "loss": 0.05074727535247803, + "loss_ce": 1.1801877917605452e-05, + "loss_iou": 0.396484375, + "loss_num": 0.0101318359375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 380541972, + "step": 4169 + }, + { + "epoch": 17.375, + "grad_norm": 1.3486808798727778, + "learning_rate": 5e-05, + "loss": 0.0714, + "num_input_tokens_seen": 380633324, + "step": 4170 + }, + { + "epoch": 17.375, + "loss": 0.06902758777141571, + "loss_ce": 7.31236141291447e-05, + "loss_iou": 0.294921875, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 380633324, + "step": 4170 + }, + { + "epoch": 17.379166666666666, + "grad_norm": 5.02642605399187, + "learning_rate": 5e-05, + "loss": 0.0361, + "num_input_tokens_seen": 380724316, + "step": 4171 + }, + { + "epoch": 17.379166666666666, + "loss": 0.028513111174106598, + "loss_ce": 2.4952072635642253e-05, + "loss_iou": 0.173828125, + "loss_num": 0.005706787109375, + "loss_xval": 0.0284423828125, + "num_input_tokens_seen": 380724316, + "step": 4171 + }, + { + "epoch": 17.383333333333333, + "grad_norm": 5.776909080785385, + "learning_rate": 5e-05, + "loss": 0.0658, + "num_input_tokens_seen": 380815592, + "step": 4172 + }, + { + "epoch": 17.383333333333333, + "loss": 0.030104611068964005, + "loss_ce": 1.4279077731771395e-05, + "loss_iou": 0.244140625, + "loss_num": 0.006011962890625, + "loss_xval": 0.030029296875, + "num_input_tokens_seen": 380815592, + "step": 4172 + }, + { + "epoch": 17.3875, + "grad_norm": 3.3291427531728686, + "learning_rate": 5e-05, + "loss": 0.0543, + "num_input_tokens_seen": 380906692, + "step": 4173 + }, + { + "epoch": 17.3875, + "loss": 0.026032838970422745, + "loss_ce": 1.660524503677152e-05, + "loss_iou": 0.21875, + "loss_num": 0.00518798828125, + "loss_xval": 0.0260009765625, + "num_input_tokens_seen": 380906692, + "step": 4173 + }, + { + "epoch": 17.391666666666666, + "grad_norm": 2.04136096032851, + "learning_rate": 5e-05, + "loss": 0.0496, + "num_input_tokens_seen": 380998264, + "step": 4174 + }, + { + "epoch": 17.391666666666666, + "loss": 0.054833292961120605, + "loss_ce": 0.0003136429295409471, + "loss_iou": 0.125, + "loss_num": 0.01092529296875, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 380998264, + "step": 4174 + }, + { + "epoch": 17.395833333333332, + "grad_norm": 5.595745041098005, + "learning_rate": 5e-05, + "loss": 0.0636, + "num_input_tokens_seen": 381089624, + "step": 4175 + }, + { + "epoch": 17.395833333333332, + "loss": 0.07274062186479568, + "loss_ce": 3.2486663258168846e-05, + "loss_iou": 0.2451171875, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 381089624, + "step": 4175 + }, + { + "epoch": 17.4, + "grad_norm": 2.462672174230439, + "learning_rate": 5e-05, + "loss": 0.1027, + "num_input_tokens_seen": 381181008, + "step": 4176 + }, + { + "epoch": 17.4, + "loss": 0.1237187534570694, + "loss_ce": 4.994043365513789e-07, + "loss_iou": 0.1640625, + "loss_num": 0.0247802734375, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 381181008, + "step": 4176 + }, + { + "epoch": 17.404166666666665, + "grad_norm": 2.048999090525931, + "learning_rate": 5e-05, + "loss": 0.0568, + "num_input_tokens_seen": 381272552, + "step": 4177 + }, + { + "epoch": 17.404166666666665, + "loss": 0.061331361532211304, + "loss_ce": 6.28693032922456e-06, + "loss_iou": 0.189453125, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 381272552, + "step": 4177 + }, + { + "epoch": 17.408333333333335, + "grad_norm": 1.6223244040540357, + "learning_rate": 5e-05, + "loss": 0.0411, + "num_input_tokens_seen": 381362932, + "step": 4178 + }, + { + "epoch": 17.408333333333335, + "loss": 0.045600827783346176, + "loss_ce": 7.567994089185959e-06, + "loss_iou": 0.1416015625, + "loss_num": 0.00909423828125, + "loss_xval": 0.045654296875, + "num_input_tokens_seen": 381362932, + "step": 4178 + }, + { + "epoch": 17.4125, + "grad_norm": 2.025302773676103, + "learning_rate": 5e-05, + "loss": 0.0507, + "num_input_tokens_seen": 381454308, + "step": 4179 + }, + { + "epoch": 17.4125, + "loss": 0.07269463688135147, + "loss_ce": 1.7684169506537728e-06, + "loss_iou": 0.2734375, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 381454308, + "step": 4179 + }, + { + "epoch": 17.416666666666668, + "grad_norm": 2.5372328217531015, + "learning_rate": 5e-05, + "loss": 0.0526, + "num_input_tokens_seen": 381545752, + "step": 4180 + }, + { + "epoch": 17.416666666666668, + "loss": 0.04797760024666786, + "loss_ce": 1.922258707054425e-05, + "loss_iou": 0.296875, + "loss_num": 0.00958251953125, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 381545752, + "step": 4180 + }, + { + "epoch": 17.420833333333334, + "grad_norm": 5.241445928314043, + "learning_rate": 5e-05, + "loss": 0.0457, + "num_input_tokens_seen": 381637208, + "step": 4181 + }, + { + "epoch": 17.420833333333334, + "loss": 0.0530615970492363, + "loss_ce": 6.7899372879765e-06, + "loss_iou": 0.27734375, + "loss_num": 0.0106201171875, + "loss_xval": 0.052978515625, + "num_input_tokens_seen": 381637208, + "step": 4181 + }, + { + "epoch": 17.425, + "grad_norm": 3.2854474328181857, + "learning_rate": 5e-05, + "loss": 0.0825, + "num_input_tokens_seen": 381728292, + "step": 4182 + }, + { + "epoch": 17.425, + "loss": 0.08642810583114624, + "loss_ce": 2.322962927792105e-06, + "loss_iou": 0.298828125, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 381728292, + "step": 4182 + }, + { + "epoch": 17.429166666666667, + "grad_norm": 1.9952205771229876, + "learning_rate": 5e-05, + "loss": 0.0391, + "num_input_tokens_seen": 381820024, + "step": 4183 + }, + { + "epoch": 17.429166666666667, + "loss": 0.03883390873670578, + "loss_ce": 1.55478592205327e-05, + "loss_iou": 0.3125, + "loss_num": 0.00775146484375, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 381820024, + "step": 4183 + }, + { + "epoch": 17.433333333333334, + "grad_norm": 2.067975560498226, + "learning_rate": 5e-05, + "loss": 0.0349, + "num_input_tokens_seen": 381911112, + "step": 4184 + }, + { + "epoch": 17.433333333333334, + "loss": 0.035965446382761, + "loss_ce": 4.811201961274492e-07, + "loss_iou": 0.1982421875, + "loss_num": 0.0072021484375, + "loss_xval": 0.035888671875, + "num_input_tokens_seen": 381911112, + "step": 4184 + }, + { + "epoch": 17.4375, + "grad_norm": 2.074459227907449, + "learning_rate": 5e-05, + "loss": 0.0452, + "num_input_tokens_seen": 382002684, + "step": 4185 + }, + { + "epoch": 17.4375, + "loss": 0.05124114826321602, + "loss_ce": 7.843073399271816e-05, + "loss_iou": 0.244140625, + "loss_num": 0.01025390625, + "loss_xval": 0.05126953125, + "num_input_tokens_seen": 382002684, + "step": 4185 + }, + { + "epoch": 17.441666666666666, + "grad_norm": 2.6020157273330744, + "learning_rate": 5e-05, + "loss": 0.042, + "num_input_tokens_seen": 382094208, + "step": 4186 + }, + { + "epoch": 17.441666666666666, + "loss": 0.048831477761268616, + "loss_ce": 3.352569137859973e-06, + "loss_iou": 0.30859375, + "loss_num": 0.009765625, + "loss_xval": 0.048828125, + "num_input_tokens_seen": 382094208, + "step": 4186 + }, + { + "epoch": 17.445833333333333, + "grad_norm": 4.115432540513496, + "learning_rate": 5e-05, + "loss": 0.0664, + "num_input_tokens_seen": 382185420, + "step": 4187 + }, + { + "epoch": 17.445833333333333, + "loss": 0.07571595907211304, + "loss_ce": 0.00012392218923196197, + "loss_iou": 0.20703125, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 382185420, + "step": 4187 + }, + { + "epoch": 17.45, + "grad_norm": 2.021583164885262, + "learning_rate": 5e-05, + "loss": 0.0837, + "num_input_tokens_seen": 382276752, + "step": 4188 + }, + { + "epoch": 17.45, + "loss": 0.07489342987537384, + "loss_ce": 7.1080962698033545e-06, + "loss_iou": 0.294921875, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 382276752, + "step": 4188 + }, + { + "epoch": 17.454166666666666, + "grad_norm": 4.566095149534234, + "learning_rate": 5e-05, + "loss": 0.0612, + "num_input_tokens_seen": 382368236, + "step": 4189 + }, + { + "epoch": 17.454166666666666, + "loss": 0.05665755644440651, + "loss_ce": 1.6720227904443163e-06, + "loss_iou": 0.251953125, + "loss_num": 0.0113525390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 382368236, + "step": 4189 + }, + { + "epoch": 17.458333333333332, + "grad_norm": 2.8811610544544695, + "learning_rate": 5e-05, + "loss": 0.0736, + "num_input_tokens_seen": 382460640, + "step": 4190 + }, + { + "epoch": 17.458333333333332, + "loss": 0.041686300188302994, + "loss_ce": 3.743516936083324e-05, + "loss_iou": 0.294921875, + "loss_num": 0.00836181640625, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 382460640, + "step": 4190 + }, + { + "epoch": 17.4625, + "grad_norm": 2.83894264255325, + "learning_rate": 5e-05, + "loss": 0.0387, + "num_input_tokens_seen": 382552036, + "step": 4191 + }, + { + "epoch": 17.4625, + "loss": 0.051210999488830566, + "loss_ce": 2.5061237920454005e-06, + "loss_iou": 0.251953125, + "loss_num": 0.01025390625, + "loss_xval": 0.05126953125, + "num_input_tokens_seen": 382552036, + "step": 4191 + }, + { + "epoch": 17.466666666666665, + "grad_norm": 2.913447387352696, + "learning_rate": 5e-05, + "loss": 0.0434, + "num_input_tokens_seen": 382643988, + "step": 4192 + }, + { + "epoch": 17.466666666666665, + "loss": 0.04521823674440384, + "loss_ce": 5.2219100325601175e-05, + "loss_iou": 0.28515625, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 382643988, + "step": 4192 + }, + { + "epoch": 17.470833333333335, + "grad_norm": 2.455147735870591, + "learning_rate": 5e-05, + "loss": 0.0441, + "num_input_tokens_seen": 382735388, + "step": 4193 + }, + { + "epoch": 17.470833333333335, + "loss": 0.024509306997060776, + "loss_ce": 3.691537131089717e-06, + "loss_iou": 0.1328125, + "loss_num": 0.004913330078125, + "loss_xval": 0.0245361328125, + "num_input_tokens_seen": 382735388, + "step": 4193 + }, + { + "epoch": 17.475, + "grad_norm": 3.309451299202754, + "learning_rate": 5e-05, + "loss": 0.084, + "num_input_tokens_seen": 382826768, + "step": 4194 + }, + { + "epoch": 17.475, + "loss": 0.0785035565495491, + "loss_ce": 4.716331659437856e-06, + "loss_iou": 0.1826171875, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 382826768, + "step": 4194 + }, + { + "epoch": 17.479166666666668, + "grad_norm": 3.4075041239144768, + "learning_rate": 5e-05, + "loss": 0.0674, + "num_input_tokens_seen": 382917520, + "step": 4195 + }, + { + "epoch": 17.479166666666668, + "loss": 0.09112516045570374, + "loss_ce": 1.4928999917174224e-05, + "loss_iou": 0.2392578125, + "loss_num": 0.0181884765625, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 382917520, + "step": 4195 + }, + { + "epoch": 17.483333333333334, + "grad_norm": 1.562039716782248, + "learning_rate": 5e-05, + "loss": 0.0353, + "num_input_tokens_seen": 383009252, + "step": 4196 + }, + { + "epoch": 17.483333333333334, + "loss": 0.04242390766739845, + "loss_ce": 4.4736902964359615e-06, + "loss_iou": 0.2333984375, + "loss_num": 0.00848388671875, + "loss_xval": 0.04248046875, + "num_input_tokens_seen": 383009252, + "step": 4196 + }, + { + "epoch": 17.4875, + "grad_norm": 4.952842041521385, + "learning_rate": 5e-05, + "loss": 0.0961, + "num_input_tokens_seen": 383100828, + "step": 4197 + }, + { + "epoch": 17.4875, + "loss": 0.15947763621807098, + "loss_ce": 8.03215880296193e-06, + "loss_iou": 0.302734375, + "loss_num": 0.031982421875, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 383100828, + "step": 4197 + }, + { + "epoch": 17.491666666666667, + "grad_norm": 2.777819714122239, + "learning_rate": 5e-05, + "loss": 0.0539, + "num_input_tokens_seen": 383192324, + "step": 4198 + }, + { + "epoch": 17.491666666666667, + "loss": 0.0738849937915802, + "loss_ce": 1.9351950868440326e-06, + "loss_iou": 0.2421875, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 383192324, + "step": 4198 + }, + { + "epoch": 17.495833333333334, + "grad_norm": 3.2626916345833705, + "learning_rate": 5e-05, + "loss": 0.0384, + "num_input_tokens_seen": 383284020, + "step": 4199 + }, + { + "epoch": 17.495833333333334, + "loss": 0.03140312433242798, + "loss_ce": 5.335076593837584e-07, + "loss_iou": 0.240234375, + "loss_num": 0.00628662109375, + "loss_xval": 0.031494140625, + "num_input_tokens_seen": 383284020, + "step": 4199 + }, + { + "epoch": 17.5, + "grad_norm": 2.6749036434240514, + "learning_rate": 5e-05, + "loss": 0.0327, + "num_input_tokens_seen": 383376196, + "step": 4200 + }, + { + "epoch": 17.5, + "loss": 0.03776795417070389, + "loss_ce": 0.00018555522547103465, + "loss_iou": 0.3984375, + "loss_num": 0.007537841796875, + "loss_xval": 0.03759765625, + "num_input_tokens_seen": 383376196, + "step": 4200 + }, + { + "epoch": 17.504166666666666, + "grad_norm": 5.499155934500834, + "learning_rate": 5e-05, + "loss": 0.0523, + "num_input_tokens_seen": 383467144, + "step": 4201 + }, + { + "epoch": 17.504166666666666, + "loss": 0.06288354843854904, + "loss_ce": 2.0762161057064077e-06, + "loss_iou": 0.34765625, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 383467144, + "step": 4201 + }, + { + "epoch": 17.508333333333333, + "grad_norm": 2.013655166783423, + "learning_rate": 5e-05, + "loss": 0.0783, + "num_input_tokens_seen": 383557920, + "step": 4202 + }, + { + "epoch": 17.508333333333333, + "loss": 0.08707220107316971, + "loss_ce": 5.555620191444177e-06, + "loss_iou": 0.29296875, + "loss_num": 0.0174560546875, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 383557920, + "step": 4202 + }, + { + "epoch": 17.5125, + "grad_norm": 2.3711594009871253, + "learning_rate": 5e-05, + "loss": 0.0337, + "num_input_tokens_seen": 383649372, + "step": 4203 + }, + { + "epoch": 17.5125, + "loss": 0.03189709410071373, + "loss_ce": 0.00018933152023237199, + "loss_iou": 0.25, + "loss_num": 0.00634765625, + "loss_xval": 0.03173828125, + "num_input_tokens_seen": 383649372, + "step": 4203 + }, + { + "epoch": 17.516666666666666, + "grad_norm": 1.6791362866056327, + "learning_rate": 5e-05, + "loss": 0.0441, + "num_input_tokens_seen": 383741344, + "step": 4204 + }, + { + "epoch": 17.516666666666666, + "loss": 0.0628369152545929, + "loss_ce": 1.2169030014774762e-06, + "loss_iou": 0.30078125, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 383741344, + "step": 4204 + }, + { + "epoch": 17.520833333333332, + "grad_norm": 1.8910707791570547, + "learning_rate": 5e-05, + "loss": 0.0415, + "num_input_tokens_seen": 383832172, + "step": 4205 + }, + { + "epoch": 17.520833333333332, + "loss": 0.03927692770957947, + "loss_ce": 8.04272076493362e-07, + "loss_iou": 0.2236328125, + "loss_num": 0.00787353515625, + "loss_xval": 0.039306640625, + "num_input_tokens_seen": 383832172, + "step": 4205 + }, + { + "epoch": 17.525, + "grad_norm": 2.6070877254757354, + "learning_rate": 5e-05, + "loss": 0.0496, + "num_input_tokens_seen": 383923912, + "step": 4206 + }, + { + "epoch": 17.525, + "loss": 0.03360820189118385, + "loss_ce": 2.3605247406521812e-05, + "loss_iou": 0.31640625, + "loss_num": 0.0067138671875, + "loss_xval": 0.03369140625, + "num_input_tokens_seen": 383923912, + "step": 4206 + }, + { + "epoch": 17.529166666666665, + "grad_norm": 2.1064854829699424, + "learning_rate": 5e-05, + "loss": 0.0534, + "num_input_tokens_seen": 384015528, + "step": 4207 + }, + { + "epoch": 17.529166666666665, + "loss": 0.05154259502887726, + "loss_ce": 0.0019362723687663674, + "loss_iou": 0.224609375, + "loss_num": 0.00994873046875, + "loss_xval": 0.049560546875, + "num_input_tokens_seen": 384015528, + "step": 4207 + }, + { + "epoch": 17.533333333333335, + "grad_norm": 2.3138079290066504, + "learning_rate": 5e-05, + "loss": 0.0324, + "num_input_tokens_seen": 384106768, + "step": 4208 + }, + { + "epoch": 17.533333333333335, + "loss": 0.029217317700386047, + "loss_ce": 2.7251966457697563e-05, + "loss_iou": 0.31640625, + "loss_num": 0.005828857421875, + "loss_xval": 0.0291748046875, + "num_input_tokens_seen": 384106768, + "step": 4208 + }, + { + "epoch": 17.5375, + "grad_norm": 2.4430234781066864, + "learning_rate": 5e-05, + "loss": 0.0584, + "num_input_tokens_seen": 384197860, + "step": 4209 + }, + { + "epoch": 17.5375, + "loss": 0.0784795880317688, + "loss_ce": 0.000995452981442213, + "loss_iou": 0.2216796875, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 384197860, + "step": 4209 + }, + { + "epoch": 17.541666666666668, + "grad_norm": 2.737309471067965, + "learning_rate": 5e-05, + "loss": 0.1394, + "num_input_tokens_seen": 384289052, + "step": 4210 + }, + { + "epoch": 17.541666666666668, + "loss": 0.13933785259723663, + "loss_ce": 2.511477578082122e-05, + "loss_iou": 0.34375, + "loss_num": 0.02783203125, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 384289052, + "step": 4210 + }, + { + "epoch": 17.545833333333334, + "grad_norm": 2.7699618866317466, + "learning_rate": 5e-05, + "loss": 0.0615, + "num_input_tokens_seen": 384380580, + "step": 4211 + }, + { + "epoch": 17.545833333333334, + "loss": 0.06901293992996216, + "loss_ce": 1.2692656127910595e-05, + "loss_iou": 0.3125, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 384380580, + "step": 4211 + }, + { + "epoch": 17.55, + "grad_norm": 2.752521438874284, + "learning_rate": 5e-05, + "loss": 0.0503, + "num_input_tokens_seen": 384471860, + "step": 4212 + }, + { + "epoch": 17.55, + "loss": 0.033134959638118744, + "loss_ce": 8.129944035317749e-06, + "loss_iou": 0.2216796875, + "loss_num": 0.006622314453125, + "loss_xval": 0.033203125, + "num_input_tokens_seen": 384471860, + "step": 4212 + }, + { + "epoch": 17.554166666666667, + "grad_norm": 2.5678958327972503, + "learning_rate": 5e-05, + "loss": 0.0539, + "num_input_tokens_seen": 384562960, + "step": 4213 + }, + { + "epoch": 17.554166666666667, + "loss": 0.038819510489702225, + "loss_ce": 1.1500437722133938e-06, + "loss_iou": 0.23046875, + "loss_num": 0.007781982421875, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 384562960, + "step": 4213 + }, + { + "epoch": 17.558333333333334, + "grad_norm": 1.7329617000610125, + "learning_rate": 5e-05, + "loss": 0.0272, + "num_input_tokens_seen": 384654876, + "step": 4214 + }, + { + "epoch": 17.558333333333334, + "loss": 0.032617341727018356, + "loss_ce": 5.494547622220125e-06, + "loss_iou": 0.26171875, + "loss_num": 0.00653076171875, + "loss_xval": 0.03271484375, + "num_input_tokens_seen": 384654876, + "step": 4214 + }, + { + "epoch": 17.5625, + "grad_norm": 7.2811736273901175, + "learning_rate": 5e-05, + "loss": 0.0559, + "num_input_tokens_seen": 384746264, + "step": 4215 + }, + { + "epoch": 17.5625, + "loss": 0.08529709279537201, + "loss_ce": 0.00012253341265022755, + "loss_iou": 0.267578125, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 384746264, + "step": 4215 + }, + { + "epoch": 17.566666666666666, + "grad_norm": 1.4273657568497262, + "learning_rate": 5e-05, + "loss": 0.0603, + "num_input_tokens_seen": 384837028, + "step": 4216 + }, + { + "epoch": 17.566666666666666, + "loss": 0.05279720202088356, + "loss_ce": 1.7941896430784254e-06, + "loss_iou": 0.162109375, + "loss_num": 0.01055908203125, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 384837028, + "step": 4216 + }, + { + "epoch": 17.570833333333333, + "grad_norm": 2.6049713329445083, + "learning_rate": 5e-05, + "loss": 0.0662, + "num_input_tokens_seen": 384928004, + "step": 4217 + }, + { + "epoch": 17.570833333333333, + "loss": 0.07219231128692627, + "loss_ce": 0.00038445499376393855, + "loss_iou": 0.376953125, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 384928004, + "step": 4217 + }, + { + "epoch": 17.575, + "grad_norm": 1.6813061374465061, + "learning_rate": 5e-05, + "loss": 0.0489, + "num_input_tokens_seen": 385019184, + "step": 4218 + }, + { + "epoch": 17.575, + "loss": 0.041826337575912476, + "loss_ce": 1.9954518393205944e-06, + "loss_iou": 0.314453125, + "loss_num": 0.00836181640625, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 385019184, + "step": 4218 + }, + { + "epoch": 17.579166666666666, + "grad_norm": 2.3886752163061593, + "learning_rate": 5e-05, + "loss": 0.0447, + "num_input_tokens_seen": 385110276, + "step": 4219 + }, + { + "epoch": 17.579166666666666, + "loss": 0.025012066587805748, + "loss_ce": 2.911604269684176e-06, + "loss_iou": 0.16015625, + "loss_num": 0.0050048828125, + "loss_xval": 0.0250244140625, + "num_input_tokens_seen": 385110276, + "step": 4219 + }, + { + "epoch": 17.583333333333332, + "grad_norm": 2.7330086944365672, + "learning_rate": 5e-05, + "loss": 0.0583, + "num_input_tokens_seen": 385201992, + "step": 4220 + }, + { + "epoch": 17.583333333333332, + "loss": 0.08029569685459137, + "loss_ce": 1.9216951841372065e-05, + "loss_iou": 0.2294921875, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 385201992, + "step": 4220 + }, + { + "epoch": 17.5875, + "grad_norm": 2.422918394086421, + "learning_rate": 5e-05, + "loss": 0.0432, + "num_input_tokens_seen": 385293352, + "step": 4221 + }, + { + "epoch": 17.5875, + "loss": 0.04279577359557152, + "loss_ce": 2.4986568405438447e-06, + "loss_iou": 0.1572265625, + "loss_num": 0.008544921875, + "loss_xval": 0.042724609375, + "num_input_tokens_seen": 385293352, + "step": 4221 + }, + { + "epoch": 17.591666666666665, + "grad_norm": 2.777699149945361, + "learning_rate": 5e-05, + "loss": 0.0525, + "num_input_tokens_seen": 385384168, + "step": 4222 + }, + { + "epoch": 17.591666666666665, + "loss": 0.03143524378538132, + "loss_ce": 1.7396700059180148e-05, + "loss_iou": 0.193359375, + "loss_num": 0.00628662109375, + "loss_xval": 0.031494140625, + "num_input_tokens_seen": 385384168, + "step": 4222 + }, + { + "epoch": 17.595833333333335, + "grad_norm": 2.69353567615644, + "learning_rate": 5e-05, + "loss": 0.0677, + "num_input_tokens_seen": 385475292, + "step": 4223 + }, + { + "epoch": 17.595833333333335, + "loss": 0.04598440229892731, + "loss_ce": 9.66764309850987e-06, + "loss_iou": 0.326171875, + "loss_num": 0.00921630859375, + "loss_xval": 0.0458984375, + "num_input_tokens_seen": 385475292, + "step": 4223 + }, + { + "epoch": 17.6, + "grad_norm": 4.163966587950836, + "learning_rate": 5e-05, + "loss": 0.0549, + "num_input_tokens_seen": 385567056, + "step": 4224 + }, + { + "epoch": 17.6, + "loss": 0.04680160805583, + "loss_ce": 1.8161270418204367e-05, + "loss_iou": 0.296875, + "loss_num": 0.00933837890625, + "loss_xval": 0.046875, + "num_input_tokens_seen": 385567056, + "step": 4224 + }, + { + "epoch": 17.604166666666668, + "grad_norm": 2.2482295399288286, + "learning_rate": 5e-05, + "loss": 0.0492, + "num_input_tokens_seen": 385658644, + "step": 4225 + }, + { + "epoch": 17.604166666666668, + "loss": 0.03979950025677681, + "loss_ce": 4.5782053348375484e-06, + "loss_iou": 0.15234375, + "loss_num": 0.0079345703125, + "loss_xval": 0.039794921875, + "num_input_tokens_seen": 385658644, + "step": 4225 + }, + { + "epoch": 17.608333333333334, + "grad_norm": 2.7217679813302937, + "learning_rate": 5e-05, + "loss": 0.0435, + "num_input_tokens_seen": 385749844, + "step": 4226 + }, + { + "epoch": 17.608333333333334, + "loss": 0.05861446261405945, + "loss_ce": 2.0709698219434358e-05, + "loss_iou": 0.296875, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 385749844, + "step": 4226 + }, + { + "epoch": 17.6125, + "grad_norm": 3.3383609570266652, + "learning_rate": 5e-05, + "loss": 0.0653, + "num_input_tokens_seen": 385840696, + "step": 4227 + }, + { + "epoch": 17.6125, + "loss": 0.08750728517770767, + "loss_ce": 0.001165428082458675, + "loss_iou": 0.201171875, + "loss_num": 0.0172119140625, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 385840696, + "step": 4227 + }, + { + "epoch": 17.616666666666667, + "grad_norm": 2.738495218076334, + "learning_rate": 5e-05, + "loss": 0.0363, + "num_input_tokens_seen": 385932628, + "step": 4228 + }, + { + "epoch": 17.616666666666667, + "loss": 0.030733108520507812, + "loss_ce": 1.9056218434343464e-06, + "loss_iou": 0.375, + "loss_num": 0.006134033203125, + "loss_xval": 0.03076171875, + "num_input_tokens_seen": 385932628, + "step": 4228 + }, + { + "epoch": 17.620833333333334, + "grad_norm": 3.0741839980590737, + "learning_rate": 5e-05, + "loss": 0.0506, + "num_input_tokens_seen": 386023320, + "step": 4229 + }, + { + "epoch": 17.620833333333334, + "loss": 0.06695705652236938, + "loss_ce": 1.493851868872298e-06, + "loss_iou": 0.259765625, + "loss_num": 0.013427734375, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 386023320, + "step": 4229 + }, + { + "epoch": 17.625, + "grad_norm": 2.208632856814461, + "learning_rate": 5e-05, + "loss": 0.0428, + "num_input_tokens_seen": 386115504, + "step": 4230 + }, + { + "epoch": 17.625, + "loss": 0.04812372103333473, + "loss_ce": 0.00011957136302953586, + "loss_iou": 0.279296875, + "loss_num": 0.00958251953125, + "loss_xval": 0.048095703125, + "num_input_tokens_seen": 386115504, + "step": 4230 + }, + { + "epoch": 17.629166666666666, + "grad_norm": 3.453935918291197, + "learning_rate": 5e-05, + "loss": 0.0389, + "num_input_tokens_seen": 386206904, + "step": 4231 + }, + { + "epoch": 17.629166666666666, + "loss": 0.04082659259438515, + "loss_ce": 9.333534762845375e-06, + "loss_iou": 0.2041015625, + "loss_num": 0.0081787109375, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 386206904, + "step": 4231 + }, + { + "epoch": 17.633333333333333, + "grad_norm": 4.551641943329968, + "learning_rate": 5e-05, + "loss": 0.0483, + "num_input_tokens_seen": 386298048, + "step": 4232 + }, + { + "epoch": 17.633333333333333, + "loss": 0.04810848459601402, + "loss_ce": 4.329779767431319e-05, + "loss_iou": 0.302734375, + "loss_num": 0.0096435546875, + "loss_xval": 0.048095703125, + "num_input_tokens_seen": 386298048, + "step": 4232 + }, + { + "epoch": 17.6375, + "grad_norm": 2.5608155879021144, + "learning_rate": 5e-05, + "loss": 0.0395, + "num_input_tokens_seen": 386389596, + "step": 4233 + }, + { + "epoch": 17.6375, + "loss": 0.03954213857650757, + "loss_ce": 6.616890914301621e-06, + "loss_iou": 0.2177734375, + "loss_num": 0.00787353515625, + "loss_xval": 0.03955078125, + "num_input_tokens_seen": 386389596, + "step": 4233 + }, + { + "epoch": 17.641666666666666, + "grad_norm": 3.5352760234306144, + "learning_rate": 5e-05, + "loss": 0.0595, + "num_input_tokens_seen": 386481540, + "step": 4234 + }, + { + "epoch": 17.641666666666666, + "loss": 0.053279146552085876, + "loss_ce": 0.00034640979720279574, + "loss_iou": 0.1806640625, + "loss_num": 0.01055908203125, + "loss_xval": 0.052978515625, + "num_input_tokens_seen": 386481540, + "step": 4234 + }, + { + "epoch": 17.645833333333332, + "grad_norm": 2.4756241576337223, + "learning_rate": 5e-05, + "loss": 0.0408, + "num_input_tokens_seen": 386573248, + "step": 4235 + }, + { + "epoch": 17.645833333333332, + "loss": 0.03585919737815857, + "loss_ce": 1.630270890018437e-05, + "loss_iou": 0.333984375, + "loss_num": 0.007171630859375, + "loss_xval": 0.035888671875, + "num_input_tokens_seen": 386573248, + "step": 4235 + }, + { + "epoch": 17.65, + "grad_norm": 2.016025969596603, + "learning_rate": 5e-05, + "loss": 0.0554, + "num_input_tokens_seen": 386664364, + "step": 4236 + }, + { + "epoch": 17.65, + "loss": 0.03518152981996536, + "loss_ce": 2.393299155301065e-06, + "loss_iou": 0.236328125, + "loss_num": 0.00701904296875, + "loss_xval": 0.03515625, + "num_input_tokens_seen": 386664364, + "step": 4236 + }, + { + "epoch": 17.654166666666665, + "grad_norm": 3.3414144838506767, + "learning_rate": 5e-05, + "loss": 0.0814, + "num_input_tokens_seen": 386754752, + "step": 4237 + }, + { + "epoch": 17.654166666666665, + "loss": 0.04250407963991165, + "loss_ce": 0.00035930349258705974, + "loss_iou": 0.203125, + "loss_num": 0.0084228515625, + "loss_xval": 0.042236328125, + "num_input_tokens_seen": 386754752, + "step": 4237 + }, + { + "epoch": 17.658333333333335, + "grad_norm": 2.1200143531779467, + "learning_rate": 5e-05, + "loss": 0.0386, + "num_input_tokens_seen": 386846024, + "step": 4238 + }, + { + "epoch": 17.658333333333335, + "loss": 0.04939752444624901, + "loss_ce": 0.0008593180100433528, + "loss_iou": 0.25390625, + "loss_num": 0.00970458984375, + "loss_xval": 0.048583984375, + "num_input_tokens_seen": 386846024, + "step": 4238 + }, + { + "epoch": 17.6625, + "grad_norm": 1.5079927276364646, + "learning_rate": 5e-05, + "loss": 0.0401, + "num_input_tokens_seen": 386937504, + "step": 4239 + }, + { + "epoch": 17.6625, + "loss": 0.03806938976049423, + "loss_ce": 6.342200322251301e-06, + "loss_iou": 0.2041015625, + "loss_num": 0.007598876953125, + "loss_xval": 0.0380859375, + "num_input_tokens_seen": 386937504, + "step": 4239 + }, + { + "epoch": 17.666666666666668, + "grad_norm": 2.0841912017626467, + "learning_rate": 5e-05, + "loss": 0.045, + "num_input_tokens_seen": 387028972, + "step": 4240 + }, + { + "epoch": 17.666666666666668, + "loss": 0.05647444352507591, + "loss_ce": 1.666220214247005e-06, + "loss_iou": 0.189453125, + "loss_num": 0.01129150390625, + "loss_xval": 0.056396484375, + "num_input_tokens_seen": 387028972, + "step": 4240 + }, + { + "epoch": 17.670833333333334, + "grad_norm": 4.928727740122159, + "learning_rate": 5e-05, + "loss": 0.0396, + "num_input_tokens_seen": 387120320, + "step": 4241 + }, + { + "epoch": 17.670833333333334, + "loss": 0.03015184961259365, + "loss_ce": 4.818359684577445e-07, + "loss_iou": 0.29296875, + "loss_num": 0.00604248046875, + "loss_xval": 0.0301513671875, + "num_input_tokens_seen": 387120320, + "step": 4241 + }, + { + "epoch": 17.675, + "grad_norm": 3.8113956592274336, + "learning_rate": 5e-05, + "loss": 0.0465, + "num_input_tokens_seen": 387211544, + "step": 4242 + }, + { + "epoch": 17.675, + "loss": 0.048737533390522, + "loss_ce": 9.625723578210454e-07, + "loss_iou": 0.1572265625, + "loss_num": 0.009765625, + "loss_xval": 0.048828125, + "num_input_tokens_seen": 387211544, + "step": 4242 + }, + { + "epoch": 17.679166666666667, + "grad_norm": 5.128999783647298, + "learning_rate": 5e-05, + "loss": 0.0513, + "num_input_tokens_seen": 387301464, + "step": 4243 + }, + { + "epoch": 17.679166666666667, + "loss": 0.055925507098436356, + "loss_ce": 6.308028969215229e-05, + "loss_iou": 0.1982421875, + "loss_num": 0.01116943359375, + "loss_xval": 0.055908203125, + "num_input_tokens_seen": 387301464, + "step": 4243 + }, + { + "epoch": 17.683333333333334, + "grad_norm": 2.89438733334762, + "learning_rate": 5e-05, + "loss": 0.071, + "num_input_tokens_seen": 387393004, + "step": 4244 + }, + { + "epoch": 17.683333333333334, + "loss": 0.05619873106479645, + "loss_ce": 6.100194696045946e-07, + "loss_iou": 0.21875, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 387393004, + "step": 4244 + }, + { + "epoch": 17.6875, + "grad_norm": 2.610717063238736, + "learning_rate": 5e-05, + "loss": 0.0354, + "num_input_tokens_seen": 387484404, + "step": 4245 + }, + { + "epoch": 17.6875, + "loss": 0.03247163072228432, + "loss_ce": 9.265061748919834e-07, + "loss_iou": 0.298828125, + "loss_num": 0.006500244140625, + "loss_xval": 0.032470703125, + "num_input_tokens_seen": 387484404, + "step": 4245 + }, + { + "epoch": 17.691666666666666, + "grad_norm": 2.263550389067424, + "learning_rate": 5e-05, + "loss": 0.0414, + "num_input_tokens_seen": 387575592, + "step": 4246 + }, + { + "epoch": 17.691666666666666, + "loss": 0.039128877222537994, + "loss_ce": 0.00020370917627587914, + "loss_iou": 0.16015625, + "loss_num": 0.007781982421875, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 387575592, + "step": 4246 + }, + { + "epoch": 17.695833333333333, + "grad_norm": 7.143087195639301, + "learning_rate": 5e-05, + "loss": 0.0551, + "num_input_tokens_seen": 387667004, + "step": 4247 + }, + { + "epoch": 17.695833333333333, + "loss": 0.060753967612981796, + "loss_ce": 1.0965741239488125e-06, + "loss_iou": 0.390625, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 387667004, + "step": 4247 + }, + { + "epoch": 17.7, + "grad_norm": 5.350068515423604, + "learning_rate": 5e-05, + "loss": 0.0674, + "num_input_tokens_seen": 387757924, + "step": 4248 + }, + { + "epoch": 17.7, + "loss": 0.07637852430343628, + "loss_ce": 6.557953611263656e-07, + "loss_iou": 0.1875, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 387757924, + "step": 4248 + }, + { + "epoch": 17.704166666666666, + "grad_norm": 1.8448489004454245, + "learning_rate": 5e-05, + "loss": 0.0545, + "num_input_tokens_seen": 387849144, + "step": 4249 + }, + { + "epoch": 17.704166666666666, + "loss": 0.07519252598285675, + "loss_ce": 4.8400288505945355e-06, + "loss_iou": 0.189453125, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 387849144, + "step": 4249 + }, + { + "epoch": 17.708333333333332, + "grad_norm": 1.2075256402713481, + "learning_rate": 5e-05, + "loss": 0.0628, + "num_input_tokens_seen": 387940940, + "step": 4250 + }, + { + "epoch": 17.708333333333332, + "eval_seeclick_CIoU": 0.25203998386859894, + "eval_seeclick_GIoU": 0.23426888138055801, + "eval_seeclick_IoU": 0.3567681908607483, + "eval_seeclick_MAE_all": 0.10411766916513443, + "eval_seeclick_MAE_h": 0.07500777393579483, + "eval_seeclick_MAE_w": 0.235479474067688, + "eval_seeclick_MAE_x_boxes": 0.2311866730451584, + "eval_seeclick_MAE_y_boxes": 0.08258943632245064, + "eval_seeclick_NUM_probability": 0.9999988377094269, + "eval_seeclick_inside_bbox": 0.5397727340459824, + "eval_seeclick_loss": 0.5964598655700684, + "eval_seeclick_loss_ce": 0.14800745993852615, + "eval_seeclick_loss_iou": 0.458740234375, + "eval_seeclick_loss_num": 0.085205078125, + "eval_seeclick_loss_xval": 0.42608642578125, + "eval_seeclick_runtime": 80.6498, + "eval_seeclick_samples_per_second": 0.533, + "eval_seeclick_steps_per_second": 0.025, + "num_input_tokens_seen": 387940940, + "step": 4250 + }, + { + "epoch": 17.708333333333332, + "eval_icons_CIoU": 0.29239118099212646, + "eval_icons_GIoU": 0.2768501341342926, + "eval_icons_IoU": 0.38413749635219574, + "eval_icons_MAE_all": 0.07174773141741753, + "eval_icons_MAE_h": 0.1544894203543663, + "eval_icons_MAE_w": 0.10443814098834991, + "eval_icons_MAE_x_boxes": 0.10570014268159866, + "eval_icons_MAE_y_boxes": 0.1550183743238449, + "eval_icons_NUM_probability": 0.9999992549419403, + "eval_icons_inside_bbox": 0.53125, + "eval_icons_loss": 0.35228726267814636, + "eval_icons_loss_ce": 2.2205644199857488e-05, + "eval_icons_loss_iou": 0.24761962890625, + "eval_icons_loss_num": 0.0735015869140625, + "eval_icons_loss_xval": 0.3673095703125, + "eval_icons_runtime": 94.4201, + "eval_icons_samples_per_second": 0.53, + "eval_icons_steps_per_second": 0.021, + "num_input_tokens_seen": 387940940, + "step": 4250 + }, + { + "epoch": 17.708333333333332, + "eval_screenspot_CIoU": 0.3815338710943858, + "eval_screenspot_GIoU": 0.371933509906133, + "eval_screenspot_IoU": 0.45208731293678284, + "eval_screenspot_MAE_all": 0.09845635046561559, + "eval_screenspot_MAE_h": 0.08088805278142293, + "eval_screenspot_MAE_w": 0.194745272397995, + "eval_screenspot_MAE_x_boxes": 0.18775259951750436, + "eval_screenspot_MAE_y_boxes": 0.07606856028238933, + "eval_screenspot_NUM_probability": 0.9999962250391642, + "eval_screenspot_inside_bbox": 0.6833333373069763, + "eval_screenspot_loss": 0.4975851774215698, + "eval_screenspot_loss_ce": 0.002237203670508355, + "eval_screenspot_loss_iou": 0.40283203125, + "eval_screenspot_loss_num": 0.0996551513671875, + "eval_screenspot_loss_xval": 0.498291015625, + "eval_screenspot_runtime": 159.3398, + "eval_screenspot_samples_per_second": 0.559, + "eval_screenspot_steps_per_second": 0.019, + "num_input_tokens_seen": 387940940, + "step": 4250 + }, + { + "epoch": 17.708333333333332, + "eval_compot_CIoU": 0.45285023748874664, + "eval_compot_GIoU": 0.4470098465681076, + "eval_compot_IoU": 0.5330235660076141, + "eval_compot_MAE_all": 0.0571780689060688, + "eval_compot_MAE_h": 0.06364855542778969, + "eval_compot_MAE_w": 0.1531415358185768, + "eval_compot_MAE_x_boxes": 0.15447616577148438, + "eval_compot_MAE_y_boxes": 0.06288901343941689, + "eval_compot_NUM_probability": 0.9999961256980896, + "eval_compot_inside_bbox": 0.7638888955116272, + "eval_compot_loss": 0.35075217485427856, + "eval_compot_loss_ce": 0.07240623980760574, + "eval_compot_loss_iou": 0.302978515625, + "eval_compot_loss_num": 0.050525665283203125, + "eval_compot_loss_xval": 0.2526092529296875, + "eval_compot_runtime": 94.7307, + "eval_compot_samples_per_second": 0.528, + "eval_compot_steps_per_second": 0.021, + "num_input_tokens_seen": 387940940, + "step": 4250 + }, + { + "epoch": 17.708333333333332, + "loss": 0.31521183252334595, + "loss_ce": 0.06936222314834595, + "loss_iou": 0.2890625, + "loss_num": 0.049072265625, + "loss_xval": 0.24609375, + "num_input_tokens_seen": 387940940, + "step": 4250 + }, + { + "epoch": 17.7125, + "grad_norm": 1.4582218440071648, + "learning_rate": 5e-05, + "loss": 0.045, + "num_input_tokens_seen": 388032684, + "step": 4251 + }, + { + "epoch": 17.7125, + "loss": 0.0526927188038826, + "loss_ce": 0.002224275842308998, + "loss_iou": 0.1806640625, + "loss_num": 0.0101318359375, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 388032684, + "step": 4251 + }, + { + "epoch": 17.716666666666665, + "grad_norm": 9.773943287946695, + "learning_rate": 5e-05, + "loss": 0.0657, + "num_input_tokens_seen": 388124348, + "step": 4252 + }, + { + "epoch": 17.716666666666665, + "loss": 0.030305448919534683, + "loss_ce": 9.122079063672572e-06, + "loss_iou": 0.275390625, + "loss_num": 0.00604248046875, + "loss_xval": 0.0302734375, + "num_input_tokens_seen": 388124348, + "step": 4252 + }, + { + "epoch": 17.720833333333335, + "grad_norm": 4.423771502817672, + "learning_rate": 5e-05, + "loss": 0.0868, + "num_input_tokens_seen": 388215432, + "step": 4253 + }, + { + "epoch": 17.720833333333335, + "loss": 0.10943731665611267, + "loss_ce": 0.0024503269232809544, + "loss_iou": 0.2119140625, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 388215432, + "step": 4253 + }, + { + "epoch": 17.725, + "grad_norm": 1.3108049913583504, + "learning_rate": 5e-05, + "loss": 0.0321, + "num_input_tokens_seen": 388307252, + "step": 4254 + }, + { + "epoch": 17.725, + "loss": 0.034032803028821945, + "loss_ce": 5.704933755623642e-06, + "loss_iou": 0.0810546875, + "loss_num": 0.006805419921875, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 388307252, + "step": 4254 + }, + { + "epoch": 17.729166666666668, + "grad_norm": 1.6607325795474122, + "learning_rate": 5e-05, + "loss": 0.0555, + "num_input_tokens_seen": 388398568, + "step": 4255 + }, + { + "epoch": 17.729166666666668, + "loss": 0.035407889634370804, + "loss_ce": 9.14240226848051e-05, + "loss_iou": 0.2890625, + "loss_num": 0.007049560546875, + "loss_xval": 0.035400390625, + "num_input_tokens_seen": 388398568, + "step": 4255 + }, + { + "epoch": 17.733333333333334, + "grad_norm": 1.8768786210329038, + "learning_rate": 5e-05, + "loss": 0.0383, + "num_input_tokens_seen": 388489580, + "step": 4256 + }, + { + "epoch": 17.733333333333334, + "loss": 0.04233090206980705, + "loss_ce": 3.0214434900699416e-06, + "loss_iou": 0.27734375, + "loss_num": 0.00848388671875, + "loss_xval": 0.042236328125, + "num_input_tokens_seen": 388489580, + "step": 4256 + }, + { + "epoch": 17.7375, + "grad_norm": 5.100596257151382, + "learning_rate": 5e-05, + "loss": 0.0713, + "num_input_tokens_seen": 388580464, + "step": 4257 + }, + { + "epoch": 17.7375, + "loss": 0.06953492760658264, + "loss_ce": 6.269452228480077e-07, + "loss_iou": 0.2255859375, + "loss_num": 0.013916015625, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 388580464, + "step": 4257 + }, + { + "epoch": 17.741666666666667, + "grad_norm": 4.620224294231164, + "learning_rate": 5e-05, + "loss": 0.0388, + "num_input_tokens_seen": 388671656, + "step": 4258 + }, + { + "epoch": 17.741666666666667, + "loss": 0.0440262109041214, + "loss_ce": 4.604961759469006e-06, + "loss_iou": 0.298828125, + "loss_num": 0.0087890625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 388671656, + "step": 4258 + }, + { + "epoch": 17.745833333333334, + "grad_norm": 3.1621588907824587, + "learning_rate": 5e-05, + "loss": 0.031, + "num_input_tokens_seen": 388763544, + "step": 4259 + }, + { + "epoch": 17.745833333333334, + "loss": 0.03645715117454529, + "loss_ce": 0.00011834719771286473, + "loss_iou": 0.2099609375, + "loss_num": 0.00726318359375, + "loss_xval": 0.036376953125, + "num_input_tokens_seen": 388763544, + "step": 4259 + }, + { + "epoch": 17.75, + "grad_norm": 3.0997334239058745, + "learning_rate": 5e-05, + "loss": 0.0538, + "num_input_tokens_seen": 388854532, + "step": 4260 + }, + { + "epoch": 17.75, + "loss": 0.06612833589315414, + "loss_ce": 0.002865397371351719, + "loss_iou": 0.330078125, + "loss_num": 0.01263427734375, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 388854532, + "step": 4260 + }, + { + "epoch": 17.754166666666666, + "grad_norm": 3.878376093167518, + "learning_rate": 5e-05, + "loss": 0.0629, + "num_input_tokens_seen": 388945092, + "step": 4261 + }, + { + "epoch": 17.754166666666666, + "loss": 0.05230996012687683, + "loss_ce": 2.8329745873634238e-06, + "loss_iou": 0.283203125, + "loss_num": 0.01043701171875, + "loss_xval": 0.05224609375, + "num_input_tokens_seen": 388945092, + "step": 4261 + }, + { + "epoch": 17.758333333333333, + "grad_norm": 5.19090268810429, + "learning_rate": 5e-05, + "loss": 0.0371, + "num_input_tokens_seen": 389036112, + "step": 4262 + }, + { + "epoch": 17.758333333333333, + "loss": 0.03373678773641586, + "loss_ce": 7.233080850710394e-06, + "loss_iou": 0.296875, + "loss_num": 0.006744384765625, + "loss_xval": 0.03369140625, + "num_input_tokens_seen": 389036112, + "step": 4262 + }, + { + "epoch": 17.7625, + "grad_norm": 3.236849772082388, + "learning_rate": 5e-05, + "loss": 0.0569, + "num_input_tokens_seen": 389127232, + "step": 4263 + }, + { + "epoch": 17.7625, + "loss": 0.0690011978149414, + "loss_ce": 9.548220987198874e-07, + "loss_iou": 0.427734375, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 389127232, + "step": 4263 + }, + { + "epoch": 17.766666666666666, + "grad_norm": 2.869496523940095, + "learning_rate": 5e-05, + "loss": 0.0565, + "num_input_tokens_seen": 389218456, + "step": 4264 + }, + { + "epoch": 17.766666666666666, + "loss": 0.060494571924209595, + "loss_ce": 0.00031390992808155715, + "loss_iou": 0.1767578125, + "loss_num": 0.01202392578125, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 389218456, + "step": 4264 + }, + { + "epoch": 17.770833333333332, + "grad_norm": 2.7422667525481383, + "learning_rate": 5e-05, + "loss": 0.0463, + "num_input_tokens_seen": 389310060, + "step": 4265 + }, + { + "epoch": 17.770833333333332, + "loss": 0.03156965970993042, + "loss_ce": 6.855726951471297e-06, + "loss_iou": 0.27734375, + "loss_num": 0.006317138671875, + "loss_xval": 0.031494140625, + "num_input_tokens_seen": 389310060, + "step": 4265 + }, + { + "epoch": 17.775, + "grad_norm": 2.249769790802501, + "learning_rate": 5e-05, + "loss": 0.0577, + "num_input_tokens_seen": 389401876, + "step": 4266 + }, + { + "epoch": 17.775, + "loss": 0.08179079741239548, + "loss_ce": 6.47258129902184e-05, + "loss_iou": 0.34375, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 389401876, + "step": 4266 + }, + { + "epoch": 17.779166666666665, + "grad_norm": 2.2773374954746934, + "learning_rate": 5e-05, + "loss": 0.0441, + "num_input_tokens_seen": 389493344, + "step": 4267 + }, + { + "epoch": 17.779166666666665, + "loss": 0.04999423027038574, + "loss_ce": 6.435068826249335e-06, + "loss_iou": 0.20703125, + "loss_num": 0.010009765625, + "loss_xval": 0.050048828125, + "num_input_tokens_seen": 389493344, + "step": 4267 + }, + { + "epoch": 17.783333333333335, + "grad_norm": 3.208639100920676, + "learning_rate": 5e-05, + "loss": 0.0366, + "num_input_tokens_seen": 389584708, + "step": 4268 + }, + { + "epoch": 17.783333333333335, + "loss": 0.04798169434070587, + "loss_ce": 8.06142998044379e-06, + "loss_iou": 0.2216796875, + "loss_num": 0.00958251953125, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 389584708, + "step": 4268 + }, + { + "epoch": 17.7875, + "grad_norm": 3.1089691432350524, + "learning_rate": 5e-05, + "loss": 0.0435, + "num_input_tokens_seen": 389676300, + "step": 4269 + }, + { + "epoch": 17.7875, + "loss": 0.03627746179699898, + "loss_ce": 6.835483509348705e-05, + "loss_iou": 0.248046875, + "loss_num": 0.007232666015625, + "loss_xval": 0.0361328125, + "num_input_tokens_seen": 389676300, + "step": 4269 + }, + { + "epoch": 17.791666666666668, + "grad_norm": 2.6802187206716517, + "learning_rate": 5e-05, + "loss": 0.0463, + "num_input_tokens_seen": 389767748, + "step": 4270 + }, + { + "epoch": 17.791666666666668, + "loss": 0.037262558937072754, + "loss_ce": 5.946749297436327e-07, + "loss_iou": 0.28125, + "loss_num": 0.0074462890625, + "loss_xval": 0.037353515625, + "num_input_tokens_seen": 389767748, + "step": 4270 + }, + { + "epoch": 17.795833333333334, + "grad_norm": 3.279966907675864, + "learning_rate": 5e-05, + "loss": 0.025, + "num_input_tokens_seen": 389859076, + "step": 4271 + }, + { + "epoch": 17.795833333333334, + "loss": 0.023572321981191635, + "loss_ce": 5.122803031554213e-06, + "loss_iou": 0.27734375, + "loss_num": 0.00469970703125, + "loss_xval": 0.0235595703125, + "num_input_tokens_seen": 389859076, + "step": 4271 + }, + { + "epoch": 17.8, + "grad_norm": 2.912014259002682, + "learning_rate": 5e-05, + "loss": 0.0958, + "num_input_tokens_seen": 389950156, + "step": 4272 + }, + { + "epoch": 17.8, + "loss": 0.1355554312467575, + "loss_ce": 1.1604141946008895e-05, + "loss_iou": 0.158203125, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 389950156, + "step": 4272 + }, + { + "epoch": 17.804166666666667, + "grad_norm": 2.8477409993278586, + "learning_rate": 5e-05, + "loss": 0.0397, + "num_input_tokens_seen": 390041020, + "step": 4273 + }, + { + "epoch": 17.804166666666667, + "loss": 0.03160158917307854, + "loss_ce": 6.368102276610443e-07, + "loss_iou": 0.2373046875, + "loss_num": 0.006317138671875, + "loss_xval": 0.031494140625, + "num_input_tokens_seen": 390041020, + "step": 4273 + }, + { + "epoch": 17.808333333333334, + "grad_norm": 3.1060506067007077, + "learning_rate": 5e-05, + "loss": 0.0746, + "num_input_tokens_seen": 390132496, + "step": 4274 + }, + { + "epoch": 17.808333333333334, + "loss": 0.09680266678333282, + "loss_ce": 9.127247153628559e-07, + "loss_iou": 0.314453125, + "loss_num": 0.0194091796875, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 390132496, + "step": 4274 + }, + { + "epoch": 17.8125, + "grad_norm": 2.5458659203048075, + "learning_rate": 5e-05, + "loss": 0.0792, + "num_input_tokens_seen": 390223444, + "step": 4275 + }, + { + "epoch": 17.8125, + "loss": 0.0719880759716034, + "loss_ce": 2.7624866561382078e-05, + "loss_iou": 0.294921875, + "loss_num": 0.014404296875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 390223444, + "step": 4275 + }, + { + "epoch": 17.816666666666666, + "grad_norm": 2.5042217476993076, + "learning_rate": 5e-05, + "loss": 0.0398, + "num_input_tokens_seen": 390315716, + "step": 4276 + }, + { + "epoch": 17.816666666666666, + "loss": 0.04687733203172684, + "loss_ce": 9.960051102098078e-06, + "loss_iou": 0.216796875, + "loss_num": 0.0093994140625, + "loss_xval": 0.046875, + "num_input_tokens_seen": 390315716, + "step": 4276 + }, + { + "epoch": 17.820833333333333, + "grad_norm": 2.199339368642041, + "learning_rate": 5e-05, + "loss": 0.0738, + "num_input_tokens_seen": 390406628, + "step": 4277 + }, + { + "epoch": 17.820833333333333, + "loss": 0.07241851091384888, + "loss_ce": 1.5560059182462282e-05, + "loss_iou": 0.1845703125, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 390406628, + "step": 4277 + }, + { + "epoch": 17.825, + "grad_norm": 2.845952078746692, + "learning_rate": 5e-05, + "loss": 0.0622, + "num_input_tokens_seen": 390497156, + "step": 4278 + }, + { + "epoch": 17.825, + "loss": 0.04003407433629036, + "loss_ce": 0.00020863440295215696, + "loss_iou": 0.361328125, + "loss_num": 0.0079345703125, + "loss_xval": 0.039794921875, + "num_input_tokens_seen": 390497156, + "step": 4278 + }, + { + "epoch": 17.829166666666666, + "grad_norm": 3.380502408546681, + "learning_rate": 5e-05, + "loss": 0.0532, + "num_input_tokens_seen": 390588864, + "step": 4279 + }, + { + "epoch": 17.829166666666666, + "loss": 0.055008068680763245, + "loss_ce": 3.064938937313855e-05, + "loss_iou": 0.3203125, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 390588864, + "step": 4279 + }, + { + "epoch": 17.833333333333332, + "grad_norm": 3.216941064128063, + "learning_rate": 5e-05, + "loss": 0.0407, + "num_input_tokens_seen": 390679656, + "step": 4280 + }, + { + "epoch": 17.833333333333332, + "loss": 0.046098776161670685, + "loss_ce": 1.9742674339795485e-06, + "loss_iou": 0.2451171875, + "loss_num": 0.00921630859375, + "loss_xval": 0.046142578125, + "num_input_tokens_seen": 390679656, + "step": 4280 + }, + { + "epoch": 17.8375, + "grad_norm": 2.2422021450978504, + "learning_rate": 5e-05, + "loss": 0.0466, + "num_input_tokens_seen": 390771036, + "step": 4281 + }, + { + "epoch": 17.8375, + "loss": 0.05915020406246185, + "loss_ce": 8.343104855157435e-05, + "loss_iou": 0.1943359375, + "loss_num": 0.0118408203125, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 390771036, + "step": 4281 + }, + { + "epoch": 17.841666666666665, + "grad_norm": 2.3078711383220663, + "learning_rate": 5e-05, + "loss": 0.0538, + "num_input_tokens_seen": 390862020, + "step": 4282 + }, + { + "epoch": 17.841666666666665, + "loss": 0.060885027050971985, + "loss_ce": 2.455707999615697e-06, + "loss_iou": 0.28515625, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 390862020, + "step": 4282 + }, + { + "epoch": 17.845833333333335, + "grad_norm": 3.894351865296879, + "learning_rate": 5e-05, + "loss": 0.0597, + "num_input_tokens_seen": 390953620, + "step": 4283 + }, + { + "epoch": 17.845833333333335, + "loss": 0.04284176975488663, + "loss_ce": 1.0351399396313354e-05, + "loss_iou": 0.28125, + "loss_num": 0.008544921875, + "loss_xval": 0.042724609375, + "num_input_tokens_seen": 390953620, + "step": 4283 + }, + { + "epoch": 17.85, + "grad_norm": 3.5549017376036667, + "learning_rate": 5e-05, + "loss": 0.0549, + "num_input_tokens_seen": 391044964, + "step": 4284 + }, + { + "epoch": 17.85, + "loss": 0.0379575677216053, + "loss_ce": 5.473750934470445e-05, + "loss_iou": 0.333984375, + "loss_num": 0.007568359375, + "loss_xval": 0.037841796875, + "num_input_tokens_seen": 391044964, + "step": 4284 + }, + { + "epoch": 17.854166666666668, + "grad_norm": 3.868964183207432, + "learning_rate": 5e-05, + "loss": 0.1196, + "num_input_tokens_seen": 391136172, + "step": 4285 + }, + { + "epoch": 17.854166666666668, + "loss": 0.1410410851240158, + "loss_ce": 4.089220965397544e-06, + "loss_iou": 0.26171875, + "loss_num": 0.0283203125, + "loss_xval": 0.140625, + "num_input_tokens_seen": 391136172, + "step": 4285 + }, + { + "epoch": 17.858333333333334, + "grad_norm": 3.646749375191905, + "learning_rate": 5e-05, + "loss": 0.0629, + "num_input_tokens_seen": 391227456, + "step": 4286 + }, + { + "epoch": 17.858333333333334, + "loss": 0.09271462261676788, + "loss_ce": 2.2152019027998904e-06, + "loss_iou": 0.388671875, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 391227456, + "step": 4286 + }, + { + "epoch": 17.8625, + "grad_norm": 2.4422908842745312, + "learning_rate": 5e-05, + "loss": 0.0948, + "num_input_tokens_seen": 391318812, + "step": 4287 + }, + { + "epoch": 17.8625, + "loss": 0.051978304982185364, + "loss_ce": 2.9758124583167955e-05, + "loss_iou": 0.287109375, + "loss_num": 0.0103759765625, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 391318812, + "step": 4287 + }, + { + "epoch": 17.866666666666667, + "grad_norm": 7.856508421494934, + "learning_rate": 5e-05, + "loss": 0.0614, + "num_input_tokens_seen": 391410136, + "step": 4288 + }, + { + "epoch": 17.866666666666667, + "loss": 0.08443892002105713, + "loss_ce": 0.0003019559953827411, + "loss_iou": 0.375, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 391410136, + "step": 4288 + }, + { + "epoch": 17.870833333333334, + "grad_norm": 3.2314063400427, + "learning_rate": 5e-05, + "loss": 0.0523, + "num_input_tokens_seen": 391501260, + "step": 4289 + }, + { + "epoch": 17.870833333333334, + "loss": 0.0704357773065567, + "loss_ce": 8.843479008646682e-06, + "loss_iou": 0.32421875, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 391501260, + "step": 4289 + }, + { + "epoch": 17.875, + "grad_norm": 3.040312817410731, + "learning_rate": 5e-05, + "loss": 0.0476, + "num_input_tokens_seen": 391592232, + "step": 4290 + }, + { + "epoch": 17.875, + "loss": 0.053047988563776016, + "loss_ce": 8.084026035248826e-07, + "loss_iou": 0.287109375, + "loss_num": 0.0106201171875, + "loss_xval": 0.052978515625, + "num_input_tokens_seen": 391592232, + "step": 4290 + }, + { + "epoch": 17.879166666666666, + "grad_norm": 4.770148941741689, + "learning_rate": 5e-05, + "loss": 0.0382, + "num_input_tokens_seen": 391683592, + "step": 4291 + }, + { + "epoch": 17.879166666666666, + "loss": 0.03159454092383385, + "loss_ce": 1.075425825547427e-05, + "loss_iou": 0.203125, + "loss_num": 0.006317138671875, + "loss_xval": 0.031494140625, + "num_input_tokens_seen": 391683592, + "step": 4291 + }, + { + "epoch": 17.883333333333333, + "grad_norm": 2.5823819704017885, + "learning_rate": 5e-05, + "loss": 0.0555, + "num_input_tokens_seen": 391774764, + "step": 4292 + }, + { + "epoch": 17.883333333333333, + "loss": 0.03879944980144501, + "loss_ce": 3.982367161370348e-06, + "loss_iou": 0.2294921875, + "loss_num": 0.00775146484375, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 391774764, + "step": 4292 + }, + { + "epoch": 17.8875, + "grad_norm": 1.699316394700182, + "learning_rate": 5e-05, + "loss": 0.0378, + "num_input_tokens_seen": 391866520, + "step": 4293 + }, + { + "epoch": 17.8875, + "loss": 0.04242832958698273, + "loss_ce": 6.99307129252702e-05, + "loss_iou": 0.17578125, + "loss_num": 0.00848388671875, + "loss_xval": 0.04248046875, + "num_input_tokens_seen": 391866520, + "step": 4293 + }, + { + "epoch": 17.891666666666666, + "grad_norm": 2.1108131520166973, + "learning_rate": 5e-05, + "loss": 0.0367, + "num_input_tokens_seen": 391957344, + "step": 4294 + }, + { + "epoch": 17.891666666666666, + "loss": 0.04325953498482704, + "loss_ce": 8.688614343554946e-07, + "loss_iou": 0.294921875, + "loss_num": 0.0086669921875, + "loss_xval": 0.043212890625, + "num_input_tokens_seen": 391957344, + "step": 4294 + }, + { + "epoch": 17.895833333333332, + "grad_norm": 2.206906552531031, + "learning_rate": 5e-05, + "loss": 0.0535, + "num_input_tokens_seen": 392048664, + "step": 4295 + }, + { + "epoch": 17.895833333333332, + "loss": 0.058287523686885834, + "loss_ce": 7.524287502747029e-05, + "loss_iou": 0.2734375, + "loss_num": 0.01165771484375, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 392048664, + "step": 4295 + }, + { + "epoch": 17.9, + "grad_norm": 2.146888954383353, + "learning_rate": 5e-05, + "loss": 0.0472, + "num_input_tokens_seen": 392140128, + "step": 4296 + }, + { + "epoch": 17.9, + "loss": 0.0633581280708313, + "loss_ce": 3.6382100461196387e-06, + "loss_iou": 0.27734375, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 392140128, + "step": 4296 + }, + { + "epoch": 17.904166666666665, + "grad_norm": 2.5035802799334212, + "learning_rate": 5e-05, + "loss": 0.0687, + "num_input_tokens_seen": 392231976, + "step": 4297 + }, + { + "epoch": 17.904166666666665, + "loss": 0.06069495528936386, + "loss_ce": 3.36383527610451e-05, + "loss_iou": 0.1826171875, + "loss_num": 0.01214599609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 392231976, + "step": 4297 + }, + { + "epoch": 17.908333333333335, + "grad_norm": 2.688228632179773, + "learning_rate": 5e-05, + "loss": 0.0664, + "num_input_tokens_seen": 392323720, + "step": 4298 + }, + { + "epoch": 17.908333333333335, + "loss": 0.0538039356470108, + "loss_ce": 1.4436795936489943e-06, + "loss_iou": 0.189453125, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 392323720, + "step": 4298 + }, + { + "epoch": 17.9125, + "grad_norm": 2.4486532513400023, + "learning_rate": 5e-05, + "loss": 0.0416, + "num_input_tokens_seen": 392415156, + "step": 4299 + }, + { + "epoch": 17.9125, + "loss": 0.03337834030389786, + "loss_ce": 6.840488640591502e-05, + "loss_iou": 0.19921875, + "loss_num": 0.00665283203125, + "loss_xval": 0.033203125, + "num_input_tokens_seen": 392415156, + "step": 4299 + }, + { + "epoch": 17.916666666666668, + "grad_norm": 3.0446213179202104, + "learning_rate": 5e-05, + "loss": 0.0517, + "num_input_tokens_seen": 392506352, + "step": 4300 + }, + { + "epoch": 17.916666666666668, + "loss": 0.0623907670378685, + "loss_ce": 2.8097461836296134e-05, + "loss_iou": 0.201171875, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 392506352, + "step": 4300 + }, + { + "epoch": 17.920833333333334, + "grad_norm": 12.97766674779127, + "learning_rate": 5e-05, + "loss": 0.081, + "num_input_tokens_seen": 392597360, + "step": 4301 + }, + { + "epoch": 17.920833333333334, + "loss": 0.10218116641044617, + "loss_ce": 8.312406862387434e-06, + "loss_iou": 0.28515625, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 392597360, + "step": 4301 + }, + { + "epoch": 17.925, + "grad_norm": 4.701325634383643, + "learning_rate": 5e-05, + "loss": 0.054, + "num_input_tokens_seen": 392688784, + "step": 4302 + }, + { + "epoch": 17.925, + "loss": 0.06068724766373634, + "loss_ce": 3.047289055757574e-06, + "loss_iou": 0.294921875, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 392688784, + "step": 4302 + }, + { + "epoch": 17.929166666666667, + "grad_norm": 3.9597801825447636, + "learning_rate": 5e-05, + "loss": 0.0439, + "num_input_tokens_seen": 392780144, + "step": 4303 + }, + { + "epoch": 17.929166666666667, + "loss": 0.04145951569080353, + "loss_ce": 1.387702468491625e-06, + "loss_iou": 0.298828125, + "loss_num": 0.00830078125, + "loss_xval": 0.04150390625, + "num_input_tokens_seen": 392780144, + "step": 4303 + }, + { + "epoch": 17.933333333333334, + "grad_norm": 16.030772287954104, + "learning_rate": 5e-05, + "loss": 0.0752, + "num_input_tokens_seen": 392871692, + "step": 4304 + }, + { + "epoch": 17.933333333333334, + "loss": 0.08494758605957031, + "loss_ce": 0.0015430464409291744, + "loss_iou": 0.337890625, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 392871692, + "step": 4304 + }, + { + "epoch": 17.9375, + "grad_norm": 2.044012042465525, + "learning_rate": 5e-05, + "loss": 0.0652, + "num_input_tokens_seen": 392963336, + "step": 4305 + }, + { + "epoch": 17.9375, + "loss": 0.07563067972660065, + "loss_ce": 0.0002598950522951782, + "loss_iou": 0.11083984375, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 392963336, + "step": 4305 + }, + { + "epoch": 17.941666666666666, + "grad_norm": 2.4403896730304293, + "learning_rate": 5e-05, + "loss": 0.0444, + "num_input_tokens_seen": 393054780, + "step": 4306 + }, + { + "epoch": 17.941666666666666, + "loss": 0.05522942170500755, + "loss_ce": 0.0011827910784631968, + "loss_iou": 0.18359375, + "loss_num": 0.01080322265625, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 393054780, + "step": 4306 + }, + { + "epoch": 17.945833333333333, + "grad_norm": 1.957076259446295, + "learning_rate": 5e-05, + "loss": 0.06, + "num_input_tokens_seen": 393145220, + "step": 4307 + }, + { + "epoch": 17.945833333333333, + "loss": 0.051417890936136246, + "loss_ce": 0.00016362001770175993, + "loss_iou": 0.189453125, + "loss_num": 0.01025390625, + "loss_xval": 0.05126953125, + "num_input_tokens_seen": 393145220, + "step": 4307 + }, + { + "epoch": 17.95, + "grad_norm": 2.9056474378085673, + "learning_rate": 5e-05, + "loss": 0.098, + "num_input_tokens_seen": 393236968, + "step": 4308 + }, + { + "epoch": 17.95, + "loss": 0.13753195106983185, + "loss_ce": 4.49333720098366e-06, + "loss_iou": 0.25390625, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 393236968, + "step": 4308 + }, + { + "epoch": 17.954166666666666, + "grad_norm": 2.9390624829772265, + "learning_rate": 5e-05, + "loss": 0.056, + "num_input_tokens_seen": 393328088, + "step": 4309 + }, + { + "epoch": 17.954166666666666, + "loss": 0.05638705566525459, + "loss_ce": 0.0005322614451870322, + "loss_iou": 0.27734375, + "loss_num": 0.01116943359375, + "loss_xval": 0.055908203125, + "num_input_tokens_seen": 393328088, + "step": 4309 + }, + { + "epoch": 17.958333333333332, + "grad_norm": 2.3808902559453875, + "learning_rate": 5e-05, + "loss": 0.0634, + "num_input_tokens_seen": 393419056, + "step": 4310 + }, + { + "epoch": 17.958333333333332, + "loss": 0.05448612570762634, + "loss_ce": 2.7511428925208747e-05, + "loss_iou": 0.30078125, + "loss_num": 0.0108642578125, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 393419056, + "step": 4310 + }, + { + "epoch": 17.9625, + "grad_norm": 11.42592444582553, + "learning_rate": 5e-05, + "loss": 0.053, + "num_input_tokens_seen": 393509936, + "step": 4311 + }, + { + "epoch": 17.9625, + "loss": 0.03451695665717125, + "loss_ce": 1.5761654594825814e-06, + "loss_iou": 0.2431640625, + "loss_num": 0.00689697265625, + "loss_xval": 0.034423828125, + "num_input_tokens_seen": 393509936, + "step": 4311 + }, + { + "epoch": 17.966666666666665, + "grad_norm": 2.026598228509948, + "learning_rate": 5e-05, + "loss": 0.0359, + "num_input_tokens_seen": 393601652, + "step": 4312 + }, + { + "epoch": 17.966666666666665, + "loss": 0.026221077889204025, + "loss_ce": 6.477197530330159e-06, + "loss_iou": 0.1796875, + "loss_num": 0.0052490234375, + "loss_xval": 0.0262451171875, + "num_input_tokens_seen": 393601652, + "step": 4312 + }, + { + "epoch": 17.970833333333335, + "grad_norm": 2.545206358777459, + "learning_rate": 5e-05, + "loss": 0.0377, + "num_input_tokens_seen": 393692832, + "step": 4313 + }, + { + "epoch": 17.970833333333335, + "loss": 0.03803935647010803, + "loss_ce": 0.00018992825062014163, + "loss_iou": 0.2470703125, + "loss_num": 0.007568359375, + "loss_xval": 0.037841796875, + "num_input_tokens_seen": 393692832, + "step": 4313 + }, + { + "epoch": 17.975, + "grad_norm": 2.5300715189162353, + "learning_rate": 5e-05, + "loss": 0.0454, + "num_input_tokens_seen": 393784424, + "step": 4314 + }, + { + "epoch": 17.975, + "loss": 0.0279478058218956, + "loss_ce": 1.3326807675184682e-06, + "loss_iou": 0.263671875, + "loss_num": 0.005584716796875, + "loss_xval": 0.0279541015625, + "num_input_tokens_seen": 393784424, + "step": 4314 + }, + { + "epoch": 17.979166666666668, + "grad_norm": 3.1352700422238295, + "learning_rate": 5e-05, + "loss": 0.048, + "num_input_tokens_seen": 393875060, + "step": 4315 + }, + { + "epoch": 17.979166666666668, + "loss": 0.0344746857881546, + "loss_ce": 5.083212727186037e-06, + "loss_iou": 0.1748046875, + "loss_num": 0.00689697265625, + "loss_xval": 0.034423828125, + "num_input_tokens_seen": 393875060, + "step": 4315 + }, + { + "epoch": 17.983333333333334, + "grad_norm": 2.1763173304718, + "learning_rate": 5e-05, + "loss": 0.0618, + "num_input_tokens_seen": 393966176, + "step": 4316 + }, + { + "epoch": 17.983333333333334, + "loss": 0.03647957369685173, + "loss_ce": 3.4403892641421407e-06, + "loss_iou": 0.2080078125, + "loss_num": 0.007293701171875, + "loss_xval": 0.036376953125, + "num_input_tokens_seen": 393966176, + "step": 4316 + }, + { + "epoch": 17.9875, + "grad_norm": 2.6116494285117655, + "learning_rate": 5e-05, + "loss": 0.0449, + "num_input_tokens_seen": 394057324, + "step": 4317 + }, + { + "epoch": 17.9875, + "loss": 0.04278308153152466, + "loss_ce": 2.7956442863796838e-05, + "loss_iou": 0.1826171875, + "loss_num": 0.008544921875, + "loss_xval": 0.042724609375, + "num_input_tokens_seen": 394057324, + "step": 4317 + }, + { + "epoch": 17.991666666666667, + "grad_norm": 3.24737325938247, + "learning_rate": 5e-05, + "loss": 0.0418, + "num_input_tokens_seen": 394148996, + "step": 4318 + }, + { + "epoch": 17.991666666666667, + "loss": 0.037456609308719635, + "loss_ce": 0.00020227550703566521, + "loss_iou": 0.16015625, + "loss_num": 0.0074462890625, + "loss_xval": 0.037353515625, + "num_input_tokens_seen": 394148996, + "step": 4318 + }, + { + "epoch": 17.995833333333334, + "grad_norm": 2.2539145328902404, + "learning_rate": 5e-05, + "loss": 0.0969, + "num_input_tokens_seen": 394240284, + "step": 4319 + }, + { + "epoch": 17.995833333333334, + "loss": 0.13667461276054382, + "loss_ce": 1.6307496935041854e-06, + "loss_iou": 0.2265625, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 394240284, + "step": 4319 + }, + { + "epoch": 18.0, + "grad_norm": 2.718931662003016, + "learning_rate": 5e-05, + "loss": 0.0475, + "num_input_tokens_seen": 394331148, + "step": 4320 + }, + { + "epoch": 18.0, + "loss": 0.06142626702785492, + "loss_ce": 2.0113529899390414e-06, + "loss_iou": 0.2490234375, + "loss_num": 0.01226806640625, + "loss_xval": 0.0615234375, + "num_input_tokens_seen": 394331148, + "step": 4320 + }, + { + "epoch": 18.004166666666666, + "grad_norm": 3.195365105851593, + "learning_rate": 5e-05, + "loss": 0.0428, + "num_input_tokens_seen": 394420940, + "step": 4321 + }, + { + "epoch": 18.004166666666666, + "loss": 0.0539390966296196, + "loss_ce": 1.4537547031068243e-05, + "loss_iou": 0.251953125, + "loss_num": 0.01080322265625, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 394420940, + "step": 4321 + }, + { + "epoch": 18.008333333333333, + "grad_norm": 3.298762019359425, + "learning_rate": 5e-05, + "loss": 0.0733, + "num_input_tokens_seen": 394511684, + "step": 4322 + }, + { + "epoch": 18.008333333333333, + "loss": 0.10038083791732788, + "loss_ce": 8.525988960172981e-06, + "loss_iou": 0.27734375, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 394511684, + "step": 4322 + }, + { + "epoch": 18.0125, + "grad_norm": 3.905792510038796, + "learning_rate": 5e-05, + "loss": 0.0641, + "num_input_tokens_seen": 394603636, + "step": 4323 + }, + { + "epoch": 18.0125, + "loss": 0.046825211495161057, + "loss_ce": 1.124912978411885e-05, + "loss_iou": 0.298828125, + "loss_num": 0.00933837890625, + "loss_xval": 0.046875, + "num_input_tokens_seen": 394603636, + "step": 4323 + }, + { + "epoch": 18.016666666666666, + "grad_norm": 1.8703517037834887, + "learning_rate": 5e-05, + "loss": 0.0372, + "num_input_tokens_seen": 394694048, + "step": 4324 + }, + { + "epoch": 18.016666666666666, + "loss": 0.027627240866422653, + "loss_ce": 1.203496367452317e-06, + "loss_iou": 0.203125, + "loss_num": 0.005523681640625, + "loss_xval": 0.027587890625, + "num_input_tokens_seen": 394694048, + "step": 4324 + }, + { + "epoch": 18.020833333333332, + "grad_norm": 1.514189991927366, + "learning_rate": 5e-05, + "loss": 0.0368, + "num_input_tokens_seen": 394785636, + "step": 4325 + }, + { + "epoch": 18.020833333333332, + "loss": 0.04702939838171005, + "loss_ce": 1.8090759112965316e-06, + "loss_iou": 0.1787109375, + "loss_num": 0.0093994140625, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 394785636, + "step": 4325 + }, + { + "epoch": 18.025, + "grad_norm": 1.0784396605015283, + "learning_rate": 5e-05, + "loss": 0.032, + "num_input_tokens_seen": 394876396, + "step": 4326 + }, + { + "epoch": 18.025, + "loss": 0.04121756553649902, + "loss_ce": 3.5790712900052313e-06, + "loss_iou": 0.322265625, + "loss_num": 0.00823974609375, + "loss_xval": 0.041259765625, + "num_input_tokens_seen": 394876396, + "step": 4326 + }, + { + "epoch": 18.029166666666665, + "grad_norm": 0.862503992716872, + "learning_rate": 5e-05, + "loss": 0.0409, + "num_input_tokens_seen": 394966044, + "step": 4327 + }, + { + "epoch": 18.029166666666665, + "loss": 0.048645853996276855, + "loss_ce": 8.345961077793618e-07, + "loss_iou": 0.2353515625, + "loss_num": 0.00970458984375, + "loss_xval": 0.048583984375, + "num_input_tokens_seen": 394966044, + "step": 4327 + }, + { + "epoch": 18.033333333333335, + "grad_norm": 1.8267676354610045, + "learning_rate": 5e-05, + "loss": 0.0326, + "num_input_tokens_seen": 395057856, + "step": 4328 + }, + { + "epoch": 18.033333333333335, + "loss": 0.026156704872846603, + "loss_ce": 3.141112074445118e-06, + "loss_iou": 0.275390625, + "loss_num": 0.005218505859375, + "loss_xval": 0.026123046875, + "num_input_tokens_seen": 395057856, + "step": 4328 + }, + { + "epoch": 18.0375, + "grad_norm": 1.1381372255017146, + "learning_rate": 5e-05, + "loss": 0.0339, + "num_input_tokens_seen": 395149312, + "step": 4329 + }, + { + "epoch": 18.0375, + "loss": 0.03376764431595802, + "loss_ce": 3.8090514863142744e-05, + "loss_iou": 0.296875, + "loss_num": 0.006744384765625, + "loss_xval": 0.03369140625, + "num_input_tokens_seen": 395149312, + "step": 4329 + }, + { + "epoch": 18.041666666666668, + "grad_norm": 1.785275220468087, + "learning_rate": 5e-05, + "loss": 0.0504, + "num_input_tokens_seen": 395240220, + "step": 4330 + }, + { + "epoch": 18.041666666666668, + "loss": 0.07155978679656982, + "loss_ce": 1.8952950995299034e-05, + "loss_iou": 0.234375, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 395240220, + "step": 4330 + }, + { + "epoch": 18.045833333333334, + "grad_norm": 1.9882529377175069, + "learning_rate": 5e-05, + "loss": 0.0445, + "num_input_tokens_seen": 395331656, + "step": 4331 + }, + { + "epoch": 18.045833333333334, + "loss": 0.042426083236932755, + "loss_ce": 6.651344392594183e-06, + "loss_iou": 0.3125, + "loss_num": 0.00848388671875, + "loss_xval": 0.04248046875, + "num_input_tokens_seen": 395331656, + "step": 4331 + }, + { + "epoch": 18.05, + "grad_norm": 2.344335366295014, + "learning_rate": 5e-05, + "loss": 0.0724, + "num_input_tokens_seen": 395423220, + "step": 4332 + }, + { + "epoch": 18.05, + "loss": 0.04951345548033714, + "loss_ce": 0.009115813300013542, + "loss_iou": 0.0478515625, + "loss_num": 0.008056640625, + "loss_xval": 0.040283203125, + "num_input_tokens_seen": 395423220, + "step": 4332 + }, + { + "epoch": 18.054166666666667, + "grad_norm": 2.4558276168301756, + "learning_rate": 5e-05, + "loss": 0.0502, + "num_input_tokens_seen": 395514576, + "step": 4333 + }, + { + "epoch": 18.054166666666667, + "loss": 0.03360137343406677, + "loss_ce": 1.5182785091383266e-06, + "loss_iou": 0.33203125, + "loss_num": 0.0067138671875, + "loss_xval": 0.03369140625, + "num_input_tokens_seen": 395514576, + "step": 4333 + }, + { + "epoch": 18.058333333333334, + "grad_norm": 2.0992927584266283, + "learning_rate": 5e-05, + "loss": 0.0657, + "num_input_tokens_seen": 395606148, + "step": 4334 + }, + { + "epoch": 18.058333333333334, + "loss": 0.047148894518613815, + "loss_ce": 1.44934074342018e-05, + "loss_iou": 0.314453125, + "loss_num": 0.0093994140625, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 395606148, + "step": 4334 + }, + { + "epoch": 18.0625, + "grad_norm": 1.6144222585513672, + "learning_rate": 5e-05, + "loss": 0.0685, + "num_input_tokens_seen": 395697324, + "step": 4335 + }, + { + "epoch": 18.0625, + "loss": 0.03702807053923607, + "loss_ce": 2.617456175357802e-06, + "loss_iou": 0.1982421875, + "loss_num": 0.007415771484375, + "loss_xval": 0.037109375, + "num_input_tokens_seen": 395697324, + "step": 4335 + }, + { + "epoch": 18.066666666666666, + "grad_norm": 2.6503863866823036, + "learning_rate": 5e-05, + "loss": 0.0531, + "num_input_tokens_seen": 395789076, + "step": 4336 + }, + { + "epoch": 18.066666666666666, + "loss": 0.04408771172165871, + "loss_ce": 2.0329125618445687e-05, + "loss_iou": 0.3125, + "loss_num": 0.0087890625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 395789076, + "step": 4336 + }, + { + "epoch": 18.070833333333333, + "grad_norm": 2.6723716054679945, + "learning_rate": 5e-05, + "loss": 0.0601, + "num_input_tokens_seen": 395880504, + "step": 4337 + }, + { + "epoch": 18.070833333333333, + "loss": 0.07788537442684174, + "loss_ce": 4.5142041926737875e-06, + "loss_iou": 0.16015625, + "loss_num": 0.01556396484375, + "loss_xval": 0.078125, + "num_input_tokens_seen": 395880504, + "step": 4337 + }, + { + "epoch": 18.075, + "grad_norm": 2.225487763718285, + "learning_rate": 5e-05, + "loss": 0.0513, + "num_input_tokens_seen": 395972012, + "step": 4338 + }, + { + "epoch": 18.075, + "loss": 0.030444277450442314, + "loss_ce": 1.0621975889080204e-05, + "loss_iou": 0.040283203125, + "loss_num": 0.006103515625, + "loss_xval": 0.0303955078125, + "num_input_tokens_seen": 395972012, + "step": 4338 + }, + { + "epoch": 18.079166666666666, + "grad_norm": 2.464868968170747, + "learning_rate": 5e-05, + "loss": 0.0282, + "num_input_tokens_seen": 396063152, + "step": 4339 + }, + { + "epoch": 18.079166666666666, + "loss": 0.03203795477747917, + "loss_ce": 2.1279520296957344e-06, + "loss_iou": 0.1796875, + "loss_num": 0.00640869140625, + "loss_xval": 0.031982421875, + "num_input_tokens_seen": 396063152, + "step": 4339 + }, + { + "epoch": 18.083333333333332, + "grad_norm": 3.451951339092108, + "learning_rate": 5e-05, + "loss": 0.0816, + "num_input_tokens_seen": 396154468, + "step": 4340 + }, + { + "epoch": 18.083333333333332, + "loss": 0.13343852758407593, + "loss_ce": 4.1616868884375435e-07, + "loss_iou": 0.353515625, + "loss_num": 0.0267333984375, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 396154468, + "step": 4340 + }, + { + "epoch": 18.0875, + "grad_norm": 2.902394444532942, + "learning_rate": 5e-05, + "loss": 0.0421, + "num_input_tokens_seen": 396245768, + "step": 4341 + }, + { + "epoch": 18.0875, + "loss": 0.04989688843488693, + "loss_ce": 6.509374088636832e-07, + "loss_iou": 0.236328125, + "loss_num": 0.00994873046875, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 396245768, + "step": 4341 + }, + { + "epoch": 18.091666666666665, + "grad_norm": 2.314349492678863, + "learning_rate": 5e-05, + "loss": 0.0823, + "num_input_tokens_seen": 396336292, + "step": 4342 + }, + { + "epoch": 18.091666666666665, + "loss": 0.1165471151471138, + "loss_ce": 4.858984539168887e-07, + "loss_iou": 0.275390625, + "loss_num": 0.0233154296875, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 396336292, + "step": 4342 + }, + { + "epoch": 18.095833333333335, + "grad_norm": 11.428303916656995, + "learning_rate": 5e-05, + "loss": 0.0589, + "num_input_tokens_seen": 396427380, + "step": 4343 + }, + { + "epoch": 18.095833333333335, + "loss": 0.05671941116452217, + "loss_ce": 2.4900548396544764e-06, + "loss_iou": 0.240234375, + "loss_num": 0.0113525390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 396427380, + "step": 4343 + }, + { + "epoch": 18.1, + "grad_norm": 2.4921457118510366, + "learning_rate": 5e-05, + "loss": 0.0494, + "num_input_tokens_seen": 396519188, + "step": 4344 + }, + { + "epoch": 18.1, + "loss": 0.045594222843647, + "loss_ce": 8.590166544308886e-06, + "loss_iou": 0.146484375, + "loss_num": 0.00909423828125, + "loss_xval": 0.045654296875, + "num_input_tokens_seen": 396519188, + "step": 4344 + }, + { + "epoch": 18.104166666666668, + "grad_norm": 2.9869058534307946, + "learning_rate": 5e-05, + "loss": 0.0386, + "num_input_tokens_seen": 396610296, + "step": 4345 + }, + { + "epoch": 18.104166666666668, + "loss": 0.044300176203250885, + "loss_ce": 3.910792202077573e-06, + "loss_iou": 0.32421875, + "loss_num": 0.00885009765625, + "loss_xval": 0.044189453125, + "num_input_tokens_seen": 396610296, + "step": 4345 + }, + { + "epoch": 18.108333333333334, + "grad_norm": 2.334854811432145, + "learning_rate": 5e-05, + "loss": 0.0443, + "num_input_tokens_seen": 396701692, + "step": 4346 + }, + { + "epoch": 18.108333333333334, + "loss": 0.04280184209346771, + "loss_ce": 9.392442734679207e-07, + "loss_iou": 0.054443359375, + "loss_num": 0.008544921875, + "loss_xval": 0.042724609375, + "num_input_tokens_seen": 396701692, + "step": 4346 + }, + { + "epoch": 18.1125, + "grad_norm": 3.2656062480042594, + "learning_rate": 5e-05, + "loss": 0.0307, + "num_input_tokens_seen": 396792740, + "step": 4347 + }, + { + "epoch": 18.1125, + "loss": 0.025705434381961823, + "loss_ce": 1.7265369024244137e-05, + "loss_iou": 0.1513671875, + "loss_num": 0.005126953125, + "loss_xval": 0.025634765625, + "num_input_tokens_seen": 396792740, + "step": 4347 + }, + { + "epoch": 18.116666666666667, + "grad_norm": 1.782148683118789, + "learning_rate": 5e-05, + "loss": 0.0685, + "num_input_tokens_seen": 396884024, + "step": 4348 + }, + { + "epoch": 18.116666666666667, + "loss": 0.04716215282678604, + "loss_ce": 0.0009890544461086392, + "loss_iou": 0.259765625, + "loss_num": 0.00921630859375, + "loss_xval": 0.046142578125, + "num_input_tokens_seen": 396884024, + "step": 4348 + }, + { + "epoch": 18.120833333333334, + "grad_norm": 2.713392053335857, + "learning_rate": 5e-05, + "loss": 0.0416, + "num_input_tokens_seen": 396975032, + "step": 4349 + }, + { + "epoch": 18.120833333333334, + "loss": 0.0341886505484581, + "loss_ce": 2.4221051717177033e-05, + "loss_iou": 0.2080078125, + "loss_num": 0.0068359375, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 396975032, + "step": 4349 + }, + { + "epoch": 18.125, + "grad_norm": 1.4380388319248392, + "learning_rate": 5e-05, + "loss": 0.0326, + "num_input_tokens_seen": 397065464, + "step": 4350 + }, + { + "epoch": 18.125, + "loss": 0.04002754017710686, + "loss_ce": 3.737887709576171e-06, + "loss_iou": 0.146484375, + "loss_num": 0.00799560546875, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 397065464, + "step": 4350 + }, + { + "epoch": 18.129166666666666, + "grad_norm": 1.385735047535627, + "learning_rate": 5e-05, + "loss": 0.0379, + "num_input_tokens_seen": 397156184, + "step": 4351 + }, + { + "epoch": 18.129166666666666, + "loss": 0.0432908833026886, + "loss_ce": 1.6999715626297984e-06, + "loss_iou": 0.1513671875, + "loss_num": 0.0086669921875, + "loss_xval": 0.043212890625, + "num_input_tokens_seen": 397156184, + "step": 4351 + }, + { + "epoch": 18.133333333333333, + "grad_norm": 1.539877158566921, + "learning_rate": 5e-05, + "loss": 0.0605, + "num_input_tokens_seen": 397247468, + "step": 4352 + }, + { + "epoch": 18.133333333333333, + "loss": 0.09733171761035919, + "loss_ce": 3.5312802992848447e-06, + "loss_iou": 0.220703125, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 397247468, + "step": 4352 + }, + { + "epoch": 18.1375, + "grad_norm": 2.5284698248198536, + "learning_rate": 5e-05, + "loss": 0.0588, + "num_input_tokens_seen": 397338636, + "step": 4353 + }, + { + "epoch": 18.1375, + "loss": 0.038271788507699966, + "loss_ce": 1.0375433703302406e-05, + "loss_iou": 0.287109375, + "loss_num": 0.007659912109375, + "loss_xval": 0.038330078125, + "num_input_tokens_seen": 397338636, + "step": 4353 + }, + { + "epoch": 18.141666666666666, + "grad_norm": 2.5679338017934827, + "learning_rate": 5e-05, + "loss": 0.0453, + "num_input_tokens_seen": 397428860, + "step": 4354 + }, + { + "epoch": 18.141666666666666, + "loss": 0.04413112998008728, + "loss_ce": 0.002199977170675993, + "loss_iou": 0.404296875, + "loss_num": 0.0084228515625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 397428860, + "step": 4354 + }, + { + "epoch": 18.145833333333332, + "grad_norm": 3.5753304198707, + "learning_rate": 5e-05, + "loss": 0.0861, + "num_input_tokens_seen": 397520156, + "step": 4355 + }, + { + "epoch": 18.145833333333332, + "loss": 0.12257055938243866, + "loss_ce": 1.1972469110332895e-05, + "loss_iou": 0.162109375, + "loss_num": 0.0245361328125, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 397520156, + "step": 4355 + }, + { + "epoch": 18.15, + "grad_norm": 2.6806511987389325, + "learning_rate": 5e-05, + "loss": 0.0678, + "num_input_tokens_seen": 397611860, + "step": 4356 + }, + { + "epoch": 18.15, + "loss": 0.07975918054580688, + "loss_ce": 3.20112521876581e-05, + "loss_iou": 0.2314453125, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 397611860, + "step": 4356 + }, + { + "epoch": 18.154166666666665, + "grad_norm": 2.273739343091957, + "learning_rate": 5e-05, + "loss": 0.0978, + "num_input_tokens_seen": 397702408, + "step": 4357 + }, + { + "epoch": 18.154166666666665, + "loss": 0.13732938468456268, + "loss_ce": 2.8608226898541034e-07, + "loss_iou": 0.33203125, + "loss_num": 0.0274658203125, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 397702408, + "step": 4357 + }, + { + "epoch": 18.158333333333335, + "grad_norm": 6.861383555688523, + "learning_rate": 5e-05, + "loss": 0.0405, + "num_input_tokens_seen": 397793940, + "step": 4358 + }, + { + "epoch": 18.158333333333335, + "loss": 0.03570987284183502, + "loss_ce": 4.307841663830914e-06, + "loss_iou": 0.306640625, + "loss_num": 0.00714111328125, + "loss_xval": 0.03564453125, + "num_input_tokens_seen": 397793940, + "step": 4358 + }, + { + "epoch": 18.1625, + "grad_norm": 1.3036399508182497, + "learning_rate": 5e-05, + "loss": 0.0256, + "num_input_tokens_seen": 397884480, + "step": 4359 + }, + { + "epoch": 18.1625, + "loss": 0.022728780284523964, + "loss_ce": 8.135599500747048e-07, + "loss_iou": 0.267578125, + "loss_num": 0.004547119140625, + "loss_xval": 0.022705078125, + "num_input_tokens_seen": 397884480, + "step": 4359 + }, + { + "epoch": 18.166666666666668, + "grad_norm": 1.7651094670492964, + "learning_rate": 5e-05, + "loss": 0.0421, + "num_input_tokens_seen": 397975704, + "step": 4360 + }, + { + "epoch": 18.166666666666668, + "loss": 0.03605952113866806, + "loss_ce": 3.0001363029441563e-06, + "loss_iou": 0.1787109375, + "loss_num": 0.0072021484375, + "loss_xval": 0.0361328125, + "num_input_tokens_seen": 397975704, + "step": 4360 + }, + { + "epoch": 18.170833333333334, + "grad_norm": 2.5718917594701285, + "learning_rate": 5e-05, + "loss": 0.0518, + "num_input_tokens_seen": 398067552, + "step": 4361 + }, + { + "epoch": 18.170833333333334, + "loss": 0.03395594656467438, + "loss_ce": 5.143494945514249e-06, + "loss_iou": 0.244140625, + "loss_num": 0.00677490234375, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 398067552, + "step": 4361 + }, + { + "epoch": 18.175, + "grad_norm": 2.618190785133452, + "learning_rate": 5e-05, + "loss": 0.0438, + "num_input_tokens_seen": 398159520, + "step": 4362 + }, + { + "epoch": 18.175, + "loss": 0.04618554934859276, + "loss_ce": 9.637584298616275e-05, + "loss_iou": 0.26171875, + "loss_num": 0.00921630859375, + "loss_xval": 0.046142578125, + "num_input_tokens_seen": 398159520, + "step": 4362 + }, + { + "epoch": 18.179166666666667, + "grad_norm": 2.9221136017576477, + "learning_rate": 5e-05, + "loss": 0.0572, + "num_input_tokens_seen": 398251144, + "step": 4363 + }, + { + "epoch": 18.179166666666667, + "loss": 0.0848899558186531, + "loss_ce": 5.3128806030144915e-06, + "loss_iou": 0.3125, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 398251144, + "step": 4363 + }, + { + "epoch": 18.183333333333334, + "grad_norm": 2.9717430372577516, + "learning_rate": 5e-05, + "loss": 0.076, + "num_input_tokens_seen": 398342744, + "step": 4364 + }, + { + "epoch": 18.183333333333334, + "loss": 0.09848225116729736, + "loss_ce": 2.021944965235889e-06, + "loss_iou": 0.328125, + "loss_num": 0.0196533203125, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 398342744, + "step": 4364 + }, + { + "epoch": 18.1875, + "grad_norm": 3.109744655408144, + "learning_rate": 5e-05, + "loss": 0.0459, + "num_input_tokens_seen": 398434288, + "step": 4365 + }, + { + "epoch": 18.1875, + "loss": 0.05099605768918991, + "loss_ce": 1.1823774457297986e-06, + "loss_iou": 0.353515625, + "loss_num": 0.01019287109375, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 398434288, + "step": 4365 + }, + { + "epoch": 18.191666666666666, + "grad_norm": 2.159842625416849, + "learning_rate": 5e-05, + "loss": 0.0407, + "num_input_tokens_seen": 398525456, + "step": 4366 + }, + { + "epoch": 18.191666666666666, + "loss": 0.04072629660367966, + "loss_ce": 5.907006084271416e-07, + "loss_iou": 0.2490234375, + "loss_num": 0.00811767578125, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 398525456, + "step": 4366 + }, + { + "epoch": 18.195833333333333, + "grad_norm": 8.228009704876078, + "learning_rate": 5e-05, + "loss": 0.0565, + "num_input_tokens_seen": 398616876, + "step": 4367 + }, + { + "epoch": 18.195833333333333, + "loss": 0.05673077702522278, + "loss_ce": 1.3857466910849325e-05, + "loss_iou": 0.2294921875, + "loss_num": 0.0113525390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 398616876, + "step": 4367 + }, + { + "epoch": 18.2, + "grad_norm": 2.012030966733271, + "learning_rate": 5e-05, + "loss": 0.0701, + "num_input_tokens_seen": 398708284, + "step": 4368 + }, + { + "epoch": 18.2, + "loss": 0.047302864491939545, + "loss_ce": 6.211755589902168e-07, + "loss_iou": 0.32421875, + "loss_num": 0.00946044921875, + "loss_xval": 0.04736328125, + "num_input_tokens_seen": 398708284, + "step": 4368 + }, + { + "epoch": 18.204166666666666, + "grad_norm": 8.039653286483375, + "learning_rate": 5e-05, + "loss": 0.0591, + "num_input_tokens_seen": 398800056, + "step": 4369 + }, + { + "epoch": 18.204166666666666, + "loss": 0.06590539216995239, + "loss_ce": 2.680851594050182e-06, + "loss_iou": 0.2236328125, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 398800056, + "step": 4369 + }, + { + "epoch": 18.208333333333332, + "grad_norm": 2.2663464506267648, + "learning_rate": 5e-05, + "loss": 0.0451, + "num_input_tokens_seen": 398891224, + "step": 4370 + }, + { + "epoch": 18.208333333333332, + "loss": 0.04339677095413208, + "loss_ce": 7.760647804389009e-07, + "loss_iou": 0.26171875, + "loss_num": 0.0086669921875, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 398891224, + "step": 4370 + }, + { + "epoch": 18.2125, + "grad_norm": 9.314328389208157, + "learning_rate": 5e-05, + "loss": 0.0345, + "num_input_tokens_seen": 398983332, + "step": 4371 + }, + { + "epoch": 18.2125, + "loss": 0.04535383731126785, + "loss_ce": 4.71842167826253e-06, + "loss_iou": 0.29296875, + "loss_num": 0.00909423828125, + "loss_xval": 0.04541015625, + "num_input_tokens_seen": 398983332, + "step": 4371 + }, + { + "epoch": 18.216666666666665, + "grad_norm": 3.217224616633414, + "learning_rate": 5e-05, + "loss": 0.0486, + "num_input_tokens_seen": 399074764, + "step": 4372 + }, + { + "epoch": 18.216666666666665, + "loss": 0.0589253231883049, + "loss_ce": 5.691545084118843e-05, + "loss_iou": 0.3046875, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 399074764, + "step": 4372 + }, + { + "epoch": 18.220833333333335, + "grad_norm": 2.730079477146947, + "learning_rate": 5e-05, + "loss": 0.0491, + "num_input_tokens_seen": 399165704, + "step": 4373 + }, + { + "epoch": 18.220833333333335, + "loss": 0.042066872119903564, + "loss_ce": 0.004362402018159628, + "loss_iou": 0.2275390625, + "loss_num": 0.007537841796875, + "loss_xval": 0.03759765625, + "num_input_tokens_seen": 399165704, + "step": 4373 + }, + { + "epoch": 18.225, + "grad_norm": 2.0687422921064287, + "learning_rate": 5e-05, + "loss": 0.0668, + "num_input_tokens_seen": 399257212, + "step": 4374 + }, + { + "epoch": 18.225, + "loss": 0.09087371826171875, + "loss_ce": 7.631599146407098e-06, + "loss_iou": 0.29296875, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 399257212, + "step": 4374 + }, + { + "epoch": 18.229166666666668, + "grad_norm": 1.6668812988733708, + "learning_rate": 5e-05, + "loss": 0.041, + "num_input_tokens_seen": 399347372, + "step": 4375 + }, + { + "epoch": 18.229166666666668, + "loss": 0.046204306185245514, + "loss_ce": 0.0001075049804057926, + "loss_iou": 0.06689453125, + "loss_num": 0.00921630859375, + "loss_xval": 0.046142578125, + "num_input_tokens_seen": 399347372, + "step": 4375 + }, + { + "epoch": 18.233333333333334, + "grad_norm": 2.0372467556218967, + "learning_rate": 5e-05, + "loss": 0.0602, + "num_input_tokens_seen": 399438828, + "step": 4376 + }, + { + "epoch": 18.233333333333334, + "loss": 0.07019403576850891, + "loss_ce": 3.6076583000976825e-06, + "loss_iou": 0.271484375, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 399438828, + "step": 4376 + }, + { + "epoch": 18.2375, + "grad_norm": 3.055557767338919, + "learning_rate": 5e-05, + "loss": 0.0574, + "num_input_tokens_seen": 399530060, + "step": 4377 + }, + { + "epoch": 18.2375, + "loss": 0.06096126139163971, + "loss_ce": 2.3983773189684143e-06, + "loss_iou": 0.27734375, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 399530060, + "step": 4377 + }, + { + "epoch": 18.241666666666667, + "grad_norm": 2.99983240473986, + "learning_rate": 5e-05, + "loss": 0.0398, + "num_input_tokens_seen": 399621304, + "step": 4378 + }, + { + "epoch": 18.241666666666667, + "loss": 0.0324772372841835, + "loss_ce": 0.00018963986076414585, + "loss_iou": 0.328125, + "loss_num": 0.0064697265625, + "loss_xval": 0.0322265625, + "num_input_tokens_seen": 399621304, + "step": 4378 + }, + { + "epoch": 18.245833333333334, + "grad_norm": 2.5880110630261663, + "learning_rate": 5e-05, + "loss": 0.0561, + "num_input_tokens_seen": 399712828, + "step": 4379 + }, + { + "epoch": 18.245833333333334, + "loss": 0.06883368641138077, + "loss_ce": 1.2874111234850716e-06, + "loss_iou": 0.2001953125, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 399712828, + "step": 4379 + }, + { + "epoch": 18.25, + "grad_norm": 3.0573426753050614, + "learning_rate": 5e-05, + "loss": 0.0525, + "num_input_tokens_seen": 399804176, + "step": 4380 + }, + { + "epoch": 18.25, + "loss": 0.06015327572822571, + "loss_ce": 9.468204370932654e-05, + "loss_iou": 0.34765625, + "loss_num": 0.01202392578125, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 399804176, + "step": 4380 + }, + { + "epoch": 18.254166666666666, + "grad_norm": 2.6736121122866146, + "learning_rate": 5e-05, + "loss": 0.0804, + "num_input_tokens_seen": 399895460, + "step": 4381 + }, + { + "epoch": 18.254166666666666, + "loss": 0.0865228921175003, + "loss_ce": 5.557565145863919e-06, + "loss_iou": 0.2490234375, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 399895460, + "step": 4381 + }, + { + "epoch": 18.258333333333333, + "grad_norm": 4.602344446400555, + "learning_rate": 5e-05, + "loss": 0.0707, + "num_input_tokens_seen": 399987048, + "step": 4382 + }, + { + "epoch": 18.258333333333333, + "loss": 0.10242481529712677, + "loss_ce": 7.821878170943819e-06, + "loss_iou": 0.12451171875, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 399987048, + "step": 4382 + }, + { + "epoch": 18.2625, + "grad_norm": 1.8709337571104105, + "learning_rate": 5e-05, + "loss": 0.0518, + "num_input_tokens_seen": 400078308, + "step": 4383 + }, + { + "epoch": 18.2625, + "loss": 0.043266646564006805, + "loss_ce": 3.4821675853891065e-07, + "loss_iou": 0.23046875, + "loss_num": 0.0086669921875, + "loss_xval": 0.043212890625, + "num_input_tokens_seen": 400078308, + "step": 4383 + }, + { + "epoch": 18.266666666666666, + "grad_norm": 2.7737642445213506, + "learning_rate": 5e-05, + "loss": 0.1023, + "num_input_tokens_seen": 400169996, + "step": 4384 + }, + { + "epoch": 18.266666666666666, + "loss": 0.15319259464740753, + "loss_ce": 1.9784554297075374e-06, + "loss_iou": 0.3203125, + "loss_num": 0.0306396484375, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 400169996, + "step": 4384 + }, + { + "epoch": 18.270833333333332, + "grad_norm": 2.9567732403055467, + "learning_rate": 5e-05, + "loss": 0.0391, + "num_input_tokens_seen": 400261664, + "step": 4385 + }, + { + "epoch": 18.270833333333332, + "loss": 0.037430521100759506, + "loss_ce": 7.110501201168518e-07, + "loss_iou": 0.23828125, + "loss_num": 0.007476806640625, + "loss_xval": 0.037353515625, + "num_input_tokens_seen": 400261664, + "step": 4385 + }, + { + "epoch": 18.275, + "grad_norm": 2.929703617373839, + "learning_rate": 5e-05, + "loss": 0.0613, + "num_input_tokens_seen": 400353208, + "step": 4386 + }, + { + "epoch": 18.275, + "loss": 0.08940213918685913, + "loss_ce": 3.1412902899319306e-05, + "loss_iou": 0.287109375, + "loss_num": 0.0179443359375, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 400353208, + "step": 4386 + }, + { + "epoch": 18.279166666666665, + "grad_norm": 4.740133168669614, + "learning_rate": 5e-05, + "loss": 0.0714, + "num_input_tokens_seen": 400444956, + "step": 4387 + }, + { + "epoch": 18.279166666666665, + "loss": 0.05769924074411392, + "loss_ce": 0.00014308842946775258, + "loss_iou": 0.240234375, + "loss_num": 0.011474609375, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 400444956, + "step": 4387 + }, + { + "epoch": 18.283333333333335, + "grad_norm": 2.5047182403696144, + "learning_rate": 5e-05, + "loss": 0.0447, + "num_input_tokens_seen": 400536092, + "step": 4388 + }, + { + "epoch": 18.283333333333335, + "loss": 0.0511624850332737, + "loss_ce": 1.5020019418443553e-05, + "loss_iou": 0.2109375, + "loss_num": 0.01025390625, + "loss_xval": 0.05126953125, + "num_input_tokens_seen": 400536092, + "step": 4388 + }, + { + "epoch": 18.2875, + "grad_norm": 3.5861856966319756, + "learning_rate": 5e-05, + "loss": 0.13, + "num_input_tokens_seen": 400626832, + "step": 4389 + }, + { + "epoch": 18.2875, + "loss": 0.21249884366989136, + "loss_ce": 4.950234142597765e-06, + "loss_iou": 0.26171875, + "loss_num": 0.04248046875, + "loss_xval": 0.212890625, + "num_input_tokens_seen": 400626832, + "step": 4389 + }, + { + "epoch": 18.291666666666668, + "grad_norm": 2.484994269711342, + "learning_rate": 5e-05, + "loss": 0.0502, + "num_input_tokens_seen": 400718168, + "step": 4390 + }, + { + "epoch": 18.291666666666668, + "loss": 0.04919488728046417, + "loss_ce": 4.633032949641347e-05, + "loss_iou": 0.26953125, + "loss_num": 0.00982666015625, + "loss_xval": 0.049072265625, + "num_input_tokens_seen": 400718168, + "step": 4390 + }, + { + "epoch": 18.295833333333334, + "grad_norm": 3.143673198657142, + "learning_rate": 5e-05, + "loss": 0.0458, + "num_input_tokens_seen": 400808980, + "step": 4391 + }, + { + "epoch": 18.295833333333334, + "loss": 0.06095048785209656, + "loss_ce": 3.740333704627119e-05, + "loss_iou": 0.1796875, + "loss_num": 0.01214599609375, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 400808980, + "step": 4391 + }, + { + "epoch": 18.3, + "grad_norm": 2.916985689361712, + "learning_rate": 5e-05, + "loss": 0.0464, + "num_input_tokens_seen": 400900012, + "step": 4392 + }, + { + "epoch": 18.3, + "loss": 0.032847288995981216, + "loss_ce": 2.74403805633483e-06, + "loss_iou": 0.1767578125, + "loss_num": 0.006561279296875, + "loss_xval": 0.032958984375, + "num_input_tokens_seen": 400900012, + "step": 4392 + }, + { + "epoch": 18.304166666666667, + "grad_norm": 3.421911908192797, + "learning_rate": 5e-05, + "loss": 0.0631, + "num_input_tokens_seen": 400991240, + "step": 4393 + }, + { + "epoch": 18.304166666666667, + "loss": 0.054063111543655396, + "loss_ce": 1.2219868494867114e-06, + "loss_iou": 0.330078125, + "loss_num": 0.01080322265625, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 400991240, + "step": 4393 + }, + { + "epoch": 18.308333333333334, + "grad_norm": 2.4731271097465477, + "learning_rate": 5e-05, + "loss": 0.091, + "num_input_tokens_seen": 401082824, + "step": 4394 + }, + { + "epoch": 18.308333333333334, + "loss": 0.0979473888874054, + "loss_ce": 1.222972855430271e-06, + "loss_iou": 0.259765625, + "loss_num": 0.01953125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 401082824, + "step": 4394 + }, + { + "epoch": 18.3125, + "grad_norm": 2.725491616054956, + "learning_rate": 5e-05, + "loss": 0.0643, + "num_input_tokens_seen": 401174452, + "step": 4395 + }, + { + "epoch": 18.3125, + "loss": 0.040101826190948486, + "loss_ce": 3.224733518436551e-05, + "loss_iou": 0.298828125, + "loss_num": 0.00799560546875, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 401174452, + "step": 4395 + }, + { + "epoch": 18.316666666666666, + "grad_norm": 2.2491274306415954, + "learning_rate": 5e-05, + "loss": 0.0591, + "num_input_tokens_seen": 401266076, + "step": 4396 + }, + { + "epoch": 18.316666666666666, + "loss": 0.06935058534145355, + "loss_ce": 1.4649865079263691e-05, + "loss_iou": 0.224609375, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 401266076, + "step": 4396 + }, + { + "epoch": 18.320833333333333, + "grad_norm": 1.9773627042160795, + "learning_rate": 5e-05, + "loss": 0.0444, + "num_input_tokens_seen": 401357832, + "step": 4397 + }, + { + "epoch": 18.320833333333333, + "loss": 0.03888298198580742, + "loss_ce": 3.5863831726601347e-06, + "loss_iou": 0.2578125, + "loss_num": 0.007781982421875, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 401357832, + "step": 4397 + }, + { + "epoch": 18.325, + "grad_norm": 4.594254689727872, + "learning_rate": 5e-05, + "loss": 0.0475, + "num_input_tokens_seen": 401449124, + "step": 4398 + }, + { + "epoch": 18.325, + "loss": 0.0331290028989315, + "loss_ce": 2.1720934455515817e-06, + "loss_iou": 0.1171875, + "loss_num": 0.006622314453125, + "loss_xval": 0.033203125, + "num_input_tokens_seen": 401449124, + "step": 4398 + }, + { + "epoch": 18.329166666666666, + "grad_norm": 1.8354731404458435, + "learning_rate": 5e-05, + "loss": 0.0433, + "num_input_tokens_seen": 401540020, + "step": 4399 + }, + { + "epoch": 18.329166666666666, + "loss": 0.036660533398389816, + "loss_ce": 5.1063821047137026e-06, + "loss_iou": 0.169921875, + "loss_num": 0.00732421875, + "loss_xval": 0.03662109375, + "num_input_tokens_seen": 401540020, + "step": 4399 + }, + { + "epoch": 18.333333333333332, + "grad_norm": 0.8685643974307135, + "learning_rate": 5e-05, + "loss": 0.0278, + "num_input_tokens_seen": 401631740, + "step": 4400 + }, + { + "epoch": 18.333333333333332, + "loss": 0.030869200825691223, + "loss_ce": 6.681293598376215e-07, + "loss_iou": 0.09375, + "loss_num": 0.00616455078125, + "loss_xval": 0.0308837890625, + "num_input_tokens_seen": 401631740, + "step": 4400 + }, + { + "epoch": 18.3375, + "grad_norm": 1.619917861504503, + "learning_rate": 5e-05, + "loss": 0.0438, + "num_input_tokens_seen": 401722968, + "step": 4401 + }, + { + "epoch": 18.3375, + "loss": 0.05206376314163208, + "loss_ce": 4.2737061448860914e-05, + "loss_iou": 0.1748046875, + "loss_num": 0.0103759765625, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 401722968, + "step": 4401 + }, + { + "epoch": 18.341666666666665, + "grad_norm": 2.9477429486798785, + "learning_rate": 5e-05, + "loss": 0.1039, + "num_input_tokens_seen": 401814388, + "step": 4402 + }, + { + "epoch": 18.341666666666665, + "loss": 0.07863037288188934, + "loss_ce": 0.00030701086507178843, + "loss_iou": 0.15625, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 401814388, + "step": 4402 + }, + { + "epoch": 18.345833333333335, + "grad_norm": 3.1687821627968518, + "learning_rate": 5e-05, + "loss": 0.0538, + "num_input_tokens_seen": 401905272, + "step": 4403 + }, + { + "epoch": 18.345833333333335, + "loss": 0.05850303918123245, + "loss_ce": 8.430246225543669e-07, + "loss_iou": 0.35546875, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 401905272, + "step": 4403 + }, + { + "epoch": 18.35, + "grad_norm": 2.245507306740881, + "learning_rate": 5e-05, + "loss": 0.071, + "num_input_tokens_seen": 401996540, + "step": 4404 + }, + { + "epoch": 18.35, + "loss": 0.034206412732601166, + "loss_ce": 3.837065378320403e-06, + "loss_iou": 0.123046875, + "loss_num": 0.0068359375, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 401996540, + "step": 4404 + }, + { + "epoch": 18.354166666666668, + "grad_norm": 2.1863927493050928, + "learning_rate": 5e-05, + "loss": 0.0447, + "num_input_tokens_seen": 402088296, + "step": 4405 + }, + { + "epoch": 18.354166666666668, + "loss": 0.049772344529628754, + "loss_ce": 5.807106390420813e-06, + "loss_iou": 0.275390625, + "loss_num": 0.00994873046875, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 402088296, + "step": 4405 + }, + { + "epoch": 18.358333333333334, + "grad_norm": 2.371649199490193, + "learning_rate": 5e-05, + "loss": 0.0338, + "num_input_tokens_seen": 402179204, + "step": 4406 + }, + { + "epoch": 18.358333333333334, + "loss": 0.03646884858608246, + "loss_ce": 3.4508883572925697e-07, + "loss_iou": 0.193359375, + "loss_num": 0.007293701171875, + "loss_xval": 0.036376953125, + "num_input_tokens_seen": 402179204, + "step": 4406 + }, + { + "epoch": 18.3625, + "grad_norm": 1.8672995620837733, + "learning_rate": 5e-05, + "loss": 0.0436, + "num_input_tokens_seen": 402270828, + "step": 4407 + }, + { + "epoch": 18.3625, + "loss": 0.03133529797196388, + "loss_ce": 1.376296381749853e-06, + "loss_iou": 0.173828125, + "loss_num": 0.006256103515625, + "loss_xval": 0.03125, + "num_input_tokens_seen": 402270828, + "step": 4407 + }, + { + "epoch": 18.366666666666667, + "grad_norm": 2.168913292443573, + "learning_rate": 5e-05, + "loss": 0.0647, + "num_input_tokens_seen": 402361824, + "step": 4408 + }, + { + "epoch": 18.366666666666667, + "loss": 0.05605369806289673, + "loss_ce": 8.166640327544883e-06, + "loss_iou": 0.13671875, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 402361824, + "step": 4408 + }, + { + "epoch": 18.370833333333334, + "grad_norm": 1.7590564446359138, + "learning_rate": 5e-05, + "loss": 0.0618, + "num_input_tokens_seen": 402453848, + "step": 4409 + }, + { + "epoch": 18.370833333333334, + "loss": 0.048607781529426575, + "loss_ce": 9.035489938469254e-07, + "loss_iou": 0.326171875, + "loss_num": 0.00970458984375, + "loss_xval": 0.048583984375, + "num_input_tokens_seen": 402453848, + "step": 4409 + }, + { + "epoch": 18.375, + "grad_norm": 1.5070976292271845, + "learning_rate": 5e-05, + "loss": 0.0766, + "num_input_tokens_seen": 402543956, + "step": 4410 + }, + { + "epoch": 18.375, + "loss": 0.11715473979711533, + "loss_ce": 0.00024190156545955688, + "loss_iou": 0.080078125, + "loss_num": 0.0234375, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 402543956, + "step": 4410 + }, + { + "epoch": 18.379166666666666, + "grad_norm": 0.8670167969666163, + "learning_rate": 5e-05, + "loss": 0.0334, + "num_input_tokens_seen": 402635120, + "step": 4411 + }, + { + "epoch": 18.379166666666666, + "loss": 0.04164545238018036, + "loss_ce": 2.710960507101845e-05, + "loss_iou": 0.228515625, + "loss_num": 0.00830078125, + "loss_xval": 0.04150390625, + "num_input_tokens_seen": 402635120, + "step": 4411 + }, + { + "epoch": 18.383333333333333, + "grad_norm": 2.7655246359324224, + "learning_rate": 5e-05, + "loss": 0.0293, + "num_input_tokens_seen": 402726100, + "step": 4412 + }, + { + "epoch": 18.383333333333333, + "loss": 0.02951713465154171, + "loss_ce": 2.1895530153415166e-05, + "loss_iou": 0.171875, + "loss_num": 0.005889892578125, + "loss_xval": 0.029541015625, + "num_input_tokens_seen": 402726100, + "step": 4412 + }, + { + "epoch": 18.3875, + "grad_norm": 5.6264590678765085, + "learning_rate": 5e-05, + "loss": 0.0395, + "num_input_tokens_seen": 402817124, + "step": 4413 + }, + { + "epoch": 18.3875, + "loss": 0.034448638558387756, + "loss_ce": 0.00016213968046940863, + "loss_iou": 0.29296875, + "loss_num": 0.006866455078125, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 402817124, + "step": 4413 + }, + { + "epoch": 18.391666666666666, + "grad_norm": 3.711848747015358, + "learning_rate": 5e-05, + "loss": 0.0451, + "num_input_tokens_seen": 402908288, + "step": 4414 + }, + { + "epoch": 18.391666666666666, + "loss": 0.02724681980907917, + "loss_ce": 8.617481944384053e-05, + "loss_iou": 0.212890625, + "loss_num": 0.00543212890625, + "loss_xval": 0.027099609375, + "num_input_tokens_seen": 402908288, + "step": 4414 + }, + { + "epoch": 18.395833333333332, + "grad_norm": 6.133768116652266, + "learning_rate": 5e-05, + "loss": 0.0456, + "num_input_tokens_seen": 402999680, + "step": 4415 + }, + { + "epoch": 18.395833333333332, + "loss": 0.05777106434106827, + "loss_ce": 1.2924733709951397e-06, + "loss_iou": 0.26171875, + "loss_num": 0.01153564453125, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 402999680, + "step": 4415 + }, + { + "epoch": 18.4, + "grad_norm": 5.143096250225353, + "learning_rate": 5e-05, + "loss": 0.0781, + "num_input_tokens_seen": 403089660, + "step": 4416 + }, + { + "epoch": 18.4, + "loss": 0.0555812232196331, + "loss_ce": 1.0821604519151151e-06, + "loss_iou": 0.337890625, + "loss_num": 0.0111083984375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 403089660, + "step": 4416 + }, + { + "epoch": 18.404166666666665, + "grad_norm": 3.328742934850162, + "learning_rate": 5e-05, + "loss": 0.0406, + "num_input_tokens_seen": 403181092, + "step": 4417 + }, + { + "epoch": 18.404166666666665, + "loss": 0.03622487559914589, + "loss_ce": 5.093725690130668e-07, + "loss_iou": 0.259765625, + "loss_num": 0.007232666015625, + "loss_xval": 0.0361328125, + "num_input_tokens_seen": 403181092, + "step": 4417 + }, + { + "epoch": 18.408333333333335, + "grad_norm": 3.3260340702202895, + "learning_rate": 5e-05, + "loss": 0.059, + "num_input_tokens_seen": 403270924, + "step": 4418 + }, + { + "epoch": 18.408333333333335, + "loss": 0.04696984589099884, + "loss_ce": 0.0005526099121198058, + "loss_iou": 0.255859375, + "loss_num": 0.00927734375, + "loss_xval": 0.04638671875, + "num_input_tokens_seen": 403270924, + "step": 4418 + }, + { + "epoch": 18.4125, + "grad_norm": 2.8574730100360175, + "learning_rate": 5e-05, + "loss": 0.0495, + "num_input_tokens_seen": 403362356, + "step": 4419 + }, + { + "epoch": 18.4125, + "loss": 0.05734871327877045, + "loss_ce": 6.184901849337621e-06, + "loss_iou": 0.408203125, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 403362356, + "step": 4419 + }, + { + "epoch": 18.416666666666668, + "grad_norm": 2.5362755439790603, + "learning_rate": 5e-05, + "loss": 0.0554, + "num_input_tokens_seen": 403453812, + "step": 4420 + }, + { + "epoch": 18.416666666666668, + "loss": 0.0601063147187233, + "loss_ce": 1.948015324160224e-06, + "loss_iou": 0.1728515625, + "loss_num": 0.01202392578125, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 403453812, + "step": 4420 + }, + { + "epoch": 18.420833333333334, + "grad_norm": 2.1787337321820788, + "learning_rate": 5e-05, + "loss": 0.0442, + "num_input_tokens_seen": 403543832, + "step": 4421 + }, + { + "epoch": 18.420833333333334, + "loss": 0.04106995835900307, + "loss_ce": 0.0009317125659435987, + "loss_iou": 0.271484375, + "loss_num": 0.008056640625, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 403543832, + "step": 4421 + }, + { + "epoch": 18.425, + "grad_norm": 5.489285242620302, + "learning_rate": 5e-05, + "loss": 0.0389, + "num_input_tokens_seen": 403634960, + "step": 4422 + }, + { + "epoch": 18.425, + "loss": 0.041658949106931686, + "loss_ce": 2.4570922505517956e-06, + "loss_iou": 0.228515625, + "loss_num": 0.00830078125, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 403634960, + "step": 4422 + }, + { + "epoch": 18.429166666666667, + "grad_norm": 1.7580315074816295, + "learning_rate": 5e-05, + "loss": 0.0572, + "num_input_tokens_seen": 403725460, + "step": 4423 + }, + { + "epoch": 18.429166666666667, + "loss": 0.058549076318740845, + "loss_ce": 2.3993075956241228e-05, + "loss_iou": 0.24609375, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 403725460, + "step": 4423 + }, + { + "epoch": 18.433333333333334, + "grad_norm": 4.338079935951398, + "learning_rate": 5e-05, + "loss": 0.0408, + "num_input_tokens_seen": 403816960, + "step": 4424 + }, + { + "epoch": 18.433333333333334, + "loss": 0.03503740578889847, + "loss_ce": 0.0014222942991182208, + "loss_iou": 0.0966796875, + "loss_num": 0.0067138671875, + "loss_xval": 0.03369140625, + "num_input_tokens_seen": 403816960, + "step": 4424 + }, + { + "epoch": 18.4375, + "grad_norm": 4.382683754585659, + "learning_rate": 5e-05, + "loss": 0.0753, + "num_input_tokens_seen": 403908220, + "step": 4425 + }, + { + "epoch": 18.4375, + "loss": 0.08374170958995819, + "loss_ce": 1.4761058082513046e-06, + "loss_iou": 0.265625, + "loss_num": 0.0167236328125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 403908220, + "step": 4425 + }, + { + "epoch": 18.441666666666666, + "grad_norm": 1.652297345716666, + "learning_rate": 5e-05, + "loss": 0.0308, + "num_input_tokens_seen": 403999452, + "step": 4426 + }, + { + "epoch": 18.441666666666666, + "loss": 0.03246862068772316, + "loss_ce": 7.420943438773975e-05, + "loss_iou": 0.224609375, + "loss_num": 0.0064697265625, + "loss_xval": 0.032470703125, + "num_input_tokens_seen": 403999452, + "step": 4426 + }, + { + "epoch": 18.445833333333333, + "grad_norm": 2.0580041388298214, + "learning_rate": 5e-05, + "loss": 0.0498, + "num_input_tokens_seen": 404091052, + "step": 4427 + }, + { + "epoch": 18.445833333333333, + "loss": 0.04866918921470642, + "loss_ce": 1.2797968338418286e-06, + "loss_iou": 0.169921875, + "loss_num": 0.009765625, + "loss_xval": 0.048583984375, + "num_input_tokens_seen": 404091052, + "step": 4427 + }, + { + "epoch": 18.45, + "grad_norm": 5.904451027653659, + "learning_rate": 5e-05, + "loss": 0.0429, + "num_input_tokens_seen": 404181768, + "step": 4428 + }, + { + "epoch": 18.45, + "loss": 0.03854472562670708, + "loss_ce": 1.0242162034046487e-06, + "loss_iou": 0.3125, + "loss_num": 0.0076904296875, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 404181768, + "step": 4428 + }, + { + "epoch": 18.454166666666666, + "grad_norm": 2.055584232293814, + "learning_rate": 5e-05, + "loss": 0.0328, + "num_input_tokens_seen": 404273320, + "step": 4429 + }, + { + "epoch": 18.454166666666666, + "loss": 0.040410809218883514, + "loss_ce": 5.537119704968063e-06, + "loss_iou": 0.1611328125, + "loss_num": 0.008056640625, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 404273320, + "step": 4429 + }, + { + "epoch": 18.458333333333332, + "grad_norm": 2.711378918960716, + "learning_rate": 5e-05, + "loss": 0.0464, + "num_input_tokens_seen": 404364380, + "step": 4430 + }, + { + "epoch": 18.458333333333332, + "loss": 0.04379798844456673, + "loss_ce": 0.0011038967641070485, + "loss_iou": 0.2412109375, + "loss_num": 0.008544921875, + "loss_xval": 0.042724609375, + "num_input_tokens_seen": 404364380, + "step": 4430 + }, + { + "epoch": 18.4625, + "grad_norm": 2.3864020110086632, + "learning_rate": 5e-05, + "loss": 0.0461, + "num_input_tokens_seen": 404455832, + "step": 4431 + }, + { + "epoch": 18.4625, + "loss": 0.03300423175096512, + "loss_ce": 6.0506183217512444e-05, + "loss_iou": 0.25, + "loss_num": 0.006591796875, + "loss_xval": 0.032958984375, + "num_input_tokens_seen": 404455832, + "step": 4431 + }, + { + "epoch": 18.466666666666665, + "grad_norm": 3.756837527836502, + "learning_rate": 5e-05, + "loss": 0.0442, + "num_input_tokens_seen": 404546108, + "step": 4432 + }, + { + "epoch": 18.466666666666665, + "loss": 0.055632077157497406, + "loss_ce": 6.161809324112255e-06, + "loss_iou": 0.189453125, + "loss_num": 0.0111083984375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 404546108, + "step": 4432 + }, + { + "epoch": 18.470833333333335, + "grad_norm": 2.249420188952619, + "learning_rate": 5e-05, + "loss": 0.0574, + "num_input_tokens_seen": 404636968, + "step": 4433 + }, + { + "epoch": 18.470833333333335, + "loss": 0.06816327571868896, + "loss_ce": 2.2612073280470213e-06, + "loss_iou": 0.146484375, + "loss_num": 0.01361083984375, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 404636968, + "step": 4433 + }, + { + "epoch": 18.475, + "grad_norm": 1.7131657942600311, + "learning_rate": 5e-05, + "loss": 0.0565, + "num_input_tokens_seen": 404728108, + "step": 4434 + }, + { + "epoch": 18.475, + "loss": 0.07907183468341827, + "loss_ce": 7.842800187063403e-07, + "loss_iou": 0.1708984375, + "loss_num": 0.0157470703125, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 404728108, + "step": 4434 + }, + { + "epoch": 18.479166666666668, + "grad_norm": 1.8772650748721937, + "learning_rate": 5e-05, + "loss": 0.041, + "num_input_tokens_seen": 404819668, + "step": 4435 + }, + { + "epoch": 18.479166666666668, + "loss": 0.04216513782739639, + "loss_ce": 2.0358822439447977e-05, + "loss_iou": 0.189453125, + "loss_num": 0.0084228515625, + "loss_xval": 0.042236328125, + "num_input_tokens_seen": 404819668, + "step": 4435 + }, + { + "epoch": 18.483333333333334, + "grad_norm": 3.3095559754291344, + "learning_rate": 5e-05, + "loss": 0.0934, + "num_input_tokens_seen": 404910868, + "step": 4436 + }, + { + "epoch": 18.483333333333334, + "loss": 0.12461771070957184, + "loss_ce": 1.443784549337579e-05, + "loss_iou": 0.2255859375, + "loss_num": 0.02490234375, + "loss_xval": 0.12451171875, + "num_input_tokens_seen": 404910868, + "step": 4436 + }, + { + "epoch": 18.4875, + "grad_norm": 2.51422835105707, + "learning_rate": 5e-05, + "loss": 0.0423, + "num_input_tokens_seen": 405001984, + "step": 4437 + }, + { + "epoch": 18.4875, + "loss": 0.05847553163766861, + "loss_ce": 0.0019722371362149715, + "loss_iou": 0.16796875, + "loss_num": 0.01129150390625, + "loss_xval": 0.056396484375, + "num_input_tokens_seen": 405001984, + "step": 4437 + }, + { + "epoch": 18.491666666666667, + "grad_norm": 2.3362978882356513, + "learning_rate": 5e-05, + "loss": 0.055, + "num_input_tokens_seen": 405092804, + "step": 4438 + }, + { + "epoch": 18.491666666666667, + "loss": 0.06804253160953522, + "loss_ce": 3.593964265746763e-06, + "loss_iou": 0.287109375, + "loss_num": 0.01361083984375, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 405092804, + "step": 4438 + }, + { + "epoch": 18.495833333333334, + "grad_norm": 2.837301728982645, + "learning_rate": 5e-05, + "loss": 0.0396, + "num_input_tokens_seen": 405184724, + "step": 4439 + }, + { + "epoch": 18.495833333333334, + "loss": 0.03965744748711586, + "loss_ce": 0.0007780530722811818, + "loss_iou": 0.34765625, + "loss_num": 0.007781982421875, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 405184724, + "step": 4439 + }, + { + "epoch": 18.5, + "grad_norm": 2.1634899230189095, + "learning_rate": 5e-05, + "loss": 0.108, + "num_input_tokens_seen": 405276324, + "step": 4440 + }, + { + "epoch": 18.5, + "loss": 0.14463144540786743, + "loss_ce": 0.00022226519649848342, + "loss_iou": 0.22265625, + "loss_num": 0.02880859375, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 405276324, + "step": 4440 + }, + { + "epoch": 18.504166666666666, + "grad_norm": 1.7895128782846779, + "learning_rate": 5e-05, + "loss": 0.0333, + "num_input_tokens_seen": 405368004, + "step": 4441 + }, + { + "epoch": 18.504166666666666, + "loss": 0.04148627072572708, + "loss_ce": 5.254165898804786e-06, + "loss_iou": 0.1494140625, + "loss_num": 0.00830078125, + "loss_xval": 0.04150390625, + "num_input_tokens_seen": 405368004, + "step": 4441 + }, + { + "epoch": 18.508333333333333, + "grad_norm": 2.8820974379135524, + "learning_rate": 5e-05, + "loss": 0.0459, + "num_input_tokens_seen": 405459140, + "step": 4442 + }, + { + "epoch": 18.508333333333333, + "loss": 0.04661928489804268, + "loss_ce": 3.685253886942519e-06, + "loss_iou": 0.267578125, + "loss_num": 0.00933837890625, + "loss_xval": 0.046630859375, + "num_input_tokens_seen": 405459140, + "step": 4442 + }, + { + "epoch": 18.5125, + "grad_norm": 2.7480017872293847, + "learning_rate": 5e-05, + "loss": 0.0627, + "num_input_tokens_seen": 405550360, + "step": 4443 + }, + { + "epoch": 18.5125, + "loss": 0.08555868268013, + "loss_ce": 2.650637043188908e-06, + "loss_iou": 0.2060546875, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 405550360, + "step": 4443 + }, + { + "epoch": 18.516666666666666, + "grad_norm": 1.9883482848739207, + "learning_rate": 5e-05, + "loss": 0.0571, + "num_input_tokens_seen": 405641536, + "step": 4444 + }, + { + "epoch": 18.516666666666666, + "loss": 0.042246539145708084, + "loss_ce": 2.583612285889103e-06, + "loss_iou": 0.1982421875, + "loss_num": 0.0084228515625, + "loss_xval": 0.042236328125, + "num_input_tokens_seen": 405641536, + "step": 4444 + }, + { + "epoch": 18.520833333333332, + "grad_norm": 2.1394162140134063, + "learning_rate": 5e-05, + "loss": 0.0624, + "num_input_tokens_seen": 405732288, + "step": 4445 + }, + { + "epoch": 18.520833333333332, + "loss": 0.07797446101903915, + "loss_ce": 2.0443444554985035e-06, + "loss_iou": 0.1708984375, + "loss_num": 0.01556396484375, + "loss_xval": 0.078125, + "num_input_tokens_seen": 405732288, + "step": 4445 + }, + { + "epoch": 18.525, + "grad_norm": 2.8044908787997693, + "learning_rate": 5e-05, + "loss": 0.0308, + "num_input_tokens_seen": 405822968, + "step": 4446 + }, + { + "epoch": 18.525, + "loss": 0.04367266595363617, + "loss_ce": 2.011754531849874e-06, + "loss_iou": 0.32421875, + "loss_num": 0.00872802734375, + "loss_xval": 0.043701171875, + "num_input_tokens_seen": 405822968, + "step": 4446 + }, + { + "epoch": 18.529166666666665, + "grad_norm": 3.0426163524559593, + "learning_rate": 5e-05, + "loss": 0.0476, + "num_input_tokens_seen": 405913680, + "step": 4447 + }, + { + "epoch": 18.529166666666665, + "loss": 0.04268840700387955, + "loss_ce": 4.0092592826113105e-05, + "loss_iou": 0.236328125, + "loss_num": 0.008544921875, + "loss_xval": 0.042724609375, + "num_input_tokens_seen": 405913680, + "step": 4447 + }, + { + "epoch": 18.533333333333335, + "grad_norm": 3.4613499678428172, + "learning_rate": 5e-05, + "loss": 0.0558, + "num_input_tokens_seen": 406005064, + "step": 4448 + }, + { + "epoch": 18.533333333333335, + "loss": 0.05986738204956055, + "loss_ce": 2.2407934011425823e-05, + "loss_iou": 0.27734375, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 406005064, + "step": 4448 + }, + { + "epoch": 18.5375, + "grad_norm": 2.700220807710911, + "learning_rate": 5e-05, + "loss": 0.0452, + "num_input_tokens_seen": 406096892, + "step": 4449 + }, + { + "epoch": 18.5375, + "loss": 0.05822140723466873, + "loss_ce": 3.9642905903747305e-05, + "loss_iou": 0.3203125, + "loss_num": 0.01165771484375, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 406096892, + "step": 4449 + }, + { + "epoch": 18.541666666666668, + "grad_norm": 4.063404233508145, + "learning_rate": 5e-05, + "loss": 0.0839, + "num_input_tokens_seen": 406188952, + "step": 4450 + }, + { + "epoch": 18.541666666666668, + "loss": 0.10297872126102448, + "loss_ce": 5.8184596127830446e-05, + "loss_iou": 0.23046875, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 406188952, + "step": 4450 + }, + { + "epoch": 18.545833333333334, + "grad_norm": 1.4196393264059073, + "learning_rate": 5e-05, + "loss": 0.0273, + "num_input_tokens_seen": 406279820, + "step": 4451 + }, + { + "epoch": 18.545833333333334, + "loss": 0.023423004895448685, + "loss_ce": 7.639623049726652e-07, + "loss_iou": 0.2734375, + "loss_num": 0.00469970703125, + "loss_xval": 0.0234375, + "num_input_tokens_seen": 406279820, + "step": 4451 + }, + { + "epoch": 18.55, + "grad_norm": 2.155221835305125, + "learning_rate": 5e-05, + "loss": 0.0326, + "num_input_tokens_seen": 406371184, + "step": 4452 + }, + { + "epoch": 18.55, + "loss": 0.027498047798871994, + "loss_ce": 1.710833430479397e-06, + "loss_iou": 0.09423828125, + "loss_num": 0.0054931640625, + "loss_xval": 0.0274658203125, + "num_input_tokens_seen": 406371184, + "step": 4452 + }, + { + "epoch": 18.554166666666667, + "grad_norm": 2.208679417966855, + "learning_rate": 5e-05, + "loss": 0.0345, + "num_input_tokens_seen": 406462272, + "step": 4453 + }, + { + "epoch": 18.554166666666667, + "loss": 0.03785201162099838, + "loss_ce": 2.587808467069408e-06, + "loss_iou": 0.2734375, + "loss_num": 0.007568359375, + "loss_xval": 0.037841796875, + "num_input_tokens_seen": 406462272, + "step": 4453 + }, + { + "epoch": 18.558333333333334, + "grad_norm": 2.517048340877465, + "learning_rate": 5e-05, + "loss": 0.0417, + "num_input_tokens_seen": 406553752, + "step": 4454 + }, + { + "epoch": 18.558333333333334, + "loss": 0.03823276609182358, + "loss_ce": 1.8682237623579567e-06, + "loss_iou": 0.267578125, + "loss_num": 0.00762939453125, + "loss_xval": 0.038330078125, + "num_input_tokens_seen": 406553752, + "step": 4454 + }, + { + "epoch": 18.5625, + "grad_norm": 2.1451262472298533, + "learning_rate": 5e-05, + "loss": 0.0716, + "num_input_tokens_seen": 406644692, + "step": 4455 + }, + { + "epoch": 18.5625, + "loss": 0.044374290853738785, + "loss_ce": 1.730572193991975e-06, + "loss_iou": 0.275390625, + "loss_num": 0.0089111328125, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 406644692, + "step": 4455 + }, + { + "epoch": 18.566666666666666, + "grad_norm": 2.750847890813243, + "learning_rate": 5e-05, + "loss": 0.0482, + "num_input_tokens_seen": 406735980, + "step": 4456 + }, + { + "epoch": 18.566666666666666, + "loss": 0.03298502415418625, + "loss_ce": 0.0001023312215693295, + "loss_iou": 0.400390625, + "loss_num": 0.006561279296875, + "loss_xval": 0.032958984375, + "num_input_tokens_seen": 406735980, + "step": 4456 + }, + { + "epoch": 18.570833333333333, + "grad_norm": 3.1762164731004154, + "learning_rate": 5e-05, + "loss": 0.0535, + "num_input_tokens_seen": 406827296, + "step": 4457 + }, + { + "epoch": 18.570833333333333, + "loss": 0.047379009425640106, + "loss_ce": 4.7071023345779395e-07, + "loss_iou": 0.22265625, + "loss_num": 0.00946044921875, + "loss_xval": 0.04736328125, + "num_input_tokens_seen": 406827296, + "step": 4457 + }, + { + "epoch": 18.575, + "grad_norm": 3.022836034055383, + "learning_rate": 5e-05, + "loss": 0.0402, + "num_input_tokens_seen": 406918792, + "step": 4458 + }, + { + "epoch": 18.575, + "loss": 0.04400699585676193, + "loss_ce": 1.590611100255046e-05, + "loss_iou": 0.291015625, + "loss_num": 0.0087890625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 406918792, + "step": 4458 + }, + { + "epoch": 18.579166666666666, + "grad_norm": 3.344276702917487, + "learning_rate": 5e-05, + "loss": 0.0477, + "num_input_tokens_seen": 407009924, + "step": 4459 + }, + { + "epoch": 18.579166666666666, + "loss": 0.032357875257730484, + "loss_ce": 1.6871223124326207e-05, + "loss_iou": 0.26953125, + "loss_num": 0.0064697265625, + "loss_xval": 0.0322265625, + "num_input_tokens_seen": 407009924, + "step": 4459 + }, + { + "epoch": 18.583333333333332, + "grad_norm": 8.865744014817219, + "learning_rate": 5e-05, + "loss": 0.0538, + "num_input_tokens_seen": 407101744, + "step": 4460 + }, + { + "epoch": 18.583333333333332, + "loss": 0.05924474075436592, + "loss_ce": 7.115265907486901e-05, + "loss_iou": 0.326171875, + "loss_num": 0.0118408203125, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 407101744, + "step": 4460 + }, + { + "epoch": 18.5875, + "grad_norm": 3.0703449184646323, + "learning_rate": 5e-05, + "loss": 0.0609, + "num_input_tokens_seen": 407193012, + "step": 4461 + }, + { + "epoch": 18.5875, + "loss": 0.030109165236353874, + "loss_ce": 3.576171820895979e-06, + "loss_iou": 0.212890625, + "loss_num": 0.006011962890625, + "loss_xval": 0.0301513671875, + "num_input_tokens_seen": 407193012, + "step": 4461 + }, + { + "epoch": 18.591666666666665, + "grad_norm": 2.323271357780661, + "learning_rate": 5e-05, + "loss": 0.0788, + "num_input_tokens_seen": 407284424, + "step": 4462 + }, + { + "epoch": 18.591666666666665, + "loss": 0.04477598890662193, + "loss_ce": 0.00034239343949593604, + "loss_iou": 0.142578125, + "loss_num": 0.0089111328125, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 407284424, + "step": 4462 + }, + { + "epoch": 18.595833333333335, + "grad_norm": 1.7998223791646442, + "learning_rate": 5e-05, + "loss": 0.0306, + "num_input_tokens_seen": 407376020, + "step": 4463 + }, + { + "epoch": 18.595833333333335, + "loss": 0.020944489166140556, + "loss_ce": 9.43049235502258e-06, + "loss_iou": 0.25390625, + "loss_num": 0.004180908203125, + "loss_xval": 0.02099609375, + "num_input_tokens_seen": 407376020, + "step": 4463 + }, + { + "epoch": 18.6, + "grad_norm": 1.7772887415471503, + "learning_rate": 5e-05, + "loss": 0.0409, + "num_input_tokens_seen": 407467812, + "step": 4464 + }, + { + "epoch": 18.6, + "loss": 0.05665751174092293, + "loss_ce": 1.6278679595416179e-06, + "loss_iou": 0.32421875, + "loss_num": 0.0113525390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 407467812, + "step": 4464 + }, + { + "epoch": 18.604166666666668, + "grad_norm": 1.2942589953418278, + "learning_rate": 5e-05, + "loss": 0.0263, + "num_input_tokens_seen": 407559336, + "step": 4465 + }, + { + "epoch": 18.604166666666668, + "loss": 0.018004463985562325, + "loss_ce": 0.0007467729155905545, + "loss_iou": 0.2109375, + "loss_num": 0.003448486328125, + "loss_xval": 0.0172119140625, + "num_input_tokens_seen": 407559336, + "step": 4465 + }, + { + "epoch": 18.608333333333334, + "grad_norm": 2.436094168196786, + "learning_rate": 5e-05, + "loss": 0.1048, + "num_input_tokens_seen": 407650544, + "step": 4466 + }, + { + "epoch": 18.608333333333334, + "loss": 0.12345411628484726, + "loss_ce": 1.0510191714274697e-05, + "loss_iou": 0.279296875, + "loss_num": 0.024658203125, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 407650544, + "step": 4466 + }, + { + "epoch": 18.6125, + "grad_norm": 2.6223977454659404, + "learning_rate": 5e-05, + "loss": 0.0474, + "num_input_tokens_seen": 407742424, + "step": 4467 + }, + { + "epoch": 18.6125, + "loss": 0.05559170991182327, + "loss_ce": 3.939579983125441e-06, + "loss_iou": 0.1806640625, + "loss_num": 0.0111083984375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 407742424, + "step": 4467 + }, + { + "epoch": 18.616666666666667, + "grad_norm": 1.3664409769636283, + "learning_rate": 5e-05, + "loss": 0.0314, + "num_input_tokens_seen": 407834412, + "step": 4468 + }, + { + "epoch": 18.616666666666667, + "loss": 0.031441450119018555, + "loss_ce": 0.0006034359685145319, + "loss_iou": 0.2421875, + "loss_num": 0.00616455078125, + "loss_xval": 0.0308837890625, + "num_input_tokens_seen": 407834412, + "step": 4468 + }, + { + "epoch": 18.620833333333334, + "grad_norm": 2.023822163166781, + "learning_rate": 5e-05, + "loss": 0.0414, + "num_input_tokens_seen": 407924272, + "step": 4469 + }, + { + "epoch": 18.620833333333334, + "loss": 0.030810587108135223, + "loss_ce": 3.360964183229953e-05, + "loss_iou": 0.2158203125, + "loss_num": 0.00616455078125, + "loss_xval": 0.03076171875, + "num_input_tokens_seen": 407924272, + "step": 4469 + }, + { + "epoch": 18.625, + "grad_norm": 2.7439402399615047, + "learning_rate": 5e-05, + "loss": 0.0405, + "num_input_tokens_seen": 408015508, + "step": 4470 + }, + { + "epoch": 18.625, + "loss": 0.03986073285341263, + "loss_ce": 4.775926299771527e-06, + "loss_iou": 0.1806640625, + "loss_num": 0.00799560546875, + "loss_xval": 0.039794921875, + "num_input_tokens_seen": 408015508, + "step": 4470 + }, + { + "epoch": 18.629166666666666, + "grad_norm": 2.9815488516198254, + "learning_rate": 5e-05, + "loss": 0.0717, + "num_input_tokens_seen": 408105620, + "step": 4471 + }, + { + "epoch": 18.629166666666666, + "loss": 0.09448256343603134, + "loss_ce": 4.59203110949602e-05, + "loss_iou": 0.396484375, + "loss_num": 0.0189208984375, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 408105620, + "step": 4471 + }, + { + "epoch": 18.633333333333333, + "grad_norm": 2.66745813251496, + "learning_rate": 5e-05, + "loss": 0.0496, + "num_input_tokens_seen": 408196668, + "step": 4472 + }, + { + "epoch": 18.633333333333333, + "loss": 0.038133613765239716, + "loss_ce": 1.9017574004465132e-06, + "loss_iou": 0.224609375, + "loss_num": 0.00762939453125, + "loss_xval": 0.0380859375, + "num_input_tokens_seen": 408196668, + "step": 4472 + }, + { + "epoch": 18.6375, + "grad_norm": 2.131843660187116, + "learning_rate": 5e-05, + "loss": 0.0343, + "num_input_tokens_seen": 408287424, + "step": 4473 + }, + { + "epoch": 18.6375, + "loss": 0.0305347740650177, + "loss_ce": 1.719842475722544e-05, + "loss_iou": 0.271484375, + "loss_num": 0.006103515625, + "loss_xval": 0.030517578125, + "num_input_tokens_seen": 408287424, + "step": 4473 + }, + { + "epoch": 18.641666666666666, + "grad_norm": 6.60588283629867, + "learning_rate": 5e-05, + "loss": 0.0405, + "num_input_tokens_seen": 408378844, + "step": 4474 + }, + { + "epoch": 18.641666666666666, + "loss": 0.04872913286089897, + "loss_ce": 1.544825681776274e-05, + "loss_iou": 0.15625, + "loss_num": 0.009765625, + "loss_xval": 0.048828125, + "num_input_tokens_seen": 408378844, + "step": 4474 + }, + { + "epoch": 18.645833333333332, + "grad_norm": 3.1277533404717297, + "learning_rate": 5e-05, + "loss": 0.033, + "num_input_tokens_seen": 408470320, + "step": 4475 + }, + { + "epoch": 18.645833333333332, + "loss": 0.028999999165534973, + "loss_ce": 6.703672283947526e-07, + "loss_iou": 0.318359375, + "loss_num": 0.00579833984375, + "loss_xval": 0.029052734375, + "num_input_tokens_seen": 408470320, + "step": 4475 + }, + { + "epoch": 18.65, + "grad_norm": 2.1571541506630747, + "learning_rate": 5e-05, + "loss": 0.0466, + "num_input_tokens_seen": 408562520, + "step": 4476 + }, + { + "epoch": 18.65, + "loss": 0.05866888538002968, + "loss_ce": 1.4098356587055605e-05, + "loss_iou": 0.39453125, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 408562520, + "step": 4476 + }, + { + "epoch": 18.654166666666665, + "grad_norm": 2.1129025810806996, + "learning_rate": 5e-05, + "loss": 0.0519, + "num_input_tokens_seen": 408654496, + "step": 4477 + }, + { + "epoch": 18.654166666666665, + "loss": 0.028653541579842567, + "loss_ce": 7.383100455626845e-05, + "loss_iou": 0.318359375, + "loss_num": 0.005706787109375, + "loss_xval": 0.028564453125, + "num_input_tokens_seen": 408654496, + "step": 4477 + }, + { + "epoch": 18.658333333333335, + "grad_norm": 4.128846484593055, + "learning_rate": 5e-05, + "loss": 0.0923, + "num_input_tokens_seen": 408745820, + "step": 4478 + }, + { + "epoch": 18.658333333333335, + "loss": 0.054933082312345505, + "loss_ce": 9.29926973185502e-05, + "loss_iou": 0.3515625, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 408745820, + "step": 4478 + }, + { + "epoch": 18.6625, + "grad_norm": 4.826721212274139, + "learning_rate": 5e-05, + "loss": 0.0769, + "num_input_tokens_seen": 408836832, + "step": 4479 + }, + { + "epoch": 18.6625, + "loss": 0.07643422484397888, + "loss_ce": 2.9575387543445686e-06, + "loss_iou": 0.287109375, + "loss_num": 0.0152587890625, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 408836832, + "step": 4479 + }, + { + "epoch": 18.666666666666668, + "grad_norm": 2.6585938697310487, + "learning_rate": 5e-05, + "loss": 0.0346, + "num_input_tokens_seen": 408927732, + "step": 4480 + }, + { + "epoch": 18.666666666666668, + "loss": 0.027991794049739838, + "loss_ce": 0.0001292466913582757, + "loss_iou": 0.21875, + "loss_num": 0.005584716796875, + "loss_xval": 0.02783203125, + "num_input_tokens_seen": 408927732, + "step": 4480 + }, + { + "epoch": 18.670833333333334, + "grad_norm": 2.5149413948569173, + "learning_rate": 5e-05, + "loss": 0.0564, + "num_input_tokens_seen": 409018648, + "step": 4481 + }, + { + "epoch": 18.670833333333334, + "loss": 0.04316835105419159, + "loss_ce": 1.6492605936946347e-05, + "loss_iou": 0.173828125, + "loss_num": 0.00860595703125, + "loss_xval": 0.043212890625, + "num_input_tokens_seen": 409018648, + "step": 4481 + }, + { + "epoch": 18.675, + "grad_norm": 2.164456274812016, + "learning_rate": 5e-05, + "loss": 0.0366, + "num_input_tokens_seen": 409108832, + "step": 4482 + }, + { + "epoch": 18.675, + "loss": 0.04486284404993057, + "loss_ce": 2.0009613308502594e-06, + "loss_iou": 0.2578125, + "loss_num": 0.00897216796875, + "loss_xval": 0.044921875, + "num_input_tokens_seen": 409108832, + "step": 4482 + }, + { + "epoch": 18.679166666666667, + "grad_norm": 2.2556710075056596, + "learning_rate": 5e-05, + "loss": 0.0404, + "num_input_tokens_seen": 409200360, + "step": 4483 + }, + { + "epoch": 18.679166666666667, + "loss": 0.04485397785902023, + "loss_ce": 7.65622019116563e-07, + "loss_iou": 0.1875, + "loss_num": 0.00897216796875, + "loss_xval": 0.044921875, + "num_input_tokens_seen": 409200360, + "step": 4483 + }, + { + "epoch": 18.683333333333334, + "grad_norm": 5.93230040799808, + "learning_rate": 5e-05, + "loss": 0.077, + "num_input_tokens_seen": 409291544, + "step": 4484 + }, + { + "epoch": 18.683333333333334, + "loss": 0.06880977749824524, + "loss_ce": 7.897714567661751e-06, + "loss_iou": 0.28125, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 409291544, + "step": 4484 + }, + { + "epoch": 18.6875, + "grad_norm": 3.767648367271881, + "learning_rate": 5e-05, + "loss": 0.0394, + "num_input_tokens_seen": 409382640, + "step": 4485 + }, + { + "epoch": 18.6875, + "loss": 0.05254533141851425, + "loss_ce": 1.6948622942436486e-05, + "loss_iou": 0.291015625, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 409382640, + "step": 4485 + }, + { + "epoch": 18.691666666666666, + "grad_norm": 3.1886877603013253, + "learning_rate": 5e-05, + "loss": 0.0532, + "num_input_tokens_seen": 409473580, + "step": 4486 + }, + { + "epoch": 18.691666666666666, + "loss": 0.048356104642152786, + "loss_ce": 1.0024384664575337e-06, + "loss_iou": 0.322265625, + "loss_num": 0.00970458984375, + "loss_xval": 0.04833984375, + "num_input_tokens_seen": 409473580, + "step": 4486 + }, + { + "epoch": 18.695833333333333, + "grad_norm": 2.1785204289187017, + "learning_rate": 5e-05, + "loss": 0.0418, + "num_input_tokens_seen": 409565448, + "step": 4487 + }, + { + "epoch": 18.695833333333333, + "loss": 0.0359908752143383, + "loss_ce": 3.022179271283676e-06, + "loss_iou": 0.28125, + "loss_num": 0.0072021484375, + "loss_xval": 0.035888671875, + "num_input_tokens_seen": 409565448, + "step": 4487 + }, + { + "epoch": 18.7, + "grad_norm": 2.1407511218268613, + "learning_rate": 5e-05, + "loss": 0.0477, + "num_input_tokens_seen": 409656152, + "step": 4488 + }, + { + "epoch": 18.7, + "loss": 0.054323356598615646, + "loss_ce": 2.0684299215645296e-06, + "loss_iou": 0.30078125, + "loss_num": 0.0108642578125, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 409656152, + "step": 4488 + }, + { + "epoch": 18.704166666666666, + "grad_norm": 3.111631253031894, + "learning_rate": 5e-05, + "loss": 0.0715, + "num_input_tokens_seen": 409747656, + "step": 4489 + }, + { + "epoch": 18.704166666666666, + "loss": 0.07458843290805817, + "loss_ce": 3.4666340980038512e-06, + "loss_iou": 0.1640625, + "loss_num": 0.014892578125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 409747656, + "step": 4489 + }, + { + "epoch": 18.708333333333332, + "grad_norm": 1.5551129829754005, + "learning_rate": 5e-05, + "loss": 0.0756, + "num_input_tokens_seen": 409838856, + "step": 4490 + }, + { + "epoch": 18.708333333333332, + "loss": 0.09391278028488159, + "loss_ce": 1.0192829904553946e-05, + "loss_iou": 0.302734375, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 409838856, + "step": 4490 + }, + { + "epoch": 18.7125, + "grad_norm": 2.520127418122806, + "learning_rate": 5e-05, + "loss": 0.039, + "num_input_tokens_seen": 409930208, + "step": 4491 + }, + { + "epoch": 18.7125, + "loss": 0.0317520946264267, + "loss_ce": 6.1826676756027155e-06, + "loss_iou": 0.32421875, + "loss_num": 0.00634765625, + "loss_xval": 0.03173828125, + "num_input_tokens_seen": 409930208, + "step": 4491 + }, + { + "epoch": 18.716666666666665, + "grad_norm": 1.556512662051991, + "learning_rate": 5e-05, + "loss": 0.0364, + "num_input_tokens_seen": 410021368, + "step": 4492 + }, + { + "epoch": 18.716666666666665, + "loss": 0.025783251971006393, + "loss_ce": 3.5267755720269633e-06, + "loss_iou": 0.1953125, + "loss_num": 0.005157470703125, + "loss_xval": 0.0257568359375, + "num_input_tokens_seen": 410021368, + "step": 4492 + }, + { + "epoch": 18.720833333333335, + "grad_norm": 8.884609105765334, + "learning_rate": 5e-05, + "loss": 0.0424, + "num_input_tokens_seen": 410113392, + "step": 4493 + }, + { + "epoch": 18.720833333333335, + "loss": 0.053828682750463486, + "loss_ce": 7.196986553026363e-05, + "loss_iou": 0.279296875, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 410113392, + "step": 4493 + }, + { + "epoch": 18.725, + "grad_norm": 2.750731941866525, + "learning_rate": 5e-05, + "loss": 0.0353, + "num_input_tokens_seen": 410205120, + "step": 4494 + }, + { + "epoch": 18.725, + "loss": 0.03087478131055832, + "loss_ce": 1.0064355592476204e-05, + "loss_iou": 0.3125, + "loss_num": 0.00616455078125, + "loss_xval": 0.0308837890625, + "num_input_tokens_seen": 410205120, + "step": 4494 + }, + { + "epoch": 18.729166666666668, + "grad_norm": 2.6665230765646117, + "learning_rate": 5e-05, + "loss": 0.049, + "num_input_tokens_seen": 410296348, + "step": 4495 + }, + { + "epoch": 18.729166666666668, + "loss": 0.03424867242574692, + "loss_ce": 7.951766747282818e-06, + "loss_iou": 0.234375, + "loss_num": 0.006866455078125, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 410296348, + "step": 4495 + }, + { + "epoch": 18.733333333333334, + "grad_norm": 4.025624968892128, + "learning_rate": 5e-05, + "loss": 0.0379, + "num_input_tokens_seen": 410386296, + "step": 4496 + }, + { + "epoch": 18.733333333333334, + "loss": 0.04179053008556366, + "loss_ce": 4.333990546001587e-06, + "loss_iou": 0.306640625, + "loss_num": 0.00836181640625, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 410386296, + "step": 4496 + }, + { + "epoch": 18.7375, + "grad_norm": 2.9287391966615077, + "learning_rate": 5e-05, + "loss": 0.0729, + "num_input_tokens_seen": 410477784, + "step": 4497 + }, + { + "epoch": 18.7375, + "loss": 0.07163071632385254, + "loss_ce": 5.957826033409219e-06, + "loss_iou": 0.1796875, + "loss_num": 0.0142822265625, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 410477784, + "step": 4497 + }, + { + "epoch": 18.741666666666667, + "grad_norm": 2.000887712646304, + "learning_rate": 5e-05, + "loss": 0.0394, + "num_input_tokens_seen": 410569244, + "step": 4498 + }, + { + "epoch": 18.741666666666667, + "loss": 0.0395614430308342, + "loss_ce": 7.16970898793079e-05, + "loss_iou": 0.212890625, + "loss_num": 0.00787353515625, + "loss_xval": 0.03955078125, + "num_input_tokens_seen": 410569244, + "step": 4498 + }, + { + "epoch": 18.745833333333334, + "grad_norm": 2.29575014252196, + "learning_rate": 5e-05, + "loss": 0.0362, + "num_input_tokens_seen": 410660492, + "step": 4499 + }, + { + "epoch": 18.745833333333334, + "loss": 0.04005458950996399, + "loss_ce": 2.6626599947121576e-07, + "loss_iou": 0.23828125, + "loss_num": 0.00799560546875, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 410660492, + "step": 4499 + }, + { + "epoch": 18.75, + "grad_norm": 2.5630762248721237, + "learning_rate": 5e-05, + "loss": 0.087, + "num_input_tokens_seen": 410751560, + "step": 4500 + }, + { + "epoch": 18.75, + "eval_seeclick_CIoU": 0.24041260033845901, + "eval_seeclick_GIoU": 0.229469396173954, + "eval_seeclick_IoU": 0.3364127427339554, + "eval_seeclick_MAE_all": 0.09035475924611092, + "eval_seeclick_MAE_h": 0.08218218386173248, + "eval_seeclick_MAE_w": 0.17696396261453629, + "eval_seeclick_MAE_x_boxes": 0.18398155272006989, + "eval_seeclick_MAE_y_boxes": 0.08752219006419182, + "eval_seeclick_NUM_probability": 0.9999991357326508, + "eval_seeclick_inside_bbox": 0.5255681872367859, + "eval_seeclick_loss": 0.5564561486244202, + "eval_seeclick_loss_ce": 0.15300309658050537, + "eval_seeclick_loss_iou": 0.45245361328125, + "eval_seeclick_loss_num": 0.0796356201171875, + "eval_seeclick_loss_xval": 0.39813232421875, + "eval_seeclick_runtime": 77.9915, + "eval_seeclick_samples_per_second": 0.551, + "eval_seeclick_steps_per_second": 0.026, + "num_input_tokens_seen": 410751560, + "step": 4500 + }, + { + "epoch": 18.75, + "eval_icons_CIoU": 0.31817570328712463, + "eval_icons_GIoU": 0.28907932341098785, + "eval_icons_IoU": 0.411752387881279, + "eval_icons_MAE_all": 0.07357519492506981, + "eval_icons_MAE_h": 0.1560768559575081, + "eval_icons_MAE_w": 0.1098833754658699, + "eval_icons_MAE_x_boxes": 0.10879017040133476, + "eval_icons_MAE_y_boxes": 0.1561029627919197, + "eval_icons_NUM_probability": 0.999999463558197, + "eval_icons_inside_bbox": 0.6336805522441864, + "eval_icons_loss": 0.3551454246044159, + "eval_icons_loss_ce": 0.000737828500859905, + "eval_icons_loss_iou": 0.25079345703125, + "eval_icons_loss_num": 0.0764312744140625, + "eval_icons_loss_xval": 0.382080078125, + "eval_icons_runtime": 90.0127, + "eval_icons_samples_per_second": 0.555, + "eval_icons_steps_per_second": 0.022, + "num_input_tokens_seen": 410751560, + "step": 4500 + }, + { + "epoch": 18.75, + "eval_screenspot_CIoU": 0.3529522220293681, + "eval_screenspot_GIoU": 0.34071413179238635, + "eval_screenspot_IoU": 0.43253129720687866, + "eval_screenspot_MAE_all": 0.10080522298812866, + "eval_screenspot_MAE_h": 0.08905058105786641, + "eval_screenspot_MAE_w": 0.22549272576967874, + "eval_screenspot_MAE_x_boxes": 0.18476370722055435, + "eval_screenspot_MAE_y_boxes": 0.08808410912752151, + "eval_screenspot_NUM_probability": 0.9999993046124777, + "eval_screenspot_inside_bbox": 0.6775000095367432, + "eval_screenspot_loss": 0.5045076012611389, + "eval_screenspot_loss_ce": 0.001676593108754787, + "eval_screenspot_loss_iou": 0.3478190104166667, + "eval_screenspot_loss_num": 0.10205586751302083, + "eval_screenspot_loss_xval": 0.5101521809895834, + "eval_screenspot_runtime": 163.3499, + "eval_screenspot_samples_per_second": 0.545, + "eval_screenspot_steps_per_second": 0.018, + "num_input_tokens_seen": 410751560, + "step": 4500 + }, + { + "epoch": 18.75, + "eval_compot_CIoU": 0.44827909767627716, + "eval_compot_GIoU": 0.4435681253671646, + "eval_compot_IoU": 0.5230425000190735, + "eval_compot_MAE_all": 0.06078624911606312, + "eval_compot_MAE_h": 0.06127396039664745, + "eval_compot_MAE_w": 0.16494429856538773, + "eval_compot_MAE_x_boxes": 0.16555871069431305, + "eval_compot_MAE_y_boxes": 0.057475872337818146, + "eval_compot_NUM_probability": 0.9999968111515045, + "eval_compot_inside_bbox": 0.6927083432674408, + "eval_compot_loss": 0.33864617347717285, + "eval_compot_loss_ce": 0.05631054379045963, + "eval_compot_loss_iou": 0.29205322265625, + "eval_compot_loss_num": 0.053653717041015625, + "eval_compot_loss_xval": 0.268218994140625, + "eval_compot_runtime": 92.952, + "eval_compot_samples_per_second": 0.538, + "eval_compot_steps_per_second": 0.022, + "num_input_tokens_seen": 410751560, + "step": 4500 + }, + { + "epoch": 18.75, + "loss": 0.35820692777633667, + "loss_ce": 0.05205457657575607, + "loss_iou": 0.267578125, + "loss_num": 0.061279296875, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 410751560, + "step": 4500 + }, + { + "epoch": 18.754166666666666, + "grad_norm": 4.165016190881154, + "learning_rate": 5e-05, + "loss": 0.0477, + "num_input_tokens_seen": 410842956, + "step": 4501 + }, + { + "epoch": 18.754166666666666, + "loss": 0.059623926877975464, + "loss_ce": 2.3097214580047876e-05, + "loss_iou": 0.267578125, + "loss_num": 0.01190185546875, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 410842956, + "step": 4501 + }, + { + "epoch": 18.758333333333333, + "grad_norm": 3.8968387340439476, + "learning_rate": 5e-05, + "loss": 0.0772, + "num_input_tokens_seen": 410934796, + "step": 4502 + }, + { + "epoch": 18.758333333333333, + "loss": 0.0845027044415474, + "loss_ce": 0.000892168958671391, + "loss_iou": 0.369140625, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 410934796, + "step": 4502 + }, + { + "epoch": 18.7625, + "grad_norm": 5.70250243158941, + "learning_rate": 5e-05, + "loss": 0.0394, + "num_input_tokens_seen": 411025664, + "step": 4503 + }, + { + "epoch": 18.7625, + "loss": 0.04231572151184082, + "loss_ce": 3.0992389383754926e-06, + "loss_iou": 0.28515625, + "loss_num": 0.00848388671875, + "loss_xval": 0.042236328125, + "num_input_tokens_seen": 411025664, + "step": 4503 + }, + { + "epoch": 18.766666666666666, + "grad_norm": 2.9812879644219796, + "learning_rate": 5e-05, + "loss": 0.0538, + "num_input_tokens_seen": 411116996, + "step": 4504 + }, + { + "epoch": 18.766666666666666, + "loss": 0.04243713617324829, + "loss_ce": 2.4412829588982277e-06, + "loss_iou": 0.279296875, + "loss_num": 0.00848388671875, + "loss_xval": 0.04248046875, + "num_input_tokens_seen": 411116996, + "step": 4504 + }, + { + "epoch": 18.770833333333332, + "grad_norm": 2.9826605958507417, + "learning_rate": 5e-05, + "loss": 0.0668, + "num_input_tokens_seen": 411208000, + "step": 4505 + }, + { + "epoch": 18.770833333333332, + "loss": 0.043711256235837936, + "loss_ce": 0.0001474139717174694, + "loss_iou": 0.26171875, + "loss_num": 0.00872802734375, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 411208000, + "step": 4505 + }, + { + "epoch": 18.775, + "grad_norm": 1.8056504080821845, + "learning_rate": 5e-05, + "loss": 0.0407, + "num_input_tokens_seen": 411299380, + "step": 4506 + }, + { + "epoch": 18.775, + "loss": 0.04516106843948364, + "loss_ce": 2.5572267986717634e-05, + "loss_iou": 0.1943359375, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 411299380, + "step": 4506 + }, + { + "epoch": 18.779166666666665, + "grad_norm": 2.503334389176959, + "learning_rate": 5e-05, + "loss": 0.0574, + "num_input_tokens_seen": 411390908, + "step": 4507 + }, + { + "epoch": 18.779166666666665, + "loss": 0.04811955988407135, + "loss_ce": 2.385993502684869e-05, + "loss_iou": 0.30078125, + "loss_num": 0.0096435546875, + "loss_xval": 0.048095703125, + "num_input_tokens_seen": 411390908, + "step": 4507 + }, + { + "epoch": 18.783333333333335, + "grad_norm": 2.3792539602486307, + "learning_rate": 5e-05, + "loss": 0.0372, + "num_input_tokens_seen": 411482032, + "step": 4508 + }, + { + "epoch": 18.783333333333335, + "loss": 0.043492428958415985, + "loss_ce": 4.879675998381572e-06, + "loss_iou": 0.203125, + "loss_num": 0.0086669921875, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 411482032, + "step": 4508 + }, + { + "epoch": 18.7875, + "grad_norm": 4.089348104331533, + "learning_rate": 5e-05, + "loss": 0.0395, + "num_input_tokens_seen": 411573448, + "step": 4509 + }, + { + "epoch": 18.7875, + "loss": 0.049597617238759995, + "loss_ce": 6.554991159646306e-06, + "loss_iou": 0.1416015625, + "loss_num": 0.0098876953125, + "loss_xval": 0.049560546875, + "num_input_tokens_seen": 411573448, + "step": 4509 + }, + { + "epoch": 18.791666666666668, + "grad_norm": 3.2327709000725986, + "learning_rate": 5e-05, + "loss": 0.0678, + "num_input_tokens_seen": 411664588, + "step": 4510 + }, + { + "epoch": 18.791666666666668, + "loss": 0.06886570155620575, + "loss_ce": 2.7887392661796184e-06, + "loss_iou": 0.30859375, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 411664588, + "step": 4510 + }, + { + "epoch": 18.795833333333334, + "grad_norm": 2.372303920105501, + "learning_rate": 5e-05, + "loss": 0.0701, + "num_input_tokens_seen": 411755708, + "step": 4511 + }, + { + "epoch": 18.795833333333334, + "loss": 0.03130407631397247, + "loss_ce": 6.694058356515598e-07, + "loss_iou": 0.1884765625, + "loss_num": 0.006256103515625, + "loss_xval": 0.03125, + "num_input_tokens_seen": 411755708, + "step": 4511 + }, + { + "epoch": 18.8, + "grad_norm": 2.3058725173417174, + "learning_rate": 5e-05, + "loss": 0.0415, + "num_input_tokens_seen": 411847404, + "step": 4512 + }, + { + "epoch": 18.8, + "loss": 0.047491900622844696, + "loss_ce": 1.4178664059727453e-05, + "loss_iou": 0.283203125, + "loss_num": 0.009521484375, + "loss_xval": 0.04736328125, + "num_input_tokens_seen": 411847404, + "step": 4512 + }, + { + "epoch": 18.804166666666667, + "grad_norm": 1.5764967521065583, + "learning_rate": 5e-05, + "loss": 0.0486, + "num_input_tokens_seen": 411938976, + "step": 4513 + }, + { + "epoch": 18.804166666666667, + "loss": 0.06534408777952194, + "loss_ce": 9.751153993420303e-05, + "loss_iou": 0.36328125, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 411938976, + "step": 4513 + }, + { + "epoch": 18.808333333333334, + "grad_norm": 1.063531570998383, + "learning_rate": 5e-05, + "loss": 0.0527, + "num_input_tokens_seen": 412030072, + "step": 4514 + }, + { + "epoch": 18.808333333333334, + "loss": 0.07547049224376678, + "loss_ce": 5.137699190527201e-07, + "loss_iou": 0.21484375, + "loss_num": 0.01507568359375, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 412030072, + "step": 4514 + }, + { + "epoch": 18.8125, + "grad_norm": 2.4462235116955395, + "learning_rate": 5e-05, + "loss": 0.0774, + "num_input_tokens_seen": 412121340, + "step": 4515 + }, + { + "epoch": 18.8125, + "loss": 0.07516561448574066, + "loss_ce": 8.244851414929144e-07, + "loss_iou": 0.2197265625, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 412121340, + "step": 4515 + }, + { + "epoch": 18.816666666666666, + "grad_norm": 3.174488492095962, + "learning_rate": 5e-05, + "loss": 0.0494, + "num_input_tokens_seen": 412212332, + "step": 4516 + }, + { + "epoch": 18.816666666666666, + "loss": 0.07436549663543701, + "loss_ce": 1.7897373254527338e-06, + "loss_iou": 0.28515625, + "loss_num": 0.014892578125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 412212332, + "step": 4516 + }, + { + "epoch": 18.820833333333333, + "grad_norm": 2.777840323934893, + "learning_rate": 5e-05, + "loss": 0.0638, + "num_input_tokens_seen": 412303960, + "step": 4517 + }, + { + "epoch": 18.820833333333333, + "loss": 0.07483154535293579, + "loss_ce": 2.4458545340166893e-06, + "loss_iou": 0.2890625, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 412303960, + "step": 4517 + }, + { + "epoch": 18.825, + "grad_norm": 3.037732930460235, + "learning_rate": 5e-05, + "loss": 0.0434, + "num_input_tokens_seen": 412395028, + "step": 4518 + }, + { + "epoch": 18.825, + "loss": 0.043012239038944244, + "loss_ce": 0.0004402203776407987, + "loss_iou": 0.185546875, + "loss_num": 0.00848388671875, + "loss_xval": 0.04248046875, + "num_input_tokens_seen": 412395028, + "step": 4518 + }, + { + "epoch": 18.829166666666666, + "grad_norm": 8.699284437617996, + "learning_rate": 5e-05, + "loss": 0.047, + "num_input_tokens_seen": 412486328, + "step": 4519 + }, + { + "epoch": 18.829166666666666, + "loss": 0.03607865422964096, + "loss_ce": 0.00011368915147613734, + "loss_iou": 0.30859375, + "loss_num": 0.0072021484375, + "loss_xval": 0.035888671875, + "num_input_tokens_seen": 412486328, + "step": 4519 + }, + { + "epoch": 18.833333333333332, + "grad_norm": 5.473940822177437, + "learning_rate": 5e-05, + "loss": 0.0642, + "num_input_tokens_seen": 412577844, + "step": 4520 + }, + { + "epoch": 18.833333333333332, + "loss": 0.05875308811664581, + "loss_ce": 6.748792202415643e-06, + "loss_iou": 0.1845703125, + "loss_num": 0.01171875, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 412577844, + "step": 4520 + }, + { + "epoch": 18.8375, + "grad_norm": 2.3231654327618143, + "learning_rate": 5e-05, + "loss": 0.0569, + "num_input_tokens_seen": 412668408, + "step": 4521 + }, + { + "epoch": 18.8375, + "loss": 0.060074321925640106, + "loss_ce": 4.6995239699754165e-07, + "loss_iou": 0.275390625, + "loss_num": 0.01202392578125, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 412668408, + "step": 4521 + }, + { + "epoch": 18.841666666666665, + "grad_norm": 5.512980870961963, + "learning_rate": 5e-05, + "loss": 0.057, + "num_input_tokens_seen": 412759712, + "step": 4522 + }, + { + "epoch": 18.841666666666665, + "loss": 0.07790729403495789, + "loss_ce": 3.5450657378532924e-06, + "loss_iou": 0.28515625, + "loss_num": 0.01556396484375, + "loss_xval": 0.078125, + "num_input_tokens_seen": 412759712, + "step": 4522 + }, + { + "epoch": 18.845833333333335, + "grad_norm": 3.418687552224435, + "learning_rate": 5e-05, + "loss": 0.0608, + "num_input_tokens_seen": 412850176, + "step": 4523 + }, + { + "epoch": 18.845833333333335, + "loss": 0.05151607096195221, + "loss_ce": 2.398493052169215e-06, + "loss_iou": 0.263671875, + "loss_num": 0.01031494140625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 412850176, + "step": 4523 + }, + { + "epoch": 18.85, + "grad_norm": 3.065566699479895, + "learning_rate": 5e-05, + "loss": 0.0422, + "num_input_tokens_seen": 412941256, + "step": 4524 + }, + { + "epoch": 18.85, + "loss": 0.02815604954957962, + "loss_ce": 3.5835851122101303e-06, + "loss_iou": 0.22265625, + "loss_num": 0.005645751953125, + "loss_xval": 0.0281982421875, + "num_input_tokens_seen": 412941256, + "step": 4524 + }, + { + "epoch": 18.854166666666668, + "grad_norm": 4.539063725156864, + "learning_rate": 5e-05, + "loss": 0.0348, + "num_input_tokens_seen": 413031964, + "step": 4525 + }, + { + "epoch": 18.854166666666668, + "loss": 0.035888951271772385, + "loss_ce": 2.80285235021438e-07, + "loss_iou": 0.310546875, + "loss_num": 0.007171630859375, + "loss_xval": 0.035888671875, + "num_input_tokens_seen": 413031964, + "step": 4525 + }, + { + "epoch": 18.858333333333334, + "grad_norm": 2.0341420317401595, + "learning_rate": 5e-05, + "loss": 0.0589, + "num_input_tokens_seen": 413123348, + "step": 4526 + }, + { + "epoch": 18.858333333333334, + "loss": 0.06325916200876236, + "loss_ce": 3.851961537293391e-06, + "loss_iou": 0.193359375, + "loss_num": 0.01263427734375, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 413123348, + "step": 4526 + }, + { + "epoch": 18.8625, + "grad_norm": 2.5688119812235595, + "learning_rate": 5e-05, + "loss": 0.0492, + "num_input_tokens_seen": 413214692, + "step": 4527 + }, + { + "epoch": 18.8625, + "loss": 0.05270035192370415, + "loss_ce": 7.279007695615292e-05, + "loss_iou": 0.1728515625, + "loss_num": 0.010498046875, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 413214692, + "step": 4527 + }, + { + "epoch": 18.866666666666667, + "grad_norm": 2.665680782878335, + "learning_rate": 5e-05, + "loss": 0.0511, + "num_input_tokens_seen": 413306240, + "step": 4528 + }, + { + "epoch": 18.866666666666667, + "loss": 0.04966755211353302, + "loss_ce": 1.9118917293781124e-07, + "loss_iou": 0.27734375, + "loss_num": 0.00994873046875, + "loss_xval": 0.049560546875, + "num_input_tokens_seen": 413306240, + "step": 4528 + }, + { + "epoch": 18.870833333333334, + "grad_norm": 2.777606940324933, + "learning_rate": 5e-05, + "loss": 0.0419, + "num_input_tokens_seen": 413397748, + "step": 4529 + }, + { + "epoch": 18.870833333333334, + "loss": 0.045915387570858, + "loss_ce": 0.0009019593708217144, + "loss_iou": 0.29296875, + "loss_num": 0.009033203125, + "loss_xval": 0.044921875, + "num_input_tokens_seen": 413397748, + "step": 4529 + }, + { + "epoch": 18.875, + "grad_norm": 3.047683174179071, + "learning_rate": 5e-05, + "loss": 0.0429, + "num_input_tokens_seen": 413489768, + "step": 4530 + }, + { + "epoch": 18.875, + "loss": 0.04263220354914665, + "loss_ce": 1.4406334230443463e-05, + "loss_iou": 0.265625, + "loss_num": 0.008544921875, + "loss_xval": 0.042724609375, + "num_input_tokens_seen": 413489768, + "step": 4530 + }, + { + "epoch": 18.879166666666666, + "grad_norm": 2.441751202633568, + "learning_rate": 5e-05, + "loss": 0.039, + "num_input_tokens_seen": 413580660, + "step": 4531 + }, + { + "epoch": 18.879166666666666, + "loss": 0.03846908360719681, + "loss_ce": 1.6747765130276093e-06, + "loss_iou": 0.244140625, + "loss_num": 0.0076904296875, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 413580660, + "step": 4531 + }, + { + "epoch": 18.883333333333333, + "grad_norm": 2.239737246516112, + "learning_rate": 5e-05, + "loss": 0.0466, + "num_input_tokens_seen": 413672152, + "step": 4532 + }, + { + "epoch": 18.883333333333333, + "loss": 0.04217064380645752, + "loss_ce": 1.0608757293084636e-05, + "loss_iou": 0.28515625, + "loss_num": 0.0084228515625, + "loss_xval": 0.042236328125, + "num_input_tokens_seen": 413672152, + "step": 4532 + }, + { + "epoch": 18.8875, + "grad_norm": 2.599374959619232, + "learning_rate": 5e-05, + "loss": 0.0304, + "num_input_tokens_seen": 413763232, + "step": 4533 + }, + { + "epoch": 18.8875, + "loss": 0.02673015370965004, + "loss_ce": 1.201295162900351e-05, + "loss_iou": 0.1669921875, + "loss_num": 0.005340576171875, + "loss_xval": 0.0267333984375, + "num_input_tokens_seen": 413763232, + "step": 4533 + }, + { + "epoch": 18.891666666666666, + "grad_norm": 2.8679999767678472, + "learning_rate": 5e-05, + "loss": 0.0329, + "num_input_tokens_seen": 413854784, + "step": 4534 + }, + { + "epoch": 18.891666666666666, + "loss": 0.03278213366866112, + "loss_ce": 3.677474887808785e-05, + "loss_iou": 0.2197265625, + "loss_num": 0.006561279296875, + "loss_xval": 0.03271484375, + "num_input_tokens_seen": 413854784, + "step": 4534 + }, + { + "epoch": 18.895833333333332, + "grad_norm": 2.3359688540852184, + "learning_rate": 5e-05, + "loss": 0.0326, + "num_input_tokens_seen": 413946248, + "step": 4535 + }, + { + "epoch": 18.895833333333332, + "loss": 0.027322562411427498, + "loss_ce": 1.7009778048304725e-06, + "loss_iou": 0.173828125, + "loss_num": 0.005462646484375, + "loss_xval": 0.02734375, + "num_input_tokens_seen": 413946248, + "step": 4535 + }, + { + "epoch": 18.9, + "grad_norm": 2.3552836654186584, + "learning_rate": 5e-05, + "loss": 0.08, + "num_input_tokens_seen": 414037748, + "step": 4536 + }, + { + "epoch": 18.9, + "loss": 0.11393754184246063, + "loss_ce": 1.541888741485309e-05, + "loss_iou": 0.26171875, + "loss_num": 0.022705078125, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 414037748, + "step": 4536 + }, + { + "epoch": 18.904166666666665, + "grad_norm": 3.160657561051584, + "learning_rate": 5e-05, + "loss": 0.0358, + "num_input_tokens_seen": 414128580, + "step": 4537 + }, + { + "epoch": 18.904166666666665, + "loss": 0.02459779940545559, + "loss_ce": 6.322184162854683e-07, + "loss_iou": 0.349609375, + "loss_num": 0.004913330078125, + "loss_xval": 0.024658203125, + "num_input_tokens_seen": 414128580, + "step": 4537 + }, + { + "epoch": 18.908333333333335, + "grad_norm": 3.2531765243968995, + "learning_rate": 5e-05, + "loss": 0.0527, + "num_input_tokens_seen": 414219824, + "step": 4538 + }, + { + "epoch": 18.908333333333335, + "loss": 0.06067047268152237, + "loss_ce": 1.5271011761797126e-06, + "loss_iou": 0.31640625, + "loss_num": 0.01214599609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 414219824, + "step": 4538 + }, + { + "epoch": 18.9125, + "grad_norm": 3.175017463980461, + "learning_rate": 5e-05, + "loss": 0.0541, + "num_input_tokens_seen": 414311600, + "step": 4539 + }, + { + "epoch": 18.9125, + "loss": 0.05885850638151169, + "loss_ce": 0.0002494982036296278, + "loss_iou": 0.314453125, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 414311600, + "step": 4539 + }, + { + "epoch": 18.916666666666668, + "grad_norm": 2.8530111649795535, + "learning_rate": 5e-05, + "loss": 0.0527, + "num_input_tokens_seen": 414402868, + "step": 4540 + }, + { + "epoch": 18.916666666666668, + "loss": 0.04116272181272507, + "loss_ce": 2.5028559321071953e-05, + "loss_iou": 0.177734375, + "loss_num": 0.00823974609375, + "loss_xval": 0.041015625, + "num_input_tokens_seen": 414402868, + "step": 4540 + }, + { + "epoch": 18.920833333333334, + "grad_norm": 1.4914725595256315, + "learning_rate": 5e-05, + "loss": 0.0606, + "num_input_tokens_seen": 414494496, + "step": 4541 + }, + { + "epoch": 18.920833333333334, + "loss": 0.024547066539525986, + "loss_ce": 0.0008005747804418206, + "loss_iou": 0.1689453125, + "loss_num": 0.0047607421875, + "loss_xval": 0.0238037109375, + "num_input_tokens_seen": 414494496, + "step": 4541 + }, + { + "epoch": 18.925, + "grad_norm": 2.1658664665582736, + "learning_rate": 5e-05, + "loss": 0.0567, + "num_input_tokens_seen": 414585944, + "step": 4542 + }, + { + "epoch": 18.925, + "loss": 0.04802921786904335, + "loss_ce": 2.5066014131880365e-05, + "loss_iou": 0.1767578125, + "loss_num": 0.00958251953125, + "loss_xval": 0.048095703125, + "num_input_tokens_seen": 414585944, + "step": 4542 + }, + { + "epoch": 18.929166666666667, + "grad_norm": 4.970453465965191, + "learning_rate": 5e-05, + "loss": 0.0451, + "num_input_tokens_seen": 414676744, + "step": 4543 + }, + { + "epoch": 18.929166666666667, + "loss": 0.043091583997011185, + "loss_ce": 7.623035571668879e-07, + "loss_iou": 0.30078125, + "loss_num": 0.00860595703125, + "loss_xval": 0.04296875, + "num_input_tokens_seen": 414676744, + "step": 4543 + }, + { + "epoch": 18.933333333333334, + "grad_norm": 2.162297932419361, + "learning_rate": 5e-05, + "loss": 0.0426, + "num_input_tokens_seen": 414767852, + "step": 4544 + }, + { + "epoch": 18.933333333333334, + "loss": 0.059343062341213226, + "loss_ce": 1.6303615666402038e-06, + "loss_iou": 0.24609375, + "loss_num": 0.0118408203125, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 414767852, + "step": 4544 + }, + { + "epoch": 18.9375, + "grad_norm": 3.184962848674524, + "learning_rate": 5e-05, + "loss": 0.0369, + "num_input_tokens_seen": 414858992, + "step": 4545 + }, + { + "epoch": 18.9375, + "loss": 0.04635874181985855, + "loss_ce": 2.539571596571477e-06, + "loss_iou": 0.26171875, + "loss_num": 0.00927734375, + "loss_xval": 0.04638671875, + "num_input_tokens_seen": 414858992, + "step": 4545 + }, + { + "epoch": 18.941666666666666, + "grad_norm": 3.3524812443803307, + "learning_rate": 5e-05, + "loss": 0.0461, + "num_input_tokens_seen": 414950512, + "step": 4546 + }, + { + "epoch": 18.941666666666666, + "loss": 0.06305290758609772, + "loss_ce": 3.584761316233198e-06, + "loss_iou": 0.28515625, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 414950512, + "step": 4546 + }, + { + "epoch": 18.945833333333333, + "grad_norm": 2.9169754083372994, + "learning_rate": 5e-05, + "loss": 0.0553, + "num_input_tokens_seen": 415042388, + "step": 4547 + }, + { + "epoch": 18.945833333333333, + "loss": 0.06709093600511551, + "loss_ce": 0.003065059892833233, + "loss_iou": 0.34375, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 415042388, + "step": 4547 + }, + { + "epoch": 18.95, + "grad_norm": 2.9309716744603866, + "learning_rate": 5e-05, + "loss": 0.0735, + "num_input_tokens_seen": 415133472, + "step": 4548 + }, + { + "epoch": 18.95, + "loss": 0.11270265281200409, + "loss_ce": 0.0008404635009355843, + "loss_iou": 0.26953125, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 415133472, + "step": 4548 + }, + { + "epoch": 18.954166666666666, + "grad_norm": 2.5923101743984405, + "learning_rate": 5e-05, + "loss": 0.0838, + "num_input_tokens_seen": 415224916, + "step": 4549 + }, + { + "epoch": 18.954166666666666, + "loss": 0.03926153853535652, + "loss_ce": 6.759119060006924e-07, + "loss_iou": 0.251953125, + "loss_num": 0.00787353515625, + "loss_xval": 0.039306640625, + "num_input_tokens_seen": 415224916, + "step": 4549 + }, + { + "epoch": 18.958333333333332, + "grad_norm": 4.250379279730983, + "learning_rate": 5e-05, + "loss": 0.0507, + "num_input_tokens_seen": 415315736, + "step": 4550 + }, + { + "epoch": 18.958333333333332, + "loss": 0.05874726548790932, + "loss_ce": 9.295710015067016e-07, + "loss_iou": 0.291015625, + "loss_num": 0.01171875, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 415315736, + "step": 4550 + }, + { + "epoch": 18.9625, + "grad_norm": 14.125519061852755, + "learning_rate": 5e-05, + "loss": 0.038, + "num_input_tokens_seen": 415406960, + "step": 4551 + }, + { + "epoch": 18.9625, + "loss": 0.029943302273750305, + "loss_ce": 3.6074361560167745e-05, + "loss_iou": 0.263671875, + "loss_num": 0.0059814453125, + "loss_xval": 0.0299072265625, + "num_input_tokens_seen": 415406960, + "step": 4551 + }, + { + "epoch": 18.966666666666665, + "grad_norm": 3.272380265120036, + "learning_rate": 5e-05, + "loss": 0.0627, + "num_input_tokens_seen": 415498504, + "step": 4552 + }, + { + "epoch": 18.966666666666665, + "loss": 0.05273493379354477, + "loss_ce": 5.606986519524071e-07, + "loss_iou": 0.27734375, + "loss_num": 0.01055908203125, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 415498504, + "step": 4552 + }, + { + "epoch": 18.970833333333335, + "grad_norm": 2.1512033513970876, + "learning_rate": 5e-05, + "loss": 0.0366, + "num_input_tokens_seen": 415589772, + "step": 4553 + }, + { + "epoch": 18.970833333333335, + "loss": 0.03154401481151581, + "loss_ce": 4.097748842468718e-06, + "loss_iou": 0.34765625, + "loss_num": 0.006317138671875, + "loss_xval": 0.031494140625, + "num_input_tokens_seen": 415589772, + "step": 4553 + }, + { + "epoch": 18.975, + "grad_norm": 2.9219392623315428, + "learning_rate": 5e-05, + "loss": 0.0398, + "num_input_tokens_seen": 415680952, + "step": 4554 + }, + { + "epoch": 18.975, + "loss": 0.03689642623066902, + "loss_ce": 6.737686248925456e-07, + "loss_iou": 0.26953125, + "loss_num": 0.00738525390625, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 415680952, + "step": 4554 + }, + { + "epoch": 18.979166666666668, + "grad_norm": 2.900574044678954, + "learning_rate": 5e-05, + "loss": 0.0392, + "num_input_tokens_seen": 415772132, + "step": 4555 + }, + { + "epoch": 18.979166666666668, + "loss": 0.04363527148962021, + "loss_ce": 8.668923692312092e-05, + "loss_iou": 0.2451171875, + "loss_num": 0.00872802734375, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 415772132, + "step": 4555 + }, + { + "epoch": 18.983333333333334, + "grad_norm": 2.1061819265107475, + "learning_rate": 5e-05, + "loss": 0.0532, + "num_input_tokens_seen": 415863748, + "step": 4556 + }, + { + "epoch": 18.983333333333334, + "loss": 0.04218093305826187, + "loss_ce": 0.00010482473589945585, + "loss_iou": 0.197265625, + "loss_num": 0.0084228515625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 415863748, + "step": 4556 + }, + { + "epoch": 18.9875, + "grad_norm": 4.744328207250594, + "learning_rate": 5e-05, + "loss": 0.0771, + "num_input_tokens_seen": 415954772, + "step": 4557 + }, + { + "epoch": 18.9875, + "loss": 0.07442444562911987, + "loss_ce": 7.329533673328115e-06, + "loss_iou": 0.236328125, + "loss_num": 0.014892578125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 415954772, + "step": 4557 + }, + { + "epoch": 18.991666666666667, + "grad_norm": 10.388722990505855, + "learning_rate": 5e-05, + "loss": 0.0488, + "num_input_tokens_seen": 416046356, + "step": 4558 + }, + { + "epoch": 18.991666666666667, + "loss": 0.038884952664375305, + "loss_ce": 5.559993951465003e-06, + "loss_iou": 0.125, + "loss_num": 0.007781982421875, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 416046356, + "step": 4558 + }, + { + "epoch": 18.995833333333334, + "grad_norm": 9.518989246986543, + "learning_rate": 5e-05, + "loss": 0.0668, + "num_input_tokens_seen": 416137732, + "step": 4559 + }, + { + "epoch": 18.995833333333334, + "loss": 0.05143982917070389, + "loss_ce": 0.00024658982874825597, + "loss_iou": 0.29296875, + "loss_num": 0.01025390625, + "loss_xval": 0.05126953125, + "num_input_tokens_seen": 416137732, + "step": 4559 + }, + { + "epoch": 19.0, + "grad_norm": 1.8359022855887634, + "learning_rate": 5e-05, + "loss": 0.0494, + "num_input_tokens_seen": 416229068, + "step": 4560 + }, + { + "epoch": 19.0, + "loss": 0.028715705499053, + "loss_ce": 2.918194695666898e-05, + "loss_iou": 0.2080078125, + "loss_num": 0.0057373046875, + "loss_xval": 0.0286865234375, + "num_input_tokens_seen": 416229068, + "step": 4560 + }, + { + "epoch": 19.004166666666666, + "grad_norm": 6.715655239337954, + "learning_rate": 5e-05, + "loss": 0.0572, + "num_input_tokens_seen": 416320568, + "step": 4561 + }, + { + "epoch": 19.004166666666666, + "loss": 0.05667451396584511, + "loss_ce": 3.371906132088043e-06, + "loss_iou": 0.0390625, + "loss_num": 0.0113525390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 416320568, + "step": 4561 + }, + { + "epoch": 19.008333333333333, + "grad_norm": 6.957390228996864, + "learning_rate": 5e-05, + "loss": 0.0675, + "num_input_tokens_seen": 416412304, + "step": 4562 + }, + { + "epoch": 19.008333333333333, + "loss": 0.024202125146985054, + "loss_ce": 1.6853648503456498e-06, + "loss_iou": 0.2412109375, + "loss_num": 0.004852294921875, + "loss_xval": 0.024169921875, + "num_input_tokens_seen": 416412304, + "step": 4562 + }, + { + "epoch": 19.0125, + "grad_norm": 3.496132083831709, + "learning_rate": 5e-05, + "loss": 0.0865, + "num_input_tokens_seen": 416503232, + "step": 4563 + }, + { + "epoch": 19.0125, + "loss": 0.042641233652830124, + "loss_ce": 5.494790684679174e-07, + "loss_iou": 0.1953125, + "loss_num": 0.008544921875, + "loss_xval": 0.042724609375, + "num_input_tokens_seen": 416503232, + "step": 4563 + }, + { + "epoch": 19.016666666666666, + "grad_norm": 2.97382417361745, + "learning_rate": 5e-05, + "loss": 0.0536, + "num_input_tokens_seen": 416594068, + "step": 4564 + }, + { + "epoch": 19.016666666666666, + "loss": 0.0603485070168972, + "loss_ce": 2.2883396013639867e-05, + "loss_iou": 0.349609375, + "loss_num": 0.0120849609375, + "loss_xval": 0.060302734375, + "num_input_tokens_seen": 416594068, + "step": 4564 + }, + { + "epoch": 19.020833333333332, + "grad_norm": 5.756229010297488, + "learning_rate": 5e-05, + "loss": 0.0849, + "num_input_tokens_seen": 416685476, + "step": 4565 + }, + { + "epoch": 19.020833333333332, + "loss": 0.09227042645215988, + "loss_ce": 5.330486487764574e-07, + "loss_iou": 0.2421875, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 416685476, + "step": 4565 + }, + { + "epoch": 19.025, + "grad_norm": 3.4465502009365765, + "learning_rate": 5e-05, + "loss": 0.0699, + "num_input_tokens_seen": 416777284, + "step": 4566 + }, + { + "epoch": 19.025, + "loss": 0.05590882524847984, + "loss_ce": 6.220782324817264e-07, + "loss_iou": 0.2216796875, + "loss_num": 0.01116943359375, + "loss_xval": 0.055908203125, + "num_input_tokens_seen": 416777284, + "step": 4566 + }, + { + "epoch": 19.029166666666665, + "grad_norm": 2.79061227690433, + "learning_rate": 5e-05, + "loss": 0.0641, + "num_input_tokens_seen": 416868844, + "step": 4567 + }, + { + "epoch": 19.029166666666665, + "loss": 0.06916234642267227, + "loss_ce": 4.003128196927719e-05, + "loss_iou": 0.2734375, + "loss_num": 0.0137939453125, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 416868844, + "step": 4567 + }, + { + "epoch": 19.033333333333335, + "grad_norm": 3.02092776013322, + "learning_rate": 5e-05, + "loss": 0.0522, + "num_input_tokens_seen": 416959988, + "step": 4568 + }, + { + "epoch": 19.033333333333335, + "loss": 0.06227421760559082, + "loss_ce": 3.0962000892031938e-06, + "loss_iou": 0.435546875, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 416959988, + "step": 4568 + }, + { + "epoch": 19.0375, + "grad_norm": 3.2971210612873905, + "learning_rate": 5e-05, + "loss": 0.0453, + "num_input_tokens_seen": 417050984, + "step": 4569 + }, + { + "epoch": 19.0375, + "loss": 0.055607423186302185, + "loss_ce": 4.39636642113328e-06, + "loss_iou": 0.28125, + "loss_num": 0.0111083984375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 417050984, + "step": 4569 + }, + { + "epoch": 19.041666666666668, + "grad_norm": 5.407971242628027, + "learning_rate": 5e-05, + "loss": 0.0483, + "num_input_tokens_seen": 417142000, + "step": 4570 + }, + { + "epoch": 19.041666666666668, + "loss": 0.05627996101975441, + "loss_ce": 8.18439875729382e-05, + "loss_iou": 0.2470703125, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 417142000, + "step": 4570 + }, + { + "epoch": 19.045833333333334, + "grad_norm": 3.072596211228088, + "learning_rate": 5e-05, + "loss": 0.0588, + "num_input_tokens_seen": 417232916, + "step": 4571 + }, + { + "epoch": 19.045833333333334, + "loss": 0.07009995728731155, + "loss_ce": 1.0801534244819777e-06, + "loss_iou": 0.3359375, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 417232916, + "step": 4571 + }, + { + "epoch": 19.05, + "grad_norm": 2.244302503059525, + "learning_rate": 5e-05, + "loss": 0.0705, + "num_input_tokens_seen": 417324232, + "step": 4572 + }, + { + "epoch": 19.05, + "loss": 0.10026659071445465, + "loss_ce": 1.6345171388820745e-05, + "loss_iou": 0.30859375, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 417324232, + "step": 4572 + }, + { + "epoch": 19.054166666666667, + "grad_norm": 2.9609513120700575, + "learning_rate": 5e-05, + "loss": 0.0555, + "num_input_tokens_seen": 417415708, + "step": 4573 + }, + { + "epoch": 19.054166666666667, + "loss": 0.047413308173418045, + "loss_ce": 1.187850193673512e-05, + "loss_iou": 0.373046875, + "loss_num": 0.00946044921875, + "loss_xval": 0.04736328125, + "num_input_tokens_seen": 417415708, + "step": 4573 + }, + { + "epoch": 19.058333333333334, + "grad_norm": 2.369157132961251, + "learning_rate": 5e-05, + "loss": 0.0564, + "num_input_tokens_seen": 417507108, + "step": 4574 + }, + { + "epoch": 19.058333333333334, + "loss": 0.07667060196399689, + "loss_ce": 1.0441099220770411e-05, + "loss_iou": 0.224609375, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 417507108, + "step": 4574 + }, + { + "epoch": 19.0625, + "grad_norm": 1.3947638658137602, + "learning_rate": 5e-05, + "loss": 0.0308, + "num_input_tokens_seen": 417598908, + "step": 4575 + }, + { + "epoch": 19.0625, + "loss": 0.02127896249294281, + "loss_ce": 5.804000693387934e-07, + "loss_iou": 0.169921875, + "loss_num": 0.0042724609375, + "loss_xval": 0.021240234375, + "num_input_tokens_seen": 417598908, + "step": 4575 + }, + { + "epoch": 19.066666666666666, + "grad_norm": 1.8918181370378695, + "learning_rate": 5e-05, + "loss": 0.08, + "num_input_tokens_seen": 417690016, + "step": 4576 + }, + { + "epoch": 19.066666666666666, + "loss": 0.13125675916671753, + "loss_ce": 6.536600949402782e-07, + "loss_iou": 0.1474609375, + "loss_num": 0.0262451171875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 417690016, + "step": 4576 + }, + { + "epoch": 19.070833333333333, + "grad_norm": 1.7643172128339852, + "learning_rate": 5e-05, + "loss": 0.0272, + "num_input_tokens_seen": 417781528, + "step": 4577 + }, + { + "epoch": 19.070833333333333, + "loss": 0.024284040555357933, + "loss_ce": 3.019509676960297e-05, + "loss_iou": 0.259765625, + "loss_num": 0.004852294921875, + "loss_xval": 0.0242919921875, + "num_input_tokens_seen": 417781528, + "step": 4577 + }, + { + "epoch": 19.075, + "grad_norm": 2.438866524106157, + "learning_rate": 5e-05, + "loss": 0.0367, + "num_input_tokens_seen": 417871744, + "step": 4578 + }, + { + "epoch": 19.075, + "loss": 0.03327229619026184, + "loss_ce": 2.3394957679556683e-05, + "loss_iou": 0.166015625, + "loss_num": 0.00665283203125, + "loss_xval": 0.033203125, + "num_input_tokens_seen": 417871744, + "step": 4578 + }, + { + "epoch": 19.079166666666666, + "grad_norm": 2.7742865635276135, + "learning_rate": 5e-05, + "loss": 0.0929, + "num_input_tokens_seen": 417962520, + "step": 4579 + }, + { + "epoch": 19.079166666666666, + "loss": 0.1042037308216095, + "loss_ce": 1.4593413197871996e-06, + "loss_iou": 0.26171875, + "loss_num": 0.0208740234375, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 417962520, + "step": 4579 + }, + { + "epoch": 19.083333333333332, + "grad_norm": 1.9474276412188012, + "learning_rate": 5e-05, + "loss": 0.0393, + "num_input_tokens_seen": 418053328, + "step": 4580 + }, + { + "epoch": 19.083333333333332, + "loss": 0.03855319693684578, + "loss_ce": 1.8681653273233678e-06, + "loss_iou": 0.0615234375, + "loss_num": 0.007720947265625, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 418053328, + "step": 4580 + }, + { + "epoch": 19.0875, + "grad_norm": 1.8638873381418695, + "learning_rate": 5e-05, + "loss": 0.0283, + "num_input_tokens_seen": 418144472, + "step": 4581 + }, + { + "epoch": 19.0875, + "loss": 0.02354763075709343, + "loss_ce": 3.3199507925019134e-06, + "loss_iou": 0.1044921875, + "loss_num": 0.00469970703125, + "loss_xval": 0.0235595703125, + "num_input_tokens_seen": 418144472, + "step": 4581 + }, + { + "epoch": 19.091666666666665, + "grad_norm": 4.119505044884438, + "learning_rate": 5e-05, + "loss": 0.0415, + "num_input_tokens_seen": 418236104, + "step": 4582 + }, + { + "epoch": 19.091666666666665, + "loss": 0.055428922176361084, + "loss_ce": 0.00017684623890090734, + "loss_iou": 0.34375, + "loss_num": 0.01104736328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 418236104, + "step": 4582 + }, + { + "epoch": 19.095833333333335, + "grad_norm": 1.9469295253184462, + "learning_rate": 5e-05, + "loss": 0.0522, + "num_input_tokens_seen": 418327876, + "step": 4583 + }, + { + "epoch": 19.095833333333335, + "loss": 0.06220350041985512, + "loss_ce": 8.677566256665159e-06, + "loss_iou": 0.287109375, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 418327876, + "step": 4583 + }, + { + "epoch": 19.1, + "grad_norm": 2.2121584184868843, + "learning_rate": 5e-05, + "loss": 0.0468, + "num_input_tokens_seen": 418419188, + "step": 4584 + }, + { + "epoch": 19.1, + "loss": 0.05744265764951706, + "loss_ce": 8.573052582505625e-06, + "loss_iou": 0.27734375, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 418419188, + "step": 4584 + }, + { + "epoch": 19.104166666666668, + "grad_norm": 12.93904308181816, + "learning_rate": 5e-05, + "loss": 0.0477, + "num_input_tokens_seen": 418510720, + "step": 4585 + }, + { + "epoch": 19.104166666666668, + "loss": 0.039964459836483, + "loss_ce": 1.6924389001360396e-06, + "loss_iou": 0.2470703125, + "loss_num": 0.00799560546875, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 418510720, + "step": 4585 + }, + { + "epoch": 19.108333333333334, + "grad_norm": 2.9666561806430396, + "learning_rate": 5e-05, + "loss": 0.0307, + "num_input_tokens_seen": 418602060, + "step": 4586 + }, + { + "epoch": 19.108333333333334, + "loss": 0.02945863828063011, + "loss_ce": 1.545392024127068e-06, + "loss_iou": 0.326171875, + "loss_num": 0.005889892578125, + "loss_xval": 0.0294189453125, + "num_input_tokens_seen": 418602060, + "step": 4586 + }, + { + "epoch": 19.1125, + "grad_norm": 3.6047797729490227, + "learning_rate": 5e-05, + "loss": 0.0462, + "num_input_tokens_seen": 418693120, + "step": 4587 + }, + { + "epoch": 19.1125, + "loss": 0.05617877095937729, + "loss_ce": 1.1170643119839951e-05, + "loss_iou": 0.33984375, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 418693120, + "step": 4587 + }, + { + "epoch": 19.116666666666667, + "grad_norm": 2.8946339671114987, + "learning_rate": 5e-05, + "loss": 0.0398, + "num_input_tokens_seen": 418784704, + "step": 4588 + }, + { + "epoch": 19.116666666666667, + "loss": 0.04178152233362198, + "loss_ce": 2.955552645289572e-06, + "loss_iou": 0.3515625, + "loss_num": 0.00836181640625, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 418784704, + "step": 4588 + }, + { + "epoch": 19.120833333333334, + "grad_norm": 5.404336078957903, + "learning_rate": 5e-05, + "loss": 0.0707, + "num_input_tokens_seen": 418876000, + "step": 4589 + }, + { + "epoch": 19.120833333333334, + "loss": 0.07328888028860092, + "loss_ce": 9.190454761665023e-07, + "loss_iou": 0.25390625, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 418876000, + "step": 4589 + }, + { + "epoch": 19.125, + "grad_norm": 3.182620288468722, + "learning_rate": 5e-05, + "loss": 0.0363, + "num_input_tokens_seen": 418966880, + "step": 4590 + }, + { + "epoch": 19.125, + "loss": 0.029190445318818092, + "loss_ce": 3.817777667336486e-07, + "loss_iou": 0.291015625, + "loss_num": 0.005828857421875, + "loss_xval": 0.0291748046875, + "num_input_tokens_seen": 418966880, + "step": 4590 + }, + { + "epoch": 19.129166666666666, + "grad_norm": 3.29085385384401, + "learning_rate": 5e-05, + "loss": 0.0394, + "num_input_tokens_seen": 419058444, + "step": 4591 + }, + { + "epoch": 19.129166666666666, + "loss": 0.0446050763130188, + "loss_ce": 3.637588861238328e-06, + "loss_iou": 0.17578125, + "loss_num": 0.0089111328125, + "loss_xval": 0.044677734375, + "num_input_tokens_seen": 419058444, + "step": 4591 + }, + { + "epoch": 19.133333333333333, + "grad_norm": 3.3331466382473955, + "learning_rate": 5e-05, + "loss": 0.0587, + "num_input_tokens_seen": 419149700, + "step": 4592 + }, + { + "epoch": 19.133333333333333, + "loss": 0.07439761608839035, + "loss_ce": 0.000270417018327862, + "loss_iou": 0.3515625, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 419149700, + "step": 4592 + }, + { + "epoch": 19.1375, + "grad_norm": 2.538930195089702, + "learning_rate": 5e-05, + "loss": 0.0266, + "num_input_tokens_seen": 419240220, + "step": 4593 + }, + { + "epoch": 19.1375, + "loss": 0.026464354246854782, + "loss_ce": 5.614498149952851e-06, + "loss_iou": 0.0927734375, + "loss_num": 0.005279541015625, + "loss_xval": 0.0264892578125, + "num_input_tokens_seen": 419240220, + "step": 4593 + }, + { + "epoch": 19.141666666666666, + "grad_norm": 1.845257482792228, + "learning_rate": 5e-05, + "loss": 0.0579, + "num_input_tokens_seen": 419331324, + "step": 4594 + }, + { + "epoch": 19.141666666666666, + "loss": 0.04600181058049202, + "loss_ce": 3.78307788651e-07, + "loss_iou": 0.134765625, + "loss_num": 0.00921630859375, + "loss_xval": 0.0458984375, + "num_input_tokens_seen": 419331324, + "step": 4594 + }, + { + "epoch": 19.145833333333332, + "grad_norm": 1.6647310288365473, + "learning_rate": 5e-05, + "loss": 0.0436, + "num_input_tokens_seen": 419422536, + "step": 4595 + }, + { + "epoch": 19.145833333333332, + "loss": 0.03375301510095596, + "loss_ce": 5.70387953757745e-07, + "loss_iou": 0.2080078125, + "loss_num": 0.006744384765625, + "loss_xval": 0.03369140625, + "num_input_tokens_seen": 419422536, + "step": 4595 + }, + { + "epoch": 19.15, + "grad_norm": 2.4610156499270515, + "learning_rate": 5e-05, + "loss": 0.0512, + "num_input_tokens_seen": 419513348, + "step": 4596 + }, + { + "epoch": 19.15, + "loss": 0.07220683991909027, + "loss_ce": 2.2547997104993556e-06, + "loss_iou": 0.2236328125, + "loss_num": 0.014404296875, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 419513348, + "step": 4596 + }, + { + "epoch": 19.154166666666665, + "grad_norm": 2.281233719535446, + "learning_rate": 5e-05, + "loss": 0.0342, + "num_input_tokens_seen": 419604488, + "step": 4597 + }, + { + "epoch": 19.154166666666665, + "loss": 0.027935050427913666, + "loss_ce": 2.6724294002633542e-05, + "loss_iou": 0.1923828125, + "loss_num": 0.005584716796875, + "loss_xval": 0.0279541015625, + "num_input_tokens_seen": 419604488, + "step": 4597 + }, + { + "epoch": 19.158333333333335, + "grad_norm": 2.3784933036688747, + "learning_rate": 5e-05, + "loss": 0.0348, + "num_input_tokens_seen": 419695952, + "step": 4598 + }, + { + "epoch": 19.158333333333335, + "loss": 0.028787771239876747, + "loss_ce": 5.5471795349149033e-05, + "loss_iou": 0.265625, + "loss_num": 0.0057373046875, + "loss_xval": 0.0286865234375, + "num_input_tokens_seen": 419695952, + "step": 4598 + }, + { + "epoch": 19.1625, + "grad_norm": 2.9961719644931772, + "learning_rate": 5e-05, + "loss": 0.0316, + "num_input_tokens_seen": 419787268, + "step": 4599 + }, + { + "epoch": 19.1625, + "loss": 0.038679152727127075, + "loss_ce": 0.0002117453987011686, + "loss_iou": 0.185546875, + "loss_num": 0.0076904296875, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 419787268, + "step": 4599 + }, + { + "epoch": 19.166666666666668, + "grad_norm": 10.899750783137005, + "learning_rate": 5e-05, + "loss": 0.0909, + "num_input_tokens_seen": 419878896, + "step": 4600 + }, + { + "epoch": 19.166666666666668, + "loss": 0.07383735477924347, + "loss_ce": 1.532657552161254e-05, + "loss_iou": 0.271484375, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 419878896, + "step": 4600 + }, + { + "epoch": 19.170833333333334, + "grad_norm": 2.0187713051589995, + "learning_rate": 5e-05, + "loss": 0.0765, + "num_input_tokens_seen": 419970188, + "step": 4601 + }, + { + "epoch": 19.170833333333334, + "loss": 0.05879247188568115, + "loss_ce": 3.5817220123135485e-07, + "loss_iou": 0.28125, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 419970188, + "step": 4601 + }, + { + "epoch": 19.175, + "grad_norm": 2.397311933697297, + "learning_rate": 5e-05, + "loss": 0.0359, + "num_input_tokens_seen": 420059880, + "step": 4602 + }, + { + "epoch": 19.175, + "loss": 0.03691239282488823, + "loss_ce": 1.3815242709824815e-06, + "loss_iou": 0.2412109375, + "loss_num": 0.00738525390625, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 420059880, + "step": 4602 + }, + { + "epoch": 19.179166666666667, + "grad_norm": 2.472619211766026, + "learning_rate": 5e-05, + "loss": 0.0345, + "num_input_tokens_seen": 420150648, + "step": 4603 + }, + { + "epoch": 19.179166666666667, + "loss": 0.027687918394804, + "loss_ce": 0.00011528656614245847, + "loss_iou": 0.205078125, + "loss_num": 0.005523681640625, + "loss_xval": 0.027587890625, + "num_input_tokens_seen": 420150648, + "step": 4603 + }, + { + "epoch": 19.183333333333334, + "grad_norm": 2.212561412051951, + "learning_rate": 5e-05, + "loss": 0.0371, + "num_input_tokens_seen": 420242356, + "step": 4604 + }, + { + "epoch": 19.183333333333334, + "loss": 0.03770972788333893, + "loss_ce": 5.2634909479820635e-06, + "loss_iou": 0.2451171875, + "loss_num": 0.007537841796875, + "loss_xval": 0.03759765625, + "num_input_tokens_seen": 420242356, + "step": 4604 + }, + { + "epoch": 19.1875, + "grad_norm": 2.719185495876815, + "learning_rate": 5e-05, + "loss": 0.0376, + "num_input_tokens_seen": 420333892, + "step": 4605 + }, + { + "epoch": 19.1875, + "loss": 0.03716596961021423, + "loss_ce": 3.1870847578829853e-06, + "loss_iou": 0.34765625, + "loss_num": 0.0074462890625, + "loss_xval": 0.037109375, + "num_input_tokens_seen": 420333892, + "step": 4605 + }, + { + "epoch": 19.191666666666666, + "grad_norm": 3.8515667271314955, + "learning_rate": 5e-05, + "loss": 0.0471, + "num_input_tokens_seen": 420424708, + "step": 4606 + }, + { + "epoch": 19.191666666666666, + "loss": 0.03641510009765625, + "loss_ce": 7.628682851645863e-06, + "loss_iou": 0.3125, + "loss_num": 0.007293701171875, + "loss_xval": 0.036376953125, + "num_input_tokens_seen": 420424708, + "step": 4606 + }, + { + "epoch": 19.195833333333333, + "grad_norm": 3.647166180041839, + "learning_rate": 5e-05, + "loss": 0.0684, + "num_input_tokens_seen": 420515716, + "step": 4607 + }, + { + "epoch": 19.195833333333333, + "loss": 0.04955513775348663, + "loss_ce": 2.218728923253366e-06, + "loss_iou": 0.3671875, + "loss_num": 0.0098876953125, + "loss_xval": 0.049560546875, + "num_input_tokens_seen": 420515716, + "step": 4607 + }, + { + "epoch": 19.2, + "grad_norm": 2.7711581869391506, + "learning_rate": 5e-05, + "loss": 0.1014, + "num_input_tokens_seen": 420606832, + "step": 4608 + }, + { + "epoch": 19.2, + "loss": 0.09333069622516632, + "loss_ce": 3.1158111823970103e-07, + "loss_iou": 0.08251953125, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 420606832, + "step": 4608 + }, + { + "epoch": 19.204166666666666, + "grad_norm": 1.9191125356977428, + "learning_rate": 5e-05, + "loss": 0.027, + "num_input_tokens_seen": 420698212, + "step": 4609 + }, + { + "epoch": 19.204166666666666, + "loss": 0.02860306203365326, + "loss_ce": 8.092390999081545e-06, + "loss_iou": 0.203125, + "loss_num": 0.005706787109375, + "loss_xval": 0.028564453125, + "num_input_tokens_seen": 420698212, + "step": 4609 + }, + { + "epoch": 19.208333333333332, + "grad_norm": 1.381995485228386, + "learning_rate": 5e-05, + "loss": 0.0385, + "num_input_tokens_seen": 420789712, + "step": 4610 + }, + { + "epoch": 19.208333333333332, + "loss": 0.052813343703746796, + "loss_ce": 0.001704030204564333, + "loss_iou": 0.2265625, + "loss_num": 0.01019287109375, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 420789712, + "step": 4610 + }, + { + "epoch": 19.2125, + "grad_norm": 2.6387628805307397, + "learning_rate": 5e-05, + "loss": 0.0794, + "num_input_tokens_seen": 420881292, + "step": 4611 + }, + { + "epoch": 19.2125, + "loss": 0.06479300558567047, + "loss_ce": 4.183812052360736e-06, + "loss_iou": 0.310546875, + "loss_num": 0.012939453125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 420881292, + "step": 4611 + }, + { + "epoch": 19.216666666666665, + "grad_norm": 5.227308189588182, + "learning_rate": 5e-05, + "loss": 0.0663, + "num_input_tokens_seen": 420973184, + "step": 4612 + }, + { + "epoch": 19.216666666666665, + "loss": 0.037201642990112305, + "loss_ce": 7.13834765520005e-07, + "loss_iou": 0.26953125, + "loss_num": 0.0074462890625, + "loss_xval": 0.037109375, + "num_input_tokens_seen": 420973184, + "step": 4612 + }, + { + "epoch": 19.220833333333335, + "grad_norm": 2.520524143939457, + "learning_rate": 5e-05, + "loss": 0.0451, + "num_input_tokens_seen": 421064220, + "step": 4613 + }, + { + "epoch": 19.220833333333335, + "loss": 0.05414511263370514, + "loss_ce": 2.9817920221830718e-05, + "loss_iou": 0.291015625, + "loss_num": 0.01080322265625, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 421064220, + "step": 4613 + }, + { + "epoch": 19.225, + "grad_norm": 2.049099752261963, + "learning_rate": 5e-05, + "loss": 0.0608, + "num_input_tokens_seen": 421155728, + "step": 4614 + }, + { + "epoch": 19.225, + "loss": 0.06513015180826187, + "loss_ce": 0.00020399918139446527, + "loss_iou": 0.263671875, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 421155728, + "step": 4614 + }, + { + "epoch": 19.229166666666668, + "grad_norm": 3.1133542526049043, + "learning_rate": 5e-05, + "loss": 0.0534, + "num_input_tokens_seen": 421247300, + "step": 4615 + }, + { + "epoch": 19.229166666666668, + "loss": 0.05482819676399231, + "loss_ce": 3.372350420249859e-06, + "loss_iou": 0.357421875, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 421247300, + "step": 4615 + }, + { + "epoch": 19.233333333333334, + "grad_norm": 2.7352091884268783, + "learning_rate": 5e-05, + "loss": 0.0746, + "num_input_tokens_seen": 421338644, + "step": 4616 + }, + { + "epoch": 19.233333333333334, + "loss": 0.04940875619649887, + "loss_ce": 8.015024945962068e-07, + "loss_iou": 0.240234375, + "loss_num": 0.0098876953125, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 421338644, + "step": 4616 + }, + { + "epoch": 19.2375, + "grad_norm": 2.1159310329647263, + "learning_rate": 5e-05, + "loss": 0.0478, + "num_input_tokens_seen": 421430104, + "step": 4617 + }, + { + "epoch": 19.2375, + "loss": 0.056608691811561584, + "loss_ce": 5.962173963780515e-05, + "loss_iou": 0.392578125, + "loss_num": 0.01129150390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 421430104, + "step": 4617 + }, + { + "epoch": 19.241666666666667, + "grad_norm": 2.2682581554209995, + "learning_rate": 5e-05, + "loss": 0.0276, + "num_input_tokens_seen": 421520848, + "step": 4618 + }, + { + "epoch": 19.241666666666667, + "loss": 0.03226238861680031, + "loss_ce": 0.00015789664757903665, + "loss_iou": 0.2421875, + "loss_num": 0.00640869140625, + "loss_xval": 0.0322265625, + "num_input_tokens_seen": 421520848, + "step": 4618 + }, + { + "epoch": 19.245833333333334, + "grad_norm": 3.0687848147882666, + "learning_rate": 5e-05, + "loss": 0.0393, + "num_input_tokens_seen": 421610708, + "step": 4619 + }, + { + "epoch": 19.245833333333334, + "loss": 0.04960303753614426, + "loss_ce": 1.1973999789915979e-05, + "loss_iou": 0.30859375, + "loss_num": 0.0098876953125, + "loss_xval": 0.049560546875, + "num_input_tokens_seen": 421610708, + "step": 4619 + }, + { + "epoch": 19.25, + "grad_norm": 2.688487761475692, + "learning_rate": 5e-05, + "loss": 0.0753, + "num_input_tokens_seen": 421701940, + "step": 4620 + }, + { + "epoch": 19.25, + "loss": 0.04789130389690399, + "loss_ce": 1.5958464700815966e-06, + "loss_iou": 0.25390625, + "loss_num": 0.00958251953125, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 421701940, + "step": 4620 + }, + { + "epoch": 19.254166666666666, + "grad_norm": 2.7867440067556357, + "learning_rate": 5e-05, + "loss": 0.0776, + "num_input_tokens_seen": 421793464, + "step": 4621 + }, + { + "epoch": 19.254166666666666, + "loss": 0.05678050220012665, + "loss_ce": 2.548747943365015e-06, + "loss_iou": 0.373046875, + "loss_num": 0.0113525390625, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 421793464, + "step": 4621 + }, + { + "epoch": 19.258333333333333, + "grad_norm": 2.7770962428236006, + "learning_rate": 5e-05, + "loss": 0.0437, + "num_input_tokens_seen": 421884396, + "step": 4622 + }, + { + "epoch": 19.258333333333333, + "loss": 0.04880964383482933, + "loss_ce": 4.406524567457382e-06, + "loss_iou": 0.1416015625, + "loss_num": 0.009765625, + "loss_xval": 0.048828125, + "num_input_tokens_seen": 421884396, + "step": 4622 + }, + { + "epoch": 19.2625, + "grad_norm": 2.2242337443277167, + "learning_rate": 5e-05, + "loss": 0.041, + "num_input_tokens_seen": 421975608, + "step": 4623 + }, + { + "epoch": 19.2625, + "loss": 0.04673875868320465, + "loss_ce": 1.083584493244416e-06, + "loss_iou": 0.30859375, + "loss_num": 0.00933837890625, + "loss_xval": 0.046630859375, + "num_input_tokens_seen": 421975608, + "step": 4623 + }, + { + "epoch": 19.266666666666666, + "grad_norm": 2.489074077579362, + "learning_rate": 5e-05, + "loss": 0.0408, + "num_input_tokens_seen": 422066960, + "step": 4624 + }, + { + "epoch": 19.266666666666666, + "loss": 0.031821057200431824, + "loss_ce": 2.6656816771719605e-06, + "loss_iou": 0.287109375, + "loss_num": 0.00634765625, + "loss_xval": 0.03173828125, + "num_input_tokens_seen": 422066960, + "step": 4624 + }, + { + "epoch": 19.270833333333332, + "grad_norm": 6.752299421382515, + "learning_rate": 5e-05, + "loss": 0.0481, + "num_input_tokens_seen": 422158040, + "step": 4625 + }, + { + "epoch": 19.270833333333332, + "loss": 0.04742731153964996, + "loss_ce": 2.9909158456575824e-06, + "loss_iou": 0.34765625, + "loss_num": 0.00946044921875, + "loss_xval": 0.04736328125, + "num_input_tokens_seen": 422158040, + "step": 4625 + }, + { + "epoch": 19.275, + "grad_norm": 4.463241691798342, + "learning_rate": 5e-05, + "loss": 0.0559, + "num_input_tokens_seen": 422249268, + "step": 4626 + }, + { + "epoch": 19.275, + "loss": 0.0679115504026413, + "loss_ce": 9.937119102687575e-06, + "loss_iou": 0.2001953125, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 422249268, + "step": 4626 + }, + { + "epoch": 19.279166666666665, + "grad_norm": 2.493955255139426, + "learning_rate": 5e-05, + "loss": 0.0333, + "num_input_tokens_seen": 422341256, + "step": 4627 + }, + { + "epoch": 19.279166666666665, + "loss": 0.03526730090379715, + "loss_ce": 0.002224391559138894, + "loss_iou": 0.357421875, + "loss_num": 0.006622314453125, + "loss_xval": 0.032958984375, + "num_input_tokens_seen": 422341256, + "step": 4627 + }, + { + "epoch": 19.283333333333335, + "grad_norm": 1.9185103008354087, + "learning_rate": 5e-05, + "loss": 0.0328, + "num_input_tokens_seen": 422432976, + "step": 4628 + }, + { + "epoch": 19.283333333333335, + "loss": 0.036364421248435974, + "loss_ce": 0.008234846405684948, + "loss_iou": 0.2578125, + "loss_num": 0.005615234375, + "loss_xval": 0.028076171875, + "num_input_tokens_seen": 422432976, + "step": 4628 + }, + { + "epoch": 19.2875, + "grad_norm": 2.399643182157675, + "learning_rate": 5e-05, + "loss": 0.0579, + "num_input_tokens_seen": 422524804, + "step": 4629 + }, + { + "epoch": 19.2875, + "loss": 0.04569268226623535, + "loss_ce": 7.867306521802675e-06, + "loss_iou": 0.2265625, + "loss_num": 0.0091552734375, + "loss_xval": 0.045654296875, + "num_input_tokens_seen": 422524804, + "step": 4629 + }, + { + "epoch": 19.291666666666668, + "grad_norm": 6.484003390186585, + "learning_rate": 5e-05, + "loss": 0.0573, + "num_input_tokens_seen": 422616320, + "step": 4630 + }, + { + "epoch": 19.291666666666668, + "loss": 0.046999868005514145, + "loss_ce": 1.805866850190796e-05, + "loss_iou": 0.291015625, + "loss_num": 0.0093994140625, + "loss_xval": 0.046875, + "num_input_tokens_seen": 422616320, + "step": 4630 + }, + { + "epoch": 19.295833333333334, + "grad_norm": 3.7062116592217076, + "learning_rate": 5e-05, + "loss": 0.0364, + "num_input_tokens_seen": 422707244, + "step": 4631 + }, + { + "epoch": 19.295833333333334, + "loss": 0.04261889308691025, + "loss_ce": 1.0962626220134553e-06, + "loss_iou": 0.306640625, + "loss_num": 0.008544921875, + "loss_xval": 0.042724609375, + "num_input_tokens_seen": 422707244, + "step": 4631 + }, + { + "epoch": 19.3, + "grad_norm": 3.0011988021600846, + "learning_rate": 5e-05, + "loss": 0.0481, + "num_input_tokens_seen": 422799340, + "step": 4632 + }, + { + "epoch": 19.3, + "loss": 0.04482024535536766, + "loss_ce": 9.673715248936787e-05, + "loss_iou": 0.185546875, + "loss_num": 0.00897216796875, + "loss_xval": 0.044677734375, + "num_input_tokens_seen": 422799340, + "step": 4632 + }, + { + "epoch": 19.304166666666667, + "grad_norm": 3.561026396108788, + "learning_rate": 5e-05, + "loss": 0.0623, + "num_input_tokens_seen": 422890824, + "step": 4633 + }, + { + "epoch": 19.304166666666667, + "loss": 0.033055853098630905, + "loss_ce": 5.316383067111019e-06, + "loss_iou": 0.33203125, + "loss_num": 0.006591796875, + "loss_xval": 0.032958984375, + "num_input_tokens_seen": 422890824, + "step": 4633 + }, + { + "epoch": 19.308333333333334, + "grad_norm": 2.5648590204529653, + "learning_rate": 5e-05, + "loss": 0.0371, + "num_input_tokens_seen": 422981696, + "step": 4634 + }, + { + "epoch": 19.308333333333334, + "loss": 0.03345128148794174, + "loss_ce": 4.014779733552132e-06, + "loss_iou": 0.248046875, + "loss_num": 0.006683349609375, + "loss_xval": 0.033447265625, + "num_input_tokens_seen": 422981696, + "step": 4634 + }, + { + "epoch": 19.3125, + "grad_norm": 2.713760964626292, + "learning_rate": 5e-05, + "loss": 0.0322, + "num_input_tokens_seen": 423073144, + "step": 4635 + }, + { + "epoch": 19.3125, + "loss": 0.03699225187301636, + "loss_ce": 1.2577083907672204e-05, + "loss_iou": 0.2890625, + "loss_num": 0.00738525390625, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 423073144, + "step": 4635 + }, + { + "epoch": 19.316666666666666, + "grad_norm": 2.8395400432992584, + "learning_rate": 5e-05, + "loss": 0.0747, + "num_input_tokens_seen": 423164548, + "step": 4636 + }, + { + "epoch": 19.316666666666666, + "loss": 0.10205307602882385, + "loss_ce": 2.2919698494661134e-06, + "loss_iou": 0.375, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 423164548, + "step": 4636 + }, + { + "epoch": 19.320833333333333, + "grad_norm": 1.4517058294201308, + "learning_rate": 5e-05, + "loss": 0.0342, + "num_input_tokens_seen": 423255736, + "step": 4637 + }, + { + "epoch": 19.320833333333333, + "loss": 0.022456102073192596, + "loss_ce": 0.0004376704164315015, + "loss_iou": 0.2431640625, + "loss_num": 0.00439453125, + "loss_xval": 0.02197265625, + "num_input_tokens_seen": 423255736, + "step": 4637 + }, + { + "epoch": 19.325, + "grad_norm": 1.7284897894745854, + "learning_rate": 5e-05, + "loss": 0.0476, + "num_input_tokens_seen": 423347540, + "step": 4638 + }, + { + "epoch": 19.325, + "loss": 0.031548645347356796, + "loss_ce": 0.0015803832793608308, + "loss_iou": 0.087890625, + "loss_num": 0.0059814453125, + "loss_xval": 0.030029296875, + "num_input_tokens_seen": 423347540, + "step": 4638 + }, + { + "epoch": 19.329166666666666, + "grad_norm": 3.241048032926379, + "learning_rate": 5e-05, + "loss": 0.0471, + "num_input_tokens_seen": 423439020, + "step": 4639 + }, + { + "epoch": 19.329166666666666, + "loss": 0.047394994646310806, + "loss_ce": 1.1946518725380884e-06, + "loss_iou": 0.33203125, + "loss_num": 0.00946044921875, + "loss_xval": 0.04736328125, + "num_input_tokens_seen": 423439020, + "step": 4639 + }, + { + "epoch": 19.333333333333332, + "grad_norm": 1.7661108467346713, + "learning_rate": 5e-05, + "loss": 0.0313, + "num_input_tokens_seen": 423530072, + "step": 4640 + }, + { + "epoch": 19.333333333333332, + "loss": 0.024259299039840698, + "loss_ce": 0.00010463706712471321, + "loss_iou": 0.251953125, + "loss_num": 0.00482177734375, + "loss_xval": 0.024169921875, + "num_input_tokens_seen": 423530072, + "step": 4640 + }, + { + "epoch": 19.3375, + "grad_norm": 1.796054270519455, + "learning_rate": 5e-05, + "loss": 0.0448, + "num_input_tokens_seen": 423621112, + "step": 4641 + }, + { + "epoch": 19.3375, + "loss": 0.05872287228703499, + "loss_ce": 0.00011385927791707218, + "loss_iou": 0.0908203125, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 423621112, + "step": 4641 + }, + { + "epoch": 19.341666666666665, + "grad_norm": 2.8753962287058172, + "learning_rate": 5e-05, + "loss": 0.0289, + "num_input_tokens_seen": 423712208, + "step": 4642 + }, + { + "epoch": 19.341666666666665, + "loss": 0.02776733972132206, + "loss_ce": 1.1602001904975623e-05, + "loss_iou": 0.22265625, + "loss_num": 0.00555419921875, + "loss_xval": 0.0277099609375, + "num_input_tokens_seen": 423712208, + "step": 4642 + }, + { + "epoch": 19.345833333333335, + "grad_norm": 3.584867533621596, + "learning_rate": 5e-05, + "loss": 0.0452, + "num_input_tokens_seen": 423802712, + "step": 4643 + }, + { + "epoch": 19.345833333333335, + "loss": 0.05088900774717331, + "loss_ce": 9.473724276176654e-07, + "loss_iou": 0.263671875, + "loss_num": 0.01019287109375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 423802712, + "step": 4643 + }, + { + "epoch": 19.35, + "grad_norm": 4.460664034772701, + "learning_rate": 5e-05, + "loss": 0.0361, + "num_input_tokens_seen": 423894308, + "step": 4644 + }, + { + "epoch": 19.35, + "loss": 0.03981529548764229, + "loss_ce": 2.800288530124817e-05, + "loss_iou": 0.1611328125, + "loss_num": 0.0079345703125, + "loss_xval": 0.039794921875, + "num_input_tokens_seen": 423894308, + "step": 4644 + }, + { + "epoch": 19.354166666666668, + "grad_norm": 3.4040997149712022, + "learning_rate": 5e-05, + "loss": 0.06, + "num_input_tokens_seen": 423984912, + "step": 4645 + }, + { + "epoch": 19.354166666666668, + "loss": 0.05637955665588379, + "loss_ce": 1.3589784430223517e-05, + "loss_iou": 0.30078125, + "loss_num": 0.01129150390625, + "loss_xval": 0.056396484375, + "num_input_tokens_seen": 423984912, + "step": 4645 + }, + { + "epoch": 19.358333333333334, + "grad_norm": 2.4571329662506205, + "learning_rate": 5e-05, + "loss": 0.055, + "num_input_tokens_seen": 424075428, + "step": 4646 + }, + { + "epoch": 19.358333333333334, + "loss": 0.04334487393498421, + "loss_ce": 2.2821677703177556e-06, + "loss_iou": 0.203125, + "loss_num": 0.0086669921875, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 424075428, + "step": 4646 + }, + { + "epoch": 19.3625, + "grad_norm": 1.9287176151702943, + "learning_rate": 5e-05, + "loss": 0.0515, + "num_input_tokens_seen": 424166624, + "step": 4647 + }, + { + "epoch": 19.3625, + "loss": 0.043597668409347534, + "loss_ce": 8.627773058833554e-05, + "loss_iou": 0.21875, + "loss_num": 0.00872802734375, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 424166624, + "step": 4647 + }, + { + "epoch": 19.366666666666667, + "grad_norm": 3.378448794891788, + "learning_rate": 5e-05, + "loss": 0.0649, + "num_input_tokens_seen": 424258132, + "step": 4648 + }, + { + "epoch": 19.366666666666667, + "loss": 0.044696178287267685, + "loss_ce": 2.607420174172148e-05, + "loss_iou": 0.2421875, + "loss_num": 0.0089111328125, + "loss_xval": 0.044677734375, + "num_input_tokens_seen": 424258132, + "step": 4648 + }, + { + "epoch": 19.370833333333334, + "grad_norm": 5.674397449328923, + "learning_rate": 5e-05, + "loss": 0.036, + "num_input_tokens_seen": 424348976, + "step": 4649 + }, + { + "epoch": 19.370833333333334, + "loss": 0.036913517862558365, + "loss_ce": 2.5051069769688183e-06, + "loss_iou": 0.34375, + "loss_num": 0.00738525390625, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 424348976, + "step": 4649 + }, + { + "epoch": 19.375, + "grad_norm": 2.679481678738005, + "learning_rate": 5e-05, + "loss": 0.0715, + "num_input_tokens_seen": 424440316, + "step": 4650 + }, + { + "epoch": 19.375, + "loss": 0.09503061324357986, + "loss_ce": 2.93930515908869e-05, + "loss_iou": 0.326171875, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 424440316, + "step": 4650 + }, + { + "epoch": 19.379166666666666, + "grad_norm": 2.508375754683479, + "learning_rate": 5e-05, + "loss": 0.0408, + "num_input_tokens_seen": 424531544, + "step": 4651 + }, + { + "epoch": 19.379166666666666, + "loss": 0.03825172036886215, + "loss_ce": 2.8451055186451413e-05, + "loss_iou": 0.361328125, + "loss_num": 0.007659912109375, + "loss_xval": 0.038330078125, + "num_input_tokens_seen": 424531544, + "step": 4651 + }, + { + "epoch": 19.383333333333333, + "grad_norm": 2.380298782734332, + "learning_rate": 5e-05, + "loss": 0.0716, + "num_input_tokens_seen": 424622704, + "step": 4652 + }, + { + "epoch": 19.383333333333333, + "loss": 0.05666510760784149, + "loss_ce": 9.22513208934106e-06, + "loss_iou": 0.26171875, + "loss_num": 0.0113525390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 424622704, + "step": 4652 + }, + { + "epoch": 19.3875, + "grad_norm": 5.627118312781519, + "learning_rate": 5e-05, + "loss": 0.0655, + "num_input_tokens_seen": 424714176, + "step": 4653 + }, + { + "epoch": 19.3875, + "loss": 0.05855630338191986, + "loss_ce": 0.0014350259443745017, + "loss_iou": 0.2060546875, + "loss_num": 0.01141357421875, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 424714176, + "step": 4653 + }, + { + "epoch": 19.391666666666666, + "grad_norm": 2.6564384856760244, + "learning_rate": 5e-05, + "loss": 0.0592, + "num_input_tokens_seen": 424805272, + "step": 4654 + }, + { + "epoch": 19.391666666666666, + "loss": 0.043996796011924744, + "loss_ce": 5.7064071370405145e-06, + "loss_iou": 0.189453125, + "loss_num": 0.0087890625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 424805272, + "step": 4654 + }, + { + "epoch": 19.395833333333332, + "grad_norm": 4.446683692038585, + "learning_rate": 5e-05, + "loss": 0.0354, + "num_input_tokens_seen": 424897180, + "step": 4655 + }, + { + "epoch": 19.395833333333332, + "loss": 0.030892925336956978, + "loss_ce": 5.491260890266858e-05, + "loss_iou": 0.322265625, + "loss_num": 0.00616455078125, + "loss_xval": 0.0308837890625, + "num_input_tokens_seen": 424897180, + "step": 4655 + }, + { + "epoch": 19.4, + "grad_norm": 3.3669092038663226, + "learning_rate": 5e-05, + "loss": 0.1017, + "num_input_tokens_seen": 424988992, + "step": 4656 + }, + { + "epoch": 19.4, + "loss": 0.14664022624492645, + "loss_ce": 3.2549396564718336e-06, + "loss_iou": 0.28125, + "loss_num": 0.029296875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 424988992, + "step": 4656 + }, + { + "epoch": 19.404166666666665, + "grad_norm": 2.2558108668362733, + "learning_rate": 5e-05, + "loss": 0.0387, + "num_input_tokens_seen": 425080276, + "step": 4657 + }, + { + "epoch": 19.404166666666665, + "loss": 0.026068340986967087, + "loss_ce": 6.3310485529655125e-06, + "loss_iou": 0.263671875, + "loss_num": 0.005218505859375, + "loss_xval": 0.026123046875, + "num_input_tokens_seen": 425080276, + "step": 4657 + }, + { + "epoch": 19.408333333333335, + "grad_norm": 2.3723436902042834, + "learning_rate": 5e-05, + "loss": 0.0514, + "num_input_tokens_seen": 425171800, + "step": 4658 + }, + { + "epoch": 19.408333333333335, + "loss": 0.0370577797293663, + "loss_ce": 5.5216809414559975e-05, + "loss_iou": 0.279296875, + "loss_num": 0.00738525390625, + "loss_xval": 0.037109375, + "num_input_tokens_seen": 425171800, + "step": 4658 + }, + { + "epoch": 19.4125, + "grad_norm": 2.306311635313943, + "learning_rate": 5e-05, + "loss": 0.0431, + "num_input_tokens_seen": 425263160, + "step": 4659 + }, + { + "epoch": 19.4125, + "loss": 0.04586896300315857, + "loss_ce": 1.0427361303300131e-06, + "loss_iou": 0.29296875, + "loss_num": 0.0091552734375, + "loss_xval": 0.0458984375, + "num_input_tokens_seen": 425263160, + "step": 4659 + }, + { + "epoch": 19.416666666666668, + "grad_norm": 2.3018572746408976, + "learning_rate": 5e-05, + "loss": 0.0422, + "num_input_tokens_seen": 425354852, + "step": 4660 + }, + { + "epoch": 19.416666666666668, + "loss": 0.049921829253435135, + "loss_ce": 0.009493667632341385, + "loss_iou": 0.28125, + "loss_num": 0.008056640625, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 425354852, + "step": 4660 + }, + { + "epoch": 19.420833333333334, + "grad_norm": 13.237613082223017, + "learning_rate": 5e-05, + "loss": 0.1018, + "num_input_tokens_seen": 425445808, + "step": 4661 + }, + { + "epoch": 19.420833333333334, + "loss": 0.0899994745850563, + "loss_ce": 3.1283711905416567e-06, + "loss_iou": 0.283203125, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 425445808, + "step": 4661 + }, + { + "epoch": 19.425, + "grad_norm": 1.359916345120742, + "learning_rate": 5e-05, + "loss": 0.0498, + "num_input_tokens_seen": 425536932, + "step": 4662 + }, + { + "epoch": 19.425, + "loss": 0.03377960994839668, + "loss_ce": 4.668272879371216e-07, + "loss_iou": 0.2333984375, + "loss_num": 0.006744384765625, + "loss_xval": 0.03369140625, + "num_input_tokens_seen": 425536932, + "step": 4662 + }, + { + "epoch": 19.429166666666667, + "grad_norm": 1.892552875532026, + "learning_rate": 5e-05, + "loss": 0.0459, + "num_input_tokens_seen": 425628296, + "step": 4663 + }, + { + "epoch": 19.429166666666667, + "loss": 0.04816794395446777, + "loss_ce": 7.392980933218496e-06, + "loss_iou": 0.10546875, + "loss_num": 0.0096435546875, + "loss_xval": 0.048095703125, + "num_input_tokens_seen": 425628296, + "step": 4663 + }, + { + "epoch": 19.433333333333334, + "grad_norm": 0.9833520687870673, + "learning_rate": 5e-05, + "loss": 0.0468, + "num_input_tokens_seen": 425720208, + "step": 4664 + }, + { + "epoch": 19.433333333333334, + "loss": 0.056046824902296066, + "loss_ce": 0.001969678560271859, + "loss_iou": 0.201171875, + "loss_num": 0.01080322265625, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 425720208, + "step": 4664 + }, + { + "epoch": 19.4375, + "grad_norm": 3.3194012961821304, + "learning_rate": 5e-05, + "loss": 0.0521, + "num_input_tokens_seen": 425811708, + "step": 4665 + }, + { + "epoch": 19.4375, + "loss": 0.023443780839443207, + "loss_ce": 9.783326095202938e-05, + "loss_iou": 0.28125, + "loss_num": 0.004669189453125, + "loss_xval": 0.0233154296875, + "num_input_tokens_seen": 425811708, + "step": 4665 + }, + { + "epoch": 19.441666666666666, + "grad_norm": 2.2507664632180524, + "learning_rate": 5e-05, + "loss": 0.0884, + "num_input_tokens_seen": 425902828, + "step": 4666 + }, + { + "epoch": 19.441666666666666, + "loss": 0.14204253256320953, + "loss_ce": 0.0031570233404636383, + "loss_iou": 0.1435546875, + "loss_num": 0.02783203125, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 425902828, + "step": 4666 + }, + { + "epoch": 19.445833333333333, + "grad_norm": 2.5591175574352567, + "learning_rate": 5e-05, + "loss": 0.0421, + "num_input_tokens_seen": 425994712, + "step": 4667 + }, + { + "epoch": 19.445833333333333, + "loss": 0.03063901700079441, + "loss_ce": 3.7513345887418836e-05, + "loss_iou": 0.28125, + "loss_num": 0.006134033203125, + "loss_xval": 0.0306396484375, + "num_input_tokens_seen": 425994712, + "step": 4667 + }, + { + "epoch": 19.45, + "grad_norm": 3.1368144608611517, + "learning_rate": 5e-05, + "loss": 0.0439, + "num_input_tokens_seen": 426085312, + "step": 4668 + }, + { + "epoch": 19.45, + "loss": 0.050644420087337494, + "loss_ce": 4.987635975339799e-07, + "loss_iou": 0.28125, + "loss_num": 0.0101318359375, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 426085312, + "step": 4668 + }, + { + "epoch": 19.454166666666666, + "grad_norm": 2.8116083842144644, + "learning_rate": 5e-05, + "loss": 0.0651, + "num_input_tokens_seen": 426176332, + "step": 4669 + }, + { + "epoch": 19.454166666666666, + "loss": 0.09149643033742905, + "loss_ce": 4.723514393845107e-06, + "loss_iou": 0.28125, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 426176332, + "step": 4669 + }, + { + "epoch": 19.458333333333332, + "grad_norm": 3.030003459572827, + "learning_rate": 5e-05, + "loss": 0.0482, + "num_input_tokens_seen": 426267580, + "step": 4670 + }, + { + "epoch": 19.458333333333332, + "loss": 0.047811392694711685, + "loss_ce": 5.608817900792928e-06, + "loss_iou": 0.21875, + "loss_num": 0.009521484375, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 426267580, + "step": 4670 + }, + { + "epoch": 19.4625, + "grad_norm": 1.4491655834251747, + "learning_rate": 5e-05, + "loss": 0.0307, + "num_input_tokens_seen": 426358968, + "step": 4671 + }, + { + "epoch": 19.4625, + "loss": 0.022887222468852997, + "loss_ce": 6.6694537963485345e-06, + "loss_iou": 0.08984375, + "loss_num": 0.00457763671875, + "loss_xval": 0.0228271484375, + "num_input_tokens_seen": 426358968, + "step": 4671 + }, + { + "epoch": 19.466666666666665, + "grad_norm": 4.709829187465688, + "learning_rate": 5e-05, + "loss": 0.0435, + "num_input_tokens_seen": 426450512, + "step": 4672 + }, + { + "epoch": 19.466666666666665, + "loss": 0.050652679055929184, + "loss_ce": 1.1255635854467982e-06, + "loss_iou": 0.259765625, + "loss_num": 0.0101318359375, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 426450512, + "step": 4672 + }, + { + "epoch": 19.470833333333335, + "grad_norm": 2.618074247148993, + "learning_rate": 5e-05, + "loss": 0.0561, + "num_input_tokens_seen": 426541836, + "step": 4673 + }, + { + "epoch": 19.470833333333335, + "loss": 0.07112175226211548, + "loss_ce": 8.170564797183033e-06, + "loss_iou": 0.3984375, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 426541836, + "step": 4673 + }, + { + "epoch": 19.475, + "grad_norm": 1.4930309481930126, + "learning_rate": 5e-05, + "loss": 0.1141, + "num_input_tokens_seen": 426633068, + "step": 4674 + }, + { + "epoch": 19.475, + "loss": 0.14563126862049103, + "loss_ce": 0.0001082018789020367, + "loss_iou": 0.25390625, + "loss_num": 0.029052734375, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 426633068, + "step": 4674 + }, + { + "epoch": 19.479166666666668, + "grad_norm": 2.980041369257071, + "learning_rate": 5e-05, + "loss": 0.0435, + "num_input_tokens_seen": 426723156, + "step": 4675 + }, + { + "epoch": 19.479166666666668, + "loss": 0.05325976759195328, + "loss_ce": 6.596771072509e-06, + "loss_iou": 0.1865234375, + "loss_num": 0.01068115234375, + "loss_xval": 0.05322265625, + "num_input_tokens_seen": 426723156, + "step": 4675 + }, + { + "epoch": 19.483333333333334, + "grad_norm": 3.026211387083105, + "learning_rate": 5e-05, + "loss": 0.0433, + "num_input_tokens_seen": 426814776, + "step": 4676 + }, + { + "epoch": 19.483333333333334, + "loss": 0.03120480850338936, + "loss_ce": 0.002579319756478071, + "loss_iou": 0.2490234375, + "loss_num": 0.005706787109375, + "loss_xval": 0.028564453125, + "num_input_tokens_seen": 426814776, + "step": 4676 + }, + { + "epoch": 19.4875, + "grad_norm": 3.366757929772386, + "learning_rate": 5e-05, + "loss": 0.0379, + "num_input_tokens_seen": 426905988, + "step": 4677 + }, + { + "epoch": 19.4875, + "loss": 0.03799605742096901, + "loss_ce": 0.0034349020570516586, + "loss_iou": 0.2734375, + "loss_num": 0.006927490234375, + "loss_xval": 0.03466796875, + "num_input_tokens_seen": 426905988, + "step": 4677 + }, + { + "epoch": 19.491666666666667, + "grad_norm": 2.8653070360820565, + "learning_rate": 5e-05, + "loss": 0.0555, + "num_input_tokens_seen": 426997520, + "step": 4678 + }, + { + "epoch": 19.491666666666667, + "loss": 0.045054540038108826, + "loss_ce": 8.689066453371197e-05, + "loss_iou": 0.265625, + "loss_num": 0.00897216796875, + "loss_xval": 0.044921875, + "num_input_tokens_seen": 426997520, + "step": 4678 + }, + { + "epoch": 19.495833333333334, + "grad_norm": 2.44534489222494, + "learning_rate": 5e-05, + "loss": 0.0432, + "num_input_tokens_seen": 427088668, + "step": 4679 + }, + { + "epoch": 19.495833333333334, + "loss": 0.0548277348279953, + "loss_ce": 1.0534420653129928e-05, + "loss_iou": 0.29296875, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 427088668, + "step": 4679 + }, + { + "epoch": 19.5, + "grad_norm": 3.304983826062335, + "learning_rate": 5e-05, + "loss": 0.0419, + "num_input_tokens_seen": 427179708, + "step": 4680 + }, + { + "epoch": 19.5, + "loss": 0.04680022597312927, + "loss_ce": 3.203599408152513e-05, + "loss_iou": 0.169921875, + "loss_num": 0.00933837890625, + "loss_xval": 0.046875, + "num_input_tokens_seen": 427179708, + "step": 4680 + }, + { + "epoch": 19.504166666666666, + "grad_norm": 1.9065554411739414, + "learning_rate": 5e-05, + "loss": 0.0337, + "num_input_tokens_seen": 427270804, + "step": 4681 + }, + { + "epoch": 19.504166666666666, + "loss": 0.03119390271604061, + "loss_ce": 2.019602652580943e-05, + "loss_iou": 0.1943359375, + "loss_num": 0.0062255859375, + "loss_xval": 0.0311279296875, + "num_input_tokens_seen": 427270804, + "step": 4681 + }, + { + "epoch": 19.508333333333333, + "grad_norm": 13.070053340693008, + "learning_rate": 5e-05, + "loss": 0.0574, + "num_input_tokens_seen": 427362196, + "step": 4682 + }, + { + "epoch": 19.508333333333333, + "loss": 0.051514655351638794, + "loss_ce": 9.846053217188455e-07, + "loss_iou": 0.265625, + "loss_num": 0.01031494140625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 427362196, + "step": 4682 + }, + { + "epoch": 19.5125, + "grad_norm": 3.3735253455937766, + "learning_rate": 5e-05, + "loss": 0.0327, + "num_input_tokens_seen": 427453656, + "step": 4683 + }, + { + "epoch": 19.5125, + "loss": 0.02974916622042656, + "loss_ce": 3.6488923797151074e-05, + "loss_iou": 0.2431640625, + "loss_num": 0.005950927734375, + "loss_xval": 0.0296630859375, + "num_input_tokens_seen": 427453656, + "step": 4683 + }, + { + "epoch": 19.516666666666666, + "grad_norm": 1.321183884842579, + "learning_rate": 5e-05, + "loss": 0.0491, + "num_input_tokens_seen": 427544776, + "step": 4684 + }, + { + "epoch": 19.516666666666666, + "loss": 0.05862480029463768, + "loss_ce": 5.306997081788722e-07, + "loss_iou": 0.1953125, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 427544776, + "step": 4684 + }, + { + "epoch": 19.520833333333332, + "grad_norm": 1.51784472719767, + "learning_rate": 5e-05, + "loss": 0.0496, + "num_input_tokens_seen": 427636228, + "step": 4685 + }, + { + "epoch": 19.520833333333332, + "loss": 0.03482060134410858, + "loss_ce": 3.056173227378167e-05, + "loss_iou": 0.259765625, + "loss_num": 0.0069580078125, + "loss_xval": 0.03466796875, + "num_input_tokens_seen": 427636228, + "step": 4685 + }, + { + "epoch": 19.525, + "grad_norm": 3.819240039794996, + "learning_rate": 5e-05, + "loss": 0.072, + "num_input_tokens_seen": 427727648, + "step": 4686 + }, + { + "epoch": 19.525, + "loss": 0.03399910777807236, + "loss_ce": 4.830400939681567e-05, + "loss_iou": 0.2421875, + "loss_num": 0.00677490234375, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 427727648, + "step": 4686 + }, + { + "epoch": 19.529166666666665, + "grad_norm": 1.2896131186634596, + "learning_rate": 5e-05, + "loss": 0.0299, + "num_input_tokens_seen": 427818756, + "step": 4687 + }, + { + "epoch": 19.529166666666665, + "loss": 0.025551263242959976, + "loss_ce": 4.222063125780551e-07, + "loss_iou": 0.2109375, + "loss_num": 0.005096435546875, + "loss_xval": 0.0255126953125, + "num_input_tokens_seen": 427818756, + "step": 4687 + }, + { + "epoch": 19.533333333333335, + "grad_norm": 2.4855605544570807, + "learning_rate": 5e-05, + "loss": 0.0721, + "num_input_tokens_seen": 427910028, + "step": 4688 + }, + { + "epoch": 19.533333333333335, + "loss": 0.052681293338537216, + "loss_ce": 7.954224201967008e-06, + "loss_iou": 0.25390625, + "loss_num": 0.010498046875, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 427910028, + "step": 4688 + }, + { + "epoch": 19.5375, + "grad_norm": 2.39258266792005, + "learning_rate": 5e-05, + "loss": 0.0559, + "num_input_tokens_seen": 428001156, + "step": 4689 + }, + { + "epoch": 19.5375, + "loss": 0.03420303389430046, + "loss_ce": 4.5918864088889677e-07, + "loss_iou": 0.326171875, + "loss_num": 0.0068359375, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 428001156, + "step": 4689 + }, + { + "epoch": 19.541666666666668, + "grad_norm": 3.0163886925914207, + "learning_rate": 5e-05, + "loss": 0.0331, + "num_input_tokens_seen": 428092540, + "step": 4690 + }, + { + "epoch": 19.541666666666668, + "loss": 0.0386294387280941, + "loss_ce": 1.8156640635424992e-06, + "loss_iou": 0.283203125, + "loss_num": 0.007720947265625, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 428092540, + "step": 4690 + }, + { + "epoch": 19.545833333333334, + "grad_norm": 2.8737297381837252, + "learning_rate": 5e-05, + "loss": 0.0477, + "num_input_tokens_seen": 428184612, + "step": 4691 + }, + { + "epoch": 19.545833333333334, + "loss": 0.05887051299214363, + "loss_ce": 2.1029197796451626e-06, + "loss_iou": 0.306640625, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 428184612, + "step": 4691 + }, + { + "epoch": 19.55, + "grad_norm": 2.9026702244736287, + "learning_rate": 5e-05, + "loss": 0.0353, + "num_input_tokens_seen": 428276444, + "step": 4692 + }, + { + "epoch": 19.55, + "loss": 0.02635483257472515, + "loss_ce": 3.3422638807678595e-05, + "loss_iou": 0.306640625, + "loss_num": 0.0052490234375, + "loss_xval": 0.0263671875, + "num_input_tokens_seen": 428276444, + "step": 4692 + }, + { + "epoch": 19.554166666666667, + "grad_norm": 3.604260008083001, + "learning_rate": 5e-05, + "loss": 0.0368, + "num_input_tokens_seen": 428368020, + "step": 4693 + }, + { + "epoch": 19.554166666666667, + "loss": 0.04505544155836105, + "loss_ce": 2.6755342332762666e-05, + "loss_iou": 0.326171875, + "loss_num": 0.009033203125, + "loss_xval": 0.044921875, + "num_input_tokens_seen": 428368020, + "step": 4693 + }, + { + "epoch": 19.558333333333334, + "grad_norm": 4.186936577110721, + "learning_rate": 5e-05, + "loss": 0.0575, + "num_input_tokens_seen": 428459144, + "step": 4694 + }, + { + "epoch": 19.558333333333334, + "loss": 0.08561927080154419, + "loss_ce": 2.206423459938378e-06, + "loss_iou": 0.2158203125, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 428459144, + "step": 4694 + }, + { + "epoch": 19.5625, + "grad_norm": 2.0447266565619673, + "learning_rate": 5e-05, + "loss": 0.0521, + "num_input_tokens_seen": 428550580, + "step": 4695 + }, + { + "epoch": 19.5625, + "loss": 0.06577420234680176, + "loss_ce": 1.1950858151976718e-06, + "loss_iou": 0.287109375, + "loss_num": 0.01312255859375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 428550580, + "step": 4695 + }, + { + "epoch": 19.566666666666666, + "grad_norm": 1.6493418387427887, + "learning_rate": 5e-05, + "loss": 0.0342, + "num_input_tokens_seen": 428642472, + "step": 4696 + }, + { + "epoch": 19.566666666666666, + "loss": 0.02830236777663231, + "loss_ce": 4.942057330481475e-06, + "loss_iou": 0.248046875, + "loss_num": 0.00567626953125, + "loss_xval": 0.0283203125, + "num_input_tokens_seen": 428642472, + "step": 4696 + }, + { + "epoch": 19.570833333333333, + "grad_norm": 1.716198320956972, + "learning_rate": 5e-05, + "loss": 0.0441, + "num_input_tokens_seen": 428733728, + "step": 4697 + }, + { + "epoch": 19.570833333333333, + "loss": 0.06456813961267471, + "loss_ce": 5.716344730899436e-07, + "loss_iou": 0.25390625, + "loss_num": 0.012939453125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 428733728, + "step": 4697 + }, + { + "epoch": 19.575, + "grad_norm": 2.2096837878042783, + "learning_rate": 5e-05, + "loss": 0.0613, + "num_input_tokens_seen": 428825072, + "step": 4698 + }, + { + "epoch": 19.575, + "loss": 0.07033099234104156, + "loss_ce": 3.374950392753817e-05, + "loss_iou": 0.36328125, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 428825072, + "step": 4698 + }, + { + "epoch": 19.579166666666666, + "grad_norm": 1.6190235520147538, + "learning_rate": 5e-05, + "loss": 0.0503, + "num_input_tokens_seen": 428916656, + "step": 4699 + }, + { + "epoch": 19.579166666666666, + "loss": 0.04145081713795662, + "loss_ce": 0.0006030374206602573, + "loss_iou": 0.189453125, + "loss_num": 0.0081787109375, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 428916656, + "step": 4699 + }, + { + "epoch": 19.583333333333332, + "grad_norm": 1.9948864819267058, + "learning_rate": 5e-05, + "loss": 0.0513, + "num_input_tokens_seen": 429008224, + "step": 4700 + }, + { + "epoch": 19.583333333333332, + "loss": 0.04196205735206604, + "loss_ce": 8.014858394744806e-06, + "loss_iou": 0.25390625, + "loss_num": 0.00836181640625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 429008224, + "step": 4700 + }, + { + "epoch": 19.5875, + "grad_norm": 2.3718308783133866, + "learning_rate": 5e-05, + "loss": 0.0558, + "num_input_tokens_seen": 429098564, + "step": 4701 + }, + { + "epoch": 19.5875, + "loss": 0.04729030281305313, + "loss_ce": 3.315304411444231e-06, + "loss_iou": 0.13671875, + "loss_num": 0.00946044921875, + "loss_xval": 0.04736328125, + "num_input_tokens_seen": 429098564, + "step": 4701 + }, + { + "epoch": 19.591666666666665, + "grad_norm": 2.724976630234873, + "learning_rate": 5e-05, + "loss": 0.0262, + "num_input_tokens_seen": 429189884, + "step": 4702 + }, + { + "epoch": 19.591666666666665, + "loss": 0.024874746799468994, + "loss_ce": 2.9214047572168056e-06, + "loss_iou": 0.162109375, + "loss_num": 0.004974365234375, + "loss_xval": 0.02490234375, + "num_input_tokens_seen": 429189884, + "step": 4702 + }, + { + "epoch": 19.595833333333335, + "grad_norm": 3.2361230807399264, + "learning_rate": 5e-05, + "loss": 0.0482, + "num_input_tokens_seen": 429281780, + "step": 4703 + }, + { + "epoch": 19.595833333333335, + "loss": 0.06647312641143799, + "loss_ce": 5.846063686476555e-06, + "loss_iou": 0.384765625, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 429281780, + "step": 4703 + }, + { + "epoch": 19.6, + "grad_norm": 3.0761430478580745, + "learning_rate": 5e-05, + "loss": 0.0481, + "num_input_tokens_seen": 429373436, + "step": 4704 + }, + { + "epoch": 19.6, + "loss": 0.03468446061015129, + "loss_ce": 1.2335344763414469e-06, + "loss_iou": 0.314453125, + "loss_num": 0.006927490234375, + "loss_xval": 0.03466796875, + "num_input_tokens_seen": 429373436, + "step": 4704 + }, + { + "epoch": 19.604166666666668, + "grad_norm": 2.890457420446236, + "learning_rate": 5e-05, + "loss": 0.0484, + "num_input_tokens_seen": 429464764, + "step": 4705 + }, + { + "epoch": 19.604166666666668, + "loss": 0.05024765059351921, + "loss_ce": 1.571861321281176e-05, + "loss_iou": 0.1865234375, + "loss_num": 0.010009765625, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 429464764, + "step": 4705 + }, + { + "epoch": 19.608333333333334, + "grad_norm": 2.2618360525194126, + "learning_rate": 5e-05, + "loss": 0.0371, + "num_input_tokens_seen": 429556460, + "step": 4706 + }, + { + "epoch": 19.608333333333334, + "loss": 0.04483448341488838, + "loss_ce": 3.468158683972433e-05, + "loss_iou": 0.23828125, + "loss_num": 0.00897216796875, + "loss_xval": 0.044921875, + "num_input_tokens_seen": 429556460, + "step": 4706 + }, + { + "epoch": 19.6125, + "grad_norm": 2.7006877486585705, + "learning_rate": 5e-05, + "loss": 0.0779, + "num_input_tokens_seen": 429647384, + "step": 4707 + }, + { + "epoch": 19.6125, + "loss": 0.1267559975385666, + "loss_ce": 1.241495851900254e-06, + "loss_iou": 0.25, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 429647384, + "step": 4707 + }, + { + "epoch": 19.616666666666667, + "grad_norm": 2.8438714688223516, + "learning_rate": 5e-05, + "loss": 0.0564, + "num_input_tokens_seen": 429738540, + "step": 4708 + }, + { + "epoch": 19.616666666666667, + "loss": 0.0584740974009037, + "loss_ce": 2.416654524495243e-06, + "loss_iou": 0.21484375, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 429738540, + "step": 4708 + }, + { + "epoch": 19.620833333333334, + "grad_norm": 2.8012454532090842, + "learning_rate": 5e-05, + "loss": 0.0459, + "num_input_tokens_seen": 429830268, + "step": 4709 + }, + { + "epoch": 19.620833333333334, + "loss": 0.0580122284591198, + "loss_ce": 0.0017759620677679777, + "loss_iou": 0.234375, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 429830268, + "step": 4709 + }, + { + "epoch": 19.625, + "grad_norm": 5.130070550972971, + "learning_rate": 5e-05, + "loss": 0.0455, + "num_input_tokens_seen": 429921536, + "step": 4710 + }, + { + "epoch": 19.625, + "loss": 0.048953525722026825, + "loss_ce": 3.333101176394848e-06, + "loss_iou": 0.26171875, + "loss_num": 0.009765625, + "loss_xval": 0.048828125, + "num_input_tokens_seen": 429921536, + "step": 4710 + }, + { + "epoch": 19.629166666666666, + "grad_norm": 4.6690431755644015, + "learning_rate": 5e-05, + "loss": 0.0565, + "num_input_tokens_seen": 430011480, + "step": 4711 + }, + { + "epoch": 19.629166666666666, + "loss": 0.0785800963640213, + "loss_ce": 1.2594346117111854e-05, + "loss_iou": 0.29296875, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 430011480, + "step": 4711 + }, + { + "epoch": 19.633333333333333, + "grad_norm": 1.6665062495952965, + "learning_rate": 5e-05, + "loss": 0.0485, + "num_input_tokens_seen": 430102852, + "step": 4712 + }, + { + "epoch": 19.633333333333333, + "loss": 0.0401344932615757, + "loss_ce": 0.00020224100444465876, + "loss_iou": 0.2265625, + "loss_num": 0.00799560546875, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 430102852, + "step": 4712 + }, + { + "epoch": 19.6375, + "grad_norm": 55.26472547352569, + "learning_rate": 5e-05, + "loss": 0.057, + "num_input_tokens_seen": 430194392, + "step": 4713 + }, + { + "epoch": 19.6375, + "loss": 0.08778560161590576, + "loss_ce": 0.0003222259401809424, + "loss_iou": 0.1435546875, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 430194392, + "step": 4713 + }, + { + "epoch": 19.641666666666666, + "grad_norm": 3.4094239420725945, + "learning_rate": 5e-05, + "loss": 0.0467, + "num_input_tokens_seen": 430285840, + "step": 4714 + }, + { + "epoch": 19.641666666666666, + "loss": 0.05561989173293114, + "loss_ce": 1.606982436896942e-06, + "loss_iou": 0.1845703125, + "loss_num": 0.0111083984375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 430285840, + "step": 4714 + }, + { + "epoch": 19.645833333333332, + "grad_norm": 2.209287444515342, + "learning_rate": 5e-05, + "loss": 0.0687, + "num_input_tokens_seen": 430377416, + "step": 4715 + }, + { + "epoch": 19.645833333333332, + "loss": 0.05580145865678787, + "loss_ce": 1.5326233551604673e-05, + "loss_iou": 0.2578125, + "loss_num": 0.01116943359375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 430377416, + "step": 4715 + }, + { + "epoch": 19.65, + "grad_norm": 3.2790060049876057, + "learning_rate": 5e-05, + "loss": 0.0482, + "num_input_tokens_seen": 430468836, + "step": 4716 + }, + { + "epoch": 19.65, + "loss": 0.05664925277233124, + "loss_ce": 3.914827175321989e-05, + "loss_iou": 0.32421875, + "loss_num": 0.01129150390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 430468836, + "step": 4716 + }, + { + "epoch": 19.654166666666665, + "grad_norm": 4.991141402880353, + "learning_rate": 5e-05, + "loss": 0.0546, + "num_input_tokens_seen": 430560788, + "step": 4717 + }, + { + "epoch": 19.654166666666665, + "loss": 0.06913499534130096, + "loss_ce": 1.2681159205385484e-05, + "loss_iou": 0.267578125, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 430560788, + "step": 4717 + }, + { + "epoch": 19.658333333333335, + "grad_norm": 2.2365178989291956, + "learning_rate": 5e-05, + "loss": 0.0516, + "num_input_tokens_seen": 430651900, + "step": 4718 + }, + { + "epoch": 19.658333333333335, + "loss": 0.04592512920498848, + "loss_ce": 1.143124154623365e-05, + "loss_iou": 0.1962890625, + "loss_num": 0.0091552734375, + "loss_xval": 0.0458984375, + "num_input_tokens_seen": 430651900, + "step": 4718 + }, + { + "epoch": 19.6625, + "grad_norm": 1.8556325329910262, + "learning_rate": 5e-05, + "loss": 0.0319, + "num_input_tokens_seen": 430742996, + "step": 4719 + }, + { + "epoch": 19.6625, + "loss": 0.03147327899932861, + "loss_ce": 9.654847417550627e-06, + "loss_iou": 0.283203125, + "loss_num": 0.00628662109375, + "loss_xval": 0.031494140625, + "num_input_tokens_seen": 430742996, + "step": 4719 + }, + { + "epoch": 19.666666666666668, + "grad_norm": 2.4656535129417003, + "learning_rate": 5e-05, + "loss": 0.0414, + "num_input_tokens_seen": 430834932, + "step": 4720 + }, + { + "epoch": 19.666666666666668, + "loss": 0.035395003855228424, + "loss_ce": 7.090914732543752e-05, + "loss_iou": 0.34765625, + "loss_num": 0.007049560546875, + "loss_xval": 0.035400390625, + "num_input_tokens_seen": 430834932, + "step": 4720 + }, + { + "epoch": 19.670833333333334, + "grad_norm": 6.594403244197698, + "learning_rate": 5e-05, + "loss": 0.0369, + "num_input_tokens_seen": 430926420, + "step": 4721 + }, + { + "epoch": 19.670833333333334, + "loss": 0.03331117704510689, + "loss_ce": 1.6500658603035845e-05, + "loss_iou": 0.1826171875, + "loss_num": 0.00665283203125, + "loss_xval": 0.033203125, + "num_input_tokens_seen": 430926420, + "step": 4721 + }, + { + "epoch": 19.675, + "grad_norm": 1.366341970941686, + "learning_rate": 5e-05, + "loss": 0.0341, + "num_input_tokens_seen": 431017736, + "step": 4722 + }, + { + "epoch": 19.675, + "loss": 0.04814673587679863, + "loss_ce": 5.256579697743291e-06, + "loss_iou": 0.291015625, + "loss_num": 0.0096435546875, + "loss_xval": 0.048095703125, + "num_input_tokens_seen": 431017736, + "step": 4722 + }, + { + "epoch": 19.679166666666667, + "grad_norm": 1.2537744542642868, + "learning_rate": 5e-05, + "loss": 0.0312, + "num_input_tokens_seen": 431108828, + "step": 4723 + }, + { + "epoch": 19.679166666666667, + "loss": 0.032738637179136276, + "loss_ce": 8.536110726709012e-06, + "loss_iou": 0.2421875, + "loss_num": 0.006561279296875, + "loss_xval": 0.03271484375, + "num_input_tokens_seen": 431108828, + "step": 4723 + }, + { + "epoch": 19.683333333333334, + "grad_norm": 2.0578171337478017, + "learning_rate": 5e-05, + "loss": 0.0569, + "num_input_tokens_seen": 431200032, + "step": 4724 + }, + { + "epoch": 19.683333333333334, + "loss": 0.07429766654968262, + "loss_ce": 1.7882392057799734e-05, + "loss_iou": 0.2080078125, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 431200032, + "step": 4724 + }, + { + "epoch": 19.6875, + "grad_norm": 4.339840406561024, + "learning_rate": 5e-05, + "loss": 0.0585, + "num_input_tokens_seen": 431291196, + "step": 4725 + }, + { + "epoch": 19.6875, + "loss": 0.03668530285358429, + "loss_ce": 3.1715421755507123e-06, + "loss_iou": 0.2275390625, + "loss_num": 0.00732421875, + "loss_xval": 0.03662109375, + "num_input_tokens_seen": 431291196, + "step": 4725 + }, + { + "epoch": 19.691666666666666, + "grad_norm": 3.29129615500161, + "learning_rate": 5e-05, + "loss": 0.0631, + "num_input_tokens_seen": 431381816, + "step": 4726 + }, + { + "epoch": 19.691666666666666, + "loss": 0.040788955986499786, + "loss_ce": 2.2145079583424376e-06, + "loss_iou": 0.318359375, + "loss_num": 0.0081787109375, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 431381816, + "step": 4726 + }, + { + "epoch": 19.695833333333333, + "grad_norm": 12.701915079434166, + "learning_rate": 5e-05, + "loss": 0.0804, + "num_input_tokens_seen": 431473364, + "step": 4727 + }, + { + "epoch": 19.695833333333333, + "loss": 0.09925023466348648, + "loss_ce": 0.0001291386433877051, + "loss_iou": 0.326171875, + "loss_num": 0.0198974609375, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 431473364, + "step": 4727 + }, + { + "epoch": 19.7, + "grad_norm": 8.645379770563224, + "learning_rate": 5e-05, + "loss": 0.071, + "num_input_tokens_seen": 431564700, + "step": 4728 + }, + { + "epoch": 19.7, + "loss": 0.056926753371953964, + "loss_ce": 1.9098599295830354e-05, + "loss_iou": 0.201171875, + "loss_num": 0.01141357421875, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 431564700, + "step": 4728 + }, + { + "epoch": 19.704166666666666, + "grad_norm": 2.0947718623776916, + "learning_rate": 5e-05, + "loss": 0.0616, + "num_input_tokens_seen": 431655808, + "step": 4729 + }, + { + "epoch": 19.704166666666666, + "loss": 0.09451901167631149, + "loss_ce": 6.074310476833489e-06, + "loss_iou": 0.201171875, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 431655808, + "step": 4729 + }, + { + "epoch": 19.708333333333332, + "grad_norm": 1.0461809066007421, + "learning_rate": 5e-05, + "loss": 0.0643, + "num_input_tokens_seen": 431746692, + "step": 4730 + }, + { + "epoch": 19.708333333333332, + "loss": 0.04510524123907089, + "loss_ce": 2.6206024017483287e-07, + "loss_iou": 0.26953125, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 431746692, + "step": 4730 + }, + { + "epoch": 19.7125, + "grad_norm": 1.9280093587865277, + "learning_rate": 5e-05, + "loss": 0.054, + "num_input_tokens_seen": 431837696, + "step": 4731 + }, + { + "epoch": 19.7125, + "loss": 0.04307686537504196, + "loss_ce": 1.3039922350799316e-06, + "loss_iou": 0.1923828125, + "loss_num": 0.00860595703125, + "loss_xval": 0.04296875, + "num_input_tokens_seen": 431837696, + "step": 4731 + }, + { + "epoch": 19.716666666666665, + "grad_norm": 1.24827481538375, + "learning_rate": 5e-05, + "loss": 0.0562, + "num_input_tokens_seen": 431928716, + "step": 4732 + }, + { + "epoch": 19.716666666666665, + "loss": 0.036690644919872284, + "loss_ce": 3.140354237984866e-05, + "loss_iou": 0.1201171875, + "loss_num": 0.00732421875, + "loss_xval": 0.03662109375, + "num_input_tokens_seen": 431928716, + "step": 4732 + }, + { + "epoch": 19.720833333333335, + "grad_norm": 2.4385557181882853, + "learning_rate": 5e-05, + "loss": 0.0383, + "num_input_tokens_seen": 432019900, + "step": 4733 + }, + { + "epoch": 19.720833333333335, + "loss": 0.029526352882385254, + "loss_ce": 0.00045836143544875085, + "loss_iou": 0.1875, + "loss_num": 0.005828857421875, + "loss_xval": 0.029052734375, + "num_input_tokens_seen": 432019900, + "step": 4733 + }, + { + "epoch": 19.725, + "grad_norm": 3.1961506297296887, + "learning_rate": 5e-05, + "loss": 0.0422, + "num_input_tokens_seen": 432111116, + "step": 4734 + }, + { + "epoch": 19.725, + "loss": 0.044266924262046814, + "loss_ce": 4.6955759898992255e-05, + "loss_iou": 0.369140625, + "loss_num": 0.00885009765625, + "loss_xval": 0.044189453125, + "num_input_tokens_seen": 432111116, + "step": 4734 + }, + { + "epoch": 19.729166666666668, + "grad_norm": 5.10569169609451, + "learning_rate": 5e-05, + "loss": 0.1041, + "num_input_tokens_seen": 432202424, + "step": 4735 + }, + { + "epoch": 19.729166666666668, + "loss": 0.17766880989074707, + "loss_ce": 5.649718514177948e-05, + "loss_iou": 0.35546875, + "loss_num": 0.03564453125, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 432202424, + "step": 4735 + }, + { + "epoch": 19.733333333333334, + "grad_norm": 3.4066542227567003, + "learning_rate": 5e-05, + "loss": 0.0536, + "num_input_tokens_seen": 432293264, + "step": 4736 + }, + { + "epoch": 19.733333333333334, + "loss": 0.030559774488210678, + "loss_ce": 4.050310508318944e-06, + "loss_iou": 0.37109375, + "loss_num": 0.006103515625, + "loss_xval": 0.030517578125, + "num_input_tokens_seen": 432293264, + "step": 4736 + }, + { + "epoch": 19.7375, + "grad_norm": 4.169440483906738, + "learning_rate": 5e-05, + "loss": 0.0563, + "num_input_tokens_seen": 432385216, + "step": 4737 + }, + { + "epoch": 19.7375, + "loss": 0.05067963898181915, + "loss_ce": 6.623629451496527e-05, + "loss_iou": 0.244140625, + "loss_num": 0.0101318359375, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 432385216, + "step": 4737 + }, + { + "epoch": 19.741666666666667, + "grad_norm": 2.841919099935251, + "learning_rate": 5e-05, + "loss": 0.0492, + "num_input_tokens_seen": 432476316, + "step": 4738 + }, + { + "epoch": 19.741666666666667, + "loss": 0.04055830091238022, + "loss_ce": 4.422929578140611e-07, + "loss_iou": 0.2421875, + "loss_num": 0.00811767578125, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 432476316, + "step": 4738 + }, + { + "epoch": 19.745833333333334, + "grad_norm": 5.138289345149183, + "learning_rate": 5e-05, + "loss": 0.066, + "num_input_tokens_seen": 432567392, + "step": 4739 + }, + { + "epoch": 19.745833333333334, + "loss": 0.1000186949968338, + "loss_ce": 0.0001499211648479104, + "loss_iou": 0.0908203125, + "loss_num": 0.0198974609375, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 432567392, + "step": 4739 + }, + { + "epoch": 19.75, + "grad_norm": 2.2537605228225135, + "learning_rate": 5e-05, + "loss": 0.0543, + "num_input_tokens_seen": 432658656, + "step": 4740 + }, + { + "epoch": 19.75, + "loss": 0.05645791068673134, + "loss_ce": 3.918984816664306e-07, + "loss_iou": 0.2578125, + "loss_num": 0.01129150390625, + "loss_xval": 0.056396484375, + "num_input_tokens_seen": 432658656, + "step": 4740 + }, + { + "epoch": 19.754166666666666, + "grad_norm": 25.017282268378043, + "learning_rate": 5e-05, + "loss": 0.0533, + "num_input_tokens_seen": 432749804, + "step": 4741 + }, + { + "epoch": 19.754166666666666, + "loss": 0.07857572287321091, + "loss_ce": 5.399743895395659e-05, + "loss_iou": 0.17578125, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 432749804, + "step": 4741 + }, + { + "epoch": 19.758333333333333, + "grad_norm": 2.2293138053656794, + "learning_rate": 5e-05, + "loss": 0.0487, + "num_input_tokens_seen": 432841492, + "step": 4742 + }, + { + "epoch": 19.758333333333333, + "loss": 0.06268471479415894, + "loss_ce": 1.6107320561786764e-06, + "loss_iou": 0.14453125, + "loss_num": 0.0125732421875, + "loss_xval": 0.0625, + "num_input_tokens_seen": 432841492, + "step": 4742 + }, + { + "epoch": 19.7625, + "grad_norm": 1.9049561130749288, + "learning_rate": 5e-05, + "loss": 0.0509, + "num_input_tokens_seen": 432932664, + "step": 4743 + }, + { + "epoch": 19.7625, + "loss": 0.034028127789497375, + "loss_ce": 1.02684725789004e-06, + "loss_iou": 0.1474609375, + "loss_num": 0.006805419921875, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 432932664, + "step": 4743 + }, + { + "epoch": 19.766666666666666, + "grad_norm": 2.813401973659435, + "learning_rate": 5e-05, + "loss": 0.0401, + "num_input_tokens_seen": 433024400, + "step": 4744 + }, + { + "epoch": 19.766666666666666, + "loss": 0.0335683599114418, + "loss_ce": 1.4282367374107707e-05, + "loss_iou": 0.298828125, + "loss_num": 0.0067138671875, + "loss_xval": 0.033447265625, + "num_input_tokens_seen": 433024400, + "step": 4744 + }, + { + "epoch": 19.770833333333332, + "grad_norm": 3.536771192947322, + "learning_rate": 5e-05, + "loss": 0.0771, + "num_input_tokens_seen": 433115424, + "step": 4745 + }, + { + "epoch": 19.770833333333332, + "loss": 0.11302217841148376, + "loss_ce": 7.958343303471338e-06, + "loss_iou": 0.326171875, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 433115424, + "step": 4745 + }, + { + "epoch": 19.775, + "grad_norm": 4.7410123582333465, + "learning_rate": 5e-05, + "loss": 0.0496, + "num_input_tokens_seen": 433206964, + "step": 4746 + }, + { + "epoch": 19.775, + "loss": 0.030320316553115845, + "loss_ce": 8.73003045853693e-06, + "loss_iou": 0.166015625, + "loss_num": 0.006072998046875, + "loss_xval": 0.0302734375, + "num_input_tokens_seen": 433206964, + "step": 4746 + }, + { + "epoch": 19.779166666666665, + "grad_norm": 2.269552770487602, + "learning_rate": 5e-05, + "loss": 0.0449, + "num_input_tokens_seen": 433298868, + "step": 4747 + }, + { + "epoch": 19.779166666666665, + "loss": 0.030412226915359497, + "loss_ce": 9.0903395175701e-06, + "loss_iou": 0.359375, + "loss_num": 0.006072998046875, + "loss_xval": 0.0303955078125, + "num_input_tokens_seen": 433298868, + "step": 4747 + }, + { + "epoch": 19.783333333333335, + "grad_norm": 2.0103372931750485, + "learning_rate": 5e-05, + "loss": 0.0618, + "num_input_tokens_seen": 433390096, + "step": 4748 + }, + { + "epoch": 19.783333333333335, + "loss": 0.057116035372018814, + "loss_ce": 2.391058842476923e-06, + "loss_iou": 0.27734375, + "loss_num": 0.01141357421875, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 433390096, + "step": 4748 + }, + { + "epoch": 19.7875, + "grad_norm": 2.0854324586282127, + "learning_rate": 5e-05, + "loss": 0.0843, + "num_input_tokens_seen": 433481292, + "step": 4749 + }, + { + "epoch": 19.7875, + "loss": 0.09669038653373718, + "loss_ce": 7.17340808478184e-05, + "loss_iou": 0.1962890625, + "loss_num": 0.0194091796875, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 433481292, + "step": 4749 + }, + { + "epoch": 19.791666666666668, + "grad_norm": 2.195617206921799, + "learning_rate": 5e-05, + "loss": 0.054, + "num_input_tokens_seen": 433573160, + "step": 4750 + }, + { + "epoch": 19.791666666666668, + "eval_seeclick_CIoU": 0.2617866322398186, + "eval_seeclick_GIoU": 0.24531903862953186, + "eval_seeclick_IoU": 0.352044939994812, + "eval_seeclick_MAE_all": 0.0924195908010006, + "eval_seeclick_MAE_h": 0.077772106975317, + "eval_seeclick_MAE_w": 0.1920384168624878, + "eval_seeclick_MAE_x_boxes": 0.19218388199806213, + "eval_seeclick_MAE_y_boxes": 0.08314986154437065, + "eval_seeclick_NUM_probability": 0.9999991953372955, + "eval_seeclick_inside_bbox": 0.6008522808551788, + "eval_seeclick_loss": 0.5741644501686096, + "eval_seeclick_loss_ce": 0.1503664255142212, + "eval_seeclick_loss_iou": 0.48944091796875, + "eval_seeclick_loss_num": 0.081939697265625, + "eval_seeclick_loss_xval": 0.40985107421875, + "eval_seeclick_runtime": 77.1332, + "eval_seeclick_samples_per_second": 0.557, + "eval_seeclick_steps_per_second": 0.026, + "num_input_tokens_seen": 433573160, + "step": 4750 + }, + { + "epoch": 19.791666666666668, + "eval_icons_CIoU": 0.24555600434541702, + "eval_icons_GIoU": 0.2464718595147133, + "eval_icons_IoU": 0.34940461814403534, + "eval_icons_MAE_all": 0.08040782436728477, + "eval_icons_MAE_h": 0.16841255873441696, + "eval_icons_MAE_w": 0.12601130455732346, + "eval_icons_MAE_x_boxes": 0.1272355169057846, + "eval_icons_MAE_y_boxes": 0.1678340807557106, + "eval_icons_NUM_probability": 0.9999995231628418, + "eval_icons_inside_bbox": 0.4913194477558136, + "eval_icons_loss": 0.39623481035232544, + "eval_icons_loss_ce": 0.000442440141341649, + "eval_icons_loss_iou": 0.17828369140625, + "eval_icons_loss_num": 0.0807037353515625, + "eval_icons_loss_xval": 0.40380859375, + "eval_icons_runtime": 106.6615, + "eval_icons_samples_per_second": 0.469, + "eval_icons_steps_per_second": 0.019, + "num_input_tokens_seen": 433573160, + "step": 4750 + }, + { + "epoch": 19.791666666666668, + "eval_screenspot_CIoU": 0.36383505662282306, + "eval_screenspot_GIoU": 0.3586165060599645, + "eval_screenspot_IoU": 0.4403722683588664, + "eval_screenspot_MAE_all": 0.10166554649670918, + "eval_screenspot_MAE_h": 0.09295431524515152, + "eval_screenspot_MAE_w": 0.19901380191246668, + "eval_screenspot_MAE_x_boxes": 0.1910252943634987, + "eval_screenspot_MAE_y_boxes": 0.08816726754109065, + "eval_screenspot_NUM_probability": 0.9999977151552836, + "eval_screenspot_inside_bbox": 0.6699999968210856, + "eval_screenspot_loss": 0.5046296715736389, + "eval_screenspot_loss_ce": 0.0008909959384861091, + "eval_screenspot_loss_iou": 0.35546875, + "eval_screenspot_loss_num": 0.10320536295572917, + "eval_screenspot_loss_xval": 0.5158894856770834, + "eval_screenspot_runtime": 153.4008, + "eval_screenspot_samples_per_second": 0.58, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 433573160, + "step": 4750 + }, + { + "epoch": 19.791666666666668, + "eval_compot_CIoU": 0.4970640242099762, + "eval_compot_GIoU": 0.4937925338745117, + "eval_compot_IoU": 0.5729747414588928, + "eval_compot_MAE_all": 0.05616691708564758, + "eval_compot_MAE_h": 0.06489542499184608, + "eval_compot_MAE_w": 0.1452133134007454, + "eval_compot_MAE_x_boxes": 0.14551985636353493, + "eval_compot_MAE_y_boxes": 0.06429455429315567, + "eval_compot_NUM_probability": 0.9999991357326508, + "eval_compot_inside_bbox": 0.7204861044883728, + "eval_compot_loss": 0.33045417070388794, + "eval_compot_loss_ce": 0.054313765838742256, + "eval_compot_loss_iou": 0.31292724609375, + "eval_compot_loss_num": 0.05380821228027344, + "eval_compot_loss_xval": 0.268829345703125, + "eval_compot_runtime": 96.9045, + "eval_compot_samples_per_second": 0.516, + "eval_compot_steps_per_second": 0.021, + "num_input_tokens_seen": 433573160, + "step": 4750 + }, + { + "epoch": 19.791666666666668, + "loss": 0.35344207286834717, + "loss_ce": 0.04799164459109306, + "loss_iou": 0.326171875, + "loss_num": 0.06103515625, + "loss_xval": 0.3046875, + "num_input_tokens_seen": 433573160, + "step": 4750 + }, + { + "epoch": 19.795833333333334, + "grad_norm": 2.519109222695506, + "learning_rate": 5e-05, + "loss": 0.0521, + "num_input_tokens_seen": 433664960, + "step": 4751 + }, + { + "epoch": 19.795833333333334, + "loss": 0.07364164292812347, + "loss_ce": 2.7295736799715087e-06, + "loss_iou": 0.265625, + "loss_num": 0.01470947265625, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 433664960, + "step": 4751 + }, + { + "epoch": 19.8, + "grad_norm": 2.77512853672313, + "learning_rate": 5e-05, + "loss": 0.0353, + "num_input_tokens_seen": 433755616, + "step": 4752 + }, + { + "epoch": 19.8, + "loss": 0.029641907662153244, + "loss_ce": 4.748583887703717e-05, + "loss_iou": 0.255859375, + "loss_num": 0.00592041015625, + "loss_xval": 0.029541015625, + "num_input_tokens_seen": 433755616, + "step": 4752 + }, + { + "epoch": 19.804166666666667, + "grad_norm": 2.770319139030136, + "learning_rate": 5e-05, + "loss": 0.0391, + "num_input_tokens_seen": 433846352, + "step": 4753 + }, + { + "epoch": 19.804166666666667, + "loss": 0.04019250348210335, + "loss_ce": 3.137064413749613e-05, + "loss_iou": 0.447265625, + "loss_num": 0.008056640625, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 433846352, + "step": 4753 + }, + { + "epoch": 19.808333333333334, + "grad_norm": 2.752000642849686, + "learning_rate": 5e-05, + "loss": 0.0408, + "num_input_tokens_seen": 433937496, + "step": 4754 + }, + { + "epoch": 19.808333333333334, + "loss": 0.028888067230582237, + "loss_ce": 3.1796630537428427e-06, + "loss_iou": 0.19140625, + "loss_num": 0.005767822265625, + "loss_xval": 0.0289306640625, + "num_input_tokens_seen": 433937496, + "step": 4754 + }, + { + "epoch": 19.8125, + "grad_norm": 3.1079540791972287, + "learning_rate": 5e-05, + "loss": 0.0742, + "num_input_tokens_seen": 434028432, + "step": 4755 + }, + { + "epoch": 19.8125, + "loss": 0.11143758147954941, + "loss_ce": 2.651629074534867e-06, + "loss_iou": 0.27734375, + "loss_num": 0.0223388671875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 434028432, + "step": 4755 + }, + { + "epoch": 19.816666666666666, + "grad_norm": 3.342540574063074, + "learning_rate": 5e-05, + "loss": 0.0552, + "num_input_tokens_seen": 434119424, + "step": 4756 + }, + { + "epoch": 19.816666666666666, + "loss": 0.05670653283596039, + "loss_ce": 4.8754759518487845e-06, + "loss_iou": 0.29296875, + "loss_num": 0.0113525390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 434119424, + "step": 4756 + }, + { + "epoch": 19.820833333333333, + "grad_norm": 4.83109444434289, + "learning_rate": 5e-05, + "loss": 0.0446, + "num_input_tokens_seen": 434210808, + "step": 4757 + }, + { + "epoch": 19.820833333333333, + "loss": 0.04404546320438385, + "loss_ce": 0.0034265692811459303, + "loss_iou": 0.318359375, + "loss_num": 0.00811767578125, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 434210808, + "step": 4757 + }, + { + "epoch": 19.825, + "grad_norm": 3.2738265241261257, + "learning_rate": 5e-05, + "loss": 0.093, + "num_input_tokens_seen": 434302308, + "step": 4758 + }, + { + "epoch": 19.825, + "loss": 0.06388229876756668, + "loss_ce": 9.010569556267e-06, + "loss_iou": 0.365234375, + "loss_num": 0.01275634765625, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 434302308, + "step": 4758 + }, + { + "epoch": 19.829166666666666, + "grad_norm": 2.387520215439463, + "learning_rate": 5e-05, + "loss": 0.0755, + "num_input_tokens_seen": 434393700, + "step": 4759 + }, + { + "epoch": 19.829166666666666, + "loss": 0.08277393877506256, + "loss_ce": 2.648112058523111e-06, + "loss_iou": 0.1826171875, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 434393700, + "step": 4759 + }, + { + "epoch": 19.833333333333332, + "grad_norm": 6.311417557444102, + "learning_rate": 5e-05, + "loss": 0.0617, + "num_input_tokens_seen": 434485016, + "step": 4760 + }, + { + "epoch": 19.833333333333332, + "loss": 0.05282047018408775, + "loss_ce": 4.031695061712526e-05, + "loss_iou": 0.205078125, + "loss_num": 0.01055908203125, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 434485016, + "step": 4760 + }, + { + "epoch": 19.8375, + "grad_norm": 1.6847023622892647, + "learning_rate": 5e-05, + "loss": 0.0532, + "num_input_tokens_seen": 434576416, + "step": 4761 + }, + { + "epoch": 19.8375, + "loss": 0.02808937057852745, + "loss_ce": 1.3197223779570777e-05, + "loss_iou": 0.26171875, + "loss_num": 0.005615234375, + "loss_xval": 0.028076171875, + "num_input_tokens_seen": 434576416, + "step": 4761 + }, + { + "epoch": 19.841666666666665, + "grad_norm": 1.833421773612613, + "learning_rate": 5e-05, + "loss": 0.0422, + "num_input_tokens_seen": 434667668, + "step": 4762 + }, + { + "epoch": 19.841666666666665, + "loss": 0.03834429755806923, + "loss_ce": 1.4218359865481034e-05, + "loss_iou": 0.1865234375, + "loss_num": 0.007659912109375, + "loss_xval": 0.038330078125, + "num_input_tokens_seen": 434667668, + "step": 4762 + }, + { + "epoch": 19.845833333333335, + "grad_norm": 3.172469243527616, + "learning_rate": 5e-05, + "loss": 0.0383, + "num_input_tokens_seen": 434759152, + "step": 4763 + }, + { + "epoch": 19.845833333333335, + "loss": 0.04270630329847336, + "loss_ce": 1.2211901776026934e-05, + "loss_iou": 0.2177734375, + "loss_num": 0.008544921875, + "loss_xval": 0.042724609375, + "num_input_tokens_seen": 434759152, + "step": 4763 + }, + { + "epoch": 19.85, + "grad_norm": 6.25394716952818, + "learning_rate": 5e-05, + "loss": 0.047, + "num_input_tokens_seen": 434850704, + "step": 4764 + }, + { + "epoch": 19.85, + "loss": 0.05046635866165161, + "loss_ce": 0.0033014400396496058, + "loss_iou": 0.31640625, + "loss_num": 0.00946044921875, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 434850704, + "step": 4764 + }, + { + "epoch": 19.854166666666668, + "grad_norm": 3.3766243816417227, + "learning_rate": 5e-05, + "loss": 0.0314, + "num_input_tokens_seen": 434941864, + "step": 4765 + }, + { + "epoch": 19.854166666666668, + "loss": 0.031087806448340416, + "loss_ce": 8.194526162697002e-05, + "loss_iou": 0.302734375, + "loss_num": 0.006195068359375, + "loss_xval": 0.031005859375, + "num_input_tokens_seen": 434941864, + "step": 4765 + }, + { + "epoch": 19.858333333333334, + "grad_norm": 2.9027640432813158, + "learning_rate": 5e-05, + "loss": 0.0416, + "num_input_tokens_seen": 435033672, + "step": 4766 + }, + { + "epoch": 19.858333333333334, + "loss": 0.04052776098251343, + "loss_ce": 4.1431542285863543e-07, + "loss_iou": 0.2060546875, + "loss_num": 0.00811767578125, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 435033672, + "step": 4766 + }, + { + "epoch": 19.8625, + "grad_norm": 2.5543771738399754, + "learning_rate": 5e-05, + "loss": 0.067, + "num_input_tokens_seen": 435124988, + "step": 4767 + }, + { + "epoch": 19.8625, + "loss": 0.06493158638477325, + "loss_ce": 5.121577123645693e-05, + "loss_iou": 0.1650390625, + "loss_num": 0.012939453125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 435124988, + "step": 4767 + }, + { + "epoch": 19.866666666666667, + "grad_norm": 1.9328115074620458, + "learning_rate": 5e-05, + "loss": 0.0349, + "num_input_tokens_seen": 435216236, + "step": 4768 + }, + { + "epoch": 19.866666666666667, + "loss": 0.03497876226902008, + "loss_ce": 0.00011242987238802016, + "loss_iou": 0.1943359375, + "loss_num": 0.006988525390625, + "loss_xval": 0.034912109375, + "num_input_tokens_seen": 435216236, + "step": 4768 + }, + { + "epoch": 19.870833333333334, + "grad_norm": 2.1231171249238603, + "learning_rate": 5e-05, + "loss": 0.0999, + "num_input_tokens_seen": 435307572, + "step": 4769 + }, + { + "epoch": 19.870833333333334, + "loss": 0.0322178415954113, + "loss_ce": 6.538154138979735e-06, + "loss_iou": 0.11865234375, + "loss_num": 0.006439208984375, + "loss_xval": 0.0322265625, + "num_input_tokens_seen": 435307572, + "step": 4769 + }, + { + "epoch": 19.875, + "grad_norm": 2.462645421105774, + "learning_rate": 5e-05, + "loss": 0.0623, + "num_input_tokens_seen": 435398996, + "step": 4770 + }, + { + "epoch": 19.875, + "loss": 0.042506515979766846, + "loss_ce": 0.00016337900888174772, + "loss_iou": 0.177734375, + "loss_num": 0.00848388671875, + "loss_xval": 0.042236328125, + "num_input_tokens_seen": 435398996, + "step": 4770 + }, + { + "epoch": 19.879166666666666, + "grad_norm": 4.052346583769054, + "learning_rate": 5e-05, + "loss": 0.0598, + "num_input_tokens_seen": 435490352, + "step": 4771 + }, + { + "epoch": 19.879166666666666, + "loss": 0.07947006821632385, + "loss_ce": 2.291058535774937e-06, + "loss_iou": 0.21484375, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 435490352, + "step": 4771 + }, + { + "epoch": 19.883333333333333, + "grad_norm": 2.122526959860213, + "learning_rate": 5e-05, + "loss": 0.0475, + "num_input_tokens_seen": 435582028, + "step": 4772 + }, + { + "epoch": 19.883333333333333, + "loss": 0.03125705569982529, + "loss_ce": 9.860927821137011e-05, + "loss_iou": 0.21875, + "loss_num": 0.0062255859375, + "loss_xval": 0.0311279296875, + "num_input_tokens_seen": 435582028, + "step": 4772 + }, + { + "epoch": 19.8875, + "grad_norm": 3.0380894123602484, + "learning_rate": 5e-05, + "loss": 0.0467, + "num_input_tokens_seen": 435673124, + "step": 4773 + }, + { + "epoch": 19.8875, + "loss": 0.038742516189813614, + "loss_ce": 4.5227932332636556e-07, + "loss_iou": 0.20703125, + "loss_num": 0.00775146484375, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 435673124, + "step": 4773 + }, + { + "epoch": 19.891666666666666, + "grad_norm": 4.866386407131122, + "learning_rate": 5e-05, + "loss": 0.0401, + "num_input_tokens_seen": 435764704, + "step": 4774 + }, + { + "epoch": 19.891666666666666, + "loss": 0.035870663821697235, + "loss_ce": 0.00024902174482122064, + "loss_iou": 0.232421875, + "loss_num": 0.00714111328125, + "loss_xval": 0.03564453125, + "num_input_tokens_seen": 435764704, + "step": 4774 + }, + { + "epoch": 19.895833333333332, + "grad_norm": 3.302905722056321, + "learning_rate": 5e-05, + "loss": 0.0727, + "num_input_tokens_seen": 435855960, + "step": 4775 + }, + { + "epoch": 19.895833333333332, + "loss": 0.05564238131046295, + "loss_ce": 0.0002682388585526496, + "loss_iou": 0.380859375, + "loss_num": 0.01104736328125, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 435855960, + "step": 4775 + }, + { + "epoch": 19.9, + "grad_norm": 2.900177431317305, + "learning_rate": 5e-05, + "loss": 0.0419, + "num_input_tokens_seen": 435947544, + "step": 4776 + }, + { + "epoch": 19.9, + "loss": 0.06207602098584175, + "loss_ce": 3.2624948289594613e-06, + "loss_iou": 0.30078125, + "loss_num": 0.01239013671875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 435947544, + "step": 4776 + }, + { + "epoch": 19.904166666666665, + "grad_norm": 2.439142759959792, + "learning_rate": 5e-05, + "loss": 0.0448, + "num_input_tokens_seen": 436038620, + "step": 4777 + }, + { + "epoch": 19.904166666666665, + "loss": 0.041464440524578094, + "loss_ce": 6.3084507928579114e-06, + "loss_iou": 0.1845703125, + "loss_num": 0.00830078125, + "loss_xval": 0.04150390625, + "num_input_tokens_seen": 436038620, + "step": 4777 + }, + { + "epoch": 19.908333333333335, + "grad_norm": 5.102423414343203, + "learning_rate": 5e-05, + "loss": 0.0532, + "num_input_tokens_seen": 436129744, + "step": 4778 + }, + { + "epoch": 19.908333333333335, + "loss": 0.04953721910715103, + "loss_ce": 0.0002971051726490259, + "loss_iou": 0.27734375, + "loss_num": 0.00982666015625, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 436129744, + "step": 4778 + }, + { + "epoch": 19.9125, + "grad_norm": 2.2744876880677984, + "learning_rate": 5e-05, + "loss": 0.0312, + "num_input_tokens_seen": 436221264, + "step": 4779 + }, + { + "epoch": 19.9125, + "loss": 0.03273453563451767, + "loss_ce": 0.000256203900789842, + "loss_iou": 0.138671875, + "loss_num": 0.006500244140625, + "loss_xval": 0.032470703125, + "num_input_tokens_seen": 436221264, + "step": 4779 + }, + { + "epoch": 19.916666666666668, + "grad_norm": 2.644698538864571, + "learning_rate": 5e-05, + "loss": 0.0511, + "num_input_tokens_seen": 436312728, + "step": 4780 + }, + { + "epoch": 19.916666666666668, + "loss": 0.06178348883986473, + "loss_ce": 6.539040668940288e-07, + "loss_iou": 0.302734375, + "loss_num": 0.0123291015625, + "loss_xval": 0.061767578125, + "num_input_tokens_seen": 436312728, + "step": 4780 + }, + { + "epoch": 19.920833333333334, + "grad_norm": 2.5130443225194004, + "learning_rate": 5e-05, + "loss": 0.0333, + "num_input_tokens_seen": 436404180, + "step": 4781 + }, + { + "epoch": 19.920833333333334, + "loss": 0.032920412719249725, + "loss_ce": 0.00017504925199318677, + "loss_iou": 0.197265625, + "loss_num": 0.006561279296875, + "loss_xval": 0.03271484375, + "num_input_tokens_seen": 436404180, + "step": 4781 + }, + { + "epoch": 19.925, + "grad_norm": 1.0152006260830428, + "learning_rate": 5e-05, + "loss": 0.0389, + "num_input_tokens_seen": 436495676, + "step": 4782 + }, + { + "epoch": 19.925, + "loss": 0.04953616112470627, + "loss_ce": 6.131633199402131e-06, + "loss_iou": 0.146484375, + "loss_num": 0.0098876953125, + "loss_xval": 0.049560546875, + "num_input_tokens_seen": 436495676, + "step": 4782 + }, + { + "epoch": 19.929166666666667, + "grad_norm": 1.6079033670692338, + "learning_rate": 5e-05, + "loss": 0.026, + "num_input_tokens_seen": 436587548, + "step": 4783 + }, + { + "epoch": 19.929166666666667, + "loss": 0.030390875414013863, + "loss_ce": 2.996691819134867e-06, + "loss_iou": 0.3203125, + "loss_num": 0.006072998046875, + "loss_xval": 0.0303955078125, + "num_input_tokens_seen": 436587548, + "step": 4783 + }, + { + "epoch": 19.933333333333334, + "grad_norm": 2.056534338786135, + "learning_rate": 5e-05, + "loss": 0.0614, + "num_input_tokens_seen": 436678536, + "step": 4784 + }, + { + "epoch": 19.933333333333334, + "loss": 0.06954550743103027, + "loss_ce": 2.6466495910426602e-05, + "loss_iou": 0.16015625, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 436678536, + "step": 4784 + }, + { + "epoch": 19.9375, + "grad_norm": 2.495766816196573, + "learning_rate": 5e-05, + "loss": 0.0379, + "num_input_tokens_seen": 436769484, + "step": 4785 + }, + { + "epoch": 19.9375, + "loss": 0.03806344047188759, + "loss_ce": 8.020611858228222e-06, + "loss_iou": 0.189453125, + "loss_num": 0.007598876953125, + "loss_xval": 0.0380859375, + "num_input_tokens_seen": 436769484, + "step": 4785 + }, + { + "epoch": 19.941666666666666, + "grad_norm": 2.6281992052223955, + "learning_rate": 5e-05, + "loss": 0.0372, + "num_input_tokens_seen": 436860612, + "step": 4786 + }, + { + "epoch": 19.941666666666666, + "loss": 0.024370262399315834, + "loss_ce": 2.4864317310857587e-05, + "loss_iou": 0.232421875, + "loss_num": 0.004852294921875, + "loss_xval": 0.0242919921875, + "num_input_tokens_seen": 436860612, + "step": 4786 + }, + { + "epoch": 19.945833333333333, + "grad_norm": 4.542283123687921, + "learning_rate": 5e-05, + "loss": 0.0434, + "num_input_tokens_seen": 436952216, + "step": 4787 + }, + { + "epoch": 19.945833333333333, + "loss": 0.055509522557258606, + "loss_ce": 0.00028796104015782475, + "loss_iou": 0.2294921875, + "loss_num": 0.01104736328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 436952216, + "step": 4787 + }, + { + "epoch": 19.95, + "grad_norm": 1.9657238523141636, + "learning_rate": 5e-05, + "loss": 0.038, + "num_input_tokens_seen": 437043396, + "step": 4788 + }, + { + "epoch": 19.95, + "loss": 0.048294927924871445, + "loss_ce": 8.602569891991152e-07, + "loss_iou": 0.294921875, + "loss_num": 0.0096435546875, + "loss_xval": 0.04833984375, + "num_input_tokens_seen": 437043396, + "step": 4788 + }, + { + "epoch": 19.954166666666666, + "grad_norm": 5.430431075051217, + "learning_rate": 5e-05, + "loss": 0.0361, + "num_input_tokens_seen": 437134896, + "step": 4789 + }, + { + "epoch": 19.954166666666666, + "loss": 0.03300505131483078, + "loss_ce": 2.8948926455996116e-07, + "loss_iou": 0.2373046875, + "loss_num": 0.006591796875, + "loss_xval": 0.032958984375, + "num_input_tokens_seen": 437134896, + "step": 4789 + }, + { + "epoch": 19.958333333333332, + "grad_norm": 2.4163825972074218, + "learning_rate": 5e-05, + "loss": 0.0699, + "num_input_tokens_seen": 437226228, + "step": 4790 + }, + { + "epoch": 19.958333333333332, + "loss": 0.08775608241558075, + "loss_ce": 0.00028507053502835333, + "loss_iou": 0.2265625, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 437226228, + "step": 4790 + }, + { + "epoch": 19.9625, + "grad_norm": 3.138963856779165, + "learning_rate": 5e-05, + "loss": 0.0347, + "num_input_tokens_seen": 437317116, + "step": 4791 + }, + { + "epoch": 19.9625, + "loss": 0.03851897269487381, + "loss_ce": 0.00038726101047359407, + "loss_iou": 0.369140625, + "loss_num": 0.00762939453125, + "loss_xval": 0.0380859375, + "num_input_tokens_seen": 437317116, + "step": 4791 + }, + { + "epoch": 19.966666666666665, + "grad_norm": 3.222853049196043, + "learning_rate": 5e-05, + "loss": 0.0682, + "num_input_tokens_seen": 437408572, + "step": 4792 + }, + { + "epoch": 19.966666666666665, + "loss": 0.06291039288043976, + "loss_ce": 0.002607660135254264, + "loss_iou": 0.1796875, + "loss_num": 0.0120849609375, + "loss_xval": 0.060302734375, + "num_input_tokens_seen": 437408572, + "step": 4792 + }, + { + "epoch": 19.970833333333335, + "grad_norm": 3.1241585449398195, + "learning_rate": 5e-05, + "loss": 0.0362, + "num_input_tokens_seen": 437500044, + "step": 4793 + }, + { + "epoch": 19.970833333333335, + "loss": 0.039086949080228806, + "loss_ce": 0.00014270386600401253, + "loss_iou": 0.31640625, + "loss_num": 0.007781982421875, + "loss_xval": 0.0390625, + "num_input_tokens_seen": 437500044, + "step": 4793 + }, + { + "epoch": 19.975, + "grad_norm": 2.754162527096431, + "learning_rate": 5e-05, + "loss": 0.0356, + "num_input_tokens_seen": 437592028, + "step": 4794 + }, + { + "epoch": 19.975, + "loss": 0.028476163744926453, + "loss_ce": 3.263681264797924e-06, + "loss_iou": 0.251953125, + "loss_num": 0.005706787109375, + "loss_xval": 0.0284423828125, + "num_input_tokens_seen": 437592028, + "step": 4794 + }, + { + "epoch": 19.979166666666668, + "grad_norm": 2.0195337905149455, + "learning_rate": 5e-05, + "loss": 0.032, + "num_input_tokens_seen": 437683280, + "step": 4795 + }, + { + "epoch": 19.979166666666668, + "loss": 0.023691684007644653, + "loss_ce": 1.0043763722933363e-05, + "loss_iou": 0.1796875, + "loss_num": 0.004730224609375, + "loss_xval": 0.023681640625, + "num_input_tokens_seen": 437683280, + "step": 4795 + }, + { + "epoch": 19.983333333333334, + "grad_norm": 2.871976282909025, + "learning_rate": 5e-05, + "loss": 0.1023, + "num_input_tokens_seen": 437774404, + "step": 4796 + }, + { + "epoch": 19.983333333333334, + "loss": 0.06012318655848503, + "loss_ce": 3.559290234989021e-06, + "loss_iou": 0.2333984375, + "loss_num": 0.01202392578125, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 437774404, + "step": 4796 + }, + { + "epoch": 19.9875, + "grad_norm": 2.7455227723420847, + "learning_rate": 5e-05, + "loss": 0.0494, + "num_input_tokens_seen": 437865412, + "step": 4797 + }, + { + "epoch": 19.9875, + "loss": 0.07167306542396545, + "loss_ce": 2.5335707505291793e-06, + "loss_iou": 0.267578125, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 437865412, + "step": 4797 + }, + { + "epoch": 19.991666666666667, + "grad_norm": 3.6041223435322114, + "learning_rate": 5e-05, + "loss": 0.0452, + "num_input_tokens_seen": 437956260, + "step": 4798 + }, + { + "epoch": 19.991666666666667, + "loss": 0.042237237095832825, + "loss_ce": 9.090753110285732e-07, + "loss_iou": 0.38671875, + "loss_num": 0.0084228515625, + "loss_xval": 0.042236328125, + "num_input_tokens_seen": 437956260, + "step": 4798 + }, + { + "epoch": 19.995833333333334, + "grad_norm": 2.4390005978821794, + "learning_rate": 5e-05, + "loss": 0.0495, + "num_input_tokens_seen": 438047760, + "step": 4799 + }, + { + "epoch": 19.995833333333334, + "loss": 0.046113960444927216, + "loss_ce": 1.7160655261250213e-05, + "loss_iou": 0.33984375, + "loss_num": 0.00921630859375, + "loss_xval": 0.046142578125, + "num_input_tokens_seen": 438047760, + "step": 4799 + }, + { + "epoch": 20.0, + "grad_norm": 2.381231649308952, + "learning_rate": 5e-05, + "loss": 0.0668, + "num_input_tokens_seen": 438139052, + "step": 4800 + }, + { + "epoch": 20.0, + "loss": 0.0817049890756607, + "loss_ce": 9.439384484721813e-06, + "loss_iou": 0.12060546875, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 438139052, + "step": 4800 + }, + { + "epoch": 20.004166666666666, + "grad_norm": 2.8739335153532806, + "learning_rate": 5e-05, + "loss": 0.0466, + "num_input_tokens_seen": 438230164, + "step": 4801 + }, + { + "epoch": 20.004166666666666, + "loss": 0.041184697300195694, + "loss_ce": 1.222786636390083e-06, + "loss_iou": 0.369140625, + "loss_num": 0.00823974609375, + "loss_xval": 0.041259765625, + "num_input_tokens_seen": 438230164, + "step": 4801 + }, + { + "epoch": 20.008333333333333, + "grad_norm": 6.263826768460578, + "learning_rate": 5e-05, + "loss": 0.044, + "num_input_tokens_seen": 438321560, + "step": 4802 + }, + { + "epoch": 20.008333333333333, + "loss": 0.028685349971055984, + "loss_ce": 6.748837768100202e-05, + "loss_iou": 0.267578125, + "loss_num": 0.005706787109375, + "loss_xval": 0.028564453125, + "num_input_tokens_seen": 438321560, + "step": 4802 + }, + { + "epoch": 20.0125, + "grad_norm": 2.6413899758281056, + "learning_rate": 5e-05, + "loss": 0.0452, + "num_input_tokens_seen": 438412924, + "step": 4803 + }, + { + "epoch": 20.0125, + "loss": 0.04007952660322189, + "loss_ce": 9.947018952516373e-06, + "loss_iou": 0.361328125, + "loss_num": 0.00799560546875, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 438412924, + "step": 4803 + }, + { + "epoch": 20.016666666666666, + "grad_norm": 2.6635715423342825, + "learning_rate": 5e-05, + "loss": 0.039, + "num_input_tokens_seen": 438503780, + "step": 4804 + }, + { + "epoch": 20.016666666666666, + "loss": 0.04384048283100128, + "loss_ce": 1.984845539482194e-06, + "loss_iou": 0.349609375, + "loss_num": 0.00872802734375, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 438503780, + "step": 4804 + }, + { + "epoch": 20.020833333333332, + "grad_norm": 4.176671686106218, + "learning_rate": 5e-05, + "loss": 0.0488, + "num_input_tokens_seen": 438595184, + "step": 4805 + }, + { + "epoch": 20.020833333333332, + "loss": 0.05074448883533478, + "loss_ce": 1.3875467175239464e-06, + "loss_iou": 0.314453125, + "loss_num": 0.0101318359375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 438595184, + "step": 4805 + }, + { + "epoch": 20.025, + "grad_norm": 2.052634063071722, + "learning_rate": 5e-05, + "loss": 0.071, + "num_input_tokens_seen": 438686516, + "step": 4806 + }, + { + "epoch": 20.025, + "loss": 0.03876500576734543, + "loss_ce": 0.0001450096460757777, + "loss_iou": 0.296875, + "loss_num": 0.007720947265625, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 438686516, + "step": 4806 + }, + { + "epoch": 20.029166666666665, + "grad_norm": 3.100562519952563, + "learning_rate": 5e-05, + "loss": 0.0432, + "num_input_tokens_seen": 438777576, + "step": 4807 + }, + { + "epoch": 20.029166666666665, + "loss": 0.017205331474542618, + "loss_ce": 1.0473372640262824e-06, + "loss_iou": 0.234375, + "loss_num": 0.003448486328125, + "loss_xval": 0.0172119140625, + "num_input_tokens_seen": 438777576, + "step": 4807 + }, + { + "epoch": 20.033333333333335, + "grad_norm": 3.1485098132144618, + "learning_rate": 5e-05, + "loss": 0.0402, + "num_input_tokens_seen": 438869028, + "step": 4808 + }, + { + "epoch": 20.033333333333335, + "loss": 0.04611296206712723, + "loss_ce": 8.993303026727517e-07, + "loss_iou": 0.310546875, + "loss_num": 0.00921630859375, + "loss_xval": 0.046142578125, + "num_input_tokens_seen": 438869028, + "step": 4808 + }, + { + "epoch": 20.0375, + "grad_norm": 3.070790534611428, + "learning_rate": 5e-05, + "loss": 0.0621, + "num_input_tokens_seen": 438960856, + "step": 4809 + }, + { + "epoch": 20.0375, + "loss": 0.08876143395900726, + "loss_ce": 0.003754718229174614, + "loss_iou": 0.2412109375, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 438960856, + "step": 4809 + }, + { + "epoch": 20.041666666666668, + "grad_norm": 3.648642482675532, + "learning_rate": 5e-05, + "loss": 0.0306, + "num_input_tokens_seen": 439052900, + "step": 4810 + }, + { + "epoch": 20.041666666666668, + "loss": 0.03443126380443573, + "loss_ce": 7.435416591761168e-06, + "loss_iou": 0.2421875, + "loss_num": 0.00689697265625, + "loss_xval": 0.034423828125, + "num_input_tokens_seen": 439052900, + "step": 4810 + }, + { + "epoch": 20.045833333333334, + "grad_norm": 4.008229139740039, + "learning_rate": 5e-05, + "loss": 0.0574, + "num_input_tokens_seen": 439144596, + "step": 4811 + }, + { + "epoch": 20.045833333333334, + "loss": 0.03682493418455124, + "loss_ce": 0.003530257847160101, + "loss_iou": 0.2314453125, + "loss_num": 0.00665283203125, + "loss_xval": 0.033203125, + "num_input_tokens_seen": 439144596, + "step": 4811 + }, + { + "epoch": 20.05, + "grad_norm": 2.6584184346017197, + "learning_rate": 5e-05, + "loss": 0.0468, + "num_input_tokens_seen": 439235856, + "step": 4812 + }, + { + "epoch": 20.05, + "loss": 0.025795510038733482, + "loss_ce": 5.288959528115811e-07, + "loss_iou": 0.353515625, + "loss_num": 0.005157470703125, + "loss_xval": 0.0257568359375, + "num_input_tokens_seen": 439235856, + "step": 4812 + }, + { + "epoch": 20.054166666666667, + "grad_norm": 3.415623241765032, + "learning_rate": 5e-05, + "loss": 0.0277, + "num_input_tokens_seen": 439327328, + "step": 4813 + }, + { + "epoch": 20.054166666666667, + "loss": 0.029038380831480026, + "loss_ce": 9.05168008102919e-07, + "loss_iou": 0.30078125, + "loss_num": 0.00579833984375, + "loss_xval": 0.029052734375, + "num_input_tokens_seen": 439327328, + "step": 4813 + }, + { + "epoch": 20.058333333333334, + "grad_norm": 3.1804420864287044, + "learning_rate": 5e-05, + "loss": 0.0455, + "num_input_tokens_seen": 439418656, + "step": 4814 + }, + { + "epoch": 20.058333333333334, + "loss": 0.03694318234920502, + "loss_ce": 1.6520855297130765e-06, + "loss_iou": 0.265625, + "loss_num": 0.00738525390625, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 439418656, + "step": 4814 + }, + { + "epoch": 20.0625, + "grad_norm": 2.2293540681116513, + "learning_rate": 5e-05, + "loss": 0.0417, + "num_input_tokens_seen": 439509992, + "step": 4815 + }, + { + "epoch": 20.0625, + "loss": 0.04403237625956535, + "loss_ce": 1.0767493222374469e-05, + "loss_iou": 0.2734375, + "loss_num": 0.0087890625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 439509992, + "step": 4815 + }, + { + "epoch": 20.066666666666666, + "grad_norm": 1.4748819583091075, + "learning_rate": 5e-05, + "loss": 0.0642, + "num_input_tokens_seen": 439601028, + "step": 4816 + }, + { + "epoch": 20.066666666666666, + "loss": 0.06710471957921982, + "loss_ce": 4.188733328192029e-06, + "loss_iou": 0.0849609375, + "loss_num": 0.013427734375, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 439601028, + "step": 4816 + }, + { + "epoch": 20.070833333333333, + "grad_norm": 1.692495496060704, + "learning_rate": 5e-05, + "loss": 0.0491, + "num_input_tokens_seen": 439691404, + "step": 4817 + }, + { + "epoch": 20.070833333333333, + "loss": 0.05415000021457672, + "loss_ce": 0.0011485957074910402, + "loss_iou": 0.224609375, + "loss_num": 0.0106201171875, + "loss_xval": 0.052978515625, + "num_input_tokens_seen": 439691404, + "step": 4817 + }, + { + "epoch": 20.075, + "grad_norm": 1.6939580744445781, + "learning_rate": 5e-05, + "loss": 0.0299, + "num_input_tokens_seen": 439782628, + "step": 4818 + }, + { + "epoch": 20.075, + "loss": 0.03671405836939812, + "loss_ce": 1.4143606676952913e-06, + "loss_iou": 0.291015625, + "loss_num": 0.00732421875, + "loss_xval": 0.03662109375, + "num_input_tokens_seen": 439782628, + "step": 4818 + }, + { + "epoch": 20.079166666666666, + "grad_norm": 2.5877769478834094, + "learning_rate": 5e-05, + "loss": 0.055, + "num_input_tokens_seen": 439873884, + "step": 4819 + }, + { + "epoch": 20.079166666666666, + "loss": 0.047144003212451935, + "loss_ce": 9.603533726476599e-06, + "loss_iou": 0.294921875, + "loss_num": 0.0093994140625, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 439873884, + "step": 4819 + }, + { + "epoch": 20.083333333333332, + "grad_norm": 2.938377407145417, + "learning_rate": 5e-05, + "loss": 0.0496, + "num_input_tokens_seen": 439965388, + "step": 4820 + }, + { + "epoch": 20.083333333333332, + "loss": 0.04065392166376114, + "loss_ce": 4.511202860157937e-06, + "loss_iou": 0.298828125, + "loss_num": 0.00811767578125, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 439965388, + "step": 4820 + }, + { + "epoch": 20.0875, + "grad_norm": 2.7862469182221674, + "learning_rate": 5e-05, + "loss": 0.0232, + "num_input_tokens_seen": 440056884, + "step": 4821 + }, + { + "epoch": 20.0875, + "loss": 0.021492835134267807, + "loss_ce": 8.301569778268458e-07, + "loss_iou": 0.193359375, + "loss_num": 0.004302978515625, + "loss_xval": 0.021484375, + "num_input_tokens_seen": 440056884, + "step": 4821 + }, + { + "epoch": 20.091666666666665, + "grad_norm": 2.867290416786235, + "learning_rate": 5e-05, + "loss": 0.0522, + "num_input_tokens_seen": 440148288, + "step": 4822 + }, + { + "epoch": 20.091666666666665, + "loss": 0.03978437930345535, + "loss_ce": 7.33786218916066e-05, + "loss_iou": 0.2392578125, + "loss_num": 0.0079345703125, + "loss_xval": 0.039794921875, + "num_input_tokens_seen": 440148288, + "step": 4822 + }, + { + "epoch": 20.095833333333335, + "grad_norm": 2.7018284998552113, + "learning_rate": 5e-05, + "loss": 0.0354, + "num_input_tokens_seen": 440237976, + "step": 4823 + }, + { + "epoch": 20.095833333333335, + "loss": 0.0364387109875679, + "loss_ce": 7.207721637314535e-07, + "loss_iou": 0.294921875, + "loss_num": 0.007293701171875, + "loss_xval": 0.036376953125, + "num_input_tokens_seen": 440237976, + "step": 4823 + }, + { + "epoch": 20.1, + "grad_norm": 2.3439581503071927, + "learning_rate": 5e-05, + "loss": 0.0495, + "num_input_tokens_seen": 440329640, + "step": 4824 + }, + { + "epoch": 20.1, + "loss": 0.019852623343467712, + "loss_ce": 4.671405258704908e-05, + "loss_iou": 0.2451171875, + "loss_num": 0.00396728515625, + "loss_xval": 0.019775390625, + "num_input_tokens_seen": 440329640, + "step": 4824 + }, + { + "epoch": 20.104166666666668, + "grad_norm": 2.3780211521592336, + "learning_rate": 5e-05, + "loss": 0.0268, + "num_input_tokens_seen": 440421464, + "step": 4825 + }, + { + "epoch": 20.104166666666668, + "loss": 0.02387407049536705, + "loss_ce": 1.694625211712264e-06, + "loss_iou": 0.259765625, + "loss_num": 0.0047607421875, + "loss_xval": 0.02392578125, + "num_input_tokens_seen": 440421464, + "step": 4825 + }, + { + "epoch": 20.108333333333334, + "grad_norm": 2.474421100813016, + "learning_rate": 5e-05, + "loss": 0.0598, + "num_input_tokens_seen": 440512524, + "step": 4826 + }, + { + "epoch": 20.108333333333334, + "loss": 0.03605201840400696, + "loss_ce": 1.0761279554571956e-05, + "loss_iou": 0.2392578125, + "loss_num": 0.0072021484375, + "loss_xval": 0.0361328125, + "num_input_tokens_seen": 440512524, + "step": 4826 + }, + { + "epoch": 20.1125, + "grad_norm": 2.822494510620272, + "learning_rate": 5e-05, + "loss": 0.0584, + "num_input_tokens_seen": 440603720, + "step": 4827 + }, + { + "epoch": 20.1125, + "loss": 0.04726698249578476, + "loss_ce": 2.577023951744195e-05, + "loss_iou": 0.357421875, + "loss_num": 0.00946044921875, + "loss_xval": 0.04736328125, + "num_input_tokens_seen": 440603720, + "step": 4827 + }, + { + "epoch": 20.116666666666667, + "grad_norm": 3.4141074259706294, + "learning_rate": 5e-05, + "loss": 0.0286, + "num_input_tokens_seen": 440695424, + "step": 4828 + }, + { + "epoch": 20.116666666666667, + "loss": 0.02667359448969364, + "loss_ce": 1.2308016721362947e-06, + "loss_iou": 0.2177734375, + "loss_num": 0.005340576171875, + "loss_xval": 0.026611328125, + "num_input_tokens_seen": 440695424, + "step": 4828 + }, + { + "epoch": 20.120833333333334, + "grad_norm": 4.193534404069147, + "learning_rate": 5e-05, + "loss": 0.0512, + "num_input_tokens_seen": 440787128, + "step": 4829 + }, + { + "epoch": 20.120833333333334, + "loss": 0.02787083014845848, + "loss_ce": 8.280884685518686e-06, + "loss_iou": 0.2734375, + "loss_num": 0.00555419921875, + "loss_xval": 0.02783203125, + "num_input_tokens_seen": 440787128, + "step": 4829 + }, + { + "epoch": 20.125, + "grad_norm": 3.2028659600742744, + "learning_rate": 5e-05, + "loss": 0.0502, + "num_input_tokens_seen": 440879012, + "step": 4830 + }, + { + "epoch": 20.125, + "loss": 0.04750242829322815, + "loss_ce": 0.00013151884195394814, + "loss_iou": 0.21484375, + "loss_num": 0.00946044921875, + "loss_xval": 0.04736328125, + "num_input_tokens_seen": 440879012, + "step": 4830 + }, + { + "epoch": 20.129166666666666, + "grad_norm": 2.8369034892888827, + "learning_rate": 5e-05, + "loss": 0.0641, + "num_input_tokens_seen": 440969644, + "step": 4831 + }, + { + "epoch": 20.129166666666666, + "loss": 0.03952815756201744, + "loss_ce": 2.641428125116363e-07, + "loss_iou": 0.20703125, + "loss_num": 0.0079345703125, + "loss_xval": 0.03955078125, + "num_input_tokens_seen": 440969644, + "step": 4831 + }, + { + "epoch": 20.133333333333333, + "grad_norm": 2.778861090319647, + "learning_rate": 5e-05, + "loss": 0.0658, + "num_input_tokens_seen": 441060208, + "step": 4832 + }, + { + "epoch": 20.133333333333333, + "loss": 0.04722682759165764, + "loss_ce": 8.743133435018535e-07, + "loss_iou": 0.33203125, + "loss_num": 0.00946044921875, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 441060208, + "step": 4832 + }, + { + "epoch": 20.1375, + "grad_norm": 2.7716139239658313, + "learning_rate": 5e-05, + "loss": 0.0299, + "num_input_tokens_seen": 441151684, + "step": 4833 + }, + { + "epoch": 20.1375, + "loss": 0.03260520473122597, + "loss_ce": 4.800813712790841e-06, + "loss_iou": 0.2236328125, + "loss_num": 0.00653076171875, + "loss_xval": 0.03271484375, + "num_input_tokens_seen": 441151684, + "step": 4833 + }, + { + "epoch": 20.141666666666666, + "grad_norm": 2.242768216362516, + "learning_rate": 5e-05, + "loss": 0.0319, + "num_input_tokens_seen": 441242584, + "step": 4834 + }, + { + "epoch": 20.141666666666666, + "loss": 0.0353640615940094, + "loss_ce": 1.8199129954155069e-06, + "loss_iou": 0.29296875, + "loss_num": 0.007080078125, + "loss_xval": 0.035400390625, + "num_input_tokens_seen": 441242584, + "step": 4834 + }, + { + "epoch": 20.145833333333332, + "grad_norm": 3.375832829792383, + "learning_rate": 5e-05, + "loss": 0.0412, + "num_input_tokens_seen": 441333960, + "step": 4835 + }, + { + "epoch": 20.145833333333332, + "loss": 0.041843000799417496, + "loss_ce": 3.401458116059075e-06, + "loss_iou": 0.29296875, + "loss_num": 0.00836181640625, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 441333960, + "step": 4835 + }, + { + "epoch": 20.15, + "grad_norm": 2.932190921396203, + "learning_rate": 5e-05, + "loss": 0.0498, + "num_input_tokens_seen": 441425004, + "step": 4836 + }, + { + "epoch": 20.15, + "loss": 0.06082218885421753, + "loss_ce": 6.564836212419323e-07, + "loss_iou": 0.31640625, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 441425004, + "step": 4836 + }, + { + "epoch": 20.154166666666665, + "grad_norm": 3.0778923197176384, + "learning_rate": 5e-05, + "loss": 0.0458, + "num_input_tokens_seen": 441515492, + "step": 4837 + }, + { + "epoch": 20.154166666666665, + "loss": 0.05241571366786957, + "loss_ce": 1.772940549926716e-06, + "loss_iou": 0.1337890625, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 441515492, + "step": 4837 + }, + { + "epoch": 20.158333333333335, + "grad_norm": 3.1140495590124795, + "learning_rate": 5e-05, + "loss": 0.1191, + "num_input_tokens_seen": 441607092, + "step": 4838 + }, + { + "epoch": 20.158333333333335, + "loss": 0.1707739531993866, + "loss_ce": 1.284774589294102e-05, + "loss_iou": 0.330078125, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 441607092, + "step": 4838 + }, + { + "epoch": 20.1625, + "grad_norm": 1.9461023033729858, + "learning_rate": 5e-05, + "loss": 0.0635, + "num_input_tokens_seen": 441699296, + "step": 4839 + }, + { + "epoch": 20.1625, + "loss": 0.04207409918308258, + "loss_ce": 5.619662260869518e-06, + "loss_iou": 0.255859375, + "loss_num": 0.0084228515625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 441699296, + "step": 4839 + }, + { + "epoch": 20.166666666666668, + "grad_norm": 2.0869868661141022, + "learning_rate": 5e-05, + "loss": 0.0435, + "num_input_tokens_seen": 441790672, + "step": 4840 + }, + { + "epoch": 20.166666666666668, + "loss": 0.04363371431827545, + "loss_ce": 1.2024182751702028e-06, + "loss_iou": 0.32421875, + "loss_num": 0.00872802734375, + "loss_xval": 0.043701171875, + "num_input_tokens_seen": 441790672, + "step": 4840 + }, + { + "epoch": 20.170833333333334, + "grad_norm": 2.788154441624537, + "learning_rate": 5e-05, + "loss": 0.0506, + "num_input_tokens_seen": 441881792, + "step": 4841 + }, + { + "epoch": 20.170833333333334, + "loss": 0.05143975839018822, + "loss_ce": 2.3802135729056317e-06, + "loss_iou": 0.2060546875, + "loss_num": 0.01031494140625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 441881792, + "step": 4841 + }, + { + "epoch": 20.175, + "grad_norm": 3.551655592733579, + "learning_rate": 5e-05, + "loss": 0.0284, + "num_input_tokens_seen": 441972952, + "step": 4842 + }, + { + "epoch": 20.175, + "loss": 0.03263666480779648, + "loss_ce": 1.3373867659538519e-05, + "loss_iou": 0.37890625, + "loss_num": 0.00653076171875, + "loss_xval": 0.03271484375, + "num_input_tokens_seen": 441972952, + "step": 4842 + }, + { + "epoch": 20.179166666666667, + "grad_norm": 3.1591844264872857, + "learning_rate": 5e-05, + "loss": 0.0348, + "num_input_tokens_seen": 442064504, + "step": 4843 + }, + { + "epoch": 20.179166666666667, + "loss": 0.04957715421915054, + "loss_ce": 0.0007795470883138478, + "loss_iou": 0.30859375, + "loss_num": 0.009765625, + "loss_xval": 0.048828125, + "num_input_tokens_seen": 442064504, + "step": 4843 + }, + { + "epoch": 20.183333333333334, + "grad_norm": 2.433716379371601, + "learning_rate": 5e-05, + "loss": 0.053, + "num_input_tokens_seen": 442156304, + "step": 4844 + }, + { + "epoch": 20.183333333333334, + "loss": 0.08409621566534042, + "loss_ce": 5.0806596846086904e-05, + "loss_iou": 0.232421875, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 442156304, + "step": 4844 + }, + { + "epoch": 20.1875, + "grad_norm": 3.0001206937570175, + "learning_rate": 5e-05, + "loss": 0.0236, + "num_input_tokens_seen": 442247652, + "step": 4845 + }, + { + "epoch": 20.1875, + "loss": 0.02707594260573387, + "loss_ce": 2.211073115176987e-05, + "loss_iou": 0.16796875, + "loss_num": 0.005401611328125, + "loss_xval": 0.027099609375, + "num_input_tokens_seen": 442247652, + "step": 4845 + }, + { + "epoch": 20.191666666666666, + "grad_norm": 4.004260269877188, + "learning_rate": 5e-05, + "loss": 0.0624, + "num_input_tokens_seen": 442339016, + "step": 4846 + }, + { + "epoch": 20.191666666666666, + "loss": 0.07790812849998474, + "loss_ce": 2.7273301384411752e-05, + "loss_iou": 0.2890625, + "loss_num": 0.01556396484375, + "loss_xval": 0.078125, + "num_input_tokens_seen": 442339016, + "step": 4846 + }, + { + "epoch": 20.195833333333333, + "grad_norm": 2.666777725899104, + "learning_rate": 5e-05, + "loss": 0.0567, + "num_input_tokens_seen": 442430576, + "step": 4847 + }, + { + "epoch": 20.195833333333333, + "loss": 0.037611234933137894, + "loss_ce": 1.3578915059042629e-05, + "loss_iou": 0.326171875, + "loss_num": 0.00750732421875, + "loss_xval": 0.03759765625, + "num_input_tokens_seen": 442430576, + "step": 4847 + }, + { + "epoch": 20.2, + "grad_norm": 1.149837717392595, + "learning_rate": 5e-05, + "loss": 0.032, + "num_input_tokens_seen": 442521864, + "step": 4848 + }, + { + "epoch": 20.2, + "loss": 0.03994525223970413, + "loss_ce": 5.372582108975621e-06, + "loss_iou": 0.154296875, + "loss_num": 0.00799560546875, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 442521864, + "step": 4848 + }, + { + "epoch": 20.204166666666666, + "grad_norm": 2.481003143027893, + "learning_rate": 5e-05, + "loss": 0.0487, + "num_input_tokens_seen": 442613304, + "step": 4849 + }, + { + "epoch": 20.204166666666666, + "loss": 0.03446324169635773, + "loss_ce": 6.993237184360623e-05, + "loss_iou": 0.2080078125, + "loss_num": 0.006866455078125, + "loss_xval": 0.034423828125, + "num_input_tokens_seen": 442613304, + "step": 4849 + }, + { + "epoch": 20.208333333333332, + "grad_norm": 1.0981239862495074, + "learning_rate": 5e-05, + "loss": 0.0337, + "num_input_tokens_seen": 442704856, + "step": 4850 + }, + { + "epoch": 20.208333333333332, + "loss": 0.034536510705947876, + "loss_ce": 1.3499163287633564e-05, + "loss_iou": 0.30078125, + "loss_num": 0.00689697265625, + "loss_xval": 0.034423828125, + "num_input_tokens_seen": 442704856, + "step": 4850 + }, + { + "epoch": 20.2125, + "grad_norm": 1.8155814850134073, + "learning_rate": 5e-05, + "loss": 0.0411, + "num_input_tokens_seen": 442796228, + "step": 4851 + }, + { + "epoch": 20.2125, + "loss": 0.03786277025938034, + "loss_ce": 5.716243322240189e-06, + "loss_iou": 0.2080078125, + "loss_num": 0.007568359375, + "loss_xval": 0.037841796875, + "num_input_tokens_seen": 442796228, + "step": 4851 + }, + { + "epoch": 20.216666666666665, + "grad_norm": 1.242135071037316, + "learning_rate": 5e-05, + "loss": 0.0616, + "num_input_tokens_seen": 442888332, + "step": 4852 + }, + { + "epoch": 20.216666666666665, + "loss": 0.05964813753962517, + "loss_ce": 1.5276299336619559e-06, + "loss_iou": 0.1953125, + "loss_num": 0.01190185546875, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 442888332, + "step": 4852 + }, + { + "epoch": 20.220833333333335, + "grad_norm": 0.8166360003808425, + "learning_rate": 5e-05, + "loss": 0.0431, + "num_input_tokens_seen": 442979476, + "step": 4853 + }, + { + "epoch": 20.220833333333335, + "loss": 0.0613655149936676, + "loss_ce": 1.755387529556174e-05, + "loss_iou": 0.1962890625, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 442979476, + "step": 4853 + }, + { + "epoch": 20.225, + "grad_norm": 3.3815310838351103, + "learning_rate": 5e-05, + "loss": 0.0659, + "num_input_tokens_seen": 443070852, + "step": 4854 + }, + { + "epoch": 20.225, + "loss": 0.041042834520339966, + "loss_ce": 1.1952846762142144e-05, + "loss_iou": 0.166015625, + "loss_num": 0.0081787109375, + "loss_xval": 0.041015625, + "num_input_tokens_seen": 443070852, + "step": 4854 + }, + { + "epoch": 20.229166666666668, + "grad_norm": 0.6287886096996755, + "learning_rate": 5e-05, + "loss": 0.0297, + "num_input_tokens_seen": 443162152, + "step": 4855 + }, + { + "epoch": 20.229166666666668, + "loss": 0.02782684564590454, + "loss_ce": 2.4423698050668463e-06, + "loss_iou": 0.189453125, + "loss_num": 0.00555419921875, + "loss_xval": 0.02783203125, + "num_input_tokens_seen": 443162152, + "step": 4855 + }, + { + "epoch": 20.233333333333334, + "grad_norm": 1.2988693869129335, + "learning_rate": 5e-05, + "loss": 0.0243, + "num_input_tokens_seen": 443253400, + "step": 4856 + }, + { + "epoch": 20.233333333333334, + "loss": 0.029308704659342766, + "loss_ce": 1.9458499082247727e-05, + "loss_iou": 0.25390625, + "loss_num": 0.005859375, + "loss_xval": 0.029296875, + "num_input_tokens_seen": 443253400, + "step": 4856 + }, + { + "epoch": 20.2375, + "grad_norm": 1.509705294300433, + "learning_rate": 5e-05, + "loss": 0.0277, + "num_input_tokens_seen": 443344636, + "step": 4857 + }, + { + "epoch": 20.2375, + "loss": 0.023133575916290283, + "loss_ce": 0.00012332136975601315, + "loss_iou": 0.322265625, + "loss_num": 0.004608154296875, + "loss_xval": 0.02294921875, + "num_input_tokens_seen": 443344636, + "step": 4857 + }, + { + "epoch": 20.241666666666667, + "grad_norm": 1.2625375682664126, + "learning_rate": 5e-05, + "loss": 0.0545, + "num_input_tokens_seen": 443435428, + "step": 4858 + }, + { + "epoch": 20.241666666666667, + "loss": 0.061081625521183014, + "loss_ce": 6.900830840095296e-07, + "loss_iou": 0.1630859375, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 443435428, + "step": 4858 + }, + { + "epoch": 20.245833333333334, + "grad_norm": 3.390200960559453, + "learning_rate": 5e-05, + "loss": 0.053, + "num_input_tokens_seen": 443527028, + "step": 4859 + }, + { + "epoch": 20.245833333333334, + "loss": 0.05636825039982796, + "loss_ce": 7.857749005779624e-05, + "loss_iou": 0.265625, + "loss_num": 0.01123046875, + "loss_xval": 0.056396484375, + "num_input_tokens_seen": 443527028, + "step": 4859 + }, + { + "epoch": 20.25, + "grad_norm": 2.4615376000436946, + "learning_rate": 5e-05, + "loss": 0.0365, + "num_input_tokens_seen": 443618344, + "step": 4860 + }, + { + "epoch": 20.25, + "loss": 0.029989613220095634, + "loss_ce": 3.661078881123103e-05, + "loss_iou": 0.232421875, + "loss_num": 0.0059814453125, + "loss_xval": 0.0299072265625, + "num_input_tokens_seen": 443618344, + "step": 4860 + }, + { + "epoch": 20.254166666666666, + "grad_norm": 3.6234847469098526, + "learning_rate": 5e-05, + "loss": 0.0424, + "num_input_tokens_seen": 443709564, + "step": 4861 + }, + { + "epoch": 20.254166666666666, + "loss": 0.03184705972671509, + "loss_ce": 1.9690830868057674e-06, + "loss_iou": 0.29296875, + "loss_num": 0.006378173828125, + "loss_xval": 0.03173828125, + "num_input_tokens_seen": 443709564, + "step": 4861 + }, + { + "epoch": 20.258333333333333, + "grad_norm": 3.113489820406098, + "learning_rate": 5e-05, + "loss": 0.0512, + "num_input_tokens_seen": 443801068, + "step": 4862 + }, + { + "epoch": 20.258333333333333, + "loss": 0.06761139631271362, + "loss_ce": 7.60020557208918e-05, + "loss_iou": 0.47265625, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 443801068, + "step": 4862 + }, + { + "epoch": 20.2625, + "grad_norm": 2.4742643245609606, + "learning_rate": 5e-05, + "loss": 0.0333, + "num_input_tokens_seen": 443892516, + "step": 4863 + }, + { + "epoch": 20.2625, + "loss": 0.03174731880426407, + "loss_ce": 9.034883987624198e-06, + "loss_iou": 0.26171875, + "loss_num": 0.00634765625, + "loss_xval": 0.03173828125, + "num_input_tokens_seen": 443892516, + "step": 4863 + }, + { + "epoch": 20.266666666666666, + "grad_norm": 3.330624777742401, + "learning_rate": 5e-05, + "loss": 0.0302, + "num_input_tokens_seen": 443983416, + "step": 4864 + }, + { + "epoch": 20.266666666666666, + "loss": 0.02159692347049713, + "loss_ce": 5.7377765187993646e-06, + "loss_iou": 0.1748046875, + "loss_num": 0.004302978515625, + "loss_xval": 0.0216064453125, + "num_input_tokens_seen": 443983416, + "step": 4864 + }, + { + "epoch": 20.270833333333332, + "grad_norm": 3.2220605320530704, + "learning_rate": 5e-05, + "loss": 0.0391, + "num_input_tokens_seen": 444074932, + "step": 4865 + }, + { + "epoch": 20.270833333333332, + "loss": 0.044381555169820786, + "loss_ce": 8.995681127998978e-06, + "loss_iou": 0.302734375, + "loss_num": 0.00885009765625, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 444074932, + "step": 4865 + }, + { + "epoch": 20.275, + "grad_norm": 16.589366367423974, + "learning_rate": 5e-05, + "loss": 0.0395, + "num_input_tokens_seen": 444166308, + "step": 4866 + }, + { + "epoch": 20.275, + "loss": 0.0509551465511322, + "loss_ce": 6.050134288670961e-06, + "loss_iou": 0.197265625, + "loss_num": 0.01019287109375, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 444166308, + "step": 4866 + }, + { + "epoch": 20.279166666666665, + "grad_norm": 3.115461945601382, + "learning_rate": 5e-05, + "loss": 0.0574, + "num_input_tokens_seen": 444257108, + "step": 4867 + }, + { + "epoch": 20.279166666666665, + "loss": 0.04429937154054642, + "loss_ce": 3.362210190971382e-05, + "loss_iou": 0.2890625, + "loss_num": 0.00885009765625, + "loss_xval": 0.044189453125, + "num_input_tokens_seen": 444257108, + "step": 4867 + }, + { + "epoch": 20.283333333333335, + "grad_norm": 5.941316697519509, + "learning_rate": 5e-05, + "loss": 0.0826, + "num_input_tokens_seen": 444348248, + "step": 4868 + }, + { + "epoch": 20.283333333333335, + "loss": 0.11602663993835449, + "loss_ce": 0.00027346183196641505, + "loss_iou": 0.333984375, + "loss_num": 0.023193359375, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 444348248, + "step": 4868 + }, + { + "epoch": 20.2875, + "grad_norm": 3.2158858891955595, + "learning_rate": 5e-05, + "loss": 0.1098, + "num_input_tokens_seen": 444439784, + "step": 4869 + }, + { + "epoch": 20.2875, + "loss": 0.11932092905044556, + "loss_ce": 5.822531602461822e-05, + "loss_iou": 0.10400390625, + "loss_num": 0.02392578125, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 444439784, + "step": 4869 + }, + { + "epoch": 20.291666666666668, + "grad_norm": 2.2748669444729335, + "learning_rate": 5e-05, + "loss": 0.0546, + "num_input_tokens_seen": 444531712, + "step": 4870 + }, + { + "epoch": 20.291666666666668, + "loss": 0.038855671882629395, + "loss_ce": 6.020005093887448e-05, + "loss_iou": 0.171875, + "loss_num": 0.00775146484375, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 444531712, + "step": 4870 + }, + { + "epoch": 20.295833333333334, + "grad_norm": 2.600265464747739, + "learning_rate": 5e-05, + "loss": 0.041, + "num_input_tokens_seen": 444622984, + "step": 4871 + }, + { + "epoch": 20.295833333333334, + "loss": 0.0485546737909317, + "loss_ce": 1.2067093848600052e-06, + "loss_iou": 0.2890625, + "loss_num": 0.00970458984375, + "loss_xval": 0.048583984375, + "num_input_tokens_seen": 444622984, + "step": 4871 + }, + { + "epoch": 20.3, + "grad_norm": 5.2037408658004045, + "learning_rate": 5e-05, + "loss": 0.0353, + "num_input_tokens_seen": 444714240, + "step": 4872 + }, + { + "epoch": 20.3, + "loss": 0.039145130664110184, + "loss_ce": 6.336808837659191e-06, + "loss_iou": 0.28515625, + "loss_num": 0.0078125, + "loss_xval": 0.0390625, + "num_input_tokens_seen": 444714240, + "step": 4872 + }, + { + "epoch": 20.304166666666667, + "grad_norm": 2.7752019833056107, + "learning_rate": 5e-05, + "loss": 0.0548, + "num_input_tokens_seen": 444805300, + "step": 4873 + }, + { + "epoch": 20.304166666666667, + "loss": 0.04133889824151993, + "loss_ce": 2.8373524401104078e-06, + "loss_iou": 0.30078125, + "loss_num": 0.00830078125, + "loss_xval": 0.041259765625, + "num_input_tokens_seen": 444805300, + "step": 4873 + }, + { + "epoch": 20.308333333333334, + "grad_norm": 1.7882737756201468, + "learning_rate": 5e-05, + "loss": 0.0387, + "num_input_tokens_seen": 444896408, + "step": 4874 + }, + { + "epoch": 20.308333333333334, + "loss": 0.03891061991453171, + "loss_ce": 7.065909812808968e-07, + "loss_iou": 0.259765625, + "loss_num": 0.007781982421875, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 444896408, + "step": 4874 + }, + { + "epoch": 20.3125, + "grad_norm": 1.231073243583653, + "learning_rate": 5e-05, + "loss": 0.0465, + "num_input_tokens_seen": 444987964, + "step": 4875 + }, + { + "epoch": 20.3125, + "loss": 0.03511609882116318, + "loss_ce": 0.00024976313579827547, + "loss_iou": 0.236328125, + "loss_num": 0.0069580078125, + "loss_xval": 0.034912109375, + "num_input_tokens_seen": 444987964, + "step": 4875 + }, + { + "epoch": 20.316666666666666, + "grad_norm": 2.4147512166844023, + "learning_rate": 5e-05, + "loss": 0.0563, + "num_input_tokens_seen": 445079640, + "step": 4876 + }, + { + "epoch": 20.316666666666666, + "loss": 0.0664483904838562, + "loss_ce": 5.740137567045167e-05, + "loss_iou": 0.29296875, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 445079640, + "step": 4876 + }, + { + "epoch": 20.320833333333333, + "grad_norm": 2.1022623574818824, + "learning_rate": 5e-05, + "loss": 0.0383, + "num_input_tokens_seen": 445171000, + "step": 4877 + }, + { + "epoch": 20.320833333333333, + "loss": 0.045300401747226715, + "loss_ce": 0.00010387108341092244, + "loss_iou": 0.1357421875, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 445171000, + "step": 4877 + }, + { + "epoch": 20.325, + "grad_norm": 2.3199989305266855, + "learning_rate": 5e-05, + "loss": 0.0384, + "num_input_tokens_seen": 445262104, + "step": 4878 + }, + { + "epoch": 20.325, + "loss": 0.025294212624430656, + "loss_ce": 5.617594433715567e-05, + "loss_iou": 0.224609375, + "loss_num": 0.00506591796875, + "loss_xval": 0.0252685546875, + "num_input_tokens_seen": 445262104, + "step": 4878 + }, + { + "epoch": 20.329166666666666, + "grad_norm": 2.8002295035815514, + "learning_rate": 5e-05, + "loss": 0.0545, + "num_input_tokens_seen": 445353480, + "step": 4879 + }, + { + "epoch": 20.329166666666666, + "loss": 0.07884392142295837, + "loss_ce": 1.7560005289851688e-06, + "loss_iou": 0.28125, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 445353480, + "step": 4879 + }, + { + "epoch": 20.333333333333332, + "grad_norm": 2.754852265998824, + "learning_rate": 5e-05, + "loss": 0.0898, + "num_input_tokens_seen": 445444500, + "step": 4880 + }, + { + "epoch": 20.333333333333332, + "loss": 0.1288609653711319, + "loss_ce": 4.901310148852644e-07, + "loss_iou": 0.201171875, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 445444500, + "step": 4880 + }, + { + "epoch": 20.3375, + "grad_norm": 3.1200199540546754, + "learning_rate": 5e-05, + "loss": 0.0489, + "num_input_tokens_seen": 445535764, + "step": 4881 + }, + { + "epoch": 20.3375, + "loss": 0.05894544720649719, + "loss_ce": 7.442573064508906e-07, + "loss_iou": 0.1845703125, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 445535764, + "step": 4881 + }, + { + "epoch": 20.341666666666665, + "grad_norm": 3.1098845961447483, + "learning_rate": 5e-05, + "loss": 0.0459, + "num_input_tokens_seen": 445627000, + "step": 4882 + }, + { + "epoch": 20.341666666666665, + "loss": 0.03754313290119171, + "loss_ce": 6.510799721581861e-06, + "loss_iou": 0.33203125, + "loss_num": 0.00750732421875, + "loss_xval": 0.03759765625, + "num_input_tokens_seen": 445627000, + "step": 4882 + }, + { + "epoch": 20.345833333333335, + "grad_norm": 3.304830489925366, + "learning_rate": 5e-05, + "loss": 0.0328, + "num_input_tokens_seen": 445718184, + "step": 4883 + }, + { + "epoch": 20.345833333333335, + "loss": 0.03720206022262573, + "loss_ce": 1.1303675364615628e-06, + "loss_iou": 0.33203125, + "loss_num": 0.0074462890625, + "loss_xval": 0.037109375, + "num_input_tokens_seen": 445718184, + "step": 4883 + }, + { + "epoch": 20.35, + "grad_norm": 3.000987544750155, + "learning_rate": 5e-05, + "loss": 0.0504, + "num_input_tokens_seen": 445809972, + "step": 4884 + }, + { + "epoch": 20.35, + "loss": 0.057484131306409836, + "loss_ce": 1.9530423742253333e-05, + "loss_iou": 0.318359375, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 445809972, + "step": 4884 + }, + { + "epoch": 20.354166666666668, + "grad_norm": 2.469429866613967, + "learning_rate": 5e-05, + "loss": 0.0357, + "num_input_tokens_seen": 445901728, + "step": 4885 + }, + { + "epoch": 20.354166666666668, + "loss": 0.03661388158798218, + "loss_ce": 4.1551777485437924e-07, + "loss_iou": 0.2041015625, + "loss_num": 0.00732421875, + "loss_xval": 0.03662109375, + "num_input_tokens_seen": 445901728, + "step": 4885 + }, + { + "epoch": 20.358333333333334, + "grad_norm": 2.100770310341459, + "learning_rate": 5e-05, + "loss": 0.0533, + "num_input_tokens_seen": 445992932, + "step": 4886 + }, + { + "epoch": 20.358333333333334, + "loss": 0.06200437992811203, + "loss_ce": 7.92302489571739e-06, + "loss_iou": 0.1533203125, + "loss_num": 0.01239013671875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 445992932, + "step": 4886 + }, + { + "epoch": 20.3625, + "grad_norm": 2.0535116291736144, + "learning_rate": 5e-05, + "loss": 0.0255, + "num_input_tokens_seen": 446084960, + "step": 4887 + }, + { + "epoch": 20.3625, + "loss": 0.030836593359708786, + "loss_ce": 7.487383845727891e-05, + "loss_iou": 0.25, + "loss_num": 0.006134033203125, + "loss_xval": 0.03076171875, + "num_input_tokens_seen": 446084960, + "step": 4887 + }, + { + "epoch": 20.366666666666667, + "grad_norm": 7.862201651792061, + "learning_rate": 5e-05, + "loss": 0.0305, + "num_input_tokens_seen": 446176152, + "step": 4888 + }, + { + "epoch": 20.366666666666667, + "loss": 0.029422171413898468, + "loss_ce": 0.00011766894749598578, + "loss_iou": 0.173828125, + "loss_num": 0.005859375, + "loss_xval": 0.029296875, + "num_input_tokens_seen": 446176152, + "step": 4888 + }, + { + "epoch": 20.370833333333334, + "grad_norm": 2.2691565397775983, + "learning_rate": 5e-05, + "loss": 0.0578, + "num_input_tokens_seen": 446266984, + "step": 4889 + }, + { + "epoch": 20.370833333333334, + "loss": 0.058382242918014526, + "loss_ce": 2.115804136337829e-06, + "loss_iou": 0.26171875, + "loss_num": 0.01165771484375, + "loss_xval": 0.058349609375, + "num_input_tokens_seen": 446266984, + "step": 4889 + }, + { + "epoch": 20.375, + "grad_norm": 1.362806615033498, + "learning_rate": 5e-05, + "loss": 0.0261, + "num_input_tokens_seen": 446358556, + "step": 4890 + }, + { + "epoch": 20.375, + "loss": 0.021336067467927933, + "loss_ce": 4.280506345821777e-06, + "loss_iou": 0.2001953125, + "loss_num": 0.0042724609375, + "loss_xval": 0.0213623046875, + "num_input_tokens_seen": 446358556, + "step": 4890 + }, + { + "epoch": 20.379166666666666, + "grad_norm": 1.362143346364944, + "learning_rate": 5e-05, + "loss": 0.0504, + "num_input_tokens_seen": 446448380, + "step": 4891 + }, + { + "epoch": 20.379166666666666, + "loss": 0.04467766731977463, + "loss_ce": 2.2821543097961694e-05, + "loss_iou": 0.1484375, + "loss_num": 0.0089111328125, + "loss_xval": 0.044677734375, + "num_input_tokens_seen": 446448380, + "step": 4891 + }, + { + "epoch": 20.383333333333333, + "grad_norm": 2.8894799690511053, + "learning_rate": 5e-05, + "loss": 0.0336, + "num_input_tokens_seen": 446539984, + "step": 4892 + }, + { + "epoch": 20.383333333333333, + "loss": 0.03168143704533577, + "loss_ce": 1.9449596948106773e-05, + "loss_iou": 0.1396484375, + "loss_num": 0.00634765625, + "loss_xval": 0.03173828125, + "num_input_tokens_seen": 446539984, + "step": 4892 + }, + { + "epoch": 20.3875, + "grad_norm": 2.1546245449489243, + "learning_rate": 5e-05, + "loss": 0.035, + "num_input_tokens_seen": 446631024, + "step": 4893 + }, + { + "epoch": 20.3875, + "loss": 0.04168850928544998, + "loss_ce": 1.6754056559875607e-05, + "loss_iou": 0.259765625, + "loss_num": 0.00830078125, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 446631024, + "step": 4893 + }, + { + "epoch": 20.391666666666666, + "grad_norm": 3.214993700974819, + "learning_rate": 5e-05, + "loss": 0.0318, + "num_input_tokens_seen": 446722444, + "step": 4894 + }, + { + "epoch": 20.391666666666666, + "loss": 0.03269556164741516, + "loss_ce": 2.649427915457636e-05, + "loss_iou": 0.14453125, + "loss_num": 0.00653076171875, + "loss_xval": 0.03271484375, + "num_input_tokens_seen": 446722444, + "step": 4894 + }, + { + "epoch": 20.395833333333332, + "grad_norm": 8.2722110275818, + "learning_rate": 5e-05, + "loss": 0.117, + "num_input_tokens_seen": 446813668, + "step": 4895 + }, + { + "epoch": 20.395833333333332, + "loss": 0.06616764515638351, + "loss_ce": 5.53373229195131e-06, + "loss_iou": 0.26953125, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 446813668, + "step": 4895 + }, + { + "epoch": 20.4, + "grad_norm": 2.4259130877628534, + "learning_rate": 5e-05, + "loss": 0.0511, + "num_input_tokens_seen": 446904808, + "step": 4896 + }, + { + "epoch": 20.4, + "loss": 0.05022679269313812, + "loss_ce": 1.0118440513906535e-05, + "loss_iou": 0.173828125, + "loss_num": 0.010009765625, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 446904808, + "step": 4896 + }, + { + "epoch": 20.404166666666665, + "grad_norm": 2.8399090176153585, + "learning_rate": 5e-05, + "loss": 0.0384, + "num_input_tokens_seen": 446996608, + "step": 4897 + }, + { + "epoch": 20.404166666666665, + "loss": 0.030106237158179283, + "loss_ce": 8.274846550193615e-06, + "loss_iou": 0.271484375, + "loss_num": 0.006011962890625, + "loss_xval": 0.0301513671875, + "num_input_tokens_seen": 446996608, + "step": 4897 + }, + { + "epoch": 20.408333333333335, + "grad_norm": 1.588252686102434, + "learning_rate": 5e-05, + "loss": 0.0411, + "num_input_tokens_seen": 447088364, + "step": 4898 + }, + { + "epoch": 20.408333333333335, + "loss": 0.049656666815280914, + "loss_ce": 7.517551239288878e-07, + "loss_iou": 0.2119140625, + "loss_num": 0.00994873046875, + "loss_xval": 0.049560546875, + "num_input_tokens_seen": 447088364, + "step": 4898 + }, + { + "epoch": 20.4125, + "grad_norm": 0.9741283001699321, + "learning_rate": 5e-05, + "loss": 0.0464, + "num_input_tokens_seen": 447179584, + "step": 4899 + }, + { + "epoch": 20.4125, + "loss": 0.04484070837497711, + "loss_ce": 2.757896481853095e-06, + "loss_iou": 0.2041015625, + "loss_num": 0.00897216796875, + "loss_xval": 0.044921875, + "num_input_tokens_seen": 447179584, + "step": 4899 + }, + { + "epoch": 20.416666666666668, + "grad_norm": 1.3770220316016173, + "learning_rate": 5e-05, + "loss": 0.025, + "num_input_tokens_seen": 447270912, + "step": 4900 + }, + { + "epoch": 20.416666666666668, + "loss": 0.023782670497894287, + "loss_ce": 1.846855525400315e-06, + "loss_iou": 0.224609375, + "loss_num": 0.0047607421875, + "loss_xval": 0.0238037109375, + "num_input_tokens_seen": 447270912, + "step": 4900 + }, + { + "epoch": 20.420833333333334, + "grad_norm": 2.3947178776460167, + "learning_rate": 5e-05, + "loss": 0.0279, + "num_input_tokens_seen": 447361732, + "step": 4901 + }, + { + "epoch": 20.420833333333334, + "loss": 0.0212043859064579, + "loss_ce": 9.92965215118602e-06, + "loss_iou": 0.1328125, + "loss_num": 0.004241943359375, + "loss_xval": 0.021240234375, + "num_input_tokens_seen": 447361732, + "step": 4901 + }, + { + "epoch": 20.425, + "grad_norm": 2.006551586773389, + "learning_rate": 5e-05, + "loss": 0.0502, + "num_input_tokens_seen": 447453548, + "step": 4902 + }, + { + "epoch": 20.425, + "loss": 0.05505795404314995, + "loss_ce": 5.0019931222777814e-05, + "loss_iou": 0.1005859375, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 447453548, + "step": 4902 + }, + { + "epoch": 20.429166666666667, + "grad_norm": 2.1123937763145793, + "learning_rate": 5e-05, + "loss": 0.0294, + "num_input_tokens_seen": 447544444, + "step": 4903 + }, + { + "epoch": 20.429166666666667, + "loss": 0.03331262990832329, + "loss_ce": 2.6916898150375346e-06, + "loss_iou": 0.24609375, + "loss_num": 0.00665283203125, + "loss_xval": 0.033203125, + "num_input_tokens_seen": 447544444, + "step": 4903 + }, + { + "epoch": 20.433333333333334, + "grad_norm": 3.218991848840505, + "learning_rate": 5e-05, + "loss": 0.0393, + "num_input_tokens_seen": 447635164, + "step": 4904 + }, + { + "epoch": 20.433333333333334, + "loss": 0.02320890873670578, + "loss_ce": 2.9073927976241976e-07, + "loss_iou": 0.27734375, + "loss_num": 0.004638671875, + "loss_xval": 0.023193359375, + "num_input_tokens_seen": 447635164, + "step": 4904 + }, + { + "epoch": 20.4375, + "grad_norm": 3.050650709598406, + "learning_rate": 5e-05, + "loss": 0.0802, + "num_input_tokens_seen": 447726428, + "step": 4905 + }, + { + "epoch": 20.4375, + "loss": 0.05380012094974518, + "loss_ce": 1.289098599954741e-05, + "loss_iou": 0.1962890625, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 447726428, + "step": 4905 + }, + { + "epoch": 20.441666666666666, + "grad_norm": 3.6080904941032017, + "learning_rate": 5e-05, + "loss": 0.0512, + "num_input_tokens_seen": 447817312, + "step": 4906 + }, + { + "epoch": 20.441666666666666, + "loss": 0.05204792320728302, + "loss_ce": 1.9120729177757312e-07, + "loss_iou": 0.140625, + "loss_num": 0.0103759765625, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 447817312, + "step": 4906 + }, + { + "epoch": 20.445833333333333, + "grad_norm": 0.8562594459524338, + "learning_rate": 5e-05, + "loss": 0.058, + "num_input_tokens_seen": 447908384, + "step": 4907 + }, + { + "epoch": 20.445833333333333, + "loss": 0.04393097758293152, + "loss_ce": 9.249480399375898e-07, + "loss_iou": 0.05517578125, + "loss_num": 0.0087890625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 447908384, + "step": 4907 + }, + { + "epoch": 20.45, + "grad_norm": 1.9636652938937202, + "learning_rate": 5e-05, + "loss": 0.0611, + "num_input_tokens_seen": 447999656, + "step": 4908 + }, + { + "epoch": 20.45, + "loss": 0.0840383991599083, + "loss_ce": 0.003967905882745981, + "loss_iou": 0.1875, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 447999656, + "step": 4908 + }, + { + "epoch": 20.454166666666666, + "grad_norm": 1.8850154368498628, + "learning_rate": 5e-05, + "loss": 0.0405, + "num_input_tokens_seen": 448090716, + "step": 4909 + }, + { + "epoch": 20.454166666666666, + "loss": 0.034120649099349976, + "loss_ce": 2.000324002437992e-06, + "loss_iou": 0.2138671875, + "loss_num": 0.0068359375, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 448090716, + "step": 4909 + }, + { + "epoch": 20.458333333333332, + "grad_norm": 2.574376969400775, + "learning_rate": 5e-05, + "loss": 0.0505, + "num_input_tokens_seen": 448182416, + "step": 4910 + }, + { + "epoch": 20.458333333333332, + "loss": 0.05781601369380951, + "loss_ce": 4.6435079070761276e-07, + "loss_iou": 0.3203125, + "loss_num": 0.01153564453125, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 448182416, + "step": 4910 + }, + { + "epoch": 20.4625, + "grad_norm": 1.8043865352238553, + "learning_rate": 5e-05, + "loss": 0.0403, + "num_input_tokens_seen": 448273596, + "step": 4911 + }, + { + "epoch": 20.4625, + "loss": 0.02100570686161518, + "loss_ce": 0.00010116630437551066, + "loss_iou": 0.1875, + "loss_num": 0.004180908203125, + "loss_xval": 0.0208740234375, + "num_input_tokens_seen": 448273596, + "step": 4911 + }, + { + "epoch": 20.466666666666665, + "grad_norm": 1.6229164781208034, + "learning_rate": 5e-05, + "loss": 0.0573, + "num_input_tokens_seen": 448364676, + "step": 4912 + }, + { + "epoch": 20.466666666666665, + "loss": 0.036881882697343826, + "loss_ce": 1.3882765870221192e-06, + "loss_iou": 0.1572265625, + "loss_num": 0.00738525390625, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 448364676, + "step": 4912 + }, + { + "epoch": 20.470833333333335, + "grad_norm": 4.454826770446542, + "learning_rate": 5e-05, + "loss": 0.0553, + "num_input_tokens_seen": 448456108, + "step": 4913 + }, + { + "epoch": 20.470833333333335, + "loss": 0.044205501675605774, + "loss_ce": 7.885735158197349e-07, + "loss_iou": 0.26953125, + "loss_num": 0.00885009765625, + "loss_xval": 0.044189453125, + "num_input_tokens_seen": 448456108, + "step": 4913 + }, + { + "epoch": 20.475, + "grad_norm": 2.6555909998390566, + "learning_rate": 5e-05, + "loss": 0.068, + "num_input_tokens_seen": 448547816, + "step": 4914 + }, + { + "epoch": 20.475, + "loss": 0.06155569851398468, + "loss_ce": 0.00032217518310062587, + "loss_iou": 0.15234375, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 448547816, + "step": 4914 + }, + { + "epoch": 20.479166666666668, + "grad_norm": 1.584083152690339, + "learning_rate": 5e-05, + "loss": 0.0557, + "num_input_tokens_seen": 448639440, + "step": 4915 + }, + { + "epoch": 20.479166666666668, + "loss": 0.08793891966342926, + "loss_ce": 2.5262986582674785e-06, + "loss_iou": 0.1923828125, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 448639440, + "step": 4915 + }, + { + "epoch": 20.483333333333334, + "grad_norm": 2.9419067451996987, + "learning_rate": 5e-05, + "loss": 0.039, + "num_input_tokens_seen": 448731128, + "step": 4916 + }, + { + "epoch": 20.483333333333334, + "loss": 0.04757130891084671, + "loss_ce": 4.018229083158076e-05, + "loss_iou": 0.203125, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 448731128, + "step": 4916 + }, + { + "epoch": 20.4875, + "grad_norm": 3.3289255011100236, + "learning_rate": 5e-05, + "loss": 0.097, + "num_input_tokens_seen": 448822136, + "step": 4917 + }, + { + "epoch": 20.4875, + "loss": 0.13442227244377136, + "loss_ce": 0.001197781995870173, + "loss_iou": 0.1640625, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 448822136, + "step": 4917 + }, + { + "epoch": 20.491666666666667, + "grad_norm": 2.2184218252363013, + "learning_rate": 5e-05, + "loss": 0.0275, + "num_input_tokens_seen": 448913020, + "step": 4918 + }, + { + "epoch": 20.491666666666667, + "loss": 0.025361178442835808, + "loss_ce": 1.0709804882935714e-06, + "loss_iou": 0.18359375, + "loss_num": 0.00506591796875, + "loss_xval": 0.025390625, + "num_input_tokens_seen": 448913020, + "step": 4918 + }, + { + "epoch": 20.495833333333334, + "grad_norm": 3.114290733634199, + "learning_rate": 5e-05, + "loss": 0.0568, + "num_input_tokens_seen": 449005084, + "step": 4919 + }, + { + "epoch": 20.495833333333334, + "loss": 0.06543560326099396, + "loss_ce": 5.912777851335704e-06, + "loss_iou": 0.1845703125, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 449005084, + "step": 4919 + }, + { + "epoch": 20.5, + "grad_norm": 2.268225791254527, + "learning_rate": 5e-05, + "loss": 0.0318, + "num_input_tokens_seen": 449096416, + "step": 4920 + }, + { + "epoch": 20.5, + "loss": 0.026118092238903046, + "loss_ce": 4.082125815330073e-05, + "loss_iou": 0.224609375, + "loss_num": 0.005218505859375, + "loss_xval": 0.026123046875, + "num_input_tokens_seen": 449096416, + "step": 4920 + }, + { + "epoch": 20.504166666666666, + "grad_norm": 1.6800960184311333, + "learning_rate": 5e-05, + "loss": 0.0868, + "num_input_tokens_seen": 449187816, + "step": 4921 + }, + { + "epoch": 20.504166666666666, + "loss": 0.04860733449459076, + "loss_ce": 2.3353166398010217e-05, + "loss_iou": 0.234375, + "loss_num": 0.00970458984375, + "loss_xval": 0.048583984375, + "num_input_tokens_seen": 449187816, + "step": 4921 + }, + { + "epoch": 20.508333333333333, + "grad_norm": 3.0114733370373057, + "learning_rate": 5e-05, + "loss": 0.1067, + "num_input_tokens_seen": 449279200, + "step": 4922 + }, + { + "epoch": 20.508333333333333, + "loss": 0.14715611934661865, + "loss_ce": 3.693668872983835e-07, + "loss_iou": 0.12890625, + "loss_num": 0.0294189453125, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 449279200, + "step": 4922 + }, + { + "epoch": 20.5125, + "grad_norm": 1.6858748187256003, + "learning_rate": 5e-05, + "loss": 0.1058, + "num_input_tokens_seen": 449370092, + "step": 4923 + }, + { + "epoch": 20.5125, + "loss": 0.05952360853552818, + "loss_ce": 6.704354746034369e-06, + "loss_iou": 0.16015625, + "loss_num": 0.01190185546875, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 449370092, + "step": 4923 + }, + { + "epoch": 20.516666666666666, + "grad_norm": 2.420186816096476, + "learning_rate": 5e-05, + "loss": 0.0296, + "num_input_tokens_seen": 449461756, + "step": 4924 + }, + { + "epoch": 20.516666666666666, + "loss": 0.023171938955783844, + "loss_ce": 1.4685980431750068e-06, + "loss_iou": 0.27734375, + "loss_num": 0.004638671875, + "loss_xval": 0.023193359375, + "num_input_tokens_seen": 449461756, + "step": 4924 + }, + { + "epoch": 20.520833333333332, + "grad_norm": 2.356011899654843, + "learning_rate": 5e-05, + "loss": 0.0319, + "num_input_tokens_seen": 449553476, + "step": 4925 + }, + { + "epoch": 20.520833333333332, + "loss": 0.030006490647792816, + "loss_ce": 7.71085251471959e-06, + "loss_iou": 0.2421875, + "loss_num": 0.006011962890625, + "loss_xval": 0.030029296875, + "num_input_tokens_seen": 449553476, + "step": 4925 + }, + { + "epoch": 20.525, + "grad_norm": 2.4604421924811173, + "learning_rate": 5e-05, + "loss": 0.0388, + "num_input_tokens_seen": 449644924, + "step": 4926 + }, + { + "epoch": 20.525, + "loss": 0.02052384242415428, + "loss_ce": 7.716460004303372e-07, + "loss_iou": 0.34375, + "loss_num": 0.00408935546875, + "loss_xval": 0.0205078125, + "num_input_tokens_seen": 449644924, + "step": 4926 + }, + { + "epoch": 20.529166666666665, + "grad_norm": 2.972454634959909, + "learning_rate": 5e-05, + "loss": 0.0525, + "num_input_tokens_seen": 449736300, + "step": 4927 + }, + { + "epoch": 20.529166666666665, + "loss": 0.07225370407104492, + "loss_ce": 3.344216111145215e-06, + "loss_iou": 0.158203125, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 449736300, + "step": 4927 + }, + { + "epoch": 20.533333333333335, + "grad_norm": 2.8204475890224407, + "learning_rate": 5e-05, + "loss": 0.0768, + "num_input_tokens_seen": 449827500, + "step": 4928 + }, + { + "epoch": 20.533333333333335, + "loss": 0.10444729030132294, + "loss_ce": 8.792231938059558e-07, + "loss_iou": 0.20703125, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 449827500, + "step": 4928 + }, + { + "epoch": 20.5375, + "grad_norm": 2.4035501839042865, + "learning_rate": 5e-05, + "loss": 0.0635, + "num_input_tokens_seen": 449919048, + "step": 4929 + }, + { + "epoch": 20.5375, + "loss": 0.07318446040153503, + "loss_ce": 3.3055887342925416e-06, + "loss_iou": 0.1171875, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 449919048, + "step": 4929 + }, + { + "epoch": 20.541666666666668, + "grad_norm": 2.1633006857346166, + "learning_rate": 5e-05, + "loss": 0.0327, + "num_input_tokens_seen": 450009940, + "step": 4930 + }, + { + "epoch": 20.541666666666668, + "loss": 0.04298504441976547, + "loss_ce": 1.0339194886910263e-06, + "loss_iou": 0.263671875, + "loss_num": 0.00860595703125, + "loss_xval": 0.04296875, + "num_input_tokens_seen": 450009940, + "step": 4930 + }, + { + "epoch": 20.545833333333334, + "grad_norm": 2.71002409160864, + "learning_rate": 5e-05, + "loss": 0.0279, + "num_input_tokens_seen": 450100912, + "step": 4931 + }, + { + "epoch": 20.545833333333334, + "loss": 0.02671864628791809, + "loss_ce": 5.075110038887942e-07, + "loss_iou": 0.1787109375, + "loss_num": 0.005340576171875, + "loss_xval": 0.0267333984375, + "num_input_tokens_seen": 450100912, + "step": 4931 + }, + { + "epoch": 20.55, + "grad_norm": 4.408404110767032, + "learning_rate": 5e-05, + "loss": 0.0428, + "num_input_tokens_seen": 450192432, + "step": 4932 + }, + { + "epoch": 20.55, + "loss": 0.060529936105012894, + "loss_ce": 0.00024245594977401197, + "loss_iou": 0.359375, + "loss_num": 0.0120849609375, + "loss_xval": 0.060302734375, + "num_input_tokens_seen": 450192432, + "step": 4932 + }, + { + "epoch": 20.554166666666667, + "grad_norm": 2.696961014567847, + "learning_rate": 5e-05, + "loss": 0.0847, + "num_input_tokens_seen": 450284088, + "step": 4933 + }, + { + "epoch": 20.554166666666667, + "loss": 0.08644923567771912, + "loss_ce": 8.188740139303263e-06, + "loss_iou": 0.30078125, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 450284088, + "step": 4933 + }, + { + "epoch": 20.558333333333334, + "grad_norm": 2.7205486255151228, + "learning_rate": 5e-05, + "loss": 0.0443, + "num_input_tokens_seen": 450374740, + "step": 4934 + }, + { + "epoch": 20.558333333333334, + "loss": 0.033890899270772934, + "loss_ce": 4.690532659878954e-05, + "loss_iou": 0.2431640625, + "loss_num": 0.00677490234375, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 450374740, + "step": 4934 + }, + { + "epoch": 20.5625, + "grad_norm": 5.026116382223273, + "learning_rate": 5e-05, + "loss": 0.0498, + "num_input_tokens_seen": 450466152, + "step": 4935 + }, + { + "epoch": 20.5625, + "loss": 0.046482790261507034, + "loss_ce": 4.519070898822974e-06, + "loss_iou": 0.28515625, + "loss_num": 0.00927734375, + "loss_xval": 0.04638671875, + "num_input_tokens_seen": 450466152, + "step": 4935 + }, + { + "epoch": 20.566666666666666, + "grad_norm": 2.5441081198921967, + "learning_rate": 5e-05, + "loss": 0.0473, + "num_input_tokens_seen": 450557664, + "step": 4936 + }, + { + "epoch": 20.566666666666666, + "loss": 0.048398494720458984, + "loss_ce": 1.2869089914602228e-05, + "loss_iou": 0.23046875, + "loss_num": 0.0096435546875, + "loss_xval": 0.04833984375, + "num_input_tokens_seen": 450557664, + "step": 4936 + }, + { + "epoch": 20.570833333333333, + "grad_norm": 1.8114682143984124, + "learning_rate": 5e-05, + "loss": 0.036, + "num_input_tokens_seen": 450649088, + "step": 4937 + }, + { + "epoch": 20.570833333333333, + "loss": 0.03278595581650734, + "loss_ce": 2.4474088604620192e-06, + "loss_iou": 0.123046875, + "loss_num": 0.006561279296875, + "loss_xval": 0.03271484375, + "num_input_tokens_seen": 450649088, + "step": 4937 + }, + { + "epoch": 20.575, + "grad_norm": 1.6078122221436446, + "learning_rate": 5e-05, + "loss": 0.0259, + "num_input_tokens_seen": 450740344, + "step": 4938 + }, + { + "epoch": 20.575, + "loss": 0.030597861856222153, + "loss_ce": 3.990992354374612e-06, + "loss_iou": 0.267578125, + "loss_num": 0.006134033203125, + "loss_xval": 0.0306396484375, + "num_input_tokens_seen": 450740344, + "step": 4938 + }, + { + "epoch": 20.579166666666666, + "grad_norm": 2.0393086384958923, + "learning_rate": 5e-05, + "loss": 0.0443, + "num_input_tokens_seen": 450831416, + "step": 4939 + }, + { + "epoch": 20.579166666666666, + "loss": 0.05075102299451828, + "loss_ce": 2.886481524910778e-07, + "loss_iou": 0.2578125, + "loss_num": 0.0101318359375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 450831416, + "step": 4939 + }, + { + "epoch": 20.583333333333332, + "grad_norm": 3.8968025371129684, + "learning_rate": 5e-05, + "loss": 0.0556, + "num_input_tokens_seen": 450922228, + "step": 4940 + }, + { + "epoch": 20.583333333333332, + "loss": 0.07820156961679459, + "loss_ce": 2.7902996180273476e-07, + "loss_iou": 0.1982421875, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 450922228, + "step": 4940 + }, + { + "epoch": 20.5875, + "grad_norm": 1.5736493006126937, + "learning_rate": 5e-05, + "loss": 0.0303, + "num_input_tokens_seen": 451013644, + "step": 4941 + }, + { + "epoch": 20.5875, + "loss": 0.033364854753017426, + "loss_ce": 1.5105636066437e-06, + "loss_iou": 0.16796875, + "loss_num": 0.006683349609375, + "loss_xval": 0.033447265625, + "num_input_tokens_seen": 451013644, + "step": 4941 + }, + { + "epoch": 20.591666666666665, + "grad_norm": 2.6377710329881214, + "learning_rate": 5e-05, + "loss": 0.0495, + "num_input_tokens_seen": 451104932, + "step": 4942 + }, + { + "epoch": 20.591666666666665, + "loss": 0.06481263041496277, + "loss_ce": 9.242695568900672e-07, + "loss_iou": 0.1396484375, + "loss_num": 0.012939453125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 451104932, + "step": 4942 + }, + { + "epoch": 20.595833333333335, + "grad_norm": 2.3555642663825966, + "learning_rate": 5e-05, + "loss": 0.0346, + "num_input_tokens_seen": 451196380, + "step": 4943 + }, + { + "epoch": 20.595833333333335, + "loss": 0.029668737202882767, + "loss_ce": 1.8372354588791495e-06, + "loss_iou": 0.220703125, + "loss_num": 0.00592041015625, + "loss_xval": 0.0296630859375, + "num_input_tokens_seen": 451196380, + "step": 4943 + }, + { + "epoch": 20.6, + "grad_norm": 2.60332049955372, + "learning_rate": 5e-05, + "loss": 0.0327, + "num_input_tokens_seen": 451288068, + "step": 4944 + }, + { + "epoch": 20.6, + "loss": 0.03186202049255371, + "loss_ce": 1.6669213209752343e-06, + "loss_iou": 0.306640625, + "loss_num": 0.006378173828125, + "loss_xval": 0.03173828125, + "num_input_tokens_seen": 451288068, + "step": 4944 + }, + { + "epoch": 20.604166666666668, + "grad_norm": 2.567116921718554, + "learning_rate": 5e-05, + "loss": 0.0937, + "num_input_tokens_seen": 451379516, + "step": 4945 + }, + { + "epoch": 20.604166666666668, + "loss": 0.04703579470515251, + "loss_ce": 5.3982519602868706e-05, + "loss_iou": 0.275390625, + "loss_num": 0.0093994140625, + "loss_xval": 0.046875, + "num_input_tokens_seen": 451379516, + "step": 4945 + }, + { + "epoch": 20.608333333333334, + "grad_norm": 1.6160182593215768, + "learning_rate": 5e-05, + "loss": 0.0424, + "num_input_tokens_seen": 451470420, + "step": 4946 + }, + { + "epoch": 20.608333333333334, + "loss": 0.027573561295866966, + "loss_ce": 9.291766218666453e-07, + "loss_iou": 0.10595703125, + "loss_num": 0.005523681640625, + "loss_xval": 0.027587890625, + "num_input_tokens_seen": 451470420, + "step": 4946 + }, + { + "epoch": 20.6125, + "grad_norm": 1.3467015493968915, + "learning_rate": 5e-05, + "loss": 0.0356, + "num_input_tokens_seen": 451561924, + "step": 4947 + }, + { + "epoch": 20.6125, + "loss": 0.04347209259867668, + "loss_ce": 0.0010602866532281041, + "loss_iou": 0.21875, + "loss_num": 0.00848388671875, + "loss_xval": 0.04248046875, + "num_input_tokens_seen": 451561924, + "step": 4947 + }, + { + "epoch": 20.616666666666667, + "grad_norm": 1.9679868243687706, + "learning_rate": 5e-05, + "loss": 0.0303, + "num_input_tokens_seen": 451653044, + "step": 4948 + }, + { + "epoch": 20.616666666666667, + "loss": 0.038005080074071884, + "loss_ce": 1.0696719982661307e-05, + "loss_iou": 0.32421875, + "loss_num": 0.007598876953125, + "loss_xval": 0.0380859375, + "num_input_tokens_seen": 451653044, + "step": 4948 + }, + { + "epoch": 20.620833333333334, + "grad_norm": 2.5169923263187264, + "learning_rate": 5e-05, + "loss": 0.0399, + "num_input_tokens_seen": 451743928, + "step": 4949 + }, + { + "epoch": 20.620833333333334, + "loss": 0.027726180851459503, + "loss_ce": 9.608993423171341e-07, + "loss_iou": 0.275390625, + "loss_num": 0.00555419921875, + "loss_xval": 0.0277099609375, + "num_input_tokens_seen": 451743928, + "step": 4949 + }, + { + "epoch": 20.625, + "grad_norm": 2.635246367786188, + "learning_rate": 5e-05, + "loss": 0.0712, + "num_input_tokens_seen": 451834636, + "step": 4950 + }, + { + "epoch": 20.625, + "loss": 0.05243086814880371, + "loss_ce": 1.6696777720426326e-06, + "loss_iou": 0.24609375, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 451834636, + "step": 4950 + }, + { + "epoch": 20.629166666666666, + "grad_norm": 4.173072825862923, + "learning_rate": 5e-05, + "loss": 0.0607, + "num_input_tokens_seen": 451926068, + "step": 4951 + }, + { + "epoch": 20.629166666666666, + "loss": 0.08744490146636963, + "loss_ce": 4.417190666572424e-06, + "loss_iou": 0.162109375, + "loss_num": 0.017578125, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 451926068, + "step": 4951 + }, + { + "epoch": 20.633333333333333, + "grad_norm": 2.5783744439870997, + "learning_rate": 5e-05, + "loss": 0.0353, + "num_input_tokens_seen": 452017932, + "step": 4952 + }, + { + "epoch": 20.633333333333333, + "loss": 0.028565822169184685, + "loss_ce": 1.3682913504453609e-06, + "loss_iou": 0.255859375, + "loss_num": 0.005706787109375, + "loss_xval": 0.028564453125, + "num_input_tokens_seen": 452017932, + "step": 4952 + }, + { + "epoch": 20.6375, + "grad_norm": 3.316342304316025, + "learning_rate": 5e-05, + "loss": 0.0344, + "num_input_tokens_seen": 452109576, + "step": 4953 + }, + { + "epoch": 20.6375, + "loss": 0.030848821625113487, + "loss_ce": 3.1791919354873244e-06, + "loss_iou": 0.361328125, + "loss_num": 0.00616455078125, + "loss_xval": 0.0308837890625, + "num_input_tokens_seen": 452109576, + "step": 4953 + }, + { + "epoch": 20.641666666666666, + "grad_norm": 3.1392782822852583, + "learning_rate": 5e-05, + "loss": 0.0635, + "num_input_tokens_seen": 452200544, + "step": 4954 + }, + { + "epoch": 20.641666666666666, + "loss": 0.08412261307239532, + "loss_ce": 9.096820576814935e-07, + "loss_iou": 0.287109375, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 452200544, + "step": 4954 + }, + { + "epoch": 20.645833333333332, + "grad_norm": 2.492693871159871, + "learning_rate": 5e-05, + "loss": 0.0939, + "num_input_tokens_seen": 452292380, + "step": 4955 + }, + { + "epoch": 20.645833333333332, + "loss": 0.06159612163901329, + "loss_ce": 4.017566880065715e-06, + "loss_iou": 0.3125, + "loss_num": 0.0123291015625, + "loss_xval": 0.0615234375, + "num_input_tokens_seen": 452292380, + "step": 4955 + }, + { + "epoch": 20.65, + "grad_norm": 2.5843001524160107, + "learning_rate": 5e-05, + "loss": 0.0569, + "num_input_tokens_seen": 452384136, + "step": 4956 + }, + { + "epoch": 20.65, + "loss": 0.08120512962341309, + "loss_ce": 1.310615425609285e-05, + "loss_iou": 0.298828125, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 452384136, + "step": 4956 + }, + { + "epoch": 20.654166666666665, + "grad_norm": 2.9508343488964126, + "learning_rate": 5e-05, + "loss": 0.034, + "num_input_tokens_seen": 452474504, + "step": 4957 + }, + { + "epoch": 20.654166666666665, + "loss": 0.022920362651348114, + "loss_ce": 1.6614910691714613e-06, + "loss_iou": 0.271484375, + "loss_num": 0.00457763671875, + "loss_xval": 0.02294921875, + "num_input_tokens_seen": 452474504, + "step": 4957 + }, + { + "epoch": 20.658333333333335, + "grad_norm": 3.197401309944741, + "learning_rate": 5e-05, + "loss": 0.0323, + "num_input_tokens_seen": 452565788, + "step": 4958 + }, + { + "epoch": 20.658333333333335, + "loss": 0.03662900626659393, + "loss_ce": 6.894825492054224e-05, + "loss_iou": 0.369140625, + "loss_num": 0.00732421875, + "loss_xval": 0.03662109375, + "num_input_tokens_seen": 452565788, + "step": 4958 + }, + { + "epoch": 20.6625, + "grad_norm": 2.461105491930837, + "learning_rate": 5e-05, + "loss": 0.0721, + "num_input_tokens_seen": 452657340, + "step": 4959 + }, + { + "epoch": 20.6625, + "loss": 0.050036199390888214, + "loss_ce": 1.0260364433634095e-05, + "loss_iou": 0.1728515625, + "loss_num": 0.010009765625, + "loss_xval": 0.050048828125, + "num_input_tokens_seen": 452657340, + "step": 4959 + }, + { + "epoch": 20.666666666666668, + "grad_norm": 3.42370674644056, + "learning_rate": 5e-05, + "loss": 0.0566, + "num_input_tokens_seen": 452748192, + "step": 4960 + }, + { + "epoch": 20.666666666666668, + "loss": 0.06822985410690308, + "loss_ce": 7.814576747477986e-06, + "loss_iou": 0.291015625, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 452748192, + "step": 4960 + }, + { + "epoch": 20.670833333333334, + "grad_norm": 31.597300727880917, + "learning_rate": 5e-05, + "loss": 0.052, + "num_input_tokens_seen": 452839420, + "step": 4961 + }, + { + "epoch": 20.670833333333334, + "loss": 0.030092181637883186, + "loss_ce": 3.236833435948938e-05, + "loss_iou": 0.20703125, + "loss_num": 0.006011962890625, + "loss_xval": 0.030029296875, + "num_input_tokens_seen": 452839420, + "step": 4961 + }, + { + "epoch": 20.675, + "grad_norm": 1.5560147417535624, + "learning_rate": 5e-05, + "loss": 0.0382, + "num_input_tokens_seen": 452930708, + "step": 4962 + }, + { + "epoch": 20.675, + "loss": 0.03558146208524704, + "loss_ce": 5.5974505812628195e-06, + "loss_iou": 0.265625, + "loss_num": 0.007110595703125, + "loss_xval": 0.03564453125, + "num_input_tokens_seen": 452930708, + "step": 4962 + }, + { + "epoch": 20.679166666666667, + "grad_norm": 1.530031099095701, + "learning_rate": 5e-05, + "loss": 0.0508, + "num_input_tokens_seen": 453022456, + "step": 4963 + }, + { + "epoch": 20.679166666666667, + "loss": 0.04042421653866768, + "loss_ce": 3.684350076582632e-06, + "loss_iou": 0.1337890625, + "loss_num": 0.008056640625, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 453022456, + "step": 4963 + }, + { + "epoch": 20.683333333333334, + "grad_norm": 1.1910616741094349, + "learning_rate": 5e-05, + "loss": 0.0377, + "num_input_tokens_seen": 453114192, + "step": 4964 + }, + { + "epoch": 20.683333333333334, + "loss": 0.045101359486579895, + "loss_ce": 4.005429673270555e-06, + "loss_iou": 0.1796875, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 453114192, + "step": 4964 + }, + { + "epoch": 20.6875, + "grad_norm": 1.4244696213226584, + "learning_rate": 5e-05, + "loss": 0.0433, + "num_input_tokens_seen": 453205336, + "step": 4965 + }, + { + "epoch": 20.6875, + "loss": 0.026782888919115067, + "loss_ce": 3.7131860608496936e-06, + "loss_iou": 0.25390625, + "loss_num": 0.00537109375, + "loss_xval": 0.0267333984375, + "num_input_tokens_seen": 453205336, + "step": 4965 + }, + { + "epoch": 20.691666666666666, + "grad_norm": 1.3465652173814044, + "learning_rate": 5e-05, + "loss": 0.0285, + "num_input_tokens_seen": 453296424, + "step": 4966 + }, + { + "epoch": 20.691666666666666, + "loss": 0.03415077552199364, + "loss_ce": 1.6061776477727108e-06, + "loss_iou": 0.232421875, + "loss_num": 0.0068359375, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 453296424, + "step": 4966 + }, + { + "epoch": 20.695833333333333, + "grad_norm": 13.981129636039054, + "learning_rate": 5e-05, + "loss": 0.0518, + "num_input_tokens_seen": 453387852, + "step": 4967 + }, + { + "epoch": 20.695833333333333, + "loss": 0.03573738783597946, + "loss_ce": 1.3047135780652752e-06, + "loss_iou": 0.1318359375, + "loss_num": 0.00714111328125, + "loss_xval": 0.03564453125, + "num_input_tokens_seen": 453387852, + "step": 4967 + }, + { + "epoch": 20.7, + "grad_norm": 3.2851894633221, + "learning_rate": 5e-05, + "loss": 0.0467, + "num_input_tokens_seen": 453479236, + "step": 4968 + }, + { + "epoch": 20.7, + "loss": 0.049005232751369476, + "loss_ce": 3.215075412299484e-05, + "loss_iou": 0.1767578125, + "loss_num": 0.00982666015625, + "loss_xval": 0.049072265625, + "num_input_tokens_seen": 453479236, + "step": 4968 + }, + { + "epoch": 20.704166666666666, + "grad_norm": 2.673697904962962, + "learning_rate": 5e-05, + "loss": 0.0576, + "num_input_tokens_seen": 453570696, + "step": 4969 + }, + { + "epoch": 20.704166666666666, + "loss": 0.05169004574418068, + "loss_ce": 8.527647878509015e-06, + "loss_iou": 0.27734375, + "loss_num": 0.0103759765625, + "loss_xval": 0.0517578125, + "num_input_tokens_seen": 453570696, + "step": 4969 + }, + { + "epoch": 20.708333333333332, + "grad_norm": 2.1221730915444774, + "learning_rate": 5e-05, + "loss": 0.0328, + "num_input_tokens_seen": 453660612, + "step": 4970 + }, + { + "epoch": 20.708333333333332, + "loss": 0.03286011517047882, + "loss_ce": 3.1290687729779165e-07, + "loss_iou": 0.2353515625, + "loss_num": 0.006561279296875, + "loss_xval": 0.032958984375, + "num_input_tokens_seen": 453660612, + "step": 4970 + }, + { + "epoch": 20.7125, + "grad_norm": 2.8251891286421738, + "learning_rate": 5e-05, + "loss": 0.046, + "num_input_tokens_seen": 453751436, + "step": 4971 + }, + { + "epoch": 20.7125, + "loss": 0.049376361072063446, + "loss_ce": 1.4179516256263014e-05, + "loss_iou": 0.15625, + "loss_num": 0.0098876953125, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 453751436, + "step": 4971 + }, + { + "epoch": 20.716666666666665, + "grad_norm": 2.863646490213054, + "learning_rate": 5e-05, + "loss": 0.0483, + "num_input_tokens_seen": 453842632, + "step": 4972 + }, + { + "epoch": 20.716666666666665, + "loss": 0.05301649123430252, + "loss_ce": 0.00040418541175313294, + "loss_iou": 0.1572265625, + "loss_num": 0.010498046875, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 453842632, + "step": 4972 + }, + { + "epoch": 20.720833333333335, + "grad_norm": 3.3607289465878973, + "learning_rate": 5e-05, + "loss": 0.0361, + "num_input_tokens_seen": 453934264, + "step": 4973 + }, + { + "epoch": 20.720833333333335, + "loss": 0.043854400515556335, + "loss_ce": 6.405851422641717e-07, + "loss_iou": 0.294921875, + "loss_num": 0.0087890625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 453934264, + "step": 4973 + }, + { + "epoch": 20.725, + "grad_norm": 3.2709664410734023, + "learning_rate": 5e-05, + "loss": 0.0489, + "num_input_tokens_seen": 454025712, + "step": 4974 + }, + { + "epoch": 20.725, + "loss": 0.030515987426042557, + "loss_ce": 0.00027306523406878114, + "loss_iou": 0.287109375, + "loss_num": 0.00604248046875, + "loss_xval": 0.0302734375, + "num_input_tokens_seen": 454025712, + "step": 4974 + }, + { + "epoch": 20.729166666666668, + "grad_norm": 2.5059070516777266, + "learning_rate": 5e-05, + "loss": 0.085, + "num_input_tokens_seen": 454116892, + "step": 4975 + }, + { + "epoch": 20.729166666666668, + "loss": 0.06782197952270508, + "loss_ce": 1.1920225006178953e-05, + "loss_iou": 0.2255859375, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 454116892, + "step": 4975 + }, + { + "epoch": 20.733333333333334, + "grad_norm": 3.253961335286682, + "learning_rate": 5e-05, + "loss": 0.0671, + "num_input_tokens_seen": 454208128, + "step": 4976 + }, + { + "epoch": 20.733333333333334, + "loss": 0.0467258021235466, + "loss_ce": 3.3931551115529146e-06, + "loss_iou": 0.2197265625, + "loss_num": 0.00933837890625, + "loss_xval": 0.046630859375, + "num_input_tokens_seen": 454208128, + "step": 4976 + }, + { + "epoch": 20.7375, + "grad_norm": 3.3823405746496076, + "learning_rate": 5e-05, + "loss": 0.0551, + "num_input_tokens_seen": 454299648, + "step": 4977 + }, + { + "epoch": 20.7375, + "loss": 0.058784861117601395, + "loss_ce": 3.749439940747834e-07, + "loss_iou": 0.2490234375, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 454299648, + "step": 4977 + }, + { + "epoch": 20.741666666666667, + "grad_norm": 2.786119531230585, + "learning_rate": 5e-05, + "loss": 0.0294, + "num_input_tokens_seen": 454390716, + "step": 4978 + }, + { + "epoch": 20.741666666666667, + "loss": 0.030614566057920456, + "loss_ce": 1.3063713595329318e-05, + "loss_iou": 0.2294921875, + "loss_num": 0.006103515625, + "loss_xval": 0.0306396484375, + "num_input_tokens_seen": 454390716, + "step": 4978 + }, + { + "epoch": 20.745833333333334, + "grad_norm": 2.2338281704667216, + "learning_rate": 5e-05, + "loss": 0.0491, + "num_input_tokens_seen": 454481636, + "step": 4979 + }, + { + "epoch": 20.745833333333334, + "loss": 0.06036851927638054, + "loss_ce": 4.749166691908613e-06, + "loss_iou": 0.234375, + "loss_num": 0.0120849609375, + "loss_xval": 0.060302734375, + "num_input_tokens_seen": 454481636, + "step": 4979 + }, + { + "epoch": 20.75, + "grad_norm": 2.0095958006107137, + "learning_rate": 5e-05, + "loss": 0.0579, + "num_input_tokens_seen": 454572604, + "step": 4980 + }, + { + "epoch": 20.75, + "loss": 0.035050153732299805, + "loss_ce": 7.142032245610608e-07, + "loss_iou": 0.18359375, + "loss_num": 0.00701904296875, + "loss_xval": 0.03515625, + "num_input_tokens_seen": 454572604, + "step": 4980 + }, + { + "epoch": 20.754166666666666, + "grad_norm": 2.264932494350519, + "learning_rate": 5e-05, + "loss": 0.0429, + "num_input_tokens_seen": 454663844, + "step": 4981 + }, + { + "epoch": 20.754166666666666, + "loss": 0.03668418526649475, + "loss_ce": 2.054937340290053e-06, + "loss_iou": 0.271484375, + "loss_num": 0.007354736328125, + "loss_xval": 0.03662109375, + "num_input_tokens_seen": 454663844, + "step": 4981 + }, + { + "epoch": 20.758333333333333, + "grad_norm": 2.424681742010323, + "learning_rate": 5e-05, + "loss": 0.0496, + "num_input_tokens_seen": 454755116, + "step": 4982 + }, + { + "epoch": 20.758333333333333, + "loss": 0.053710393607616425, + "loss_ce": 7.08766674506478e-06, + "loss_iou": 0.24609375, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 454755116, + "step": 4982 + }, + { + "epoch": 20.7625, + "grad_norm": 2.746761167417316, + "learning_rate": 5e-05, + "loss": 0.0568, + "num_input_tokens_seen": 454846020, + "step": 4983 + }, + { + "epoch": 20.7625, + "loss": 0.04286251217126846, + "loss_ce": 5.733260195484036e-07, + "loss_iou": 0.2314453125, + "loss_num": 0.00860595703125, + "loss_xval": 0.04296875, + "num_input_tokens_seen": 454846020, + "step": 4983 + }, + { + "epoch": 20.766666666666666, + "grad_norm": 4.3282290256004545, + "learning_rate": 5e-05, + "loss": 0.0422, + "num_input_tokens_seen": 454937608, + "step": 4984 + }, + { + "epoch": 20.766666666666666, + "loss": 0.026827123016119003, + "loss_ce": 2.170055950045935e-06, + "loss_iou": 0.2734375, + "loss_num": 0.00537109375, + "loss_xval": 0.02685546875, + "num_input_tokens_seen": 454937608, + "step": 4984 + }, + { + "epoch": 20.770833333333332, + "grad_norm": 1.8867506140848092, + "learning_rate": 5e-05, + "loss": 0.077, + "num_input_tokens_seen": 455029116, + "step": 4985 + }, + { + "epoch": 20.770833333333332, + "loss": 0.12005200982093811, + "loss_ce": 3.4892395888164174e-06, + "loss_iou": 0.189453125, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 455029116, + "step": 4985 + }, + { + "epoch": 20.775, + "grad_norm": 1.8878314678749506, + "learning_rate": 5e-05, + "loss": 0.0559, + "num_input_tokens_seen": 455120496, + "step": 4986 + }, + { + "epoch": 20.775, + "loss": 0.06420492380857468, + "loss_ce": 5.6972850870806724e-05, + "loss_iou": 0.138671875, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 455120496, + "step": 4986 + }, + { + "epoch": 20.779166666666665, + "grad_norm": 2.5363373125866993, + "learning_rate": 5e-05, + "loss": 0.0409, + "num_input_tokens_seen": 455211888, + "step": 4987 + }, + { + "epoch": 20.779166666666665, + "loss": 0.04293936491012573, + "loss_ce": 1.1320619250909658e-06, + "loss_iou": 0.2177734375, + "loss_num": 0.00860595703125, + "loss_xval": 0.04296875, + "num_input_tokens_seen": 455211888, + "step": 4987 + }, + { + "epoch": 20.783333333333335, + "grad_norm": 3.6959060546808886, + "learning_rate": 5e-05, + "loss": 0.0936, + "num_input_tokens_seen": 455302952, + "step": 4988 + }, + { + "epoch": 20.783333333333335, + "loss": 0.03244100511074066, + "loss_ce": 8.187475941667799e-07, + "loss_iou": 0.23828125, + "loss_num": 0.006500244140625, + "loss_xval": 0.032470703125, + "num_input_tokens_seen": 455302952, + "step": 4988 + }, + { + "epoch": 20.7875, + "grad_norm": 2.2800049431943354, + "learning_rate": 5e-05, + "loss": 0.0486, + "num_input_tokens_seen": 455394648, + "step": 4989 + }, + { + "epoch": 20.7875, + "loss": 0.03557218611240387, + "loss_ce": 0.00029386812821030617, + "loss_iou": 0.28125, + "loss_num": 0.007049560546875, + "loss_xval": 0.03515625, + "num_input_tokens_seen": 455394648, + "step": 4989 + }, + { + "epoch": 20.791666666666668, + "grad_norm": 4.819173410676004, + "learning_rate": 5e-05, + "loss": 0.0514, + "num_input_tokens_seen": 455486140, + "step": 4990 + }, + { + "epoch": 20.791666666666668, + "loss": 0.047681789845228195, + "loss_ce": 8.96269193617627e-05, + "loss_iou": 0.310546875, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 455486140, + "step": 4990 + }, + { + "epoch": 20.795833333333334, + "grad_norm": 3.162090646689533, + "learning_rate": 5e-05, + "loss": 0.0512, + "num_input_tokens_seen": 455577380, + "step": 4991 + }, + { + "epoch": 20.795833333333334, + "loss": 0.046526797115802765, + "loss_ce": 1.801136022550054e-05, + "loss_iou": 0.333984375, + "loss_num": 0.00927734375, + "loss_xval": 0.04638671875, + "num_input_tokens_seen": 455577380, + "step": 4991 + }, + { + "epoch": 20.8, + "grad_norm": 1.782414590120586, + "learning_rate": 5e-05, + "loss": 0.0581, + "num_input_tokens_seen": 455669196, + "step": 4992 + }, + { + "epoch": 20.8, + "loss": 0.04718227684497833, + "loss_ce": 8.602525485912338e-05, + "loss_iou": 0.1435546875, + "loss_num": 0.0093994140625, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 455669196, + "step": 4992 + }, + { + "epoch": 20.804166666666667, + "grad_norm": 1.6869126328682527, + "learning_rate": 5e-05, + "loss": 0.0341, + "num_input_tokens_seen": 455759684, + "step": 4993 + }, + { + "epoch": 20.804166666666667, + "loss": 0.029673587530851364, + "loss_ce": 0.0011549104237928987, + "loss_iou": 0.1728515625, + "loss_num": 0.005706787109375, + "loss_xval": 0.028564453125, + "num_input_tokens_seen": 455759684, + "step": 4993 + }, + { + "epoch": 20.808333333333334, + "grad_norm": 3.5504204820461487, + "learning_rate": 5e-05, + "loss": 0.0411, + "num_input_tokens_seen": 455851540, + "step": 4994 + }, + { + "epoch": 20.808333333333334, + "loss": 0.04614360257983208, + "loss_ce": 1.0263997864967678e-06, + "loss_iou": 0.3203125, + "loss_num": 0.00921630859375, + "loss_xval": 0.046142578125, + "num_input_tokens_seen": 455851540, + "step": 4994 + }, + { + "epoch": 20.8125, + "grad_norm": 7.9999161852664775, + "learning_rate": 5e-05, + "loss": 0.1073, + "num_input_tokens_seen": 455943032, + "step": 4995 + }, + { + "epoch": 20.8125, + "loss": 0.16412606835365295, + "loss_ce": 0.0007807252113707364, + "loss_iou": 0.171875, + "loss_num": 0.03271484375, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 455943032, + "step": 4995 + }, + { + "epoch": 20.816666666666666, + "grad_norm": 2.344526080385181, + "learning_rate": 5e-05, + "loss": 0.0481, + "num_input_tokens_seen": 456034340, + "step": 4996 + }, + { + "epoch": 20.816666666666666, + "loss": 0.06617549061775208, + "loss_ce": 5.753272034780821e-06, + "loss_iou": 0.236328125, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 456034340, + "step": 4996 + }, + { + "epoch": 20.820833333333333, + "grad_norm": 0.7441476896758881, + "learning_rate": 5e-05, + "loss": 0.0344, + "num_input_tokens_seen": 456125576, + "step": 4997 + }, + { + "epoch": 20.820833333333333, + "loss": 0.04812666028738022, + "loss_ce": 4.406510640819761e-07, + "loss_iou": 0.203125, + "loss_num": 0.0096435546875, + "loss_xval": 0.048095703125, + "num_input_tokens_seen": 456125576, + "step": 4997 + }, + { + "epoch": 20.825, + "grad_norm": 2.994426114133977, + "learning_rate": 5e-05, + "loss": 0.0381, + "num_input_tokens_seen": 456216976, + "step": 4998 + }, + { + "epoch": 20.825, + "loss": 0.03340182825922966, + "loss_ce": 3.3879524607982603e-07, + "loss_iou": 0.2265625, + "loss_num": 0.006683349609375, + "loss_xval": 0.033447265625, + "num_input_tokens_seen": 456216976, + "step": 4998 + }, + { + "epoch": 20.829166666666666, + "grad_norm": 0.7822136255921042, + "learning_rate": 5e-05, + "loss": 0.0225, + "num_input_tokens_seen": 456307684, + "step": 4999 + }, + { + "epoch": 20.829166666666666, + "loss": 0.0229647196829319, + "loss_ce": 2.422164016024908e-07, + "loss_iou": 0.1669921875, + "loss_num": 0.004608154296875, + "loss_xval": 0.02294921875, + "num_input_tokens_seen": 456307684, + "step": 4999 + }, + { + "epoch": 20.833333333333332, + "grad_norm": 1.1877380935922035, + "learning_rate": 5e-05, + "loss": 0.0583, + "num_input_tokens_seen": 456398700, + "step": 5000 + }, + { + "epoch": 20.833333333333332, + "eval_seeclick_CIoU": 0.24451086670160294, + "eval_seeclick_GIoU": 0.21807420998811722, + "eval_seeclick_IoU": 0.3399440348148346, + "eval_seeclick_MAE_all": 0.10255898535251617, + "eval_seeclick_MAE_h": 0.08058749511837959, + "eval_seeclick_MAE_w": 0.2307920679450035, + "eval_seeclick_MAE_x_boxes": 0.22138375788927078, + "eval_seeclick_MAE_y_boxes": 0.0859718956053257, + "eval_seeclick_NUM_probability": 0.9999991357326508, + "eval_seeclick_inside_bbox": 0.5852272808551788, + "eval_seeclick_loss": 0.6117884516716003, + "eval_seeclick_loss_ce": 0.14957696199417114, + "eval_seeclick_loss_iou": 0.45806884765625, + "eval_seeclick_loss_num": 0.0897216796875, + "eval_seeclick_loss_xval": 0.4483642578125, + "eval_seeclick_runtime": 76.2635, + "eval_seeclick_samples_per_second": 0.564, + "eval_seeclick_steps_per_second": 0.026, + "num_input_tokens_seen": 456398700, + "step": 5000 + }, + { + "epoch": 20.833333333333332, + "eval_icons_CIoU": 0.2932829260826111, + "eval_icons_GIoU": 0.28378401696681976, + "eval_icons_IoU": 0.39218054711818695, + "eval_icons_MAE_all": 0.07791866362094879, + "eval_icons_MAE_h": 0.1548103168606758, + "eval_icons_MAE_w": 0.1250578574836254, + "eval_icons_MAE_x_boxes": 0.12813802063465118, + "eval_icons_MAE_y_boxes": 0.1542813554406166, + "eval_icons_NUM_probability": 0.9999994933605194, + "eval_icons_inside_bbox": 0.5503472238779068, + "eval_icons_loss": 0.3675766885280609, + "eval_icons_loss_ce": 0.003656390472315252, + "eval_icons_loss_iou": 0.23919677734375, + "eval_icons_loss_num": 0.07466888427734375, + "eval_icons_loss_xval": 0.37335205078125, + "eval_icons_runtime": 87.8661, + "eval_icons_samples_per_second": 0.569, + "eval_icons_steps_per_second": 0.023, + "num_input_tokens_seen": 456398700, + "step": 5000 + }, + { + "epoch": 20.833333333333332, + "eval_screenspot_CIoU": 0.35886751115322113, + "eval_screenspot_GIoU": 0.3485796203215917, + "eval_screenspot_IoU": 0.4398085872332255, + "eval_screenspot_MAE_all": 0.10286615292231242, + "eval_screenspot_MAE_h": 0.09417568395535152, + "eval_screenspot_MAE_w": 0.22546063363552094, + "eval_screenspot_MAE_x_boxes": 0.18992613007624945, + "eval_screenspot_MAE_y_boxes": 0.09073736766974132, + "eval_screenspot_NUM_probability": 0.9999987681706747, + "eval_screenspot_inside_bbox": 0.7012499968210856, + "eval_screenspot_loss": 0.5169656276702881, + "eval_screenspot_loss_ce": 0.005585619326060017, + "eval_screenspot_loss_iou": 0.3651123046875, + "eval_screenspot_loss_num": 0.10569254557291667, + "eval_screenspot_loss_xval": 0.5284016927083334, + "eval_screenspot_runtime": 149.1336, + "eval_screenspot_samples_per_second": 0.597, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 456398700, + "step": 5000 + }, + { + "epoch": 20.833333333333332, + "eval_compot_CIoU": 0.4774431884288788, + "eval_compot_GIoU": 0.4672915041446686, + "eval_compot_IoU": 0.5645192861557007, + "eval_compot_MAE_all": 0.0588533990085125, + "eval_compot_MAE_h": 0.06242892146110535, + "eval_compot_MAE_w": 0.16087764501571655, + "eval_compot_MAE_x_boxes": 0.16103634238243103, + "eval_compot_MAE_y_boxes": 0.061904361471533775, + "eval_compot_NUM_probability": 0.9999991953372955, + "eval_compot_inside_bbox": 0.7361111044883728, + "eval_compot_loss": 0.3558048605918884, + "eval_compot_loss_ce": 0.0663297027349472, + "eval_compot_loss_iou": 0.25738525390625, + "eval_compot_loss_num": 0.05814361572265625, + "eval_compot_loss_xval": 0.2906646728515625, + "eval_compot_runtime": 96.7957, + "eval_compot_samples_per_second": 0.517, + "eval_compot_steps_per_second": 0.021, + "num_input_tokens_seen": 456398700, + "step": 5000 + }, + { + "epoch": 20.833333333333332, + "loss": 0.38143765926361084, + "loss_ce": 0.06625210493803024, + "loss_iou": 0.26171875, + "loss_num": 0.06298828125, + "loss_xval": 0.314453125, + "num_input_tokens_seen": 456398700, + "step": 5000 + }, + { + "epoch": 20.8375, + "grad_norm": 3.337976958031263, + "learning_rate": 5e-05, + "loss": 0.06, + "num_input_tokens_seen": 456490176, + "step": 5001 + }, + { + "epoch": 20.8375, + "loss": 0.0710010752081871, + "loss_ce": 0.00035288077197037637, + "loss_iou": 0.197265625, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 456490176, + "step": 5001 + }, + { + "epoch": 20.841666666666665, + "grad_norm": 1.9477035834448888, + "learning_rate": 5e-05, + "loss": 0.0471, + "num_input_tokens_seen": 456581212, + "step": 5002 + }, + { + "epoch": 20.841666666666665, + "loss": 0.03873321786522865, + "loss_ce": 6.408430181181757e-06, + "loss_iou": 0.11328125, + "loss_num": 0.00775146484375, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 456581212, + "step": 5002 + }, + { + "epoch": 20.845833333333335, + "grad_norm": 2.132809389200927, + "learning_rate": 5e-05, + "loss": 0.0859, + "num_input_tokens_seen": 456671592, + "step": 5003 + }, + { + "epoch": 20.845833333333335, + "loss": 0.09218017011880875, + "loss_ce": 1.8222680182589102e-06, + "loss_iou": 0.3671875, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 456671592, + "step": 5003 + }, + { + "epoch": 20.85, + "grad_norm": 4.614820135793629, + "learning_rate": 5e-05, + "loss": 0.0646, + "num_input_tokens_seen": 456762728, + "step": 5004 + }, + { + "epoch": 20.85, + "loss": 0.09170796722173691, + "loss_ce": 2.6483596684556687e-06, + "loss_iou": 0.1796875, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 456762728, + "step": 5004 + }, + { + "epoch": 20.854166666666668, + "grad_norm": 2.787794613467919, + "learning_rate": 5e-05, + "loss": 0.0386, + "num_input_tokens_seen": 456853680, + "step": 5005 + }, + { + "epoch": 20.854166666666668, + "loss": 0.04777923598885536, + "loss_ce": 3.964900315622799e-06, + "loss_iou": 0.236328125, + "loss_num": 0.00958251953125, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 456853680, + "step": 5005 + }, + { + "epoch": 20.858333333333334, + "grad_norm": 2.606264263599698, + "learning_rate": 5e-05, + "loss": 0.0484, + "num_input_tokens_seen": 456945360, + "step": 5006 + }, + { + "epoch": 20.858333333333334, + "loss": 0.06281374394893646, + "loss_ce": 1.6193531337194145e-05, + "loss_iou": 0.251953125, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 456945360, + "step": 5006 + }, + { + "epoch": 20.8625, + "grad_norm": 3.1024920269881573, + "learning_rate": 5e-05, + "loss": 0.0997, + "num_input_tokens_seen": 457035500, + "step": 5007 + }, + { + "epoch": 20.8625, + "loss": 0.0986967384815216, + "loss_ce": 7.156594801926985e-05, + "loss_iou": 0.333984375, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 457035500, + "step": 5007 + }, + { + "epoch": 20.866666666666667, + "grad_norm": 2.668374767398001, + "learning_rate": 5e-05, + "loss": 0.0448, + "num_input_tokens_seen": 457126620, + "step": 5008 + }, + { + "epoch": 20.866666666666667, + "loss": 0.052392516285181046, + "loss_ce": 1.4628599274146836e-06, + "loss_iou": 0.16796875, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 457126620, + "step": 5008 + }, + { + "epoch": 20.870833333333334, + "grad_norm": 2.0467538160029886, + "learning_rate": 5e-05, + "loss": 0.0342, + "num_input_tokens_seen": 457217796, + "step": 5009 + }, + { + "epoch": 20.870833333333334, + "loss": 0.03557741269469261, + "loss_ce": 1.5464353282368393e-06, + "loss_iou": 0.2109375, + "loss_num": 0.007110595703125, + "loss_xval": 0.03564453125, + "num_input_tokens_seen": 457217796, + "step": 5009 + }, + { + "epoch": 20.875, + "grad_norm": 3.191668867345092, + "learning_rate": 5e-05, + "loss": 0.079, + "num_input_tokens_seen": 457309084, + "step": 5010 + }, + { + "epoch": 20.875, + "loss": 0.10242909938097, + "loss_ce": 6.703396024931862e-07, + "loss_iou": 0.27734375, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 457309084, + "step": 5010 + }, + { + "epoch": 20.879166666666666, + "grad_norm": 0.4186359609007298, + "learning_rate": 5e-05, + "loss": 0.0323, + "num_input_tokens_seen": 457400456, + "step": 5011 + }, + { + "epoch": 20.879166666666666, + "loss": 0.024034079164266586, + "loss_ce": 1.4864708646200597e-06, + "loss_iou": 0.302734375, + "loss_num": 0.00482177734375, + "loss_xval": 0.0240478515625, + "num_input_tokens_seen": 457400456, + "step": 5011 + }, + { + "epoch": 20.883333333333333, + "grad_norm": 1.1425865029006985, + "learning_rate": 5e-05, + "loss": 0.038, + "num_input_tokens_seen": 457492068, + "step": 5012 + }, + { + "epoch": 20.883333333333333, + "loss": 0.05428203195333481, + "loss_ce": 0.0007999764638952911, + "loss_iou": 0.2265625, + "loss_num": 0.0107421875, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 457492068, + "step": 5012 + }, + { + "epoch": 20.8875, + "grad_norm": 1.4859386496117717, + "learning_rate": 5e-05, + "loss": 0.0343, + "num_input_tokens_seen": 457583168, + "step": 5013 + }, + { + "epoch": 20.8875, + "loss": 0.046580176800489426, + "loss_ce": 1.7980943084694445e-05, + "loss_iou": 0.25390625, + "loss_num": 0.00933837890625, + "loss_xval": 0.046630859375, + "num_input_tokens_seen": 457583168, + "step": 5013 + }, + { + "epoch": 20.891666666666666, + "grad_norm": 4.079518351007812, + "learning_rate": 5e-05, + "loss": 0.0685, + "num_input_tokens_seen": 457674756, + "step": 5014 + }, + { + "epoch": 20.891666666666666, + "loss": 0.06736816465854645, + "loss_ce": 6.158369956210663e-07, + "loss_iou": 0.2431640625, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 457674756, + "step": 5014 + }, + { + "epoch": 20.895833333333332, + "grad_norm": 4.058994239972123, + "learning_rate": 5e-05, + "loss": 0.0412, + "num_input_tokens_seen": 457765868, + "step": 5015 + }, + { + "epoch": 20.895833333333332, + "loss": 0.030060499906539917, + "loss_ce": 6.862018153697136e-07, + "loss_iou": 0.28515625, + "loss_num": 0.006011962890625, + "loss_xval": 0.030029296875, + "num_input_tokens_seen": 457765868, + "step": 5015 + }, + { + "epoch": 20.9, + "grad_norm": 2.2914838035650793, + "learning_rate": 5e-05, + "loss": 0.0659, + "num_input_tokens_seen": 457857512, + "step": 5016 + }, + { + "epoch": 20.9, + "loss": 0.05039317160844803, + "loss_ce": 1.0222338460152969e-06, + "loss_iou": 0.228515625, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 457857512, + "step": 5016 + }, + { + "epoch": 20.904166666666665, + "grad_norm": 3.7616964007946807, + "learning_rate": 5e-05, + "loss": 0.0354, + "num_input_tokens_seen": 457949028, + "step": 5017 + }, + { + "epoch": 20.904166666666665, + "loss": 0.028687015175819397, + "loss_ce": 4.915837052976713e-07, + "loss_iou": 0.236328125, + "loss_num": 0.0057373046875, + "loss_xval": 0.0286865234375, + "num_input_tokens_seen": 457949028, + "step": 5017 + }, + { + "epoch": 20.908333333333335, + "grad_norm": 1.4106076483959105, + "learning_rate": 5e-05, + "loss": 0.0278, + "num_input_tokens_seen": 458040516, + "step": 5018 + }, + { + "epoch": 20.908333333333335, + "loss": 0.02542763389647007, + "loss_ce": 2.1749872757936828e-05, + "loss_iou": 0.154296875, + "loss_num": 0.00506591796875, + "loss_xval": 0.025390625, + "num_input_tokens_seen": 458040516, + "step": 5018 + }, + { + "epoch": 20.9125, + "grad_norm": 1.5423874174158072, + "learning_rate": 5e-05, + "loss": 0.0451, + "num_input_tokens_seen": 458131972, + "step": 5019 + }, + { + "epoch": 20.9125, + "loss": 0.04575492814183235, + "loss_ce": 1.4497619531539385e-06, + "loss_iou": 0.1552734375, + "loss_num": 0.0091552734375, + "loss_xval": 0.045654296875, + "num_input_tokens_seen": 458131972, + "step": 5019 + }, + { + "epoch": 20.916666666666668, + "grad_norm": 2.4457580219008053, + "learning_rate": 5e-05, + "loss": 0.0309, + "num_input_tokens_seen": 458223872, + "step": 5020 + }, + { + "epoch": 20.916666666666668, + "loss": 0.03433071821928024, + "loss_ce": 2.2569920474779792e-06, + "loss_iou": 0.201171875, + "loss_num": 0.006866455078125, + "loss_xval": 0.034423828125, + "num_input_tokens_seen": 458223872, + "step": 5020 + }, + { + "epoch": 20.920833333333334, + "grad_norm": 2.6501025146001673, + "learning_rate": 5e-05, + "loss": 0.0477, + "num_input_tokens_seen": 458315016, + "step": 5021 + }, + { + "epoch": 20.920833333333334, + "loss": 0.061783723533153534, + "loss_ce": 8.834414302327787e-07, + "loss_iou": 0.33984375, + "loss_num": 0.01239013671875, + "loss_xval": 0.061767578125, + "num_input_tokens_seen": 458315016, + "step": 5021 + }, + { + "epoch": 20.925, + "grad_norm": 1.8261893391247939, + "learning_rate": 5e-05, + "loss": 0.025, + "num_input_tokens_seen": 458406876, + "step": 5022 + }, + { + "epoch": 20.925, + "loss": 0.020394135266542435, + "loss_ce": 7.634452003912884e-07, + "loss_iou": 0.1337890625, + "loss_num": 0.00408935546875, + "loss_xval": 0.0203857421875, + "num_input_tokens_seen": 458406876, + "step": 5022 + }, + { + "epoch": 20.929166666666667, + "grad_norm": 7.652682655986564, + "learning_rate": 5e-05, + "loss": 0.0305, + "num_input_tokens_seen": 458497860, + "step": 5023 + }, + { + "epoch": 20.929166666666667, + "loss": 0.03658346086740494, + "loss_ce": 5.112319172440039e-07, + "loss_iou": 0.25, + "loss_num": 0.00732421875, + "loss_xval": 0.03662109375, + "num_input_tokens_seen": 458497860, + "step": 5023 + }, + { + "epoch": 20.933333333333334, + "grad_norm": 3.372785638348631, + "learning_rate": 5e-05, + "loss": 0.0617, + "num_input_tokens_seen": 458588648, + "step": 5024 + }, + { + "epoch": 20.933333333333334, + "loss": 0.07515182346105576, + "loss_ce": 2.2860726858198177e-06, + "loss_iou": 0.28125, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 458588648, + "step": 5024 + }, + { + "epoch": 20.9375, + "grad_norm": 2.4428530832099793, + "learning_rate": 5e-05, + "loss": 0.032, + "num_input_tokens_seen": 458680288, + "step": 5025 + }, + { + "epoch": 20.9375, + "loss": 0.036588169634342194, + "loss_ce": 0.0004935072502121329, + "loss_iou": 0.1796875, + "loss_num": 0.0072021484375, + "loss_xval": 0.0361328125, + "num_input_tokens_seen": 458680288, + "step": 5025 + }, + { + "epoch": 20.941666666666666, + "grad_norm": 16.422718461859585, + "learning_rate": 5e-05, + "loss": 0.0527, + "num_input_tokens_seen": 458771808, + "step": 5026 + }, + { + "epoch": 20.941666666666666, + "loss": 0.062179841101169586, + "loss_ce": 2.767868636510684e-07, + "loss_iou": 0.337890625, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 458771808, + "step": 5026 + }, + { + "epoch": 20.945833333333333, + "grad_norm": 2.947345254036747, + "learning_rate": 5e-05, + "loss": 0.0472, + "num_input_tokens_seen": 458862592, + "step": 5027 + }, + { + "epoch": 20.945833333333333, + "loss": 0.051636189222335815, + "loss_ce": 4.4753275574294094e-07, + "loss_iou": 0.244140625, + "loss_num": 0.01031494140625, + "loss_xval": 0.0517578125, + "num_input_tokens_seen": 458862592, + "step": 5027 + }, + { + "epoch": 20.95, + "grad_norm": 3.3874837066315058, + "learning_rate": 5e-05, + "loss": 0.0847, + "num_input_tokens_seen": 458954308, + "step": 5028 + }, + { + "epoch": 20.95, + "loss": 0.034767866134643555, + "loss_ce": 8.345932656084187e-06, + "loss_iou": 0.3515625, + "loss_num": 0.0069580078125, + "loss_xval": 0.03466796875, + "num_input_tokens_seen": 458954308, + "step": 5028 + }, + { + "epoch": 20.954166666666666, + "grad_norm": 1.9194942216754223, + "learning_rate": 5e-05, + "loss": 0.0533, + "num_input_tokens_seen": 459045720, + "step": 5029 + }, + { + "epoch": 20.954166666666666, + "loss": 0.06594738364219666, + "loss_ce": 1.4157611985865515e-05, + "loss_iou": 0.208984375, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 459045720, + "step": 5029 + }, + { + "epoch": 20.958333333333332, + "grad_norm": 1.9020720939002147, + "learning_rate": 5e-05, + "loss": 0.0288, + "num_input_tokens_seen": 459136588, + "step": 5030 + }, + { + "epoch": 20.958333333333332, + "loss": 0.035919640213251114, + "loss_ce": 4.5112381030776305e-07, + "loss_iou": 0.1982421875, + "loss_num": 0.007171630859375, + "loss_xval": 0.035888671875, + "num_input_tokens_seen": 459136588, + "step": 5030 + }, + { + "epoch": 20.9625, + "grad_norm": 1.6016189140048587, + "learning_rate": 5e-05, + "loss": 0.0488, + "num_input_tokens_seen": 459227684, + "step": 5031 + }, + { + "epoch": 20.9625, + "loss": 0.036752112209796906, + "loss_ce": 2.4207462047343142e-05, + "loss_iou": 0.1884765625, + "loss_num": 0.007354736328125, + "loss_xval": 0.03662109375, + "num_input_tokens_seen": 459227684, + "step": 5031 + }, + { + "epoch": 20.966666666666665, + "grad_norm": 1.9618492446367335, + "learning_rate": 5e-05, + "loss": 0.0552, + "num_input_tokens_seen": 459318620, + "step": 5032 + }, + { + "epoch": 20.966666666666665, + "loss": 0.05786576122045517, + "loss_ce": 4.430605713423574e-06, + "loss_iou": 0.177734375, + "loss_num": 0.0115966796875, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 459318620, + "step": 5032 + }, + { + "epoch": 20.970833333333335, + "grad_norm": 0.5597310898126626, + "learning_rate": 5e-05, + "loss": 0.0456, + "num_input_tokens_seen": 459409972, + "step": 5033 + }, + { + "epoch": 20.970833333333335, + "loss": 0.04477032646536827, + "loss_ce": 1.0376323871241766e-06, + "loss_iou": 0.125, + "loss_num": 0.00897216796875, + "loss_xval": 0.044677734375, + "num_input_tokens_seen": 459409972, + "step": 5033 + }, + { + "epoch": 20.975, + "grad_norm": 4.598239030189397, + "learning_rate": 5e-05, + "loss": 0.0342, + "num_input_tokens_seen": 459500968, + "step": 5034 + }, + { + "epoch": 20.975, + "loss": 0.042814694344997406, + "loss_ce": 6.1625769376405515e-06, + "loss_iou": 0.1669921875, + "loss_num": 0.008544921875, + "loss_xval": 0.042724609375, + "num_input_tokens_seen": 459500968, + "step": 5034 + }, + { + "epoch": 20.979166666666668, + "grad_norm": 4.326593137145183, + "learning_rate": 5e-05, + "loss": 0.0555, + "num_input_tokens_seen": 459592724, + "step": 5035 + }, + { + "epoch": 20.979166666666668, + "loss": 0.056089065968990326, + "loss_ce": 6.64217586745508e-05, + "loss_iou": 0.1953125, + "loss_num": 0.01116943359375, + "loss_xval": 0.055908203125, + "num_input_tokens_seen": 459592724, + "step": 5035 + }, + { + "epoch": 20.983333333333334, + "grad_norm": 1.715973771888479, + "learning_rate": 5e-05, + "loss": 0.0498, + "num_input_tokens_seen": 459684760, + "step": 5036 + }, + { + "epoch": 20.983333333333334, + "loss": 0.06677746027708054, + "loss_ce": 4.996963070880156e-06, + "loss_iou": 0.2421875, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 459684760, + "step": 5036 + }, + { + "epoch": 20.9875, + "grad_norm": 0.9801665764126386, + "learning_rate": 5e-05, + "loss": 0.0231, + "num_input_tokens_seen": 459775892, + "step": 5037 + }, + { + "epoch": 20.9875, + "loss": 0.02707877941429615, + "loss_ce": 0.00038352797855623066, + "loss_iou": 0.028076171875, + "loss_num": 0.005340576171875, + "loss_xval": 0.0267333984375, + "num_input_tokens_seen": 459775892, + "step": 5037 + }, + { + "epoch": 20.991666666666667, + "grad_norm": 1.6502273748838339, + "learning_rate": 5e-05, + "loss": 0.0463, + "num_input_tokens_seen": 459867180, + "step": 5038 + }, + { + "epoch": 20.991666666666667, + "loss": 0.04678413271903992, + "loss_ce": 6.85528220856213e-07, + "loss_iou": 0.25, + "loss_num": 0.00933837890625, + "loss_xval": 0.046875, + "num_input_tokens_seen": 459867180, + "step": 5038 + }, + { + "epoch": 20.995833333333334, + "grad_norm": 13.8492837561597, + "learning_rate": 5e-05, + "loss": 0.0592, + "num_input_tokens_seen": 459958980, + "step": 5039 + }, + { + "epoch": 20.995833333333334, + "loss": 0.03167743235826492, + "loss_ce": 1.8604114870868216e-07, + "loss_iou": 0.193359375, + "loss_num": 0.00634765625, + "loss_xval": 0.03173828125, + "num_input_tokens_seen": 459958980, + "step": 5039 + }, + { + "epoch": 21.0, + "grad_norm": 2.997662396936798, + "learning_rate": 5e-05, + "loss": 0.0709, + "num_input_tokens_seen": 460050472, + "step": 5040 + }, + { + "epoch": 21.0, + "loss": 0.051897190511226654, + "loss_ce": 2.0506997771008173e-06, + "loss_iou": 0.23828125, + "loss_num": 0.0103759765625, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 460050472, + "step": 5040 + }, + { + "epoch": 21.004166666666666, + "grad_norm": 2.6900583583643867, + "learning_rate": 5e-05, + "loss": 0.0334, + "num_input_tokens_seen": 460142104, + "step": 5041 + }, + { + "epoch": 21.004166666666666, + "loss": 0.032296109944581985, + "loss_ce": 5.4287651437334716e-05, + "loss_iou": 0.26953125, + "loss_num": 0.006439208984375, + "loss_xval": 0.0322265625, + "num_input_tokens_seen": 460142104, + "step": 5041 + }, + { + "epoch": 21.008333333333333, + "grad_norm": 6.367181829234212, + "learning_rate": 5e-05, + "loss": 0.0337, + "num_input_tokens_seen": 460232776, + "step": 5042 + }, + { + "epoch": 21.008333333333333, + "loss": 0.023956449702382088, + "loss_ce": 1.5054619950660708e-07, + "loss_iou": 0.244140625, + "loss_num": 0.004791259765625, + "loss_xval": 0.02392578125, + "num_input_tokens_seen": 460232776, + "step": 5042 + }, + { + "epoch": 21.0125, + "grad_norm": 2.7300525317927953, + "learning_rate": 5e-05, + "loss": 0.0333, + "num_input_tokens_seen": 460324156, + "step": 5043 + }, + { + "epoch": 21.0125, + "loss": 0.04496552795171738, + "loss_ce": 2.0766037778230384e-05, + "loss_iou": 0.2578125, + "loss_num": 0.00897216796875, + "loss_xval": 0.044921875, + "num_input_tokens_seen": 460324156, + "step": 5043 + }, + { + "epoch": 21.016666666666666, + "grad_norm": 3.6869204388176096, + "learning_rate": 5e-05, + "loss": 0.0361, + "num_input_tokens_seen": 460415312, + "step": 5044 + }, + { + "epoch": 21.016666666666666, + "loss": 0.036822736263275146, + "loss_ce": 3.2774910323496442e-06, + "loss_iou": 0.158203125, + "loss_num": 0.007354736328125, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 460415312, + "step": 5044 + }, + { + "epoch": 21.020833333333332, + "grad_norm": 2.5416651150304377, + "learning_rate": 5e-05, + "loss": 0.0953, + "num_input_tokens_seen": 460506688, + "step": 5045 + }, + { + "epoch": 21.020833333333332, + "loss": 0.13226720690727234, + "loss_ce": 4.033006462123012e-06, + "loss_iou": 0.384765625, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 460506688, + "step": 5045 + }, + { + "epoch": 21.025, + "grad_norm": 1.3640919934626132, + "learning_rate": 5e-05, + "loss": 0.0434, + "num_input_tokens_seen": 460597880, + "step": 5046 + }, + { + "epoch": 21.025, + "loss": 0.026312153786420822, + "loss_ce": 6.000811481499113e-06, + "loss_iou": 0.007080078125, + "loss_num": 0.0052490234375, + "loss_xval": 0.0263671875, + "num_input_tokens_seen": 460597880, + "step": 5046 + }, + { + "epoch": 21.029166666666665, + "grad_norm": 2.111172573092908, + "learning_rate": 5e-05, + "loss": 0.0408, + "num_input_tokens_seen": 460688988, + "step": 5047 + }, + { + "epoch": 21.029166666666665, + "loss": 0.031969036906957626, + "loss_ce": 1.8739243614618317e-06, + "loss_iou": 0.1328125, + "loss_num": 0.006378173828125, + "loss_xval": 0.031982421875, + "num_input_tokens_seen": 460688988, + "step": 5047 + }, + { + "epoch": 21.033333333333335, + "grad_norm": 2.483429770162464, + "learning_rate": 5e-05, + "loss": 0.0447, + "num_input_tokens_seen": 460780876, + "step": 5048 + }, + { + "epoch": 21.033333333333335, + "loss": 0.038086287677288055, + "loss_ce": 1.560991586302407e-05, + "loss_iou": 0.271484375, + "loss_num": 0.007598876953125, + "loss_xval": 0.0380859375, + "num_input_tokens_seen": 460780876, + "step": 5048 + }, + { + "epoch": 21.0375, + "grad_norm": 2.667579986903645, + "learning_rate": 5e-05, + "loss": 0.0318, + "num_input_tokens_seen": 460872044, + "step": 5049 + }, + { + "epoch": 21.0375, + "loss": 0.028536254540085793, + "loss_ce": 3.283719706814736e-05, + "loss_iou": 0.173828125, + "loss_num": 0.005706787109375, + "loss_xval": 0.028564453125, + "num_input_tokens_seen": 460872044, + "step": 5049 + }, + { + "epoch": 21.041666666666668, + "grad_norm": 2.1337628588389506, + "learning_rate": 5e-05, + "loss": 0.0462, + "num_input_tokens_seen": 460963572, + "step": 5050 + }, + { + "epoch": 21.041666666666668, + "loss": 0.02425198256969452, + "loss_ce": 0.00021938963618595153, + "loss_iou": 0.158203125, + "loss_num": 0.00482177734375, + "loss_xval": 0.0240478515625, + "num_input_tokens_seen": 460963572, + "step": 5050 + }, + { + "epoch": 21.045833333333334, + "grad_norm": 1.825125178904793, + "learning_rate": 5e-05, + "loss": 0.0385, + "num_input_tokens_seen": 461054480, + "step": 5051 + }, + { + "epoch": 21.045833333333334, + "loss": 0.05502419173717499, + "loss_ce": 1.0019826959251077e-06, + "loss_iou": 0.2294921875, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 461054480, + "step": 5051 + }, + { + "epoch": 21.05, + "grad_norm": 3.647261849290983, + "learning_rate": 5e-05, + "loss": 0.0316, + "num_input_tokens_seen": 461145168, + "step": 5052 + }, + { + "epoch": 21.05, + "loss": 0.038705211132764816, + "loss_ce": 2.4182521883631125e-05, + "loss_iou": 0.26171875, + "loss_num": 0.007720947265625, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 461145168, + "step": 5052 + }, + { + "epoch": 21.054166666666667, + "grad_norm": 1.985815327131711, + "learning_rate": 5e-05, + "loss": 0.0356, + "num_input_tokens_seen": 461236644, + "step": 5053 + }, + { + "epoch": 21.054166666666667, + "loss": 0.0394623801112175, + "loss_ce": 3.151773398712976e-06, + "loss_iou": 0.265625, + "loss_num": 0.00787353515625, + "loss_xval": 0.03955078125, + "num_input_tokens_seen": 461236644, + "step": 5053 + }, + { + "epoch": 21.058333333333334, + "grad_norm": 2.5777082297701255, + "learning_rate": 5e-05, + "loss": 0.0762, + "num_input_tokens_seen": 461327936, + "step": 5054 + }, + { + "epoch": 21.058333333333334, + "loss": 0.03418642282485962, + "loss_ce": 6.014150858391076e-05, + "loss_iou": 0.1318359375, + "loss_num": 0.006805419921875, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 461327936, + "step": 5054 + }, + { + "epoch": 21.0625, + "grad_norm": 2.1285702336933796, + "learning_rate": 5e-05, + "loss": 0.0637, + "num_input_tokens_seen": 461418936, + "step": 5055 + }, + { + "epoch": 21.0625, + "loss": 0.0940098986029625, + "loss_ce": 4.946746230416466e-07, + "loss_iou": 0.2578125, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 461418936, + "step": 5055 + }, + { + "epoch": 21.066666666666666, + "grad_norm": 2.5402293879248803, + "learning_rate": 5e-05, + "loss": 0.0596, + "num_input_tokens_seen": 461509892, + "step": 5056 + }, + { + "epoch": 21.066666666666666, + "loss": 0.09463571012020111, + "loss_ce": 7.034442432996002e-07, + "loss_iou": 0.1826171875, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 461509892, + "step": 5056 + }, + { + "epoch": 21.070833333333333, + "grad_norm": 3.144230652175914, + "learning_rate": 5e-05, + "loss": 0.0347, + "num_input_tokens_seen": 461601260, + "step": 5057 + }, + { + "epoch": 21.070833333333333, + "loss": 0.0379909984767437, + "loss_ce": 4.2440492507012095e-06, + "loss_iou": 0.375, + "loss_num": 0.007598876953125, + "loss_xval": 0.0380859375, + "num_input_tokens_seen": 461601260, + "step": 5057 + }, + { + "epoch": 21.075, + "grad_norm": 3.121256612516521, + "learning_rate": 5e-05, + "loss": 0.0366, + "num_input_tokens_seen": 461692564, + "step": 5058 + }, + { + "epoch": 21.075, + "loss": 0.04023890942335129, + "loss_ce": 1.4835345609753858e-06, + "loss_iou": 0.3359375, + "loss_num": 0.008056640625, + "loss_xval": 0.040283203125, + "num_input_tokens_seen": 461692564, + "step": 5058 + }, + { + "epoch": 21.079166666666666, + "grad_norm": 2.355240613160966, + "learning_rate": 5e-05, + "loss": 0.0289, + "num_input_tokens_seen": 461784752, + "step": 5059 + }, + { + "epoch": 21.079166666666666, + "loss": 0.024186529219150543, + "loss_ce": 1.348738805972971e-06, + "loss_iou": 0.224609375, + "loss_num": 0.004852294921875, + "loss_xval": 0.024169921875, + "num_input_tokens_seen": 461784752, + "step": 5059 + }, + { + "epoch": 21.083333333333332, + "grad_norm": 2.8721899574505234, + "learning_rate": 5e-05, + "loss": 0.0347, + "num_input_tokens_seen": 461874012, + "step": 5060 + }, + { + "epoch": 21.083333333333332, + "loss": 0.02969268709421158, + "loss_ce": 4.485932004172355e-05, + "loss_iou": 0.3359375, + "loss_num": 0.00592041015625, + "loss_xval": 0.0296630859375, + "num_input_tokens_seen": 461874012, + "step": 5060 + }, + { + "epoch": 21.0875, + "grad_norm": 2.34567439799414, + "learning_rate": 5e-05, + "loss": 0.0345, + "num_input_tokens_seen": 461965424, + "step": 5061 + }, + { + "epoch": 21.0875, + "loss": 0.03739079833030701, + "loss_ce": 6.762803877791157e-06, + "loss_iou": 0.2060546875, + "loss_num": 0.007476806640625, + "loss_xval": 0.037353515625, + "num_input_tokens_seen": 461965424, + "step": 5061 + }, + { + "epoch": 21.091666666666665, + "grad_norm": 2.147127909839684, + "learning_rate": 5e-05, + "loss": 0.0394, + "num_input_tokens_seen": 462057056, + "step": 5062 + }, + { + "epoch": 21.091666666666665, + "loss": 0.030382130295038223, + "loss_ce": 1.7140035197371617e-05, + "loss_iou": 0.1376953125, + "loss_num": 0.006072998046875, + "loss_xval": 0.0303955078125, + "num_input_tokens_seen": 462057056, + "step": 5062 + }, + { + "epoch": 21.095833333333335, + "grad_norm": 2.5408218475613866, + "learning_rate": 5e-05, + "loss": 0.0396, + "num_input_tokens_seen": 462148068, + "step": 5063 + }, + { + "epoch": 21.095833333333335, + "loss": 0.03337986022233963, + "loss_ce": 5.466506991069764e-05, + "loss_iou": 0.2177734375, + "loss_num": 0.00665283203125, + "loss_xval": 0.033203125, + "num_input_tokens_seen": 462148068, + "step": 5063 + }, + { + "epoch": 21.1, + "grad_norm": 3.4630009665204855, + "learning_rate": 5e-05, + "loss": 0.0394, + "num_input_tokens_seen": 462239468, + "step": 5064 + }, + { + "epoch": 21.1, + "loss": 0.04306018725037575, + "loss_ce": 7.514630851801485e-06, + "loss_iou": 0.283203125, + "loss_num": 0.00860595703125, + "loss_xval": 0.04296875, + "num_input_tokens_seen": 462239468, + "step": 5064 + }, + { + "epoch": 21.104166666666668, + "grad_norm": 2.7646771813700397, + "learning_rate": 5e-05, + "loss": 0.0316, + "num_input_tokens_seen": 462329652, + "step": 5065 + }, + { + "epoch": 21.104166666666668, + "loss": 0.03456904739141464, + "loss_ce": 2.5871258912957273e-07, + "loss_iou": 0.2041015625, + "loss_num": 0.006927490234375, + "loss_xval": 0.03466796875, + "num_input_tokens_seen": 462329652, + "step": 5065 + }, + { + "epoch": 21.108333333333334, + "grad_norm": 2.55066057684379, + "learning_rate": 5e-05, + "loss": 0.0354, + "num_input_tokens_seen": 462421144, + "step": 5066 + }, + { + "epoch": 21.108333333333334, + "loss": 0.029943151399493217, + "loss_ce": 5.405986030382337e-06, + "loss_iou": 0.267578125, + "loss_num": 0.0059814453125, + "loss_xval": 0.0299072265625, + "num_input_tokens_seen": 462421144, + "step": 5066 + }, + { + "epoch": 21.1125, + "grad_norm": 2.8795739152285855, + "learning_rate": 5e-05, + "loss": 0.046, + "num_input_tokens_seen": 462512932, + "step": 5067 + }, + { + "epoch": 21.1125, + "loss": 0.0422324612736702, + "loss_ce": 1.1392266060283873e-05, + "loss_iou": 0.3515625, + "loss_num": 0.0084228515625, + "loss_xval": 0.042236328125, + "num_input_tokens_seen": 462512932, + "step": 5067 + }, + { + "epoch": 21.116666666666667, + "grad_norm": 2.163544161580569, + "learning_rate": 5e-05, + "loss": 0.0297, + "num_input_tokens_seen": 462603004, + "step": 5068 + }, + { + "epoch": 21.116666666666667, + "loss": 0.030031124129891396, + "loss_ce": 1.8264663594891317e-06, + "loss_iou": 0.185546875, + "loss_num": 0.006011962890625, + "loss_xval": 0.030029296875, + "num_input_tokens_seen": 462603004, + "step": 5068 + }, + { + "epoch": 21.120833333333334, + "grad_norm": 1.964811021290909, + "learning_rate": 5e-05, + "loss": 0.0369, + "num_input_tokens_seen": 462694484, + "step": 5069 + }, + { + "epoch": 21.120833333333334, + "loss": 0.022997736930847168, + "loss_ce": 2.7417595447332133e-06, + "loss_iou": 0.2294921875, + "loss_num": 0.004608154296875, + "loss_xval": 0.02294921875, + "num_input_tokens_seen": 462694484, + "step": 5069 + }, + { + "epoch": 21.125, + "grad_norm": 1.3945575648462891, + "learning_rate": 5e-05, + "loss": 0.0472, + "num_input_tokens_seen": 462785588, + "step": 5070 + }, + { + "epoch": 21.125, + "loss": 0.036909572780132294, + "loss_ce": 9.011285874294117e-05, + "loss_iou": 0.30859375, + "loss_num": 0.007354736328125, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 462785588, + "step": 5070 + }, + { + "epoch": 21.129166666666666, + "grad_norm": 0.7354333195966727, + "learning_rate": 5e-05, + "loss": 0.0295, + "num_input_tokens_seen": 462877428, + "step": 5071 + }, + { + "epoch": 21.129166666666666, + "loss": 0.023640796542167664, + "loss_ce": 2.7821044568554498e-05, + "loss_iou": 0.2578125, + "loss_num": 0.004730224609375, + "loss_xval": 0.0235595703125, + "num_input_tokens_seen": 462877428, + "step": 5071 + }, + { + "epoch": 21.133333333333333, + "grad_norm": 2.465280101647072, + "learning_rate": 5e-05, + "loss": 0.0299, + "num_input_tokens_seen": 462969388, + "step": 5072 + }, + { + "epoch": 21.133333333333333, + "loss": 0.02642808109521866, + "loss_ce": 0.00011429937876528129, + "loss_iou": 0.162109375, + "loss_num": 0.0052490234375, + "loss_xval": 0.0263671875, + "num_input_tokens_seen": 462969388, + "step": 5072 + }, + { + "epoch": 21.1375, + "grad_norm": 1.3225705648155508, + "learning_rate": 5e-05, + "loss": 0.0375, + "num_input_tokens_seen": 463061024, + "step": 5073 + }, + { + "epoch": 21.1375, + "loss": 0.052613116800785065, + "loss_ce": 8.112248224279028e-07, + "loss_iou": 0.1376953125, + "loss_num": 0.010498046875, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 463061024, + "step": 5073 + }, + { + "epoch": 21.141666666666666, + "grad_norm": 4.000357639254447, + "learning_rate": 5e-05, + "loss": 0.0554, + "num_input_tokens_seen": 463152004, + "step": 5074 + }, + { + "epoch": 21.141666666666666, + "loss": 0.02655082568526268, + "loss_ce": 5.327280518940825e-07, + "loss_iou": 0.2421875, + "loss_num": 0.00531005859375, + "loss_xval": 0.026611328125, + "num_input_tokens_seen": 463152004, + "step": 5074 + }, + { + "epoch": 21.145833333333332, + "grad_norm": 3.28702933174913, + "learning_rate": 5e-05, + "loss": 0.086, + "num_input_tokens_seen": 463243256, + "step": 5075 + }, + { + "epoch": 21.145833333333332, + "loss": 0.13034138083457947, + "loss_ce": 8.039830845518736e-07, + "loss_iou": 0.26953125, + "loss_num": 0.026123046875, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 463243256, + "step": 5075 + }, + { + "epoch": 21.15, + "grad_norm": 5.610082202684336, + "learning_rate": 5e-05, + "loss": 0.0598, + "num_input_tokens_seen": 463334320, + "step": 5076 + }, + { + "epoch": 21.15, + "loss": 0.07361648231744766, + "loss_ce": 4.5481851884687785e-07, + "loss_iou": 0.2890625, + "loss_num": 0.01470947265625, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 463334320, + "step": 5076 + }, + { + "epoch": 21.154166666666665, + "grad_norm": 4.048968495288943, + "learning_rate": 5e-05, + "loss": 0.0479, + "num_input_tokens_seen": 463426408, + "step": 5077 + }, + { + "epoch": 21.154166666666665, + "loss": 0.039019372314214706, + "loss_ce": 4.842506314162165e-05, + "loss_iou": 0.2255859375, + "loss_num": 0.0078125, + "loss_xval": 0.0390625, + "num_input_tokens_seen": 463426408, + "step": 5077 + }, + { + "epoch": 21.158333333333335, + "grad_norm": 2.9163708565496, + "learning_rate": 5e-05, + "loss": 0.0575, + "num_input_tokens_seen": 463516684, + "step": 5078 + }, + { + "epoch": 21.158333333333335, + "loss": 0.07347186654806137, + "loss_ce": 8.00481871010561e-07, + "loss_iou": 0.1669921875, + "loss_num": 0.01470947265625, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 463516684, + "step": 5078 + }, + { + "epoch": 21.1625, + "grad_norm": 2.568020065248772, + "learning_rate": 5e-05, + "loss": 0.0374, + "num_input_tokens_seen": 463607868, + "step": 5079 + }, + { + "epoch": 21.1625, + "loss": 0.03308640792965889, + "loss_ce": 5.353571395971812e-06, + "loss_iou": 0.2431640625, + "loss_num": 0.006622314453125, + "loss_xval": 0.033203125, + "num_input_tokens_seen": 463607868, + "step": 5079 + }, + { + "epoch": 21.166666666666668, + "grad_norm": 2.019850801933071, + "learning_rate": 5e-05, + "loss": 0.0295, + "num_input_tokens_seen": 463699692, + "step": 5080 + }, + { + "epoch": 21.166666666666668, + "loss": 0.023806005716323853, + "loss_ce": 2.295150807185564e-06, + "loss_iou": 0.05126953125, + "loss_num": 0.0047607421875, + "loss_xval": 0.0238037109375, + "num_input_tokens_seen": 463699692, + "step": 5080 + }, + { + "epoch": 21.170833333333334, + "grad_norm": 2.1500917435027826, + "learning_rate": 5e-05, + "loss": 0.0368, + "num_input_tokens_seen": 463791052, + "step": 5081 + }, + { + "epoch": 21.170833333333334, + "loss": 0.039952442049980164, + "loss_ce": 4.93404695589561e-06, + "loss_iou": 0.23046875, + "loss_num": 0.00799560546875, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 463791052, + "step": 5081 + }, + { + "epoch": 21.175, + "grad_norm": 2.976406847478439, + "learning_rate": 5e-05, + "loss": 0.059, + "num_input_tokens_seen": 463882336, + "step": 5082 + }, + { + "epoch": 21.175, + "loss": 0.0614861361682415, + "loss_ce": 8.455392048745125e-07, + "loss_iou": 0.20703125, + "loss_num": 0.01226806640625, + "loss_xval": 0.0615234375, + "num_input_tokens_seen": 463882336, + "step": 5082 + }, + { + "epoch": 21.179166666666667, + "grad_norm": 3.8415896939854655, + "learning_rate": 5e-05, + "loss": 0.049, + "num_input_tokens_seen": 463973180, + "step": 5083 + }, + { + "epoch": 21.179166666666667, + "loss": 0.034776244312524796, + "loss_ce": 1.4637113281423808e-06, + "loss_iou": 0.3515625, + "loss_num": 0.0069580078125, + "loss_xval": 0.03466796875, + "num_input_tokens_seen": 463973180, + "step": 5083 + }, + { + "epoch": 21.183333333333334, + "grad_norm": 3.0208003017066174, + "learning_rate": 5e-05, + "loss": 0.0541, + "num_input_tokens_seen": 464064584, + "step": 5084 + }, + { + "epoch": 21.183333333333334, + "loss": 0.06564892083406448, + "loss_ce": 3.6127748899161816e-05, + "loss_iou": 0.265625, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 464064584, + "step": 5084 + }, + { + "epoch": 21.1875, + "grad_norm": 3.241014229091648, + "learning_rate": 5e-05, + "loss": 0.0466, + "num_input_tokens_seen": 464156084, + "step": 5085 + }, + { + "epoch": 21.1875, + "loss": 0.04939756914973259, + "loss_ce": 0.0018359236419200897, + "loss_iou": 0.216796875, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 464156084, + "step": 5085 + }, + { + "epoch": 21.191666666666666, + "grad_norm": 3.468386060828746, + "learning_rate": 5e-05, + "loss": 0.0514, + "num_input_tokens_seen": 464246820, + "step": 5086 + }, + { + "epoch": 21.191666666666666, + "loss": 0.05452055484056473, + "loss_ce": 9.02461238183605e-07, + "loss_iou": 0.240234375, + "loss_num": 0.01092529296875, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 464246820, + "step": 5086 + }, + { + "epoch": 21.195833333333333, + "grad_norm": 3.8644889622630334, + "learning_rate": 5e-05, + "loss": 0.0396, + "num_input_tokens_seen": 464337828, + "step": 5087 + }, + { + "epoch": 21.195833333333333, + "loss": 0.031357407569885254, + "loss_ce": 5.93802496950957e-07, + "loss_iou": 0.171875, + "loss_num": 0.00628662109375, + "loss_xval": 0.03125, + "num_input_tokens_seen": 464337828, + "step": 5087 + }, + { + "epoch": 21.2, + "grad_norm": 3.08540439088849, + "learning_rate": 5e-05, + "loss": 0.072, + "num_input_tokens_seen": 464429808, + "step": 5088 + }, + { + "epoch": 21.2, + "loss": 0.08263568580150604, + "loss_ce": 1.715917846922821e-06, + "loss_iou": 0.193359375, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 464429808, + "step": 5088 + }, + { + "epoch": 21.204166666666666, + "grad_norm": 2.1076324500298864, + "learning_rate": 5e-05, + "loss": 0.0337, + "num_input_tokens_seen": 464521652, + "step": 5089 + }, + { + "epoch": 21.204166666666666, + "loss": 0.024250905960798264, + "loss_ce": 4.6885875235602725e-06, + "loss_iou": 0.2216796875, + "loss_num": 0.004852294921875, + "loss_xval": 0.0242919921875, + "num_input_tokens_seen": 464521652, + "step": 5089 + }, + { + "epoch": 21.208333333333332, + "grad_norm": 2.1363318465816445, + "learning_rate": 5e-05, + "loss": 0.0369, + "num_input_tokens_seen": 464613240, + "step": 5090 + }, + { + "epoch": 21.208333333333332, + "loss": 0.023094555363059044, + "loss_ce": 1.56367605086416e-05, + "loss_iou": 0.1923828125, + "loss_num": 0.004608154296875, + "loss_xval": 0.0230712890625, + "num_input_tokens_seen": 464613240, + "step": 5090 + }, + { + "epoch": 21.2125, + "grad_norm": 1.4078491064513639, + "learning_rate": 5e-05, + "loss": 0.0582, + "num_input_tokens_seen": 464704340, + "step": 5091 + }, + { + "epoch": 21.2125, + "loss": 0.07828094810247421, + "loss_ce": 3.3591459214221686e-06, + "loss_iou": 0.185546875, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 464704340, + "step": 5091 + }, + { + "epoch": 21.216666666666665, + "grad_norm": 0.7291619373870915, + "learning_rate": 5e-05, + "loss": 0.0176, + "num_input_tokens_seen": 464795532, + "step": 5092 + }, + { + "epoch": 21.216666666666665, + "loss": 0.014080582186579704, + "loss_ce": 0.00014930842735338956, + "loss_iou": 0.181640625, + "loss_num": 0.0027923583984375, + "loss_xval": 0.013916015625, + "num_input_tokens_seen": 464795532, + "step": 5092 + }, + { + "epoch": 21.220833333333335, + "grad_norm": 1.0392975169344065, + "learning_rate": 5e-05, + "loss": 0.0837, + "num_input_tokens_seen": 464885692, + "step": 5093 + }, + { + "epoch": 21.220833333333335, + "loss": 0.14798246324062347, + "loss_ce": 0.00018581724725663662, + "loss_iou": 0.1298828125, + "loss_num": 0.029541015625, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 464885692, + "step": 5093 + }, + { + "epoch": 21.225, + "grad_norm": 1.6224562431284506, + "learning_rate": 5e-05, + "loss": 0.0418, + "num_input_tokens_seen": 464977256, + "step": 5094 + }, + { + "epoch": 21.225, + "loss": 0.06149175018072128, + "loss_ce": 6.458715688495431e-06, + "loss_iou": 0.2314453125, + "loss_num": 0.0123291015625, + "loss_xval": 0.0615234375, + "num_input_tokens_seen": 464977256, + "step": 5094 + }, + { + "epoch": 21.229166666666668, + "grad_norm": 1.2327032327636886, + "learning_rate": 5e-05, + "loss": 0.0408, + "num_input_tokens_seen": 465068456, + "step": 5095 + }, + { + "epoch": 21.229166666666668, + "loss": 0.04151744768023491, + "loss_ce": 2.880004103644751e-05, + "loss_iou": 0.1708984375, + "loss_num": 0.00830078125, + "loss_xval": 0.04150390625, + "num_input_tokens_seen": 465068456, + "step": 5095 + }, + { + "epoch": 21.233333333333334, + "grad_norm": 0.8941970332041576, + "learning_rate": 5e-05, + "loss": 0.0509, + "num_input_tokens_seen": 465160128, + "step": 5096 + }, + { + "epoch": 21.233333333333334, + "loss": 0.03685451298952103, + "loss_ce": 0.00037075072759762406, + "loss_iou": 0.16796875, + "loss_num": 0.007293701171875, + "loss_xval": 0.036376953125, + "num_input_tokens_seen": 465160128, + "step": 5096 + }, + { + "epoch": 21.2375, + "grad_norm": 0.6661944717074832, + "learning_rate": 5e-05, + "loss": 0.0459, + "num_input_tokens_seen": 465250648, + "step": 5097 + }, + { + "epoch": 21.2375, + "loss": 0.016508635133504868, + "loss_ce": 3.295803981018253e-05, + "loss_iou": 0.25, + "loss_num": 0.0032958984375, + "loss_xval": 0.0164794921875, + "num_input_tokens_seen": 465250648, + "step": 5097 + }, + { + "epoch": 21.241666666666667, + "grad_norm": 0.8383924992028557, + "learning_rate": 5e-05, + "loss": 0.0315, + "num_input_tokens_seen": 465342480, + "step": 5098 + }, + { + "epoch": 21.241666666666667, + "loss": 0.023276329040527344, + "loss_ce": 4.482225267565809e-05, + "loss_iou": 0.203125, + "loss_num": 0.004638671875, + "loss_xval": 0.023193359375, + "num_input_tokens_seen": 465342480, + "step": 5098 + }, + { + "epoch": 21.245833333333334, + "grad_norm": 1.5745019835901888, + "learning_rate": 5e-05, + "loss": 0.0407, + "num_input_tokens_seen": 465434396, + "step": 5099 + }, + { + "epoch": 21.245833333333334, + "loss": 0.04514005407691002, + "loss_ce": 4.554187398753129e-06, + "loss_iou": 0.1728515625, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 465434396, + "step": 5099 + }, + { + "epoch": 21.25, + "grad_norm": 6.255873712161377, + "learning_rate": 5e-05, + "loss": 0.0669, + "num_input_tokens_seen": 465525368, + "step": 5100 + }, + { + "epoch": 21.25, + "loss": 0.10162433236837387, + "loss_ce": 7.985177603586635e-07, + "loss_iou": 0.2431640625, + "loss_num": 0.0203857421875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 465525368, + "step": 5100 + }, + { + "epoch": 21.254166666666666, + "grad_norm": 1.161366941581007, + "learning_rate": 5e-05, + "loss": 0.055, + "num_input_tokens_seen": 465616504, + "step": 5101 + }, + { + "epoch": 21.254166666666666, + "loss": 0.0856485515832901, + "loss_ce": 9.648006198403891e-07, + "loss_iou": 0.265625, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 465616504, + "step": 5101 + }, + { + "epoch": 21.258333333333333, + "grad_norm": 1.1708937248271916, + "learning_rate": 5e-05, + "loss": 0.0253, + "num_input_tokens_seen": 465708368, + "step": 5102 + }, + { + "epoch": 21.258333333333333, + "loss": 0.02742135338485241, + "loss_ce": 6.234511965885758e-05, + "loss_iou": 0.2041015625, + "loss_num": 0.005462646484375, + "loss_xval": 0.02734375, + "num_input_tokens_seen": 465708368, + "step": 5102 + }, + { + "epoch": 21.2625, + "grad_norm": 4.447723485840417, + "learning_rate": 5e-05, + "loss": 0.0581, + "num_input_tokens_seen": 465799968, + "step": 5103 + }, + { + "epoch": 21.2625, + "loss": 0.07481688261032104, + "loss_ce": 3.0401115509448573e-06, + "loss_iou": 0.2734375, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 465799968, + "step": 5103 + }, + { + "epoch": 21.266666666666666, + "grad_norm": 1.3703411906643668, + "learning_rate": 5e-05, + "loss": 0.0425, + "num_input_tokens_seen": 465890928, + "step": 5104 + }, + { + "epoch": 21.266666666666666, + "loss": 0.05223878473043442, + "loss_ce": 3.2282886763823626e-07, + "loss_iou": 0.18359375, + "loss_num": 0.01043701171875, + "loss_xval": 0.05224609375, + "num_input_tokens_seen": 465890928, + "step": 5104 + }, + { + "epoch": 21.270833333333332, + "grad_norm": 2.769525755927495, + "learning_rate": 5e-05, + "loss": 0.0284, + "num_input_tokens_seen": 465981804, + "step": 5105 + }, + { + "epoch": 21.270833333333332, + "loss": 0.03247120976448059, + "loss_ce": 5.079807010588411e-07, + "loss_iou": 0.2236328125, + "loss_num": 0.006500244140625, + "loss_xval": 0.032470703125, + "num_input_tokens_seen": 465981804, + "step": 5105 + }, + { + "epoch": 21.275, + "grad_norm": 1.7096892427478472, + "learning_rate": 5e-05, + "loss": 0.0317, + "num_input_tokens_seen": 466073436, + "step": 5106 + }, + { + "epoch": 21.275, + "loss": 0.041040971875190735, + "loss_ce": 0.00025422926410101354, + "loss_iou": 0.1650390625, + "loss_num": 0.0081787109375, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 466073436, + "step": 5106 + }, + { + "epoch": 21.279166666666665, + "grad_norm": 3.939910670255322, + "learning_rate": 5e-05, + "loss": 0.0394, + "num_input_tokens_seen": 466164960, + "step": 5107 + }, + { + "epoch": 21.279166666666665, + "loss": 0.05779968202114105, + "loss_ce": 7.568756700493395e-05, + "loss_iou": 0.28125, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 466164960, + "step": 5107 + }, + { + "epoch": 21.283333333333335, + "grad_norm": 2.9411654891285806, + "learning_rate": 5e-05, + "loss": 0.0741, + "num_input_tokens_seen": 466256052, + "step": 5108 + }, + { + "epoch": 21.283333333333335, + "loss": 0.04657086730003357, + "loss_ce": 1.0450962690811139e-06, + "loss_iou": 0.251953125, + "loss_num": 0.00933837890625, + "loss_xval": 0.046630859375, + "num_input_tokens_seen": 466256052, + "step": 5108 + }, + { + "epoch": 21.2875, + "grad_norm": 3.239933849340362, + "learning_rate": 5e-05, + "loss": 0.0449, + "num_input_tokens_seen": 466347372, + "step": 5109 + }, + { + "epoch": 21.2875, + "loss": 0.042045846581459045, + "loss_ce": 2.5268442982451234e-07, + "loss_iou": 0.25390625, + "loss_num": 0.0084228515625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 466347372, + "step": 5109 + }, + { + "epoch": 21.291666666666668, + "grad_norm": 6.448067717519509, + "learning_rate": 5e-05, + "loss": 0.0589, + "num_input_tokens_seen": 466438272, + "step": 5110 + }, + { + "epoch": 21.291666666666668, + "loss": 0.03699462115764618, + "loss_ce": 3.0206821975298226e-05, + "loss_iou": 0.1806640625, + "loss_num": 0.00738525390625, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 466438272, + "step": 5110 + }, + { + "epoch": 21.295833333333334, + "grad_norm": 1.5650711524552718, + "learning_rate": 5e-05, + "loss": 0.0487, + "num_input_tokens_seen": 466529504, + "step": 5111 + }, + { + "epoch": 21.295833333333334, + "loss": 0.027462124824523926, + "loss_ce": 3.932990239263745e-06, + "loss_iou": 0.162109375, + "loss_num": 0.0054931640625, + "loss_xval": 0.0274658203125, + "num_input_tokens_seen": 466529504, + "step": 5111 + }, + { + "epoch": 21.3, + "grad_norm": 3.5636719032526685, + "learning_rate": 5e-05, + "loss": 0.0592, + "num_input_tokens_seen": 466621060, + "step": 5112 + }, + { + "epoch": 21.3, + "loss": 0.05067984759807587, + "loss_ce": 0.00010840390314115211, + "loss_iou": 0.275390625, + "loss_num": 0.0101318359375, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 466621060, + "step": 5112 + }, + { + "epoch": 21.304166666666667, + "grad_norm": 2.7456772689630182, + "learning_rate": 5e-05, + "loss": 0.0445, + "num_input_tokens_seen": 466712584, + "step": 5113 + }, + { + "epoch": 21.304166666666667, + "loss": 0.04448344558477402, + "loss_ce": 4.072552201250801e-06, + "loss_iou": 0.2197265625, + "loss_num": 0.0089111328125, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 466712584, + "step": 5113 + }, + { + "epoch": 21.308333333333334, + "grad_norm": 4.233691763444781, + "learning_rate": 5e-05, + "loss": 0.0626, + "num_input_tokens_seen": 466803620, + "step": 5114 + }, + { + "epoch": 21.308333333333334, + "loss": 0.026803674176335335, + "loss_ce": 1.6103215330076637e-06, + "loss_iou": 0.21484375, + "loss_num": 0.00537109375, + "loss_xval": 0.02685546875, + "num_input_tokens_seen": 466803620, + "step": 5114 + }, + { + "epoch": 21.3125, + "grad_norm": 3.9309097782444935, + "learning_rate": 5e-05, + "loss": 0.0392, + "num_input_tokens_seen": 466895120, + "step": 5115 + }, + { + "epoch": 21.3125, + "loss": 0.033508796244859695, + "loss_ce": 1.5754052583361045e-05, + "loss_iou": 0.31640625, + "loss_num": 0.0067138671875, + "loss_xval": 0.033447265625, + "num_input_tokens_seen": 466895120, + "step": 5115 + }, + { + "epoch": 21.316666666666666, + "grad_norm": 1.9957723175132391, + "learning_rate": 5e-05, + "loss": 0.0686, + "num_input_tokens_seen": 466986304, + "step": 5116 + }, + { + "epoch": 21.316666666666666, + "loss": 0.034279532730579376, + "loss_ce": 8.292201528092846e-06, + "loss_iou": 0.32421875, + "loss_num": 0.006866455078125, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 466986304, + "step": 5116 + }, + { + "epoch": 21.320833333333333, + "grad_norm": 1.605300799367561, + "learning_rate": 5e-05, + "loss": 0.0417, + "num_input_tokens_seen": 467077756, + "step": 5117 + }, + { + "epoch": 21.320833333333333, + "loss": 0.027537034824490547, + "loss_ce": 1.0179230230278336e-05, + "loss_iou": 0.232421875, + "loss_num": 0.005523681640625, + "loss_xval": 0.027587890625, + "num_input_tokens_seen": 467077756, + "step": 5117 + }, + { + "epoch": 21.325, + "grad_norm": 1.4889248178904293, + "learning_rate": 5e-05, + "loss": 0.0567, + "num_input_tokens_seen": 467168436, + "step": 5118 + }, + { + "epoch": 21.325, + "loss": 0.06738781929016113, + "loss_ce": 0.00047039767378009856, + "loss_iou": 0.1953125, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 467168436, + "step": 5118 + }, + { + "epoch": 21.329166666666666, + "grad_norm": 1.6569154713614649, + "learning_rate": 5e-05, + "loss": 0.0625, + "num_input_tokens_seen": 467260136, + "step": 5119 + }, + { + "epoch": 21.329166666666666, + "loss": 0.089745432138443, + "loss_ce": 8.497641829308122e-06, + "loss_iou": 0.20703125, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 467260136, + "step": 5119 + }, + { + "epoch": 21.333333333333332, + "grad_norm": 2.7731899562486255, + "learning_rate": 5e-05, + "loss": 0.0667, + "num_input_tokens_seen": 467351804, + "step": 5120 + }, + { + "epoch": 21.333333333333332, + "loss": 0.04865720868110657, + "loss_ce": 5.033303386881016e-05, + "loss_iou": 0.3671875, + "loss_num": 0.00970458984375, + "loss_xval": 0.048583984375, + "num_input_tokens_seen": 467351804, + "step": 5120 + }, + { + "epoch": 21.3375, + "grad_norm": 2.712222274695321, + "learning_rate": 5e-05, + "loss": 0.0313, + "num_input_tokens_seen": 467443352, + "step": 5121 + }, + { + "epoch": 21.3375, + "loss": 0.026291847229003906, + "loss_ce": 9.549963806421147e-07, + "loss_iou": 0.265625, + "loss_num": 0.0052490234375, + "loss_xval": 0.0262451171875, + "num_input_tokens_seen": 467443352, + "step": 5121 + }, + { + "epoch": 21.341666666666665, + "grad_norm": 2.6302096628943454, + "learning_rate": 5e-05, + "loss": 0.0553, + "num_input_tokens_seen": 467534672, + "step": 5122 + }, + { + "epoch": 21.341666666666665, + "loss": 0.06893706321716309, + "loss_ce": 5.48449042980792e-06, + "loss_iou": 0.2236328125, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 467534672, + "step": 5122 + }, + { + "epoch": 21.345833333333335, + "grad_norm": 6.87071556194301, + "learning_rate": 5e-05, + "loss": 0.0385, + "num_input_tokens_seen": 467625644, + "step": 5123 + }, + { + "epoch": 21.345833333333335, + "loss": 0.04277816414833069, + "loss_ce": 7.777657629048917e-06, + "loss_iou": 0.271484375, + "loss_num": 0.008544921875, + "loss_xval": 0.042724609375, + "num_input_tokens_seen": 467625644, + "step": 5123 + }, + { + "epoch": 21.35, + "grad_norm": 2.9986205193387017, + "learning_rate": 5e-05, + "loss": 0.0373, + "num_input_tokens_seen": 467716840, + "step": 5124 + }, + { + "epoch": 21.35, + "loss": 0.034434035420417786, + "loss_ce": 0.0005137492553330958, + "loss_iou": 0.3125, + "loss_num": 0.006805419921875, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 467716840, + "step": 5124 + }, + { + "epoch": 21.354166666666668, + "grad_norm": 2.778658504696439, + "learning_rate": 5e-05, + "loss": 0.0319, + "num_input_tokens_seen": 467808148, + "step": 5125 + }, + { + "epoch": 21.354166666666668, + "loss": 0.02985912188887596, + "loss_ce": 4.344867920735851e-05, + "loss_iou": 0.314453125, + "loss_num": 0.005950927734375, + "loss_xval": 0.02978515625, + "num_input_tokens_seen": 467808148, + "step": 5125 + }, + { + "epoch": 21.358333333333334, + "grad_norm": 2.6175877227407347, + "learning_rate": 5e-05, + "loss": 0.0346, + "num_input_tokens_seen": 467899308, + "step": 5126 + }, + { + "epoch": 21.358333333333334, + "loss": 0.030777405947446823, + "loss_ce": 8.059006177063566e-06, + "loss_iou": 0.302734375, + "loss_num": 0.00616455078125, + "loss_xval": 0.03076171875, + "num_input_tokens_seen": 467899308, + "step": 5126 + }, + { + "epoch": 21.3625, + "grad_norm": 2.3286229772663187, + "learning_rate": 5e-05, + "loss": 0.0528, + "num_input_tokens_seen": 467990460, + "step": 5127 + }, + { + "epoch": 21.3625, + "loss": 0.05989307537674904, + "loss_ce": 2.329000380996149e-06, + "loss_iou": 0.15234375, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 467990460, + "step": 5127 + }, + { + "epoch": 21.366666666666667, + "grad_norm": 2.6920855529969043, + "learning_rate": 5e-05, + "loss": 0.0486, + "num_input_tokens_seen": 468081116, + "step": 5128 + }, + { + "epoch": 21.366666666666667, + "loss": 0.041651129722595215, + "loss_ce": 2.2659719434159342e-06, + "loss_iou": 0.263671875, + "loss_num": 0.00830078125, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 468081116, + "step": 5128 + }, + { + "epoch": 21.370833333333334, + "grad_norm": 0.9620627133383356, + "learning_rate": 5e-05, + "loss": 0.0224, + "num_input_tokens_seen": 468172632, + "step": 5129 + }, + { + "epoch": 21.370833333333334, + "loss": 0.02263781800866127, + "loss_ce": 1.4041656868357677e-06, + "loss_iou": 0.314453125, + "loss_num": 0.0045166015625, + "loss_xval": 0.0225830078125, + "num_input_tokens_seen": 468172632, + "step": 5129 + }, + { + "epoch": 21.375, + "grad_norm": 4.4940261797059895, + "learning_rate": 5e-05, + "loss": 0.0538, + "num_input_tokens_seen": 468263924, + "step": 5130 + }, + { + "epoch": 21.375, + "loss": 0.08159472048282623, + "loss_ce": 0.00016237185627687722, + "loss_iou": 0.095703125, + "loss_num": 0.0162353515625, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 468263924, + "step": 5130 + }, + { + "epoch": 21.379166666666666, + "grad_norm": 2.425644312036774, + "learning_rate": 5e-05, + "loss": 0.0444, + "num_input_tokens_seen": 468355576, + "step": 5131 + }, + { + "epoch": 21.379166666666666, + "loss": 0.06586514413356781, + "loss_ce": 5.811029950564262e-07, + "loss_iou": 0.203125, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 468355576, + "step": 5131 + }, + { + "epoch": 21.383333333333333, + "grad_norm": 3.169692159749731, + "learning_rate": 5e-05, + "loss": 0.0576, + "num_input_tokens_seen": 468446672, + "step": 5132 + }, + { + "epoch": 21.383333333333333, + "loss": 0.06761285662651062, + "loss_ce": 1.161280351880123e-06, + "loss_iou": 0.275390625, + "loss_num": 0.0135498046875, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 468446672, + "step": 5132 + }, + { + "epoch": 21.3875, + "grad_norm": 4.388810851070836, + "learning_rate": 5e-05, + "loss": 0.0409, + "num_input_tokens_seen": 468537072, + "step": 5133 + }, + { + "epoch": 21.3875, + "loss": 0.033356256783008575, + "loss_ce": 5.455306109070079e-07, + "loss_iou": 0.267578125, + "loss_num": 0.006683349609375, + "loss_xval": 0.033447265625, + "num_input_tokens_seen": 468537072, + "step": 5133 + }, + { + "epoch": 21.391666666666666, + "grad_norm": 2.4028206194039163, + "learning_rate": 5e-05, + "loss": 0.0721, + "num_input_tokens_seen": 468627996, + "step": 5134 + }, + { + "epoch": 21.391666666666666, + "loss": 0.04357944428920746, + "loss_ce": 3.4140907700930256e-07, + "loss_iou": 0.28515625, + "loss_num": 0.00872802734375, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 468627996, + "step": 5134 + }, + { + "epoch": 21.395833333333332, + "grad_norm": 2.5505019179703026, + "learning_rate": 5e-05, + "loss": 0.0505, + "num_input_tokens_seen": 468716964, + "step": 5135 + }, + { + "epoch": 21.395833333333332, + "loss": 0.026316307485103607, + "loss_ce": 2.5413703042431735e-05, + "loss_iou": 0.1455078125, + "loss_num": 0.0052490234375, + "loss_xval": 0.0262451171875, + "num_input_tokens_seen": 468716964, + "step": 5135 + }, + { + "epoch": 21.4, + "grad_norm": 1.4135616379362372, + "learning_rate": 5e-05, + "loss": 0.055, + "num_input_tokens_seen": 468807892, + "step": 5136 + }, + { + "epoch": 21.4, + "loss": 0.05752602219581604, + "loss_ce": 0.001999289495870471, + "loss_iou": 0.212890625, + "loss_num": 0.0111083984375, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 468807892, + "step": 5136 + }, + { + "epoch": 21.404166666666665, + "grad_norm": 2.6192946966806017, + "learning_rate": 5e-05, + "loss": 0.0746, + "num_input_tokens_seen": 468898704, + "step": 5137 + }, + { + "epoch": 21.404166666666665, + "loss": 0.07630203664302826, + "loss_ce": 4.6984385448922694e-07, + "loss_iou": 0.1435546875, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 468898704, + "step": 5137 + }, + { + "epoch": 21.408333333333335, + "grad_norm": 2.2379969434569134, + "learning_rate": 5e-05, + "loss": 0.0747, + "num_input_tokens_seen": 468990852, + "step": 5138 + }, + { + "epoch": 21.408333333333335, + "loss": 0.06957028061151505, + "loss_ce": 0.0005471492768265307, + "loss_iou": 0.265625, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 468990852, + "step": 5138 + }, + { + "epoch": 21.4125, + "grad_norm": 1.5048102658186053, + "learning_rate": 5e-05, + "loss": 0.0762, + "num_input_tokens_seen": 469082548, + "step": 5139 + }, + { + "epoch": 21.4125, + "loss": 0.11329013854265213, + "loss_ce": 8.887142939784098e-06, + "loss_iou": 0.29296875, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 469082548, + "step": 5139 + }, + { + "epoch": 21.416666666666668, + "grad_norm": 19.14512999510595, + "learning_rate": 5e-05, + "loss": 0.0853, + "num_input_tokens_seen": 469173712, + "step": 5140 + }, + { + "epoch": 21.416666666666668, + "loss": 0.11006104946136475, + "loss_ce": 6.806710007367656e-05, + "loss_iou": 0.26171875, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 469173712, + "step": 5140 + }, + { + "epoch": 21.420833333333334, + "grad_norm": 1.0371820529257796, + "learning_rate": 5e-05, + "loss": 0.0611, + "num_input_tokens_seen": 469264932, + "step": 5141 + }, + { + "epoch": 21.420833333333334, + "loss": 0.07569563388824463, + "loss_ce": 0.000256184081081301, + "loss_iou": 0.25, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 469264932, + "step": 5141 + }, + { + "epoch": 21.425, + "grad_norm": 0.8318537160887409, + "learning_rate": 5e-05, + "loss": 0.0499, + "num_input_tokens_seen": 469355808, + "step": 5142 + }, + { + "epoch": 21.425, + "loss": 0.023618394508957863, + "loss_ce": 9.231413059751503e-06, + "loss_iou": 0.14453125, + "loss_num": 0.004730224609375, + "loss_xval": 0.0235595703125, + "num_input_tokens_seen": 469355808, + "step": 5142 + }, + { + "epoch": 21.429166666666667, + "grad_norm": 1.7916804024785156, + "learning_rate": 5e-05, + "loss": 0.0397, + "num_input_tokens_seen": 469447440, + "step": 5143 + }, + { + "epoch": 21.429166666666667, + "loss": 0.048645682632923126, + "loss_ce": 6.652858246525284e-07, + "loss_iou": 0.17578125, + "loss_num": 0.009765625, + "loss_xval": 0.048583984375, + "num_input_tokens_seen": 469447440, + "step": 5143 + }, + { + "epoch": 21.433333333333334, + "grad_norm": 4.676872959945136, + "learning_rate": 5e-05, + "loss": 0.0404, + "num_input_tokens_seen": 469538496, + "step": 5144 + }, + { + "epoch": 21.433333333333334, + "loss": 0.03317292779684067, + "loss_ce": 3.225621298952319e-07, + "loss_iou": 0.26171875, + "loss_num": 0.00665283203125, + "loss_xval": 0.033203125, + "num_input_tokens_seen": 469538496, + "step": 5144 + }, + { + "epoch": 21.4375, + "grad_norm": 2.8883211934646775, + "learning_rate": 5e-05, + "loss": 0.0408, + "num_input_tokens_seen": 469629716, + "step": 5145 + }, + { + "epoch": 21.4375, + "loss": 0.04455077648162842, + "loss_ce": 2.5629222363932058e-05, + "loss_iou": 0.33203125, + "loss_num": 0.0089111328125, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 469629716, + "step": 5145 + }, + { + "epoch": 21.441666666666666, + "grad_norm": 3.172159232706883, + "learning_rate": 5e-05, + "loss": 0.0716, + "num_input_tokens_seen": 469721268, + "step": 5146 + }, + { + "epoch": 21.441666666666666, + "loss": 0.07887370884418488, + "loss_ce": 2.3914546545711346e-05, + "loss_iou": 0.24609375, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 469721268, + "step": 5146 + }, + { + "epoch": 21.445833333333333, + "grad_norm": 1.941963955509079, + "learning_rate": 5e-05, + "loss": 0.1117, + "num_input_tokens_seen": 469813208, + "step": 5147 + }, + { + "epoch": 21.445833333333333, + "loss": 0.14985010027885437, + "loss_ce": 8.781100405030884e-06, + "loss_iou": 0.12109375, + "loss_num": 0.0299072265625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 469813208, + "step": 5147 + }, + { + "epoch": 21.45, + "grad_norm": 5.756794171636228, + "learning_rate": 5e-05, + "loss": 0.0602, + "num_input_tokens_seen": 469903704, + "step": 5148 + }, + { + "epoch": 21.45, + "loss": 0.05248153209686279, + "loss_ce": 6.558901532116579e-06, + "loss_iou": 0.310546875, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 469903704, + "step": 5148 + }, + { + "epoch": 21.454166666666666, + "grad_norm": 2.0219404994608374, + "learning_rate": 5e-05, + "loss": 0.035, + "num_input_tokens_seen": 469994624, + "step": 5149 + }, + { + "epoch": 21.454166666666666, + "loss": 0.042460788041353226, + "loss_ce": 3.2075922717922367e-06, + "loss_iou": 0.14453125, + "loss_num": 0.00848388671875, + "loss_xval": 0.04248046875, + "num_input_tokens_seen": 469994624, + "step": 5149 + }, + { + "epoch": 21.458333333333332, + "grad_norm": 4.55508797806237, + "learning_rate": 5e-05, + "loss": 0.0412, + "num_input_tokens_seen": 470085964, + "step": 5150 + }, + { + "epoch": 21.458333333333332, + "loss": 0.05022701993584633, + "loss_ce": 0.000483365380205214, + "loss_iou": 0.2197265625, + "loss_num": 0.00994873046875, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 470085964, + "step": 5150 + }, + { + "epoch": 21.4625, + "grad_norm": 2.80946260721684, + "learning_rate": 5e-05, + "loss": 0.0517, + "num_input_tokens_seen": 470177768, + "step": 5151 + }, + { + "epoch": 21.4625, + "loss": 0.03056851401925087, + "loss_ce": 5.1591487135738134e-06, + "loss_iou": 0.31640625, + "loss_num": 0.006103515625, + "loss_xval": 0.030517578125, + "num_input_tokens_seen": 470177768, + "step": 5151 + }, + { + "epoch": 21.466666666666665, + "grad_norm": 6.489473341280429, + "learning_rate": 5e-05, + "loss": 0.0636, + "num_input_tokens_seen": 470268748, + "step": 5152 + }, + { + "epoch": 21.466666666666665, + "loss": 0.07733218371868134, + "loss_ce": 6.388052042893833e-07, + "loss_iou": 0.255859375, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 470268748, + "step": 5152 + }, + { + "epoch": 21.470833333333335, + "grad_norm": 2.385341172996109, + "learning_rate": 5e-05, + "loss": 0.0604, + "num_input_tokens_seen": 470360844, + "step": 5153 + }, + { + "epoch": 21.470833333333335, + "loss": 0.06412360072135925, + "loss_ce": 6.167654191813199e-06, + "loss_iou": 0.294921875, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 470360844, + "step": 5153 + }, + { + "epoch": 21.475, + "grad_norm": 2.7186507731781706, + "learning_rate": 5e-05, + "loss": 0.0559, + "num_input_tokens_seen": 470452448, + "step": 5154 + }, + { + "epoch": 21.475, + "loss": 0.07098732888698578, + "loss_ce": 3.439177817199379e-06, + "loss_iou": 0.3359375, + "loss_num": 0.01422119140625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 470452448, + "step": 5154 + }, + { + "epoch": 21.479166666666668, + "grad_norm": 3.7008464093250946, + "learning_rate": 5e-05, + "loss": 0.04, + "num_input_tokens_seen": 470544008, + "step": 5155 + }, + { + "epoch": 21.479166666666668, + "loss": 0.04185812547802925, + "loss_ce": 0.00233786110766232, + "loss_iou": 0.376953125, + "loss_num": 0.00787353515625, + "loss_xval": 0.03955078125, + "num_input_tokens_seen": 470544008, + "step": 5155 + }, + { + "epoch": 21.483333333333334, + "grad_norm": 2.4514042894558172, + "learning_rate": 5e-05, + "loss": 0.034, + "num_input_tokens_seen": 470635716, + "step": 5156 + }, + { + "epoch": 21.483333333333334, + "loss": 0.029755450785160065, + "loss_ce": 8.143604190991027e-07, + "loss_iou": 0.306640625, + "loss_num": 0.005950927734375, + "loss_xval": 0.02978515625, + "num_input_tokens_seen": 470635716, + "step": 5156 + }, + { + "epoch": 21.4875, + "grad_norm": 1.7616124288679285, + "learning_rate": 5e-05, + "loss": 0.0848, + "num_input_tokens_seen": 470727076, + "step": 5157 + }, + { + "epoch": 21.4875, + "loss": 0.06504976749420166, + "loss_ce": 4.733014793600887e-05, + "loss_iou": 0.2109375, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 470727076, + "step": 5157 + }, + { + "epoch": 21.491666666666667, + "grad_norm": 2.6863159119286926, + "learning_rate": 5e-05, + "loss": 0.0518, + "num_input_tokens_seen": 470818544, + "step": 5158 + }, + { + "epoch": 21.491666666666667, + "loss": 0.03866743668913841, + "loss_ce": 0.003724808106198907, + "loss_iou": 0.302734375, + "loss_num": 0.006988525390625, + "loss_xval": 0.034912109375, + "num_input_tokens_seen": 470818544, + "step": 5158 + }, + { + "epoch": 21.495833333333334, + "grad_norm": 3.538664901062215, + "learning_rate": 5e-05, + "loss": 0.0444, + "num_input_tokens_seen": 470910092, + "step": 5159 + }, + { + "epoch": 21.495833333333334, + "loss": 0.05011036992073059, + "loss_ce": 5.07782260683598e-07, + "loss_iou": 0.29296875, + "loss_num": 0.010009765625, + "loss_xval": 0.050048828125, + "num_input_tokens_seen": 470910092, + "step": 5159 + }, + { + "epoch": 21.5, + "grad_norm": 2.51570375481264, + "learning_rate": 5e-05, + "loss": 0.0612, + "num_input_tokens_seen": 471001484, + "step": 5160 + }, + { + "epoch": 21.5, + "loss": 0.06976377964019775, + "loss_ce": 5.932008662057342e-07, + "loss_iou": 0.1162109375, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 471001484, + "step": 5160 + }, + { + "epoch": 21.504166666666666, + "grad_norm": 1.4899894367983637, + "learning_rate": 5e-05, + "loss": 0.0551, + "num_input_tokens_seen": 471093124, + "step": 5161 + }, + { + "epoch": 21.504166666666666, + "loss": 0.05978493392467499, + "loss_ce": 6.203370139701292e-05, + "loss_iou": 0.2294921875, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 471093124, + "step": 5161 + }, + { + "epoch": 21.508333333333333, + "grad_norm": 2.804098382159329, + "learning_rate": 5e-05, + "loss": 0.031, + "num_input_tokens_seen": 471184200, + "step": 5162 + }, + { + "epoch": 21.508333333333333, + "loss": 0.034136831760406494, + "loss_ce": 2.918685140684829e-06, + "loss_iou": 0.3203125, + "loss_num": 0.0068359375, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 471184200, + "step": 5162 + }, + { + "epoch": 21.5125, + "grad_norm": 2.4871477512168187, + "learning_rate": 5e-05, + "loss": 0.0416, + "num_input_tokens_seen": 471274320, + "step": 5163 + }, + { + "epoch": 21.5125, + "loss": 0.0459405779838562, + "loss_ce": 1.1626463674474508e-05, + "loss_iou": 0.32421875, + "loss_num": 0.0091552734375, + "loss_xval": 0.0458984375, + "num_input_tokens_seen": 471274320, + "step": 5163 + }, + { + "epoch": 21.516666666666666, + "grad_norm": 2.0333966820572096, + "learning_rate": 5e-05, + "loss": 0.0737, + "num_input_tokens_seen": 471365360, + "step": 5164 + }, + { + "epoch": 21.516666666666666, + "loss": 0.06339624524116516, + "loss_ce": 0.0007589179440401495, + "loss_iou": 0.3125, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 471365360, + "step": 5164 + }, + { + "epoch": 21.520833333333332, + "grad_norm": 2.6889053137143564, + "learning_rate": 5e-05, + "loss": 0.0472, + "num_input_tokens_seen": 471457112, + "step": 5165 + }, + { + "epoch": 21.520833333333332, + "loss": 0.051147811114788055, + "loss_ce": 3.4876194376920466e-07, + "loss_iou": 0.3125, + "loss_num": 0.01025390625, + "loss_xval": 0.05126953125, + "num_input_tokens_seen": 471457112, + "step": 5165 + }, + { + "epoch": 21.525, + "grad_norm": 4.306832416324348, + "learning_rate": 5e-05, + "loss": 0.046, + "num_input_tokens_seen": 471548696, + "step": 5166 + }, + { + "epoch": 21.525, + "loss": 0.03406589478254318, + "loss_ce": 4.4640055421041325e-06, + "loss_iou": 0.283203125, + "loss_num": 0.006805419921875, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 471548696, + "step": 5166 + }, + { + "epoch": 21.529166666666665, + "grad_norm": 3.313246129449017, + "learning_rate": 5e-05, + "loss": 0.0608, + "num_input_tokens_seen": 471640760, + "step": 5167 + }, + { + "epoch": 21.529166666666665, + "loss": 0.09329698234796524, + "loss_ce": 0.00018022381118498743, + "loss_iou": 0.3203125, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 471640760, + "step": 5167 + }, + { + "epoch": 21.533333333333335, + "grad_norm": 2.6613088108665552, + "learning_rate": 5e-05, + "loss": 0.032, + "num_input_tokens_seen": 471731924, + "step": 5168 + }, + { + "epoch": 21.533333333333335, + "loss": 0.03726760298013687, + "loss_ce": 8.193208486773074e-05, + "loss_iou": 0.236328125, + "loss_num": 0.007415771484375, + "loss_xval": 0.037109375, + "num_input_tokens_seen": 471731924, + "step": 5168 + }, + { + "epoch": 21.5375, + "grad_norm": 6.013018668852461, + "learning_rate": 5e-05, + "loss": 0.0308, + "num_input_tokens_seen": 471823292, + "step": 5169 + }, + { + "epoch": 21.5375, + "loss": 0.03623223304748535, + "loss_ce": 7.870154149713926e-06, + "loss_iou": 0.162109375, + "loss_num": 0.00726318359375, + "loss_xval": 0.0361328125, + "num_input_tokens_seen": 471823292, + "step": 5169 + }, + { + "epoch": 21.541666666666668, + "grad_norm": 3.1403542077463613, + "learning_rate": 5e-05, + "loss": 0.0352, + "num_input_tokens_seen": 471914652, + "step": 5170 + }, + { + "epoch": 21.541666666666668, + "loss": 0.026538927108049393, + "loss_ce": 1.152195363829378e-05, + "loss_iou": 0.248046875, + "loss_num": 0.00531005859375, + "loss_xval": 0.0264892578125, + "num_input_tokens_seen": 471914652, + "step": 5170 + }, + { + "epoch": 21.545833333333334, + "grad_norm": 2.5965410496723247, + "learning_rate": 5e-05, + "loss": 0.028, + "num_input_tokens_seen": 472005660, + "step": 5171 + }, + { + "epoch": 21.545833333333334, + "loss": 0.02297055907547474, + "loss_ce": 6.082138497731648e-06, + "loss_iou": 0.248046875, + "loss_num": 0.00457763671875, + "loss_xval": 0.02294921875, + "num_input_tokens_seen": 472005660, + "step": 5171 + }, + { + "epoch": 21.55, + "grad_norm": 2.488738984890954, + "learning_rate": 5e-05, + "loss": 0.0664, + "num_input_tokens_seen": 472097340, + "step": 5172 + }, + { + "epoch": 21.55, + "loss": 0.0831460952758789, + "loss_ce": 9.540294740872923e-07, + "loss_iou": 0.29296875, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 472097340, + "step": 5172 + }, + { + "epoch": 21.554166666666667, + "grad_norm": 2.1226264315021828, + "learning_rate": 5e-05, + "loss": 0.0781, + "num_input_tokens_seen": 472189100, + "step": 5173 + }, + { + "epoch": 21.554166666666667, + "loss": 0.13382770121097565, + "loss_ce": 3.100679168710485e-05, + "loss_iou": 0.1337890625, + "loss_num": 0.0267333984375, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 472189100, + "step": 5173 + }, + { + "epoch": 21.558333333333334, + "grad_norm": 1.9685445410059124, + "learning_rate": 5e-05, + "loss": 0.0377, + "num_input_tokens_seen": 472280176, + "step": 5174 + }, + { + "epoch": 21.558333333333334, + "loss": 0.04735033959150314, + "loss_ce": 2.315889787496417e-06, + "loss_iou": 0.2578125, + "loss_num": 0.00946044921875, + "loss_xval": 0.04736328125, + "num_input_tokens_seen": 472280176, + "step": 5174 + }, + { + "epoch": 21.5625, + "grad_norm": 2.124853590179741, + "learning_rate": 5e-05, + "loss": 0.0336, + "num_input_tokens_seen": 472371624, + "step": 5175 + }, + { + "epoch": 21.5625, + "loss": 0.0341351218521595, + "loss_ce": 1.2107645943615353e-06, + "loss_iou": 0.193359375, + "loss_num": 0.0068359375, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 472371624, + "step": 5175 + }, + { + "epoch": 21.566666666666666, + "grad_norm": 7.927331329297304, + "learning_rate": 5e-05, + "loss": 0.0357, + "num_input_tokens_seen": 472463452, + "step": 5176 + }, + { + "epoch": 21.566666666666666, + "loss": 0.03894485533237457, + "loss_ce": 0.00223983614705503, + "loss_iou": 0.2119140625, + "loss_num": 0.00732421875, + "loss_xval": 0.03662109375, + "num_input_tokens_seen": 472463452, + "step": 5176 + }, + { + "epoch": 21.570833333333333, + "grad_norm": 2.168471099832549, + "learning_rate": 5e-05, + "loss": 0.0331, + "num_input_tokens_seen": 472554896, + "step": 5177 + }, + { + "epoch": 21.570833333333333, + "loss": 0.034134577959775925, + "loss_ce": 5.407243952504359e-05, + "loss_iou": 0.2353515625, + "loss_num": 0.006805419921875, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 472554896, + "step": 5177 + }, + { + "epoch": 21.575, + "grad_norm": 2.28263867475468, + "learning_rate": 5e-05, + "loss": 0.0328, + "num_input_tokens_seen": 472646320, + "step": 5178 + }, + { + "epoch": 21.575, + "loss": 0.03484075516462326, + "loss_ce": 4.938349775329698e-06, + "loss_iou": 0.36328125, + "loss_num": 0.0069580078125, + "loss_xval": 0.034912109375, + "num_input_tokens_seen": 472646320, + "step": 5178 + }, + { + "epoch": 21.579166666666666, + "grad_norm": 2.6364224922774007, + "learning_rate": 5e-05, + "loss": 0.0365, + "num_input_tokens_seen": 472737792, + "step": 5179 + }, + { + "epoch": 21.579166666666666, + "loss": 0.04767080396413803, + "loss_ce": 2.346263499930501e-06, + "loss_iou": 0.265625, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 472737792, + "step": 5179 + }, + { + "epoch": 21.583333333333332, + "grad_norm": 2.8503069499383784, + "learning_rate": 5e-05, + "loss": 0.0699, + "num_input_tokens_seen": 472829404, + "step": 5180 + }, + { + "epoch": 21.583333333333332, + "loss": 0.10118187963962555, + "loss_ce": 0.00010765744809759781, + "loss_iou": 0.2578125, + "loss_num": 0.0201416015625, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 472829404, + "step": 5180 + }, + { + "epoch": 21.5875, + "grad_norm": 1.8857280247450574, + "learning_rate": 5e-05, + "loss": 0.034, + "num_input_tokens_seen": 472921276, + "step": 5181 + }, + { + "epoch": 21.5875, + "loss": 0.039288513362407684, + "loss_ce": 7.342467870330438e-05, + "loss_iou": 0.39453125, + "loss_num": 0.0078125, + "loss_xval": 0.039306640625, + "num_input_tokens_seen": 472921276, + "step": 5181 + }, + { + "epoch": 21.591666666666665, + "grad_norm": 1.4468703705584696, + "learning_rate": 5e-05, + "loss": 0.0549, + "num_input_tokens_seen": 473012032, + "step": 5182 + }, + { + "epoch": 21.591666666666665, + "loss": 0.036988455802202225, + "loss_ce": 1.1513138815644197e-06, + "loss_iou": 0.1767578125, + "loss_num": 0.00738525390625, + "loss_xval": 0.037109375, + "num_input_tokens_seen": 473012032, + "step": 5182 + }, + { + "epoch": 21.595833333333335, + "grad_norm": 1.1441987157474223, + "learning_rate": 5e-05, + "loss": 0.0555, + "num_input_tokens_seen": 473103984, + "step": 5183 + }, + { + "epoch": 21.595833333333335, + "loss": 0.08971662819385529, + "loss_ce": 0.0003306442522443831, + "loss_iou": 0.28515625, + "loss_num": 0.0179443359375, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 473103984, + "step": 5183 + }, + { + "epoch": 21.6, + "grad_norm": 1.1025974467681459, + "learning_rate": 5e-05, + "loss": 0.0513, + "num_input_tokens_seen": 473195300, + "step": 5184 + }, + { + "epoch": 21.6, + "loss": 0.04473863169550896, + "loss_ce": 3.8011618016753346e-05, + "loss_iou": 0.1904296875, + "loss_num": 0.0089111328125, + "loss_xval": 0.044677734375, + "num_input_tokens_seen": 473195300, + "step": 5184 + }, + { + "epoch": 21.604166666666668, + "grad_norm": 1.6782143259054236, + "learning_rate": 5e-05, + "loss": 0.0353, + "num_input_tokens_seen": 473286108, + "step": 5185 + }, + { + "epoch": 21.604166666666668, + "loss": 0.03215426951646805, + "loss_ce": 4.004207312391372e-06, + "loss_iou": 0.19140625, + "loss_num": 0.006439208984375, + "loss_xval": 0.0322265625, + "num_input_tokens_seen": 473286108, + "step": 5185 + }, + { + "epoch": 21.608333333333334, + "grad_norm": 2.202930522774745, + "learning_rate": 5e-05, + "loss": 0.0402, + "num_input_tokens_seen": 473377196, + "step": 5186 + }, + { + "epoch": 21.608333333333334, + "loss": 0.040970880538225174, + "loss_ce": 1.0322871730750194e-06, + "loss_iou": 0.29296875, + "loss_num": 0.0081787109375, + "loss_xval": 0.041015625, + "num_input_tokens_seen": 473377196, + "step": 5186 + }, + { + "epoch": 21.6125, + "grad_norm": 2.8559950712554056, + "learning_rate": 5e-05, + "loss": 0.0464, + "num_input_tokens_seen": 473468276, + "step": 5187 + }, + { + "epoch": 21.6125, + "loss": 0.03946885094046593, + "loss_ce": 1.992463467104244e-06, + "loss_iou": 0.1943359375, + "loss_num": 0.00787353515625, + "loss_xval": 0.03955078125, + "num_input_tokens_seen": 473468276, + "step": 5187 + }, + { + "epoch": 21.616666666666667, + "grad_norm": 3.2077730781119507, + "learning_rate": 5e-05, + "loss": 0.0368, + "num_input_tokens_seen": 473559784, + "step": 5188 + }, + { + "epoch": 21.616666666666667, + "loss": 0.032352715730667114, + "loss_ce": 0.00032451938022859395, + "loss_iou": 0.216796875, + "loss_num": 0.00640869140625, + "loss_xval": 0.031982421875, + "num_input_tokens_seen": 473559784, + "step": 5188 + }, + { + "epoch": 21.620833333333334, + "grad_norm": 2.899727186482045, + "learning_rate": 5e-05, + "loss": 0.0579, + "num_input_tokens_seen": 473651204, + "step": 5189 + }, + { + "epoch": 21.620833333333334, + "loss": 0.0342266820371151, + "loss_ce": 1.2193077054689638e-06, + "loss_iou": 0.28515625, + "loss_num": 0.0068359375, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 473651204, + "step": 5189 + }, + { + "epoch": 21.625, + "grad_norm": 2.4169527212246447, + "learning_rate": 5e-05, + "loss": 0.0961, + "num_input_tokens_seen": 473742684, + "step": 5190 + }, + { + "epoch": 21.625, + "loss": 0.15376363694667816, + "loss_ce": 8.107833195936109e-07, + "loss_iou": 0.287109375, + "loss_num": 0.03076171875, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 473742684, + "step": 5190 + }, + { + "epoch": 21.629166666666666, + "grad_norm": 1.5503498073536341, + "learning_rate": 5e-05, + "loss": 0.0225, + "num_input_tokens_seen": 473833896, + "step": 5191 + }, + { + "epoch": 21.629166666666666, + "loss": 0.022525303065776825, + "loss_ce": 3.3305796023341827e-06, + "loss_iou": 0.216796875, + "loss_num": 0.0045166015625, + "loss_xval": 0.0224609375, + "num_input_tokens_seen": 473833896, + "step": 5191 + }, + { + "epoch": 21.633333333333333, + "grad_norm": 1.127646011377556, + "learning_rate": 5e-05, + "loss": 0.0308, + "num_input_tokens_seen": 473924504, + "step": 5192 + }, + { + "epoch": 21.633333333333333, + "loss": 0.015396122820675373, + "loss_ce": 7.63364732847549e-06, + "loss_iou": 0.1796875, + "loss_num": 0.003082275390625, + "loss_xval": 0.015380859375, + "num_input_tokens_seen": 473924504, + "step": 5192 + }, + { + "epoch": 21.6375, + "grad_norm": 1.3417370517909932, + "learning_rate": 5e-05, + "loss": 0.0522, + "num_input_tokens_seen": 474016108, + "step": 5193 + }, + { + "epoch": 21.6375, + "loss": 0.04691285640001297, + "loss_ce": 1.4966522030590568e-05, + "loss_iou": 0.2255859375, + "loss_num": 0.0093994140625, + "loss_xval": 0.046875, + "num_input_tokens_seen": 474016108, + "step": 5193 + }, + { + "epoch": 21.641666666666666, + "grad_norm": 2.760065897453089, + "learning_rate": 5e-05, + "loss": 0.0611, + "num_input_tokens_seen": 474107736, + "step": 5194 + }, + { + "epoch": 21.641666666666666, + "loss": 0.031467683613300323, + "loss_ce": 4.0595964492240455e-06, + "loss_iou": 0.29296875, + "loss_num": 0.00628662109375, + "loss_xval": 0.031494140625, + "num_input_tokens_seen": 474107736, + "step": 5194 + }, + { + "epoch": 21.645833333333332, + "grad_norm": 3.8417209646728834, + "learning_rate": 5e-05, + "loss": 0.0475, + "num_input_tokens_seen": 474198916, + "step": 5195 + }, + { + "epoch": 21.645833333333332, + "loss": 0.054826926440000534, + "loss_ce": 2.099684479617281e-06, + "loss_iou": 0.421875, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 474198916, + "step": 5195 + }, + { + "epoch": 21.65, + "grad_norm": 2.5788512954742857, + "learning_rate": 5e-05, + "loss": 0.0505, + "num_input_tokens_seen": 474290460, + "step": 5196 + }, + { + "epoch": 21.65, + "loss": 0.041484106332063675, + "loss_ce": 3.085830030613579e-06, + "loss_iou": 0.2392578125, + "loss_num": 0.00830078125, + "loss_xval": 0.04150390625, + "num_input_tokens_seen": 474290460, + "step": 5196 + }, + { + "epoch": 21.654166666666665, + "grad_norm": 2.7839116889670095, + "learning_rate": 5e-05, + "loss": 0.0531, + "num_input_tokens_seen": 474381736, + "step": 5197 + }, + { + "epoch": 21.654166666666665, + "loss": 0.04018375277519226, + "loss_ce": 2.2618718503508717e-05, + "loss_iou": 0.2392578125, + "loss_num": 0.00799560546875, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 474381736, + "step": 5197 + }, + { + "epoch": 21.658333333333335, + "grad_norm": 2.394371038678764, + "learning_rate": 5e-05, + "loss": 0.036, + "num_input_tokens_seen": 474473196, + "step": 5198 + }, + { + "epoch": 21.658333333333335, + "loss": 0.025610364973545074, + "loss_ce": 6.117667908256408e-06, + "loss_iou": 0.279296875, + "loss_num": 0.005126953125, + "loss_xval": 0.025634765625, + "num_input_tokens_seen": 474473196, + "step": 5198 + }, + { + "epoch": 21.6625, + "grad_norm": 1.5772029071222384, + "learning_rate": 5e-05, + "loss": 0.0403, + "num_input_tokens_seen": 474564788, + "step": 5199 + }, + { + "epoch": 21.6625, + "loss": 0.03406330198049545, + "loss_ce": 5.686005806637695e-06, + "loss_iou": 0.2734375, + "loss_num": 0.006805419921875, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 474564788, + "step": 5199 + }, + { + "epoch": 21.666666666666668, + "grad_norm": 2.271853023782968, + "learning_rate": 5e-05, + "loss": 0.0337, + "num_input_tokens_seen": 474655668, + "step": 5200 + }, + { + "epoch": 21.666666666666668, + "loss": 0.03489246591925621, + "loss_ce": 3.245364950998919e-06, + "loss_iou": 0.263671875, + "loss_num": 0.006988525390625, + "loss_xval": 0.034912109375, + "num_input_tokens_seen": 474655668, + "step": 5200 + }, + { + "epoch": 21.670833333333334, + "grad_norm": 1.1929891383052986, + "learning_rate": 5e-05, + "loss": 0.0472, + "num_input_tokens_seen": 474747052, + "step": 5201 + }, + { + "epoch": 21.670833333333334, + "loss": 0.03987661376595497, + "loss_ce": 5.399557267082855e-06, + "loss_iou": 0.2421875, + "loss_num": 0.00799560546875, + "loss_xval": 0.039794921875, + "num_input_tokens_seen": 474747052, + "step": 5201 + }, + { + "epoch": 21.675, + "grad_norm": 1.3799547850853573, + "learning_rate": 5e-05, + "loss": 0.0856, + "num_input_tokens_seen": 474838368, + "step": 5202 + }, + { + "epoch": 21.675, + "loss": 0.12406538426876068, + "loss_ce": 2.6694284315453842e-05, + "loss_iou": 0.2353515625, + "loss_num": 0.0247802734375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 474838368, + "step": 5202 + }, + { + "epoch": 21.679166666666667, + "grad_norm": 2.284261544884045, + "learning_rate": 5e-05, + "loss": 0.0513, + "num_input_tokens_seen": 474927804, + "step": 5203 + }, + { + "epoch": 21.679166666666667, + "loss": 0.06884060055017471, + "loss_ce": 8.208784493035637e-06, + "loss_iou": 0.220703125, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 474927804, + "step": 5203 + }, + { + "epoch": 21.683333333333334, + "grad_norm": 1.127675302187502, + "learning_rate": 5e-05, + "loss": 0.045, + "num_input_tokens_seen": 475019008, + "step": 5204 + }, + { + "epoch": 21.683333333333334, + "loss": 0.04536312445998192, + "loss_ce": 9.02963220141828e-05, + "loss_iou": 0.3125, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 475019008, + "step": 5204 + }, + { + "epoch": 21.6875, + "grad_norm": 0.5740189289806529, + "learning_rate": 5e-05, + "loss": 0.0401, + "num_input_tokens_seen": 475110416, + "step": 5205 + }, + { + "epoch": 21.6875, + "loss": 0.049877770245075226, + "loss_ce": 3.493142867228016e-05, + "loss_iou": 0.1796875, + "loss_num": 0.00994873046875, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 475110416, + "step": 5205 + }, + { + "epoch": 21.691666666666666, + "grad_norm": 3.3850143021747465, + "learning_rate": 5e-05, + "loss": 0.0496, + "num_input_tokens_seen": 475201784, + "step": 5206 + }, + { + "epoch": 21.691666666666666, + "loss": 0.054459817707538605, + "loss_ce": 8.827035344438627e-06, + "loss_iou": 0.1787109375, + "loss_num": 0.0108642578125, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 475201784, + "step": 5206 + }, + { + "epoch": 21.695833333333333, + "grad_norm": 4.522311779324683, + "learning_rate": 5e-05, + "loss": 0.0333, + "num_input_tokens_seen": 475293092, + "step": 5207 + }, + { + "epoch": 21.695833333333333, + "loss": 0.03339429944753647, + "loss_ce": 4.3943214222963434e-07, + "loss_iou": 0.26953125, + "loss_num": 0.006683349609375, + "loss_xval": 0.033447265625, + "num_input_tokens_seen": 475293092, + "step": 5207 + }, + { + "epoch": 21.7, + "grad_norm": 1.5613983690941637, + "learning_rate": 5e-05, + "loss": 0.0584, + "num_input_tokens_seen": 475385228, + "step": 5208 + }, + { + "epoch": 21.7, + "loss": 0.03437087684869766, + "loss_ce": 4.623156928573735e-05, + "loss_iou": 0.271484375, + "loss_num": 0.006866455078125, + "loss_xval": 0.034423828125, + "num_input_tokens_seen": 475385228, + "step": 5208 + }, + { + "epoch": 21.704166666666666, + "grad_norm": 2.2465253992130014, + "learning_rate": 5e-05, + "loss": 0.0381, + "num_input_tokens_seen": 475476016, + "step": 5209 + }, + { + "epoch": 21.704166666666666, + "loss": 0.040314361453056335, + "loss_ce": 6.393887019839894e-07, + "loss_iou": 0.31640625, + "loss_num": 0.008056640625, + "loss_xval": 0.040283203125, + "num_input_tokens_seen": 475476016, + "step": 5209 + }, + { + "epoch": 21.708333333333332, + "grad_norm": 2.8540832434061927, + "learning_rate": 5e-05, + "loss": 0.0416, + "num_input_tokens_seen": 475566704, + "step": 5210 + }, + { + "epoch": 21.708333333333332, + "loss": 0.037071388214826584, + "loss_ce": 1.6171455285984848e-07, + "loss_iou": 0.140625, + "loss_num": 0.007415771484375, + "loss_xval": 0.037109375, + "num_input_tokens_seen": 475566704, + "step": 5210 + }, + { + "epoch": 21.7125, + "grad_norm": 2.0271835737644843, + "learning_rate": 5e-05, + "loss": 0.0559, + "num_input_tokens_seen": 475657748, + "step": 5211 + }, + { + "epoch": 21.7125, + "loss": 0.07204379886388779, + "loss_ce": 1.4682226719742175e-05, + "loss_iou": 0.267578125, + "loss_num": 0.014404296875, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 475657748, + "step": 5211 + }, + { + "epoch": 21.716666666666665, + "grad_norm": 2.4003885949869166, + "learning_rate": 5e-05, + "loss": 0.0431, + "num_input_tokens_seen": 475748644, + "step": 5212 + }, + { + "epoch": 21.716666666666665, + "loss": 0.05843600630760193, + "loss_ce": 1.0106807167176157e-05, + "loss_iou": 0.30078125, + "loss_num": 0.01171875, + "loss_xval": 0.058349609375, + "num_input_tokens_seen": 475748644, + "step": 5212 + }, + { + "epoch": 21.720833333333335, + "grad_norm": 2.513923371140344, + "learning_rate": 5e-05, + "loss": 0.031, + "num_input_tokens_seen": 475838924, + "step": 5213 + }, + { + "epoch": 21.720833333333335, + "loss": 0.034256190061569214, + "loss_ce": 2.093961768423469e-07, + "loss_iou": 0.228515625, + "loss_num": 0.0068359375, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 475838924, + "step": 5213 + }, + { + "epoch": 21.725, + "grad_norm": 3.3437388752474724, + "learning_rate": 5e-05, + "loss": 0.0602, + "num_input_tokens_seen": 475930388, + "step": 5214 + }, + { + "epoch": 21.725, + "loss": 0.05761764198541641, + "loss_ce": 4.5206579102341493e-07, + "loss_iou": 0.2060546875, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 475930388, + "step": 5214 + }, + { + "epoch": 21.729166666666668, + "grad_norm": 2.113248422801573, + "learning_rate": 5e-05, + "loss": 0.054, + "num_input_tokens_seen": 476021372, + "step": 5215 + }, + { + "epoch": 21.729166666666668, + "loss": 0.06953422725200653, + "loss_ce": 0.0004042856162413955, + "loss_iou": 0.1533203125, + "loss_num": 0.0137939453125, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 476021372, + "step": 5215 + }, + { + "epoch": 21.733333333333334, + "grad_norm": 1.6024094805556266, + "learning_rate": 5e-05, + "loss": 0.0521, + "num_input_tokens_seen": 476112972, + "step": 5216 + }, + { + "epoch": 21.733333333333334, + "loss": 0.05392562970519066, + "loss_ce": 1.070283929038851e-06, + "loss_iou": 0.310546875, + "loss_num": 0.01080322265625, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 476112972, + "step": 5216 + }, + { + "epoch": 21.7375, + "grad_norm": 1.7687476494316916, + "learning_rate": 5e-05, + "loss": 0.0389, + "num_input_tokens_seen": 476204388, + "step": 5217 + }, + { + "epoch": 21.7375, + "loss": 0.022738073021173477, + "loss_ce": 2.478513351888978e-06, + "loss_iou": 0.23828125, + "loss_num": 0.004547119140625, + "loss_xval": 0.022705078125, + "num_input_tokens_seen": 476204388, + "step": 5217 + }, + { + "epoch": 21.741666666666667, + "grad_norm": 2.358736586141072, + "learning_rate": 5e-05, + "loss": 0.0426, + "num_input_tokens_seen": 476295792, + "step": 5218 + }, + { + "epoch": 21.741666666666667, + "loss": 0.060945916920900345, + "loss_ce": 1.7571270291227847e-05, + "loss_iou": 0.189453125, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 476295792, + "step": 5218 + }, + { + "epoch": 21.745833333333334, + "grad_norm": 2.7285288322163357, + "learning_rate": 5e-05, + "loss": 0.0661, + "num_input_tokens_seen": 476387128, + "step": 5219 + }, + { + "epoch": 21.745833333333334, + "loss": 0.02875593677163124, + "loss_ce": 1.600810901436489e-05, + "loss_iou": 0.333984375, + "loss_num": 0.0057373046875, + "loss_xval": 0.0286865234375, + "num_input_tokens_seen": 476387128, + "step": 5219 + }, + { + "epoch": 21.75, + "grad_norm": 5.473026437297767, + "learning_rate": 5e-05, + "loss": 0.0824, + "num_input_tokens_seen": 476478060, + "step": 5220 + }, + { + "epoch": 21.75, + "loss": 0.07223550975322723, + "loss_ce": 3.9653684780205367e-07, + "loss_iou": 0.25, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 476478060, + "step": 5220 + }, + { + "epoch": 21.754166666666666, + "grad_norm": 2.6463556427619648, + "learning_rate": 5e-05, + "loss": 0.1077, + "num_input_tokens_seen": 476567608, + "step": 5221 + }, + { + "epoch": 21.754166666666666, + "loss": 0.06919453293085098, + "loss_ce": 0.009914140217006207, + "loss_iou": 0.234375, + "loss_num": 0.0118408203125, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 476567608, + "step": 5221 + }, + { + "epoch": 21.758333333333333, + "grad_norm": 4.524436723885793, + "learning_rate": 5e-05, + "loss": 0.1222, + "num_input_tokens_seen": 476658892, + "step": 5222 + }, + { + "epoch": 21.758333333333333, + "loss": 0.1302042156457901, + "loss_ce": 0.0603799931704998, + "loss_iou": 0.326171875, + "loss_num": 0.013916015625, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 476658892, + "step": 5222 + }, + { + "epoch": 21.7625, + "grad_norm": 16.378174829214576, + "learning_rate": 5e-05, + "loss": 0.3835, + "num_input_tokens_seen": 476750800, + "step": 5223 + }, + { + "epoch": 21.7625, + "loss": 0.47329944372177124, + "loss_ce": 0.29362720251083374, + "loss_iou": 0.1796875, + "loss_num": 0.035888671875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 476750800, + "step": 5223 + }, + { + "epoch": 21.766666666666666, + "grad_norm": 12.95783818151927, + "learning_rate": 5e-05, + "loss": 1.3604, + "num_input_tokens_seen": 476841908, + "step": 5224 + }, + { + "epoch": 21.766666666666666, + "loss": 1.3603050708770752, + "loss_ce": 1.3277199268341064, + "loss_iou": 0.1806640625, + "loss_num": 0.00653076171875, + "loss_xval": 0.032470703125, + "num_input_tokens_seen": 476841908, + "step": 5224 + }, + { + "epoch": 21.770833333333332, + "grad_norm": 38.876350421328716, + "learning_rate": 5e-05, + "loss": 0.3373, + "num_input_tokens_seen": 476933316, + "step": 5225 + }, + { + "epoch": 21.770833333333332, + "loss": 0.3426739275455475, + "loss_ce": 0.26179471611976624, + "loss_iou": 0.1728515625, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 476933316, + "step": 5225 + }, + { + "epoch": 21.775, + "grad_norm": 72.00053397847842, + "learning_rate": 5e-05, + "loss": 3.4274, + "num_input_tokens_seen": 477024284, + "step": 5226 + }, + { + "epoch": 21.775, + "loss": 3.5369696617126465, + "loss_ce": 3.5133261680603027, + "loss_iou": 0.140625, + "loss_num": 0.004730224609375, + "loss_xval": 0.023681640625, + "num_input_tokens_seen": 477024284, + "step": 5226 + }, + { + "epoch": 21.779166666666665, + "grad_norm": 2.655551455701337, + "learning_rate": 5e-05, + "loss": 0.0686, + "num_input_tokens_seen": 477114208, + "step": 5227 + }, + { + "epoch": 21.779166666666665, + "loss": 0.06098110228776932, + "loss_ce": 0.0073006837628781796, + "loss_iou": 0.26953125, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 477114208, + "step": 5227 + }, + { + "epoch": 21.783333333333335, + "grad_norm": 0.8326858351138243, + "learning_rate": 5e-05, + "loss": 0.0345, + "num_input_tokens_seen": 477205596, + "step": 5228 + }, + { + "epoch": 21.783333333333335, + "loss": 0.041718218475580215, + "loss_ce": 0.004334184341132641, + "loss_iou": 0.2421875, + "loss_num": 0.007476806640625, + "loss_xval": 0.037353515625, + "num_input_tokens_seen": 477205596, + "step": 5228 + }, + { + "epoch": 21.7875, + "grad_norm": 4.0888741279726695, + "learning_rate": 5e-05, + "loss": 0.0483, + "num_input_tokens_seen": 477296764, + "step": 5229 + }, + { + "epoch": 21.7875, + "loss": 0.06158842518925667, + "loss_ce": 0.000614304793998599, + "loss_iou": 0.1181640625, + "loss_num": 0.01214599609375, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 477296764, + "step": 5229 + }, + { + "epoch": 21.791666666666668, + "grad_norm": 1.460208471676983, + "learning_rate": 5e-05, + "loss": 0.04, + "num_input_tokens_seen": 477388312, + "step": 5230 + }, + { + "epoch": 21.791666666666668, + "loss": 0.03421544283628464, + "loss_ce": 0.0005087743629701436, + "loss_iou": 0.193359375, + "loss_num": 0.006744384765625, + "loss_xval": 0.03369140625, + "num_input_tokens_seen": 477388312, + "step": 5230 + }, + { + "epoch": 21.795833333333334, + "grad_norm": 4.024651774496485, + "learning_rate": 5e-05, + "loss": 0.0378, + "num_input_tokens_seen": 477479928, + "step": 5231 + }, + { + "epoch": 21.795833333333334, + "loss": 0.021592549979686737, + "loss_ce": 0.003411701647564769, + "loss_iou": 0.12060546875, + "loss_num": 0.003631591796875, + "loss_xval": 0.0181884765625, + "num_input_tokens_seen": 477479928, + "step": 5231 + }, + { + "epoch": 21.8, + "grad_norm": 3.102519437165367, + "learning_rate": 5e-05, + "loss": 0.0499, + "num_input_tokens_seen": 477571288, + "step": 5232 + }, + { + "epoch": 21.8, + "loss": 0.046392329037189484, + "loss_ce": 0.0008143266895785928, + "loss_iou": 0.263671875, + "loss_num": 0.00909423828125, + "loss_xval": 0.045654296875, + "num_input_tokens_seen": 477571288, + "step": 5232 + }, + { + "epoch": 21.804166666666667, + "grad_norm": 2.26852830905551, + "learning_rate": 5e-05, + "loss": 0.0449, + "num_input_tokens_seen": 477662128, + "step": 5233 + }, + { + "epoch": 21.804166666666667, + "loss": 0.03912534564733505, + "loss_ce": 0.0004214280634187162, + "loss_iou": 0.2578125, + "loss_num": 0.007720947265625, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 477662128, + "step": 5233 + }, + { + "epoch": 21.808333333333334, + "grad_norm": 1.2881546218242232, + "learning_rate": 5e-05, + "loss": 0.0421, + "num_input_tokens_seen": 477753112, + "step": 5234 + }, + { + "epoch": 21.808333333333334, + "loss": 0.04148241505026817, + "loss_ce": 0.0005964894080534577, + "loss_iou": 0.201171875, + "loss_num": 0.0081787109375, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 477753112, + "step": 5234 + }, + { + "epoch": 21.8125, + "grad_norm": 1.392795251198521, + "learning_rate": 5e-05, + "loss": 0.0299, + "num_input_tokens_seen": 477844560, + "step": 5235 + }, + { + "epoch": 21.8125, + "loss": 0.03164687007665634, + "loss_ce": 0.00039305430254898965, + "loss_iou": 0.1630859375, + "loss_num": 0.006256103515625, + "loss_xval": 0.03125, + "num_input_tokens_seen": 477844560, + "step": 5235 + }, + { + "epoch": 21.816666666666666, + "grad_norm": 2.1300994137265494, + "learning_rate": 5e-05, + "loss": 0.0363, + "num_input_tokens_seen": 477935896, + "step": 5236 + }, + { + "epoch": 21.816666666666666, + "loss": 0.03342318534851074, + "loss_ce": 0.00029635560349561274, + "loss_iou": 0.287109375, + "loss_num": 0.006622314453125, + "loss_xval": 0.033203125, + "num_input_tokens_seen": 477935896, + "step": 5236 + }, + { + "epoch": 21.820833333333333, + "grad_norm": 4.482202232344798, + "learning_rate": 5e-05, + "loss": 0.0275, + "num_input_tokens_seen": 478026960, + "step": 5237 + }, + { + "epoch": 21.820833333333333, + "loss": 0.028818076476454735, + "loss_ce": 0.0006961278268136084, + "loss_iou": 0.25, + "loss_num": 0.005615234375, + "loss_xval": 0.028076171875, + "num_input_tokens_seen": 478026960, + "step": 5237 + }, + { + "epoch": 21.825, + "grad_norm": 2.2595071349267837, + "learning_rate": 5e-05, + "loss": 0.0349, + "num_input_tokens_seen": 478118188, + "step": 5238 + }, + { + "epoch": 21.825, + "loss": 0.026751350611448288, + "loss_ce": 0.00017816826584748924, + "loss_iou": 0.21875, + "loss_num": 0.00531005859375, + "loss_xval": 0.026611328125, + "num_input_tokens_seen": 478118188, + "step": 5238 + }, + { + "epoch": 21.829166666666666, + "grad_norm": 2.7136863235245485, + "learning_rate": 5e-05, + "loss": 0.0425, + "num_input_tokens_seen": 478209636, + "step": 5239 + }, + { + "epoch": 21.829166666666666, + "loss": 0.047790057957172394, + "loss_ce": 0.00024366873549297452, + "loss_iou": 0.232421875, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 478209636, + "step": 5239 + }, + { + "epoch": 21.833333333333332, + "grad_norm": 2.423196777012122, + "learning_rate": 5e-05, + "loss": 0.0288, + "num_input_tokens_seen": 478300992, + "step": 5240 + }, + { + "epoch": 21.833333333333332, + "loss": 0.030891956761479378, + "loss_ce": 0.00019127382256556302, + "loss_iou": 0.23046875, + "loss_num": 0.006134033203125, + "loss_xval": 0.03076171875, + "num_input_tokens_seen": 478300992, + "step": 5240 + }, + { + "epoch": 21.8375, + "grad_norm": 2.7051208449677895, + "learning_rate": 5e-05, + "loss": 0.0293, + "num_input_tokens_seen": 478391728, + "step": 5241 + }, + { + "epoch": 21.8375, + "loss": 0.024106372147798538, + "loss_ce": 0.00021873789955861866, + "loss_iou": 0.1943359375, + "loss_num": 0.004791259765625, + "loss_xval": 0.02392578125, + "num_input_tokens_seen": 478391728, + "step": 5241 + }, + { + "epoch": 21.841666666666665, + "grad_norm": 2.582985556850991, + "learning_rate": 5e-05, + "loss": 0.0318, + "num_input_tokens_seen": 478483932, + "step": 5242 + }, + { + "epoch": 21.841666666666665, + "loss": 0.041258759796619415, + "loss_ce": 0.000525420589838177, + "loss_iou": 0.265625, + "loss_num": 0.0081787109375, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 478483932, + "step": 5242 + }, + { + "epoch": 21.845833333333335, + "grad_norm": 2.8424877418042307, + "learning_rate": 5e-05, + "loss": 0.039, + "num_input_tokens_seen": 478575216, + "step": 5243 + }, + { + "epoch": 21.845833333333335, + "loss": 0.03894955664873123, + "loss_ce": 0.000184601143701002, + "loss_iou": 0.2578125, + "loss_num": 0.00775146484375, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 478575216, + "step": 5243 + }, + { + "epoch": 21.85, + "grad_norm": 2.235698937848587, + "learning_rate": 5e-05, + "loss": 0.0266, + "num_input_tokens_seen": 478665472, + "step": 5244 + }, + { + "epoch": 21.85, + "loss": 0.0273615550249815, + "loss_ce": 0.00024668616242706776, + "loss_iou": 0.25390625, + "loss_num": 0.00543212890625, + "loss_xval": 0.027099609375, + "num_input_tokens_seen": 478665472, + "step": 5244 + }, + { + "epoch": 21.854166666666668, + "grad_norm": 2.869722444238756, + "learning_rate": 5e-05, + "loss": 0.0603, + "num_input_tokens_seen": 478756948, + "step": 5245 + }, + { + "epoch": 21.854166666666668, + "loss": 0.09617424011230469, + "loss_ce": 0.003171928459778428, + "loss_iou": 0.255859375, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 478756948, + "step": 5245 + }, + { + "epoch": 21.858333333333334, + "grad_norm": 2.883646852211987, + "learning_rate": 5e-05, + "loss": 0.0579, + "num_input_tokens_seen": 478848536, + "step": 5246 + }, + { + "epoch": 21.858333333333334, + "loss": 0.06147945672273636, + "loss_ce": 0.0002077911631204188, + "loss_iou": 0.26953125, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 478848536, + "step": 5246 + }, + { + "epoch": 21.8625, + "grad_norm": 2.4193694224975797, + "learning_rate": 5e-05, + "loss": 0.0597, + "num_input_tokens_seen": 478940084, + "step": 5247 + }, + { + "epoch": 21.8625, + "loss": 0.05228853598237038, + "loss_ce": 0.008594990707933903, + "loss_iou": 0.189453125, + "loss_num": 0.00872802734375, + "loss_xval": 0.043701171875, + "num_input_tokens_seen": 478940084, + "step": 5247 + }, + { + "epoch": 21.866666666666667, + "grad_norm": 1.2736474236894773, + "learning_rate": 5e-05, + "loss": 0.03, + "num_input_tokens_seen": 479031040, + "step": 5248 + }, + { + "epoch": 21.866666666666667, + "loss": 0.03317740187048912, + "loss_ce": 0.0001726409827824682, + "loss_iou": 0.134765625, + "loss_num": 0.006591796875, + "loss_xval": 0.032958984375, + "num_input_tokens_seen": 479031040, + "step": 5248 + }, + { + "epoch": 21.870833333333334, + "grad_norm": 1.7156603280658627, + "learning_rate": 5e-05, + "loss": 0.0448, + "num_input_tokens_seen": 479122160, + "step": 5249 + }, + { + "epoch": 21.870833333333334, + "loss": 0.049606695771217346, + "loss_ce": 0.00019873742712661624, + "loss_iou": 0.236328125, + "loss_num": 0.0098876953125, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 479122160, + "step": 5249 + }, + { + "epoch": 21.875, + "grad_norm": 2.9339600092716287, + "learning_rate": 5e-05, + "loss": 0.0305, + "num_input_tokens_seen": 479213764, + "step": 5250 + }, + { + "epoch": 21.875, + "eval_seeclick_CIoU": 0.22049980238080025, + "eval_seeclick_GIoU": 0.21388709545135498, + "eval_seeclick_IoU": 0.32379044592380524, + "eval_seeclick_MAE_all": 0.10728560388088226, + "eval_seeclick_MAE_h": 0.07920366153120995, + "eval_seeclick_MAE_w": 0.25277023017406464, + "eval_seeclick_MAE_x_boxes": 0.22806067764759064, + "eval_seeclick_MAE_y_boxes": 0.08363592252135277, + "eval_seeclick_NUM_probability": 0.9999969601631165, + "eval_seeclick_inside_bbox": 0.5866477340459824, + "eval_seeclick_loss": 0.582019031047821, + "eval_seeclick_loss_ce": 0.09686426445841789, + "eval_seeclick_loss_iou": 0.4454345703125, + "eval_seeclick_loss_num": 0.0921783447265625, + "eval_seeclick_loss_xval": 0.4608154296875, + "eval_seeclick_runtime": 76.6074, + "eval_seeclick_samples_per_second": 0.561, + "eval_seeclick_steps_per_second": 0.026, + "num_input_tokens_seen": 479213764, + "step": 5250 + }, + { + "epoch": 21.875, + "eval_icons_CIoU": 0.2262982428073883, + "eval_icons_GIoU": 0.15895729139447212, + "eval_icons_IoU": 0.3449597507715225, + "eval_icons_MAE_all": 0.09179431200027466, + "eval_icons_MAE_h": 0.1736046001315117, + "eval_icons_MAE_w": 0.16428311169147491, + "eval_icons_MAE_x_boxes": 0.16489966958761215, + "eval_icons_MAE_y_boxes": 0.17304036766290665, + "eval_icons_NUM_probability": 0.9999924004077911, + "eval_icons_inside_bbox": 0.4878472238779068, + "eval_icons_loss": 0.45651158690452576, + "eval_icons_loss_ce": 0.008816860150545835, + "eval_icons_loss_iou": 0.177001953125, + "eval_icons_loss_num": 0.093536376953125, + "eval_icons_loss_xval": 0.4674072265625, + "eval_icons_runtime": 86.8027, + "eval_icons_samples_per_second": 0.576, + "eval_icons_steps_per_second": 0.023, + "num_input_tokens_seen": 479213764, + "step": 5250 + }, + { + "epoch": 21.875, + "eval_screenspot_CIoU": 0.3330252965291341, + "eval_screenspot_GIoU": 0.3239004115263621, + "eval_screenspot_IoU": 0.4180097281932831, + "eval_screenspot_MAE_all": 0.09967080752054851, + "eval_screenspot_MAE_h": 0.09926832715670268, + "eval_screenspot_MAE_w": 0.21363018453121185, + "eval_screenspot_MAE_x_boxes": 0.1893785446882248, + "eval_screenspot_MAE_y_boxes": 0.09871842215458553, + "eval_screenspot_NUM_probability": 0.9999570647875468, + "eval_screenspot_inside_bbox": 0.6804166634877523, + "eval_screenspot_loss": 0.5017403364181519, + "eval_screenspot_loss_ce": 0.005790580064058304, + "eval_screenspot_loss_iou": 0.3467203776041667, + "eval_screenspot_loss_num": 0.10290018717447917, + "eval_screenspot_loss_xval": 0.5142008463541666, + "eval_screenspot_runtime": 149.4622, + "eval_screenspot_samples_per_second": 0.595, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 479213764, + "step": 5250 + }, + { + "epoch": 21.875, + "eval_compot_CIoU": 0.45339295268058777, + "eval_compot_GIoU": 0.45081885159015656, + "eval_compot_IoU": 0.5354850888252258, + "eval_compot_MAE_all": 0.06074054725468159, + "eval_compot_MAE_h": 0.04861530102789402, + "eval_compot_MAE_w": 0.17756661027669907, + "eval_compot_MAE_x_boxes": 0.1776425689458847, + "eval_compot_MAE_y_boxes": 0.047350864857435226, + "eval_compot_NUM_probability": 0.9999347925186157, + "eval_compot_inside_bbox": 0.7361111044883728, + "eval_compot_loss": 0.35404831171035767, + "eval_compot_loss_ce": 0.04850252345204353, + "eval_compot_loss_iou": 0.2886962890625, + "eval_compot_loss_num": 0.059711456298828125, + "eval_compot_loss_xval": 0.298583984375, + "eval_compot_runtime": 86.1966, + "eval_compot_samples_per_second": 0.58, + "eval_compot_steps_per_second": 0.023, + "num_input_tokens_seen": 479213764, + "step": 5250 + }, + { + "epoch": 21.875, + "loss": 0.35134467482566833, + "loss_ce": 0.05233343690633774, + "loss_iou": 0.31640625, + "loss_num": 0.059814453125, + "loss_xval": 0.298828125, + "num_input_tokens_seen": 479213764, + "step": 5250 + }, + { + "epoch": 21.879166666666666, + "grad_norm": 4.492691268409018, + "learning_rate": 5e-05, + "loss": 0.0857, + "num_input_tokens_seen": 479304724, + "step": 5251 + }, + { + "epoch": 21.879166666666666, + "loss": 0.11784595996141434, + "loss_ce": 0.00026172742946073413, + "loss_iou": 0.294921875, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 479304724, + "step": 5251 + }, + { + "epoch": 21.883333333333333, + "grad_norm": 5.380219994049467, + "learning_rate": 5e-05, + "loss": 0.0434, + "num_input_tokens_seen": 479395364, + "step": 5252 + }, + { + "epoch": 21.883333333333333, + "loss": 0.04840845614671707, + "loss_ce": 0.0003127541858702898, + "loss_iou": 0.359375, + "loss_num": 0.0096435546875, + "loss_xval": 0.048095703125, + "num_input_tokens_seen": 479395364, + "step": 5252 + }, + { + "epoch": 21.8875, + "grad_norm": 2.4134467975947613, + "learning_rate": 5e-05, + "loss": 0.039, + "num_input_tokens_seen": 479486420, + "step": 5253 + }, + { + "epoch": 21.8875, + "loss": 0.04481302201747894, + "loss_ce": 0.0002573596721049398, + "loss_iou": 0.23828125, + "loss_num": 0.0089111328125, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 479486420, + "step": 5253 + }, + { + "epoch": 21.891666666666666, + "grad_norm": 2.3544784211872374, + "learning_rate": 5e-05, + "loss": 0.0224, + "num_input_tokens_seen": 479577596, + "step": 5254 + }, + { + "epoch": 21.891666666666666, + "loss": 0.020729443058371544, + "loss_ce": 0.00013770633086096495, + "loss_iou": 0.1787109375, + "loss_num": 0.004119873046875, + "loss_xval": 0.0206298828125, + "num_input_tokens_seen": 479577596, + "step": 5254 + }, + { + "epoch": 21.895833333333332, + "grad_norm": 2.0921788964058807, + "learning_rate": 5e-05, + "loss": 0.0375, + "num_input_tokens_seen": 479669088, + "step": 5255 + }, + { + "epoch": 21.895833333333332, + "loss": 0.04369882494211197, + "loss_ce": 0.0002646785578690469, + "loss_iou": 0.2080078125, + "loss_num": 0.0086669921875, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 479669088, + "step": 5255 + }, + { + "epoch": 21.9, + "grad_norm": 1.389998062638131, + "learning_rate": 5e-05, + "loss": 0.0492, + "num_input_tokens_seen": 479760124, + "step": 5256 + }, + { + "epoch": 21.9, + "loss": 0.027485912665724754, + "loss_ce": 0.0001269038039026782, + "loss_iou": 0.216796875, + "loss_num": 0.005462646484375, + "loss_xval": 0.02734375, + "num_input_tokens_seen": 479760124, + "step": 5256 + }, + { + "epoch": 21.904166666666665, + "grad_norm": 1.559914252826017, + "learning_rate": 5e-05, + "loss": 0.0404, + "num_input_tokens_seen": 479851468, + "step": 5257 + }, + { + "epoch": 21.904166666666665, + "loss": 0.02916884422302246, + "loss_ce": 0.00010847946396097541, + "loss_iou": 0.26171875, + "loss_num": 0.00579833984375, + "loss_xval": 0.029052734375, + "num_input_tokens_seen": 479851468, + "step": 5257 + }, + { + "epoch": 21.908333333333335, + "grad_norm": 0.6527247533851892, + "learning_rate": 5e-05, + "loss": 0.0399, + "num_input_tokens_seen": 479942660, + "step": 5258 + }, + { + "epoch": 21.908333333333335, + "loss": 0.052520204335451126, + "loss_ce": 0.00015203985094558448, + "loss_iou": 0.27734375, + "loss_num": 0.010498046875, + "loss_xval": 0.05224609375, + "num_input_tokens_seen": 479942660, + "step": 5258 + }, + { + "epoch": 21.9125, + "grad_norm": 2.4036418366319356, + "learning_rate": 5e-05, + "loss": 0.0292, + "num_input_tokens_seen": 480033712, + "step": 5259 + }, + { + "epoch": 21.9125, + "loss": 0.026913050562143326, + "loss_ce": 8.81015439517796e-05, + "loss_iou": 0.220703125, + "loss_num": 0.00537109375, + "loss_xval": 0.02685546875, + "num_input_tokens_seen": 480033712, + "step": 5259 + }, + { + "epoch": 21.916666666666668, + "grad_norm": 1.5052202432782815, + "learning_rate": 5e-05, + "loss": 0.0426, + "num_input_tokens_seen": 480124500, + "step": 5260 + }, + { + "epoch": 21.916666666666668, + "loss": 0.0278320275247097, + "loss_ce": 9.917754505295306e-05, + "loss_iou": 0.224609375, + "loss_num": 0.00555419921875, + "loss_xval": 0.0277099609375, + "num_input_tokens_seen": 480124500, + "step": 5260 + }, + { + "epoch": 21.920833333333334, + "grad_norm": 7.039157047532622, + "learning_rate": 5e-05, + "loss": 0.0355, + "num_input_tokens_seen": 480215860, + "step": 5261 + }, + { + "epoch": 21.920833333333334, + "loss": 0.03863038867712021, + "loss_ce": 0.001780410297214985, + "loss_iou": 0.2080078125, + "loss_num": 0.00738525390625, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 480215860, + "step": 5261 + }, + { + "epoch": 21.925, + "grad_norm": 3.226045222418645, + "learning_rate": 5e-05, + "loss": 0.038, + "num_input_tokens_seen": 480307224, + "step": 5262 + }, + { + "epoch": 21.925, + "loss": 0.04644927382469177, + "loss_ce": 0.0002304044901393354, + "loss_iou": 0.298828125, + "loss_num": 0.00921630859375, + "loss_xval": 0.046142578125, + "num_input_tokens_seen": 480307224, + "step": 5262 + }, + { + "epoch": 21.929166666666667, + "grad_norm": 3.41641248782928, + "learning_rate": 5e-05, + "loss": 0.0458, + "num_input_tokens_seen": 480398828, + "step": 5263 + }, + { + "epoch": 21.929166666666667, + "loss": 0.054157473146915436, + "loss_ce": 0.00015662264195270836, + "loss_iou": 0.265625, + "loss_num": 0.01080322265625, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 480398828, + "step": 5263 + }, + { + "epoch": 21.933333333333334, + "grad_norm": 2.2687866903164013, + "learning_rate": 5e-05, + "loss": 0.0461, + "num_input_tokens_seen": 480490488, + "step": 5264 + }, + { + "epoch": 21.933333333333334, + "loss": 0.047733329236507416, + "loss_ce": 0.0015907498309388757, + "loss_iou": 0.162109375, + "loss_num": 0.00921630859375, + "loss_xval": 0.046142578125, + "num_input_tokens_seen": 480490488, + "step": 5264 + }, + { + "epoch": 21.9375, + "grad_norm": 2.2036409399472876, + "learning_rate": 5e-05, + "loss": 0.0394, + "num_input_tokens_seen": 480582308, + "step": 5265 + }, + { + "epoch": 21.9375, + "loss": 0.0360955074429512, + "loss_ce": 0.00023354000586550683, + "loss_iou": 0.1708984375, + "loss_num": 0.007171630859375, + "loss_xval": 0.035888671875, + "num_input_tokens_seen": 480582308, + "step": 5265 + }, + { + "epoch": 21.941666666666666, + "grad_norm": 1.5157849644907473, + "learning_rate": 5e-05, + "loss": 0.0288, + "num_input_tokens_seen": 480673540, + "step": 5266 + }, + { + "epoch": 21.941666666666666, + "loss": 0.02454659342765808, + "loss_ce": 0.0029325177893042564, + "loss_iou": 0.2353515625, + "loss_num": 0.00433349609375, + "loss_xval": 0.0216064453125, + "num_input_tokens_seen": 480673540, + "step": 5266 + }, + { + "epoch": 21.945833333333333, + "grad_norm": 2.283724280107787, + "learning_rate": 5e-05, + "loss": 0.0364, + "num_input_tokens_seen": 480764408, + "step": 5267 + }, + { + "epoch": 21.945833333333333, + "loss": 0.05202634632587433, + "loss_ce": 0.00010068815026897937, + "loss_iou": 0.216796875, + "loss_num": 0.0103759765625, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 480764408, + "step": 5267 + }, + { + "epoch": 21.95, + "grad_norm": 2.406683325736589, + "learning_rate": 5e-05, + "loss": 0.0656, + "num_input_tokens_seen": 480855996, + "step": 5268 + }, + { + "epoch": 21.95, + "loss": 0.09315572679042816, + "loss_ce": 0.0002907381858676672, + "loss_iou": 0.3515625, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 480855996, + "step": 5268 + }, + { + "epoch": 21.954166666666666, + "grad_norm": 2.810274551612445, + "learning_rate": 5e-05, + "loss": 0.0376, + "num_input_tokens_seen": 480947560, + "step": 5269 + }, + { + "epoch": 21.954166666666666, + "loss": 0.05150197818875313, + "loss_ce": 0.00014089501928538084, + "loss_iou": 0.271484375, + "loss_num": 0.01025390625, + "loss_xval": 0.05126953125, + "num_input_tokens_seen": 480947560, + "step": 5269 + }, + { + "epoch": 21.958333333333332, + "grad_norm": 3.7126604800620457, + "learning_rate": 5e-05, + "loss": 0.0471, + "num_input_tokens_seen": 481039152, + "step": 5270 + }, + { + "epoch": 21.958333333333332, + "loss": 0.044563956558704376, + "loss_ce": 0.00020665646297857165, + "loss_iou": 0.2734375, + "loss_num": 0.00885009765625, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 481039152, + "step": 5270 + }, + { + "epoch": 21.9625, + "grad_norm": 3.3842874869357606, + "learning_rate": 5e-05, + "loss": 0.0332, + "num_input_tokens_seen": 481130816, + "step": 5271 + }, + { + "epoch": 21.9625, + "loss": 0.03572816029191017, + "loss_ce": 0.00011414707114454359, + "loss_iou": 0.275390625, + "loss_num": 0.00714111328125, + "loss_xval": 0.03564453125, + "num_input_tokens_seen": 481130816, + "step": 5271 + }, + { + "epoch": 21.966666666666665, + "grad_norm": 2.8120471497391386, + "learning_rate": 5e-05, + "loss": 0.0466, + "num_input_tokens_seen": 481222128, + "step": 5272 + }, + { + "epoch": 21.966666666666665, + "loss": 0.043471939861774445, + "loss_ce": 9.120319009525701e-05, + "loss_iou": 0.35546875, + "loss_num": 0.0086669921875, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 481222128, + "step": 5272 + }, + { + "epoch": 21.970833333333335, + "grad_norm": 1.8730839606126495, + "learning_rate": 5e-05, + "loss": 0.0476, + "num_input_tokens_seen": 481313140, + "step": 5273 + }, + { + "epoch": 21.970833333333335, + "loss": 0.027745647355914116, + "loss_ce": 0.00014249965897761285, + "loss_iou": 0.16796875, + "loss_num": 0.005523681640625, + "loss_xval": 0.027587890625, + "num_input_tokens_seen": 481313140, + "step": 5273 + }, + { + "epoch": 21.975, + "grad_norm": 1.8566497064524343, + "learning_rate": 5e-05, + "loss": 0.0451, + "num_input_tokens_seen": 481404928, + "step": 5274 + }, + { + "epoch": 21.975, + "loss": 0.05320358648896217, + "loss_ce": 0.0001945517724379897, + "loss_iou": 0.33984375, + "loss_num": 0.0106201171875, + "loss_xval": 0.052978515625, + "num_input_tokens_seen": 481404928, + "step": 5274 + }, + { + "epoch": 21.979166666666668, + "grad_norm": 1.6505782038848729, + "learning_rate": 5e-05, + "loss": 0.0452, + "num_input_tokens_seen": 481496404, + "step": 5275 + }, + { + "epoch": 21.979166666666668, + "loss": 0.06193459406495094, + "loss_ce": 0.0005713765858672559, + "loss_iou": 0.2099609375, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 481496404, + "step": 5275 + }, + { + "epoch": 21.983333333333334, + "grad_norm": 2.0673108497867694, + "learning_rate": 5e-05, + "loss": 0.0354, + "num_input_tokens_seen": 481588516, + "step": 5276 + }, + { + "epoch": 21.983333333333334, + "loss": 0.0274334903806448, + "loss_ce": 0.00366029585711658, + "loss_iou": 0.244140625, + "loss_num": 0.0047607421875, + "loss_xval": 0.0238037109375, + "num_input_tokens_seen": 481588516, + "step": 5276 + }, + { + "epoch": 21.9875, + "grad_norm": 0.6926522013161461, + "learning_rate": 5e-05, + "loss": 0.0323, + "num_input_tokens_seen": 481680176, + "step": 5277 + }, + { + "epoch": 21.9875, + "loss": 0.03269730508327484, + "loss_ce": 0.00021134436246939003, + "loss_iou": 0.2431640625, + "loss_num": 0.006500244140625, + "loss_xval": 0.032470703125, + "num_input_tokens_seen": 481680176, + "step": 5277 + }, + { + "epoch": 21.991666666666667, + "grad_norm": 2.836791567215157, + "learning_rate": 5e-05, + "loss": 0.0297, + "num_input_tokens_seen": 481771536, + "step": 5278 + }, + { + "epoch": 21.991666666666667, + "loss": 0.031540993601083755, + "loss_ce": 0.00033677060855552554, + "loss_iou": 0.201171875, + "loss_num": 0.006256103515625, + "loss_xval": 0.03125, + "num_input_tokens_seen": 481771536, + "step": 5278 + }, + { + "epoch": 21.995833333333334, + "grad_norm": 4.867023021726077, + "learning_rate": 5e-05, + "loss": 0.028, + "num_input_tokens_seen": 481861428, + "step": 5279 + }, + { + "epoch": 21.995833333333334, + "loss": 0.025719735771417618, + "loss_ce": 6.208197009982541e-05, + "loss_iou": 0.2490234375, + "loss_num": 0.005126953125, + "loss_xval": 0.025634765625, + "num_input_tokens_seen": 481861428, + "step": 5279 + }, + { + "epoch": 22.0, + "grad_norm": 1.02537337810124, + "learning_rate": 5e-05, + "loss": 0.0334, + "num_input_tokens_seen": 481951132, + "step": 5280 + }, + { + "epoch": 22.0, + "loss": 0.032459720969200134, + "loss_ce": 0.0001110858574975282, + "loss_iou": 0.26171875, + "loss_num": 0.0064697265625, + "loss_xval": 0.0322265625, + "num_input_tokens_seen": 481951132, + "step": 5280 + }, + { + "epoch": 22.004166666666666, + "grad_norm": 1.0512527663811588, + "learning_rate": 5e-05, + "loss": 0.0581, + "num_input_tokens_seen": 482042020, + "step": 5281 + }, + { + "epoch": 22.004166666666666, + "loss": 0.043644338846206665, + "loss_ce": 7.286606705747545e-05, + "loss_iou": 0.189453125, + "loss_num": 0.00872802734375, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 482042020, + "step": 5281 + }, + { + "epoch": 22.008333333333333, + "grad_norm": 2.0710280576409925, + "learning_rate": 5e-05, + "loss": 0.0389, + "num_input_tokens_seen": 482133376, + "step": 5282 + }, + { + "epoch": 22.008333333333333, + "loss": 0.030969956889748573, + "loss_ce": 0.0001472034491598606, + "loss_iou": 0.09912109375, + "loss_num": 0.00616455078125, + "loss_xval": 0.03076171875, + "num_input_tokens_seen": 482133376, + "step": 5282 + }, + { + "epoch": 22.0125, + "grad_norm": 2.4110739556831646, + "learning_rate": 5e-05, + "loss": 0.0462, + "num_input_tokens_seen": 482224396, + "step": 5283 + }, + { + "epoch": 22.0125, + "loss": 0.041511379182338715, + "loss_ce": 9.139893518295139e-05, + "loss_iou": 0.12890625, + "loss_num": 0.00830078125, + "loss_xval": 0.04150390625, + "num_input_tokens_seen": 482224396, + "step": 5283 + }, + { + "epoch": 22.016666666666666, + "grad_norm": 1.5776653945280796, + "learning_rate": 5e-05, + "loss": 0.0314, + "num_input_tokens_seen": 482315864, + "step": 5284 + }, + { + "epoch": 22.016666666666666, + "loss": 0.02530059777200222, + "loss_ce": 0.0001388565287925303, + "loss_iou": 0.1220703125, + "loss_num": 0.005035400390625, + "loss_xval": 0.025146484375, + "num_input_tokens_seen": 482315864, + "step": 5284 + }, + { + "epoch": 22.020833333333332, + "grad_norm": 2.6097619209688183, + "learning_rate": 5e-05, + "loss": 0.0326, + "num_input_tokens_seen": 482408016, + "step": 5285 + }, + { + "epoch": 22.020833333333332, + "loss": 0.025224104523658752, + "loss_ce": 0.00033701941720210016, + "loss_iou": 0.2294921875, + "loss_num": 0.004974365234375, + "loss_xval": 0.02490234375, + "num_input_tokens_seen": 482408016, + "step": 5285 + }, + { + "epoch": 22.025, + "grad_norm": 2.4991762130925825, + "learning_rate": 5e-05, + "loss": 0.0428, + "num_input_tokens_seen": 482499076, + "step": 5286 + }, + { + "epoch": 22.025, + "loss": 0.024774424731731415, + "loss_ce": 0.0004290279175620526, + "loss_iou": 0.294921875, + "loss_num": 0.0048828125, + "loss_xval": 0.0242919921875, + "num_input_tokens_seen": 482499076, + "step": 5286 + }, + { + "epoch": 22.029166666666665, + "grad_norm": 1.711039056793413, + "learning_rate": 5e-05, + "loss": 0.0215, + "num_input_tokens_seen": 482591584, + "step": 5287 + }, + { + "epoch": 22.029166666666665, + "loss": 0.021085284650325775, + "loss_ce": 0.00011970890773227438, + "loss_iou": 0.1640625, + "loss_num": 0.004180908203125, + "loss_xval": 0.02099609375, + "num_input_tokens_seen": 482591584, + "step": 5287 + }, + { + "epoch": 22.033333333333335, + "grad_norm": 1.9039882747765806, + "learning_rate": 5e-05, + "loss": 0.0319, + "num_input_tokens_seen": 482681820, + "step": 5288 + }, + { + "epoch": 22.033333333333335, + "loss": 0.023020733147859573, + "loss_ce": 0.00024699015193618834, + "loss_iou": 0.1865234375, + "loss_num": 0.004547119140625, + "loss_xval": 0.0228271484375, + "num_input_tokens_seen": 482681820, + "step": 5288 + }, + { + "epoch": 22.0375, + "grad_norm": 1.5749168952865242, + "learning_rate": 5e-05, + "loss": 0.0204, + "num_input_tokens_seen": 482773252, + "step": 5289 + }, + { + "epoch": 22.0375, + "loss": 0.0190866831690073, + "loss_ce": 0.0002344487002119422, + "loss_iou": 0.12060546875, + "loss_num": 0.0037689208984375, + "loss_xval": 0.018798828125, + "num_input_tokens_seen": 482773252, + "step": 5289 + }, + { + "epoch": 22.041666666666668, + "grad_norm": 1.716734322199081, + "learning_rate": 5e-05, + "loss": 0.0838, + "num_input_tokens_seen": 482864240, + "step": 5290 + }, + { + "epoch": 22.041666666666668, + "loss": 0.13151633739471436, + "loss_ce": 0.00013816305727232248, + "loss_iou": 0.310546875, + "loss_num": 0.0262451171875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 482864240, + "step": 5290 + }, + { + "epoch": 22.045833333333334, + "grad_norm": 3.0159664173797176, + "learning_rate": 5e-05, + "loss": 0.0553, + "num_input_tokens_seen": 482955372, + "step": 5291 + }, + { + "epoch": 22.045833333333334, + "loss": 0.031092027202248573, + "loss_ce": 0.00011668415390886366, + "loss_iou": 0.2490234375, + "loss_num": 0.006195068359375, + "loss_xval": 0.031005859375, + "num_input_tokens_seen": 482955372, + "step": 5291 + }, + { + "epoch": 22.05, + "grad_norm": 4.501999040234554, + "learning_rate": 5e-05, + "loss": 0.045, + "num_input_tokens_seen": 483046876, + "step": 5292 + }, + { + "epoch": 22.05, + "loss": 0.05462396889925003, + "loss_ce": 0.00011957163223996758, + "loss_iou": 0.34765625, + "loss_num": 0.0108642578125, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 483046876, + "step": 5292 + }, + { + "epoch": 22.054166666666667, + "grad_norm": 2.982610396408003, + "learning_rate": 5e-05, + "loss": 0.0703, + "num_input_tokens_seen": 483138532, + "step": 5293 + }, + { + "epoch": 22.054166666666667, + "loss": 0.050793588161468506, + "loss_ce": 0.0008363127708435059, + "loss_iou": 0.30859375, + "loss_num": 0.00994873046875, + "loss_xval": 0.050048828125, + "num_input_tokens_seen": 483138532, + "step": 5293 + }, + { + "epoch": 22.058333333333334, + "grad_norm": 2.5559913977150157, + "learning_rate": 5e-05, + "loss": 0.0433, + "num_input_tokens_seen": 483229876, + "step": 5294 + }, + { + "epoch": 22.058333333333334, + "loss": 0.03200722858309746, + "loss_ce": 0.00010110236325999722, + "loss_iou": 0.296875, + "loss_num": 0.006378173828125, + "loss_xval": 0.031982421875, + "num_input_tokens_seen": 483229876, + "step": 5294 + }, + { + "epoch": 22.0625, + "grad_norm": 1.6884618816929389, + "learning_rate": 5e-05, + "loss": 0.0781, + "num_input_tokens_seen": 483321328, + "step": 5295 + }, + { + "epoch": 22.0625, + "loss": 0.0712161660194397, + "loss_ce": 7.969448051881045e-05, + "loss_iou": 0.21875, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 483321328, + "step": 5295 + }, + { + "epoch": 22.066666666666666, + "grad_norm": 1.5093449135600019, + "learning_rate": 5e-05, + "loss": 0.0293, + "num_input_tokens_seen": 483412264, + "step": 5296 + }, + { + "epoch": 22.066666666666666, + "loss": 0.015982337296009064, + "loss_ce": 7.504779932787642e-05, + "loss_iou": 0.04052734375, + "loss_num": 0.0031890869140625, + "loss_xval": 0.015869140625, + "num_input_tokens_seen": 483412264, + "step": 5296 + }, + { + "epoch": 22.070833333333333, + "grad_norm": 1.485904538435143, + "learning_rate": 5e-05, + "loss": 0.0218, + "num_input_tokens_seen": 483503844, + "step": 5297 + }, + { + "epoch": 22.070833333333333, + "loss": 0.02632717601954937, + "loss_ce": 9.731962927617133e-05, + "loss_iou": 0.1826171875, + "loss_num": 0.0052490234375, + "loss_xval": 0.0262451171875, + "num_input_tokens_seen": 483503844, + "step": 5297 + }, + { + "epoch": 22.075, + "grad_norm": 0.9224941304114999, + "learning_rate": 5e-05, + "loss": 0.0834, + "num_input_tokens_seen": 483594920, + "step": 5298 + }, + { + "epoch": 22.075, + "loss": 0.14659483730793, + "loss_ce": 6.469509389717132e-05, + "loss_iou": 0.1484375, + "loss_num": 0.029296875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 483594920, + "step": 5298 + }, + { + "epoch": 22.079166666666666, + "grad_norm": 2.282638840202788, + "learning_rate": 5e-05, + "loss": 0.0252, + "num_input_tokens_seen": 483685976, + "step": 5299 + }, + { + "epoch": 22.079166666666666, + "loss": 0.022921577095985413, + "loss_ce": 6.391090573742986e-05, + "loss_iou": 0.2177734375, + "loss_num": 0.00457763671875, + "loss_xval": 0.0228271484375, + "num_input_tokens_seen": 483685976, + "step": 5299 + }, + { + "epoch": 22.083333333333332, + "grad_norm": 2.6521355470734473, + "learning_rate": 5e-05, + "loss": 0.0513, + "num_input_tokens_seen": 483777272, + "step": 5300 + }, + { + "epoch": 22.083333333333332, + "loss": 0.046746619045734406, + "loss_ce": 8.52411612868309e-05, + "loss_iou": 0.2314453125, + "loss_num": 0.00933837890625, + "loss_xval": 0.046630859375, + "num_input_tokens_seen": 483777272, + "step": 5300 + }, + { + "epoch": 22.0875, + "grad_norm": 2.7199186660563672, + "learning_rate": 5e-05, + "loss": 0.0477, + "num_input_tokens_seen": 483868844, + "step": 5301 + }, + { + "epoch": 22.0875, + "loss": 0.059269554913043976, + "loss_ce": 0.00018752555479295552, + "loss_iou": 0.06884765625, + "loss_num": 0.01177978515625, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 483868844, + "step": 5301 + }, + { + "epoch": 22.091666666666665, + "grad_norm": 6.386667306569148, + "learning_rate": 5e-05, + "loss": 0.0287, + "num_input_tokens_seen": 483959484, + "step": 5302 + }, + { + "epoch": 22.091666666666665, + "loss": 0.035696543753147125, + "loss_ce": 3.675363404909149e-05, + "loss_iou": 0.193359375, + "loss_num": 0.00714111328125, + "loss_xval": 0.03564453125, + "num_input_tokens_seen": 483959484, + "step": 5302 + }, + { + "epoch": 22.095833333333335, + "grad_norm": 2.8292560563918876, + "learning_rate": 5e-05, + "loss": 0.0395, + "num_input_tokens_seen": 484050572, + "step": 5303 + }, + { + "epoch": 22.095833333333335, + "loss": 0.024900998920202255, + "loss_ce": 0.0009294397314079106, + "loss_iou": 0.2470703125, + "loss_num": 0.004791259765625, + "loss_xval": 0.02392578125, + "num_input_tokens_seen": 484050572, + "step": 5303 + }, + { + "epoch": 22.1, + "grad_norm": 1.6401184329935514, + "learning_rate": 5e-05, + "loss": 0.0567, + "num_input_tokens_seen": 484142108, + "step": 5304 + }, + { + "epoch": 22.1, + "loss": 0.06375754624605179, + "loss_ce": 5.2097351726843044e-05, + "loss_iou": 0.251953125, + "loss_num": 0.01275634765625, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 484142108, + "step": 5304 + }, + { + "epoch": 22.104166666666668, + "grad_norm": 1.3663876458902557, + "learning_rate": 5e-05, + "loss": 0.0574, + "num_input_tokens_seen": 484233692, + "step": 5305 + }, + { + "epoch": 22.104166666666668, + "loss": 0.03514707088470459, + "loss_ce": 0.0007537580095231533, + "loss_iou": 0.12060546875, + "loss_num": 0.006866455078125, + "loss_xval": 0.034423828125, + "num_input_tokens_seen": 484233692, + "step": 5305 + }, + { + "epoch": 22.108333333333334, + "grad_norm": 2.06616645999263, + "learning_rate": 5e-05, + "loss": 0.0274, + "num_input_tokens_seen": 484323416, + "step": 5306 + }, + { + "epoch": 22.108333333333334, + "loss": 0.024102866649627686, + "loss_ce": 8.55340767884627e-05, + "loss_iou": 0.162109375, + "loss_num": 0.004791259765625, + "loss_xval": 0.0240478515625, + "num_input_tokens_seen": 484323416, + "step": 5306 + }, + { + "epoch": 22.1125, + "grad_norm": 1.2662155080798614, + "learning_rate": 5e-05, + "loss": 0.0357, + "num_input_tokens_seen": 484414608, + "step": 5307 + }, + { + "epoch": 22.1125, + "loss": 0.033745765686035156, + "loss_ce": 7.724766328465194e-05, + "loss_iou": 0.130859375, + "loss_num": 0.006744384765625, + "loss_xval": 0.03369140625, + "num_input_tokens_seen": 484414608, + "step": 5307 + }, + { + "epoch": 22.116666666666667, + "grad_norm": 1.852131239125287, + "learning_rate": 5e-05, + "loss": 0.0288, + "num_input_tokens_seen": 484506084, + "step": 5308 + }, + { + "epoch": 22.116666666666667, + "loss": 0.028312429785728455, + "loss_ce": 7.985768024809659e-05, + "loss_iou": 0.13671875, + "loss_num": 0.005645751953125, + "loss_xval": 0.0281982421875, + "num_input_tokens_seen": 484506084, + "step": 5308 + }, + { + "epoch": 22.120833333333334, + "grad_norm": 1.9954084619623214, + "learning_rate": 5e-05, + "loss": 0.0713, + "num_input_tokens_seen": 484597320, + "step": 5309 + }, + { + "epoch": 22.120833333333334, + "loss": 0.08587288111448288, + "loss_ce": 0.00010322533489670604, + "loss_iou": 0.126953125, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 484597320, + "step": 5309 + }, + { + "epoch": 22.125, + "grad_norm": 12.733430054841147, + "learning_rate": 5e-05, + "loss": 0.0496, + "num_input_tokens_seen": 484688572, + "step": 5310 + }, + { + "epoch": 22.125, + "loss": 0.06032126024365425, + "loss_ce": 4.141611134400591e-05, + "loss_iou": 0.2109375, + "loss_num": 0.01202392578125, + "loss_xval": 0.060302734375, + "num_input_tokens_seen": 484688572, + "step": 5310 + }, + { + "epoch": 22.129166666666666, + "grad_norm": 1.793970054204257, + "learning_rate": 5e-05, + "loss": 0.0341, + "num_input_tokens_seen": 484780296, + "step": 5311 + }, + { + "epoch": 22.129166666666666, + "loss": 0.02716052532196045, + "loss_ce": 0.0001372115802951157, + "loss_iou": 0.1328125, + "loss_num": 0.005401611328125, + "loss_xval": 0.0269775390625, + "num_input_tokens_seen": 484780296, + "step": 5311 + }, + { + "epoch": 22.133333333333333, + "grad_norm": 2.2711533570209013, + "learning_rate": 5e-05, + "loss": 0.0555, + "num_input_tokens_seen": 484872084, + "step": 5312 + }, + { + "epoch": 22.133333333333333, + "loss": 0.07867635786533356, + "loss_ce": 0.0001851472770795226, + "loss_iou": 0.291015625, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 484872084, + "step": 5312 + }, + { + "epoch": 22.1375, + "grad_norm": 2.4990669253833677, + "learning_rate": 5e-05, + "loss": 0.0233, + "num_input_tokens_seen": 484963592, + "step": 5313 + }, + { + "epoch": 22.1375, + "loss": 0.026571379974484444, + "loss_ce": 0.00012789816537406296, + "loss_iou": 0.236328125, + "loss_num": 0.005279541015625, + "loss_xval": 0.0264892578125, + "num_input_tokens_seen": 484963592, + "step": 5313 + }, + { + "epoch": 22.141666666666666, + "grad_norm": 7.084602588469983, + "learning_rate": 5e-05, + "loss": 0.0346, + "num_input_tokens_seen": 485055388, + "step": 5314 + }, + { + "epoch": 22.141666666666666, + "loss": 0.04131896421313286, + "loss_ce": 0.0003033394750673324, + "loss_iou": 0.337890625, + "loss_num": 0.0081787109375, + "loss_xval": 0.041015625, + "num_input_tokens_seen": 485055388, + "step": 5314 + }, + { + "epoch": 22.145833333333332, + "grad_norm": 2.734101352423799, + "learning_rate": 5e-05, + "loss": 0.0262, + "num_input_tokens_seen": 485146524, + "step": 5315 + }, + { + "epoch": 22.145833333333332, + "loss": 0.025176800787448883, + "loss_ce": 7.60941329644993e-05, + "loss_iou": 0.279296875, + "loss_num": 0.0050048828125, + "loss_xval": 0.025146484375, + "num_input_tokens_seen": 485146524, + "step": 5315 + }, + { + "epoch": 22.15, + "grad_norm": 3.0314632006075444, + "learning_rate": 5e-05, + "loss": 0.0435, + "num_input_tokens_seen": 485237700, + "step": 5316 + }, + { + "epoch": 22.15, + "loss": 0.05775919556617737, + "loss_ce": 0.0045517971739172935, + "loss_iou": 0.27734375, + "loss_num": 0.0106201171875, + "loss_xval": 0.05322265625, + "num_input_tokens_seen": 485237700, + "step": 5316 + }, + { + "epoch": 22.154166666666665, + "grad_norm": 2.742873805341264, + "learning_rate": 5e-05, + "loss": 0.0356, + "num_input_tokens_seen": 485328580, + "step": 5317 + }, + { + "epoch": 22.154166666666665, + "loss": 0.038473501801490784, + "loss_ce": 5.186857742955908e-05, + "loss_iou": 0.27734375, + "loss_num": 0.0076904296875, + "loss_xval": 0.038330078125, + "num_input_tokens_seen": 485328580, + "step": 5317 + }, + { + "epoch": 22.158333333333335, + "grad_norm": 3.0947148946477605, + "learning_rate": 5e-05, + "loss": 0.0438, + "num_input_tokens_seen": 485419876, + "step": 5318 + }, + { + "epoch": 22.158333333333335, + "loss": 0.054676301777362823, + "loss_ce": 0.0001185036962851882, + "loss_iou": 0.17578125, + "loss_num": 0.01092529296875, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 485419876, + "step": 5318 + }, + { + "epoch": 22.1625, + "grad_norm": 1.2289292236498561, + "learning_rate": 5e-05, + "loss": 0.0308, + "num_input_tokens_seen": 485511428, + "step": 5319 + }, + { + "epoch": 22.1625, + "loss": 0.03030611202120781, + "loss_ce": 0.00012422689178492874, + "loss_iou": 0.130859375, + "loss_num": 0.00604248046875, + "loss_xval": 0.0301513671875, + "num_input_tokens_seen": 485511428, + "step": 5319 + }, + { + "epoch": 22.166666666666668, + "grad_norm": 0.7004978757823226, + "learning_rate": 5e-05, + "loss": 0.1065, + "num_input_tokens_seen": 485602360, + "step": 5320 + }, + { + "epoch": 22.166666666666668, + "loss": 0.05163067951798439, + "loss_ce": 0.0003230020229239017, + "loss_iou": 0.1875, + "loss_num": 0.01025390625, + "loss_xval": 0.05126953125, + "num_input_tokens_seen": 485602360, + "step": 5320 + }, + { + "epoch": 22.170833333333334, + "grad_norm": 1.0268657155267547, + "learning_rate": 5e-05, + "loss": 0.0298, + "num_input_tokens_seen": 485693464, + "step": 5321 + }, + { + "epoch": 22.170833333333334, + "loss": 0.04323270916938782, + "loss_ce": 5.0333379476796836e-05, + "loss_iou": 0.193359375, + "loss_num": 0.0086669921875, + "loss_xval": 0.043212890625, + "num_input_tokens_seen": 485693464, + "step": 5321 + }, + { + "epoch": 22.175, + "grad_norm": 0.5279806498577583, + "learning_rate": 5e-05, + "loss": 0.0344, + "num_input_tokens_seen": 485784804, + "step": 5322 + }, + { + "epoch": 22.175, + "loss": 0.017814230173826218, + "loss_ce": 6.062957982067019e-05, + "loss_iou": 0.125, + "loss_num": 0.0035552978515625, + "loss_xval": 0.0177001953125, + "num_input_tokens_seen": 485784804, + "step": 5322 + }, + { + "epoch": 22.179166666666667, + "grad_norm": 4.115271450580411, + "learning_rate": 5e-05, + "loss": 0.0428, + "num_input_tokens_seen": 485876136, + "step": 5323 + }, + { + "epoch": 22.179166666666667, + "loss": 0.023923374712467194, + "loss_ce": 5.8628491387935355e-05, + "loss_iou": 0.201171875, + "loss_num": 0.004791259765625, + "loss_xval": 0.02392578125, + "num_input_tokens_seen": 485876136, + "step": 5323 + }, + { + "epoch": 22.183333333333334, + "grad_norm": 1.1259019805751649, + "learning_rate": 5e-05, + "loss": 0.0512, + "num_input_tokens_seen": 485967676, + "step": 5324 + }, + { + "epoch": 22.183333333333334, + "loss": 0.05798065662384033, + "loss_ce": 3.92239453503862e-05, + "loss_iou": 0.2080078125, + "loss_num": 0.0115966796875, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 485967676, + "step": 5324 + }, + { + "epoch": 22.1875, + "grad_norm": 1.8439616652645208, + "learning_rate": 5e-05, + "loss": 0.0334, + "num_input_tokens_seen": 486059168, + "step": 5325 + }, + { + "epoch": 22.1875, + "loss": 0.04111999273300171, + "loss_ce": 3.951697362936102e-05, + "loss_iou": 0.045166015625, + "loss_num": 0.00823974609375, + "loss_xval": 0.041015625, + "num_input_tokens_seen": 486059168, + "step": 5325 + }, + { + "epoch": 22.191666666666666, + "grad_norm": 1.4991446665938253, + "learning_rate": 5e-05, + "loss": 0.0523, + "num_input_tokens_seen": 486151080, + "step": 5326 + }, + { + "epoch": 22.191666666666666, + "loss": 0.05851975455880165, + "loss_ce": 0.00019684791914187372, + "loss_iou": 0.1357421875, + "loss_num": 0.01171875, + "loss_xval": 0.058349609375, + "num_input_tokens_seen": 486151080, + "step": 5326 + }, + { + "epoch": 22.195833333333333, + "grad_norm": 12.291938403856095, + "learning_rate": 5e-05, + "loss": 0.0524, + "num_input_tokens_seen": 486242284, + "step": 5327 + }, + { + "epoch": 22.195833333333333, + "loss": 0.0813756138086319, + "loss_ce": 4.627187809091993e-05, + "loss_iou": 0.248046875, + "loss_num": 0.0162353515625, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 486242284, + "step": 5327 + }, + { + "epoch": 22.2, + "grad_norm": 1.4562714436129898, + "learning_rate": 5e-05, + "loss": 0.0304, + "num_input_tokens_seen": 486334084, + "step": 5328 + }, + { + "epoch": 22.2, + "loss": 0.02499345690011978, + "loss_ce": 8.348390110768378e-05, + "loss_iou": 0.2265625, + "loss_num": 0.004974365234375, + "loss_xval": 0.02490234375, + "num_input_tokens_seen": 486334084, + "step": 5328 + }, + { + "epoch": 22.204166666666666, + "grad_norm": 1.3089633299319143, + "learning_rate": 5e-05, + "loss": 0.0292, + "num_input_tokens_seen": 486426152, + "step": 5329 + }, + { + "epoch": 22.204166666666666, + "loss": 0.016233263537287712, + "loss_ce": 0.00012379758118186146, + "loss_iou": 0.205078125, + "loss_num": 0.0032196044921875, + "loss_xval": 0.01611328125, + "num_input_tokens_seen": 486426152, + "step": 5329 + }, + { + "epoch": 22.208333333333332, + "grad_norm": 2.426740851370266, + "learning_rate": 5e-05, + "loss": 0.0395, + "num_input_tokens_seen": 486517208, + "step": 5330 + }, + { + "epoch": 22.208333333333332, + "loss": 0.05923628434538841, + "loss_ce": 5.5069765949156135e-05, + "loss_iou": 0.1484375, + "loss_num": 0.0118408203125, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 486517208, + "step": 5330 + }, + { + "epoch": 22.2125, + "grad_norm": 3.210597140596656, + "learning_rate": 5e-05, + "loss": 0.0415, + "num_input_tokens_seen": 486609036, + "step": 5331 + }, + { + "epoch": 22.2125, + "loss": 0.054034460335969925, + "loss_ce": 0.002108802553266287, + "loss_iou": 0.171875, + "loss_num": 0.0103759765625, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 486609036, + "step": 5331 + }, + { + "epoch": 22.216666666666665, + "grad_norm": 2.111383574118387, + "learning_rate": 5e-05, + "loss": 0.0645, + "num_input_tokens_seen": 486700124, + "step": 5332 + }, + { + "epoch": 22.216666666666665, + "loss": 0.07857811450958252, + "loss_ce": 5.637863068841398e-05, + "loss_iou": 0.3046875, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 486700124, + "step": 5332 + }, + { + "epoch": 22.220833333333335, + "grad_norm": 2.978790690703692, + "learning_rate": 5e-05, + "loss": 0.037, + "num_input_tokens_seen": 486791124, + "step": 5333 + }, + { + "epoch": 22.220833333333335, + "loss": 0.024580243974924088, + "loss_ce": 4.4111726310802624e-05, + "loss_iou": 0.1494140625, + "loss_num": 0.004913330078125, + "loss_xval": 0.0245361328125, + "num_input_tokens_seen": 486791124, + "step": 5333 + }, + { + "epoch": 22.225, + "grad_norm": 4.959953751790821, + "learning_rate": 5e-05, + "loss": 0.0493, + "num_input_tokens_seen": 486882660, + "step": 5334 + }, + { + "epoch": 22.225, + "loss": 0.054794326424598694, + "loss_ce": 7.630858453921974e-05, + "loss_iou": 0.25390625, + "loss_num": 0.010986328125, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 486882660, + "step": 5334 + }, + { + "epoch": 22.229166666666668, + "grad_norm": 1.4963218782355763, + "learning_rate": 5e-05, + "loss": 0.0529, + "num_input_tokens_seen": 486973948, + "step": 5335 + }, + { + "epoch": 22.229166666666668, + "loss": 0.04119858145713806, + "loss_ce": 5.3257987019605935e-05, + "loss_iou": 0.1728515625, + "loss_num": 0.00823974609375, + "loss_xval": 0.041259765625, + "num_input_tokens_seen": 486973948, + "step": 5335 + }, + { + "epoch": 22.233333333333334, + "grad_norm": 4.227896015406954, + "learning_rate": 5e-05, + "loss": 0.0552, + "num_input_tokens_seen": 487065208, + "step": 5336 + }, + { + "epoch": 22.233333333333334, + "loss": 0.04797931760549545, + "loss_ce": 0.0002650836540851742, + "loss_iou": 0.275390625, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 487065208, + "step": 5336 + }, + { + "epoch": 22.2375, + "grad_norm": 2.962284188425735, + "learning_rate": 5e-05, + "loss": 0.0348, + "num_input_tokens_seen": 487156896, + "step": 5337 + }, + { + "epoch": 22.2375, + "loss": 0.04217856377363205, + "loss_ce": 6.430511712096632e-05, + "loss_iou": 0.158203125, + "loss_num": 0.0084228515625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 487156896, + "step": 5337 + }, + { + "epoch": 22.241666666666667, + "grad_norm": 2.3401394567648683, + "learning_rate": 5e-05, + "loss": 0.0305, + "num_input_tokens_seen": 487248404, + "step": 5338 + }, + { + "epoch": 22.241666666666667, + "loss": 0.029043810442090034, + "loss_ce": 2.9222243028925732e-05, + "loss_iou": 0.16796875, + "loss_num": 0.00579833984375, + "loss_xval": 0.029052734375, + "num_input_tokens_seen": 487248404, + "step": 5338 + }, + { + "epoch": 22.245833333333334, + "grad_norm": 1.8049075424901455, + "learning_rate": 5e-05, + "loss": 0.0388, + "num_input_tokens_seen": 487339940, + "step": 5339 + }, + { + "epoch": 22.245833333333334, + "loss": 0.030754217877984047, + "loss_ce": 3.827477485174313e-05, + "loss_iou": 0.2021484375, + "loss_num": 0.006134033203125, + "loss_xval": 0.03076171875, + "num_input_tokens_seen": 487339940, + "step": 5339 + }, + { + "epoch": 22.25, + "grad_norm": 1.8560570430837169, + "learning_rate": 5e-05, + "loss": 0.0351, + "num_input_tokens_seen": 487430952, + "step": 5340 + }, + { + "epoch": 22.25, + "loss": 0.04377035051584244, + "loss_ce": 2.3403106752084568e-05, + "loss_iou": 0.265625, + "loss_num": 0.0087890625, + "loss_xval": 0.043701171875, + "num_input_tokens_seen": 487430952, + "step": 5340 + }, + { + "epoch": 22.254166666666666, + "grad_norm": 2.220258487143648, + "learning_rate": 5e-05, + "loss": 0.0569, + "num_input_tokens_seen": 487521912, + "step": 5341 + }, + { + "epoch": 22.254166666666666, + "loss": 0.03143194317817688, + "loss_ce": 2.9357390303630382e-05, + "loss_iou": 0.31640625, + "loss_num": 0.00628662109375, + "loss_xval": 0.031494140625, + "num_input_tokens_seen": 487521912, + "step": 5341 + }, + { + "epoch": 22.258333333333333, + "grad_norm": 2.406366107649946, + "learning_rate": 5e-05, + "loss": 0.0648, + "num_input_tokens_seen": 487612848, + "step": 5342 + }, + { + "epoch": 22.258333333333333, + "loss": 0.10091523826122284, + "loss_ce": 3.9381353417411447e-05, + "loss_iou": 0.2001953125, + "loss_num": 0.0201416015625, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 487612848, + "step": 5342 + }, + { + "epoch": 22.2625, + "grad_norm": 2.050396271138617, + "learning_rate": 5e-05, + "loss": 0.0598, + "num_input_tokens_seen": 487704392, + "step": 5343 + }, + { + "epoch": 22.2625, + "loss": 0.02968679741024971, + "loss_ce": 0.0001762998872436583, + "loss_iou": 0.1787109375, + "loss_num": 0.00592041015625, + "loss_xval": 0.029541015625, + "num_input_tokens_seen": 487704392, + "step": 5343 + }, + { + "epoch": 22.266666666666666, + "grad_norm": 1.3513413531259062, + "learning_rate": 5e-05, + "loss": 0.0734, + "num_input_tokens_seen": 487795284, + "step": 5344 + }, + { + "epoch": 22.266666666666666, + "loss": 0.057062018662691116, + "loss_ce": 0.00023829156998544931, + "loss_iou": 0.267578125, + "loss_num": 0.0113525390625, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 487795284, + "step": 5344 + }, + { + "epoch": 22.270833333333332, + "grad_norm": 2.863963273524733, + "learning_rate": 5e-05, + "loss": 0.041, + "num_input_tokens_seen": 487886308, + "step": 5345 + }, + { + "epoch": 22.270833333333332, + "loss": 0.05691341683268547, + "loss_ce": 2.864954512915574e-05, + "loss_iou": 0.1513671875, + "loss_num": 0.0113525390625, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 487886308, + "step": 5345 + }, + { + "epoch": 22.275, + "grad_norm": 1.9127195605234466, + "learning_rate": 5e-05, + "loss": 0.0256, + "num_input_tokens_seen": 487978188, + "step": 5346 + }, + { + "epoch": 22.275, + "loss": 0.02111220732331276, + "loss_ce": 6.270705489441752e-05, + "loss_iou": 0.2265625, + "loss_num": 0.00421142578125, + "loss_xval": 0.02099609375, + "num_input_tokens_seen": 487978188, + "step": 5346 + }, + { + "epoch": 22.279166666666665, + "grad_norm": 2.39834707809313, + "learning_rate": 5e-05, + "loss": 0.0384, + "num_input_tokens_seen": 488068596, + "step": 5347 + }, + { + "epoch": 22.279166666666665, + "loss": 0.03273743391036987, + "loss_ce": 6.836632383055985e-05, + "loss_iou": 0.2578125, + "loss_num": 0.00653076171875, + "loss_xval": 0.03271484375, + "num_input_tokens_seen": 488068596, + "step": 5347 + }, + { + "epoch": 22.283333333333335, + "grad_norm": 3.0382686069871174, + "learning_rate": 5e-05, + "loss": 0.0321, + "num_input_tokens_seen": 488160336, + "step": 5348 + }, + { + "epoch": 22.283333333333335, + "loss": 0.04009832814335823, + "loss_ce": 5.926272933720611e-05, + "loss_iou": 0.25390625, + "loss_num": 0.00799560546875, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 488160336, + "step": 5348 + }, + { + "epoch": 22.2875, + "grad_norm": 3.3178942224571095, + "learning_rate": 5e-05, + "loss": 0.0805, + "num_input_tokens_seen": 488252200, + "step": 5349 + }, + { + "epoch": 22.2875, + "loss": 0.04742604121565819, + "loss_ce": 0.0003603041695896536, + "loss_iou": 0.240234375, + "loss_num": 0.0093994140625, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 488252200, + "step": 5349 + }, + { + "epoch": 22.291666666666668, + "grad_norm": 3.3946229074438197, + "learning_rate": 5e-05, + "loss": 0.0732, + "num_input_tokens_seen": 488343676, + "step": 5350 + }, + { + "epoch": 22.291666666666668, + "loss": 0.040834080427885056, + "loss_ce": 0.00010837269655894488, + "loss_iou": 0.3125, + "loss_num": 0.00811767578125, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 488343676, + "step": 5350 + }, + { + "epoch": 22.295833333333334, + "grad_norm": 3.3166911799541503, + "learning_rate": 5e-05, + "loss": 0.0722, + "num_input_tokens_seen": 488434972, + "step": 5351 + }, + { + "epoch": 22.295833333333334, + "loss": 0.10436109453439713, + "loss_ce": 9.779349784366786e-05, + "loss_iou": 0.203125, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 488434972, + "step": 5351 + }, + { + "epoch": 22.3, + "grad_norm": 3.277188400787632, + "learning_rate": 5e-05, + "loss": 0.0294, + "num_input_tokens_seen": 488527088, + "step": 5352 + }, + { + "epoch": 22.3, + "loss": 0.03265918046236038, + "loss_ce": 2.063011925201863e-05, + "loss_iou": 0.3046875, + "loss_num": 0.00653076171875, + "loss_xval": 0.03271484375, + "num_input_tokens_seen": 488527088, + "step": 5352 + }, + { + "epoch": 22.304166666666667, + "grad_norm": 3.244957587048781, + "learning_rate": 5e-05, + "loss": 0.0315, + "num_input_tokens_seen": 488618212, + "step": 5353 + }, + { + "epoch": 22.304166666666667, + "loss": 0.03455101326107979, + "loss_ce": 8.140966383507475e-05, + "loss_iou": 0.212890625, + "loss_num": 0.00689697265625, + "loss_xval": 0.034423828125, + "num_input_tokens_seen": 488618212, + "step": 5353 + }, + { + "epoch": 22.308333333333334, + "grad_norm": 2.133454868668326, + "learning_rate": 5e-05, + "loss": 0.0263, + "num_input_tokens_seen": 488709552, + "step": 5354 + }, + { + "epoch": 22.308333333333334, + "loss": 0.027545509859919548, + "loss_ce": 0.001201209961436689, + "loss_iou": 0.03173828125, + "loss_num": 0.005279541015625, + "loss_xval": 0.0263671875, + "num_input_tokens_seen": 488709552, + "step": 5354 + }, + { + "epoch": 22.3125, + "grad_norm": 2.270121558074414, + "learning_rate": 5e-05, + "loss": 0.0263, + "num_input_tokens_seen": 488800540, + "step": 5355 + }, + { + "epoch": 22.3125, + "loss": 0.02278338000178337, + "loss_ce": 0.00011644795449683443, + "loss_iou": 0.1669921875, + "loss_num": 0.004547119140625, + "loss_xval": 0.022705078125, + "num_input_tokens_seen": 488800540, + "step": 5355 + }, + { + "epoch": 22.316666666666666, + "grad_norm": 3.5211375058871357, + "learning_rate": 5e-05, + "loss": 0.0587, + "num_input_tokens_seen": 488892136, + "step": 5356 + }, + { + "epoch": 22.316666666666666, + "loss": 0.08479742705821991, + "loss_ce": 0.00018743482360150665, + "loss_iou": 0.298828125, + "loss_num": 0.0169677734375, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 488892136, + "step": 5356 + }, + { + "epoch": 22.320833333333333, + "grad_norm": 2.5951780334016963, + "learning_rate": 5e-05, + "loss": 0.0287, + "num_input_tokens_seen": 488983396, + "step": 5357 + }, + { + "epoch": 22.320833333333333, + "loss": 0.030574705451726913, + "loss_ce": 3.42391176673118e-05, + "loss_iou": 0.1474609375, + "loss_num": 0.006103515625, + "loss_xval": 0.030517578125, + "num_input_tokens_seen": 488983396, + "step": 5357 + }, + { + "epoch": 22.325, + "grad_norm": 2.2765668696194092, + "learning_rate": 5e-05, + "loss": 0.0894, + "num_input_tokens_seen": 489075212, + "step": 5358 + }, + { + "epoch": 22.325, + "loss": 0.11566958576440811, + "loss_ce": 0.004524565767496824, + "loss_iou": 0.06494140625, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 489075212, + "step": 5358 + }, + { + "epoch": 22.329166666666666, + "grad_norm": 2.426511575607964, + "learning_rate": 5e-05, + "loss": 0.0361, + "num_input_tokens_seen": 489166424, + "step": 5359 + }, + { + "epoch": 22.329166666666666, + "loss": 0.022829484194517136, + "loss_ce": 2.522457180020865e-05, + "loss_iou": 0.189453125, + "loss_num": 0.004547119140625, + "loss_xval": 0.0228271484375, + "num_input_tokens_seen": 489166424, + "step": 5359 + }, + { + "epoch": 22.333333333333332, + "grad_norm": 2.384897569870611, + "learning_rate": 5e-05, + "loss": 0.0243, + "num_input_tokens_seen": 489258044, + "step": 5360 + }, + { + "epoch": 22.333333333333332, + "loss": 0.018354296684265137, + "loss_ce": 2.086133827106096e-05, + "loss_iou": 0.2041015625, + "loss_num": 0.003662109375, + "loss_xval": 0.018310546875, + "num_input_tokens_seen": 489258044, + "step": 5360 + }, + { + "epoch": 22.3375, + "grad_norm": 3.4264892694382643, + "learning_rate": 5e-05, + "loss": 0.0312, + "num_input_tokens_seen": 489349168, + "step": 5361 + }, + { + "epoch": 22.3375, + "loss": 0.034981995820999146, + "loss_ce": 0.00010040501365438104, + "loss_iou": 0.271484375, + "loss_num": 0.0069580078125, + "loss_xval": 0.034912109375, + "num_input_tokens_seen": 489349168, + "step": 5361 + }, + { + "epoch": 22.341666666666665, + "grad_norm": 35.093081007506875, + "learning_rate": 5e-05, + "loss": 0.0561, + "num_input_tokens_seen": 489440432, + "step": 5362 + }, + { + "epoch": 22.341666666666665, + "loss": 0.0685751810669899, + "loss_ce": 1.7436836060369387e-05, + "loss_iou": 0.1884765625, + "loss_num": 0.01373291015625, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 489440432, + "step": 5362 + }, + { + "epoch": 22.345833333333335, + "grad_norm": 2.7972034028546755, + "learning_rate": 5e-05, + "loss": 0.0354, + "num_input_tokens_seen": 489531972, + "step": 5363 + }, + { + "epoch": 22.345833333333335, + "loss": 0.042357951402664185, + "loss_ce": 0.002158672781661153, + "loss_iou": 0.173828125, + "loss_num": 0.008056640625, + "loss_xval": 0.040283203125, + "num_input_tokens_seen": 489531972, + "step": 5363 + }, + { + "epoch": 22.35, + "grad_norm": 3.494472550638683, + "learning_rate": 5e-05, + "loss": 0.0621, + "num_input_tokens_seen": 489623632, + "step": 5364 + }, + { + "epoch": 22.35, + "loss": 0.0324832946062088, + "loss_ce": 2.78517218248453e-05, + "loss_iou": 0.1806640625, + "loss_num": 0.006500244140625, + "loss_xval": 0.032470703125, + "num_input_tokens_seen": 489623632, + "step": 5364 + }, + { + "epoch": 22.354166666666668, + "grad_norm": 4.362105699287279, + "learning_rate": 5e-05, + "loss": 0.046, + "num_input_tokens_seen": 489714560, + "step": 5365 + }, + { + "epoch": 22.354166666666668, + "loss": 0.043050382286310196, + "loss_ce": 3.585560625651851e-05, + "loss_iou": 0.240234375, + "loss_num": 0.00860595703125, + "loss_xval": 0.04296875, + "num_input_tokens_seen": 489714560, + "step": 5365 + }, + { + "epoch": 22.358333333333334, + "grad_norm": 2.8929406516760863, + "learning_rate": 5e-05, + "loss": 0.0287, + "num_input_tokens_seen": 489805968, + "step": 5366 + }, + { + "epoch": 22.358333333333334, + "loss": 0.035020582377910614, + "loss_ce": 1.691956822469365e-05, + "loss_iou": 0.23828125, + "loss_num": 0.006988525390625, + "loss_xval": 0.034912109375, + "num_input_tokens_seen": 489805968, + "step": 5366 + }, + { + "epoch": 22.3625, + "grad_norm": 3.0977638120639153, + "learning_rate": 5e-05, + "loss": 0.0456, + "num_input_tokens_seen": 489897024, + "step": 5367 + }, + { + "epoch": 22.3625, + "loss": 0.02139360085129738, + "loss_ce": 3.8926802517380565e-05, + "loss_iou": 0.25390625, + "loss_num": 0.0042724609375, + "loss_xval": 0.0213623046875, + "num_input_tokens_seen": 489897024, + "step": 5367 + }, + { + "epoch": 22.366666666666667, + "grad_norm": 1.3422920759104453, + "learning_rate": 5e-05, + "loss": 0.0302, + "num_input_tokens_seen": 489988420, + "step": 5368 + }, + { + "epoch": 22.366666666666667, + "loss": 0.03387787193059921, + "loss_ce": 3.388000914128497e-05, + "loss_iou": 0.10302734375, + "loss_num": 0.00677490234375, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 489988420, + "step": 5368 + }, + { + "epoch": 22.370833333333334, + "grad_norm": 1.0428391477939085, + "learning_rate": 5e-05, + "loss": 0.0702, + "num_input_tokens_seen": 490079600, + "step": 5369 + }, + { + "epoch": 22.370833333333334, + "loss": 0.07174451649188995, + "loss_ce": 4.346769128460437e-05, + "loss_iou": 0.1943359375, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 490079600, + "step": 5369 + }, + { + "epoch": 22.375, + "grad_norm": 1.0664256751003562, + "learning_rate": 5e-05, + "loss": 0.0257, + "num_input_tokens_seen": 490171280, + "step": 5370 + }, + { + "epoch": 22.375, + "loss": 0.025925656780600548, + "loss_ce": 0.000985165941528976, + "loss_iou": 0.2373046875, + "loss_num": 0.004974365234375, + "loss_xval": 0.02490234375, + "num_input_tokens_seen": 490171280, + "step": 5370 + }, + { + "epoch": 22.379166666666666, + "grad_norm": 1.5563494410398024, + "learning_rate": 5e-05, + "loss": 0.0364, + "num_input_tokens_seen": 490262924, + "step": 5371 + }, + { + "epoch": 22.379166666666666, + "loss": 0.034245528280735016, + "loss_ce": 6.583852518815547e-05, + "loss_iou": 0.15234375, + "loss_num": 0.0068359375, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 490262924, + "step": 5371 + }, + { + "epoch": 22.383333333333333, + "grad_norm": 2.5664351197816786, + "learning_rate": 5e-05, + "loss": 0.0459, + "num_input_tokens_seen": 490354332, + "step": 5372 + }, + { + "epoch": 22.383333333333333, + "loss": 0.033131957054138184, + "loss_ce": 2.8015738280373625e-05, + "loss_iou": 0.26953125, + "loss_num": 0.006622314453125, + "loss_xval": 0.033203125, + "num_input_tokens_seen": 490354332, + "step": 5372 + }, + { + "epoch": 22.3875, + "grad_norm": 1.916318718588456, + "learning_rate": 5e-05, + "loss": 0.0288, + "num_input_tokens_seen": 490445200, + "step": 5373 + }, + { + "epoch": 22.3875, + "loss": 0.025140874087810516, + "loss_ce": 7.068323611747473e-05, + "loss_iou": 0.185546875, + "loss_num": 0.0050048828125, + "loss_xval": 0.0250244140625, + "num_input_tokens_seen": 490445200, + "step": 5373 + }, + { + "epoch": 22.391666666666666, + "grad_norm": 3.1213070784766765, + "learning_rate": 5e-05, + "loss": 0.0535, + "num_input_tokens_seen": 490536484, + "step": 5374 + }, + { + "epoch": 22.391666666666666, + "loss": 0.037068236619234085, + "loss_ce": 0.00018774466298054904, + "loss_iou": 0.298828125, + "loss_num": 0.00738525390625, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 490536484, + "step": 5374 + }, + { + "epoch": 22.395833333333332, + "grad_norm": 2.529762377102089, + "learning_rate": 5e-05, + "loss": 0.0287, + "num_input_tokens_seen": 490628100, + "step": 5375 + }, + { + "epoch": 22.395833333333332, + "loss": 0.03373111039400101, + "loss_ce": 7.02215766068548e-05, + "loss_iou": 0.1484375, + "loss_num": 0.006744384765625, + "loss_xval": 0.03369140625, + "num_input_tokens_seen": 490628100, + "step": 5375 + }, + { + "epoch": 22.4, + "grad_norm": 19.242992938124434, + "learning_rate": 5e-05, + "loss": 0.0582, + "num_input_tokens_seen": 490719064, + "step": 5376 + }, + { + "epoch": 22.4, + "loss": 0.03175321966409683, + "loss_ce": 0.00010648959869286045, + "loss_iou": 0.154296875, + "loss_num": 0.006317138671875, + "loss_xval": 0.03173828125, + "num_input_tokens_seen": 490719064, + "step": 5376 + }, + { + "epoch": 22.404166666666665, + "grad_norm": 2.3804535970551663, + "learning_rate": 5e-05, + "loss": 0.0458, + "num_input_tokens_seen": 490809884, + "step": 5377 + }, + { + "epoch": 22.404166666666665, + "loss": 0.02702101692557335, + "loss_ce": 8.544004231225699e-05, + "loss_iou": 0.169921875, + "loss_num": 0.005401611328125, + "loss_xval": 0.0269775390625, + "num_input_tokens_seen": 490809884, + "step": 5377 + }, + { + "epoch": 22.408333333333335, + "grad_norm": 2.6108490382580496, + "learning_rate": 5e-05, + "loss": 0.036, + "num_input_tokens_seen": 490901520, + "step": 5378 + }, + { + "epoch": 22.408333333333335, + "loss": 0.031930726021528244, + "loss_ce": 8.563094161218032e-05, + "loss_iou": 0.294921875, + "loss_num": 0.006378173828125, + "loss_xval": 0.03173828125, + "num_input_tokens_seen": 490901520, + "step": 5378 + }, + { + "epoch": 22.4125, + "grad_norm": 2.5894466488834946, + "learning_rate": 5e-05, + "loss": 0.0626, + "num_input_tokens_seen": 490992648, + "step": 5379 + }, + { + "epoch": 22.4125, + "loss": 0.0888570249080658, + "loss_ce": 0.0005544152809306979, + "loss_iou": 0.228515625, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 490992648, + "step": 5379 + }, + { + "epoch": 22.416666666666668, + "grad_norm": 2.78634108448832, + "learning_rate": 5e-05, + "loss": 0.0662, + "num_input_tokens_seen": 491083320, + "step": 5380 + }, + { + "epoch": 22.416666666666668, + "loss": 0.08157380670309067, + "loss_ce": 6.13602896919474e-05, + "loss_iou": 0.185546875, + "loss_num": 0.0162353515625, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 491083320, + "step": 5380 + }, + { + "epoch": 22.420833333333334, + "grad_norm": 1.6569327710068393, + "learning_rate": 5e-05, + "loss": 0.0297, + "num_input_tokens_seen": 491174316, + "step": 5381 + }, + { + "epoch": 22.420833333333334, + "loss": 0.019204962998628616, + "loss_ce": 7.044081576168537e-05, + "loss_iou": 0.080078125, + "loss_num": 0.0038299560546875, + "loss_xval": 0.0191650390625, + "num_input_tokens_seen": 491174316, + "step": 5381 + }, + { + "epoch": 22.425, + "grad_norm": 1.7105774926903508, + "learning_rate": 5e-05, + "loss": 0.039, + "num_input_tokens_seen": 491265744, + "step": 5382 + }, + { + "epoch": 22.425, + "loss": 0.04312212020158768, + "loss_ce": 3.1299583497457206e-05, + "loss_iou": 0.27734375, + "loss_num": 0.00860595703125, + "loss_xval": 0.04296875, + "num_input_tokens_seen": 491265744, + "step": 5382 + }, + { + "epoch": 22.429166666666667, + "grad_norm": 1.7403705972722234, + "learning_rate": 5e-05, + "loss": 0.0698, + "num_input_tokens_seen": 491354352, + "step": 5383 + }, + { + "epoch": 22.429166666666667, + "loss": 0.05720948055386543, + "loss_ce": 1.9535385945346206e-05, + "loss_iou": 0.2421875, + "loss_num": 0.01141357421875, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 491354352, + "step": 5383 + }, + { + "epoch": 22.433333333333334, + "grad_norm": 2.00698516413751, + "learning_rate": 5e-05, + "loss": 0.0886, + "num_input_tokens_seen": 491445484, + "step": 5384 + }, + { + "epoch": 22.433333333333334, + "loss": 0.14696118235588074, + "loss_ce": 3.430706055951305e-05, + "loss_iou": 0.1962890625, + "loss_num": 0.029296875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 491445484, + "step": 5384 + }, + { + "epoch": 22.4375, + "grad_norm": 1.4529458763998768, + "learning_rate": 5e-05, + "loss": 0.0322, + "num_input_tokens_seen": 491536328, + "step": 5385 + }, + { + "epoch": 22.4375, + "loss": 0.03684793412685394, + "loss_ce": 1.3217177183832973e-05, + "loss_iou": 0.2353515625, + "loss_num": 0.00738525390625, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 491536328, + "step": 5385 + }, + { + "epoch": 22.441666666666666, + "grad_norm": 2.2941956647046395, + "learning_rate": 5e-05, + "loss": 0.0453, + "num_input_tokens_seen": 491627740, + "step": 5386 + }, + { + "epoch": 22.441666666666666, + "loss": 0.052660562098026276, + "loss_ce": 0.00010929418203886598, + "loss_iou": 0.2158203125, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 491627740, + "step": 5386 + }, + { + "epoch": 22.445833333333333, + "grad_norm": 2.2595644852220897, + "learning_rate": 5e-05, + "loss": 0.0356, + "num_input_tokens_seen": 491718420, + "step": 5387 + }, + { + "epoch": 22.445833333333333, + "loss": 0.020403403788805008, + "loss_ce": 8.632548997411504e-05, + "loss_iou": 0.107421875, + "loss_num": 0.004058837890625, + "loss_xval": 0.020263671875, + "num_input_tokens_seen": 491718420, + "step": 5387 + }, + { + "epoch": 22.45, + "grad_norm": 2.665623036596892, + "learning_rate": 5e-05, + "loss": 0.0484, + "num_input_tokens_seen": 491809752, + "step": 5388 + }, + { + "epoch": 22.45, + "loss": 0.03691437840461731, + "loss_ce": 3.388620825717226e-05, + "loss_iou": 0.27734375, + "loss_num": 0.00738525390625, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 491809752, + "step": 5388 + }, + { + "epoch": 22.454166666666666, + "grad_norm": 1.7622978237673568, + "learning_rate": 5e-05, + "loss": 0.0412, + "num_input_tokens_seen": 491901876, + "step": 5389 + }, + { + "epoch": 22.454166666666666, + "loss": 0.03598965331912041, + "loss_ce": 0.00012005640019197017, + "loss_iou": 0.1787109375, + "loss_num": 0.007171630859375, + "loss_xval": 0.035888671875, + "num_input_tokens_seen": 491901876, + "step": 5389 + }, + { + "epoch": 22.458333333333332, + "grad_norm": 0.7490084016911978, + "learning_rate": 5e-05, + "loss": 0.0451, + "num_input_tokens_seen": 491993576, + "step": 5390 + }, + { + "epoch": 22.458333333333332, + "loss": 0.05927729979157448, + "loss_ce": 6.5567423007451e-05, + "loss_iou": 0.310546875, + "loss_num": 0.0118408203125, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 491993576, + "step": 5390 + }, + { + "epoch": 22.4625, + "grad_norm": 0.47811007001813804, + "learning_rate": 5e-05, + "loss": 0.0365, + "num_input_tokens_seen": 492085012, + "step": 5391 + }, + { + "epoch": 22.4625, + "loss": 0.02164047211408615, + "loss_ce": 2.6398767658974975e-05, + "loss_iou": 0.1962890625, + "loss_num": 0.00433349609375, + "loss_xval": 0.0216064453125, + "num_input_tokens_seen": 492085012, + "step": 5391 + }, + { + "epoch": 22.466666666666665, + "grad_norm": 0.7928415294582661, + "learning_rate": 5e-05, + "loss": 0.0207, + "num_input_tokens_seen": 492176244, + "step": 5392 + }, + { + "epoch": 22.466666666666665, + "loss": 0.022790245711803436, + "loss_ce": 2.7946376576437615e-05, + "loss_iou": 0.201171875, + "loss_num": 0.004547119140625, + "loss_xval": 0.022705078125, + "num_input_tokens_seen": 492176244, + "step": 5392 + }, + { + "epoch": 22.470833333333335, + "grad_norm": 1.4785762565522547, + "learning_rate": 5e-05, + "loss": 0.0451, + "num_input_tokens_seen": 492267676, + "step": 5393 + }, + { + "epoch": 22.470833333333335, + "loss": 0.06214084103703499, + "loss_ce": 2.2309055566438474e-05, + "loss_iou": 0.28515625, + "loss_num": 0.012451171875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 492267676, + "step": 5393 + }, + { + "epoch": 22.475, + "grad_norm": 2.0332821119261713, + "learning_rate": 5e-05, + "loss": 0.0243, + "num_input_tokens_seen": 492359032, + "step": 5394 + }, + { + "epoch": 22.475, + "loss": 0.02958713099360466, + "loss_ce": 1.5598343452438712e-05, + "loss_iou": 0.2255859375, + "loss_num": 0.00592041015625, + "loss_xval": 0.029541015625, + "num_input_tokens_seen": 492359032, + "step": 5394 + }, + { + "epoch": 22.479166666666668, + "grad_norm": 3.236142234487662, + "learning_rate": 5e-05, + "loss": 0.0287, + "num_input_tokens_seen": 492450032, + "step": 5395 + }, + { + "epoch": 22.479166666666668, + "loss": 0.02547776699066162, + "loss_ce": 4.136636562179774e-05, + "loss_iou": 0.333984375, + "loss_num": 0.005096435546875, + "loss_xval": 0.025390625, + "num_input_tokens_seen": 492450032, + "step": 5395 + }, + { + "epoch": 22.483333333333334, + "grad_norm": 3.099703672782828, + "learning_rate": 5e-05, + "loss": 0.0478, + "num_input_tokens_seen": 492539916, + "step": 5396 + }, + { + "epoch": 22.483333333333334, + "loss": 0.03732884302735329, + "loss_ce": 2.1101570382597856e-05, + "loss_iou": 0.267578125, + "loss_num": 0.0074462890625, + "loss_xval": 0.037353515625, + "num_input_tokens_seen": 492539916, + "step": 5396 + }, + { + "epoch": 22.4875, + "grad_norm": 3.3406444811373293, + "learning_rate": 5e-05, + "loss": 0.0543, + "num_input_tokens_seen": 492631208, + "step": 5397 + }, + { + "epoch": 22.4875, + "loss": 0.07744846493005753, + "loss_ce": 9.403174044564366e-05, + "loss_iou": 0.2001953125, + "loss_num": 0.0155029296875, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 492631208, + "step": 5397 + }, + { + "epoch": 22.491666666666667, + "grad_norm": 2.500264706930889, + "learning_rate": 5e-05, + "loss": 0.0411, + "num_input_tokens_seen": 492722220, + "step": 5398 + }, + { + "epoch": 22.491666666666667, + "loss": 0.03508803993463516, + "loss_ce": 0.0004963674582540989, + "loss_iou": 0.1357421875, + "loss_num": 0.006927490234375, + "loss_xval": 0.03466796875, + "num_input_tokens_seen": 492722220, + "step": 5398 + }, + { + "epoch": 22.495833333333334, + "grad_norm": 2.923014487782051, + "learning_rate": 5e-05, + "loss": 0.0809, + "num_input_tokens_seen": 492813116, + "step": 5399 + }, + { + "epoch": 22.495833333333334, + "loss": 0.0822400376200676, + "loss_ce": 5.620271258521825e-05, + "loss_iou": 0.2353515625, + "loss_num": 0.0164794921875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 492813116, + "step": 5399 + }, + { + "epoch": 22.5, + "grad_norm": 1.9591952052807597, + "learning_rate": 5e-05, + "loss": 0.0467, + "num_input_tokens_seen": 492904024, + "step": 5400 + }, + { + "epoch": 22.5, + "loss": 0.05392465740442276, + "loss_ce": 4.587280272971839e-05, + "loss_iou": 0.24609375, + "loss_num": 0.0107421875, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 492904024, + "step": 5400 + }, + { + "epoch": 22.504166666666666, + "grad_norm": 2.1966628956720307, + "learning_rate": 5e-05, + "loss": 0.0528, + "num_input_tokens_seen": 492996044, + "step": 5401 + }, + { + "epoch": 22.504166666666666, + "loss": 0.0637560486793518, + "loss_ce": 6.586193921975791e-05, + "loss_iou": 0.1982421875, + "loss_num": 0.01275634765625, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 492996044, + "step": 5401 + }, + { + "epoch": 22.508333333333333, + "grad_norm": 1.8024880556166163, + "learning_rate": 5e-05, + "loss": 0.0308, + "num_input_tokens_seen": 493087680, + "step": 5402 + }, + { + "epoch": 22.508333333333333, + "loss": 0.021951181814074516, + "loss_ce": 0.0002074065268971026, + "loss_iou": 0.1396484375, + "loss_num": 0.00433349609375, + "loss_xval": 0.021728515625, + "num_input_tokens_seen": 493087680, + "step": 5402 + }, + { + "epoch": 22.5125, + "grad_norm": 2.6832177202867578, + "learning_rate": 5e-05, + "loss": 0.0234, + "num_input_tokens_seen": 493179756, + "step": 5403 + }, + { + "epoch": 22.5125, + "loss": 0.018739566206932068, + "loss_ce": 1.703085217741318e-05, + "loss_iou": 0.248046875, + "loss_num": 0.0037384033203125, + "loss_xval": 0.0186767578125, + "num_input_tokens_seen": 493179756, + "step": 5403 + }, + { + "epoch": 22.516666666666666, + "grad_norm": 11.651569876024217, + "learning_rate": 5e-05, + "loss": 0.0643, + "num_input_tokens_seen": 493271120, + "step": 5404 + }, + { + "epoch": 22.516666666666666, + "loss": 0.025263365358114243, + "loss_ce": 7.873350114095956e-05, + "loss_iou": 0.275390625, + "loss_num": 0.005035400390625, + "loss_xval": 0.025146484375, + "num_input_tokens_seen": 493271120, + "step": 5404 + }, + { + "epoch": 22.520833333333332, + "grad_norm": 1.6422518445865064, + "learning_rate": 5e-05, + "loss": 0.0412, + "num_input_tokens_seen": 493361644, + "step": 5405 + }, + { + "epoch": 22.520833333333332, + "loss": 0.024423548951745033, + "loss_ce": 3.237510100007057e-05, + "loss_iou": 0.25390625, + "loss_num": 0.0048828125, + "loss_xval": 0.0244140625, + "num_input_tokens_seen": 493361644, + "step": 5405 + }, + { + "epoch": 22.525, + "grad_norm": 2.780010890769678, + "learning_rate": 5e-05, + "loss": 0.0674, + "num_input_tokens_seen": 493453124, + "step": 5406 + }, + { + "epoch": 22.525, + "loss": 0.11650041490793228, + "loss_ce": 2.2444699425250292e-05, + "loss_iou": 0.1279296875, + "loss_num": 0.0233154296875, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 493453124, + "step": 5406 + }, + { + "epoch": 22.529166666666665, + "grad_norm": 1.7272088537396273, + "learning_rate": 5e-05, + "loss": 0.0691, + "num_input_tokens_seen": 493544756, + "step": 5407 + }, + { + "epoch": 22.529166666666665, + "loss": 0.05183388292789459, + "loss_ce": 4.5554290409199893e-05, + "loss_iou": 0.11669921875, + "loss_num": 0.0103759765625, + "loss_xval": 0.0517578125, + "num_input_tokens_seen": 493544756, + "step": 5407 + }, + { + "epoch": 22.533333333333335, + "grad_norm": 2.4838237798667437, + "learning_rate": 5e-05, + "loss": 0.0204, + "num_input_tokens_seen": 493635696, + "step": 5408 + }, + { + "epoch": 22.533333333333335, + "loss": 0.01650792546570301, + "loss_ce": 5.8951460232492536e-05, + "loss_iou": 0.171875, + "loss_num": 0.0032958984375, + "loss_xval": 0.0164794921875, + "num_input_tokens_seen": 493635696, + "step": 5408 + }, + { + "epoch": 22.5375, + "grad_norm": 2.680248921270045, + "learning_rate": 5e-05, + "loss": 0.0375, + "num_input_tokens_seen": 493727152, + "step": 5409 + }, + { + "epoch": 22.5375, + "loss": 0.043390579521656036, + "loss_ce": 5.5617347243241966e-05, + "loss_iou": 0.1728515625, + "loss_num": 0.0086669921875, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 493727152, + "step": 5409 + }, + { + "epoch": 22.541666666666668, + "grad_norm": 3.1224713410805434, + "learning_rate": 5e-05, + "loss": 0.0326, + "num_input_tokens_seen": 493818056, + "step": 5410 + }, + { + "epoch": 22.541666666666668, + "loss": 0.034009747207164764, + "loss_ce": 4.368008376332e-05, + "loss_iou": 0.109375, + "loss_num": 0.006805419921875, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 493818056, + "step": 5410 + }, + { + "epoch": 22.545833333333334, + "grad_norm": 1.9609990516597686, + "learning_rate": 5e-05, + "loss": 0.0247, + "num_input_tokens_seen": 493908748, + "step": 5411 + }, + { + "epoch": 22.545833333333334, + "loss": 0.028770219534635544, + "loss_ce": 3.792079951381311e-05, + "loss_iou": 0.19140625, + "loss_num": 0.0057373046875, + "loss_xval": 0.0286865234375, + "num_input_tokens_seen": 493908748, + "step": 5411 + }, + { + "epoch": 22.55, + "grad_norm": 2.1888017020400845, + "learning_rate": 5e-05, + "loss": 0.0507, + "num_input_tokens_seen": 493999912, + "step": 5412 + }, + { + "epoch": 22.55, + "loss": 0.04256013035774231, + "loss_ce": 0.0007510512950830162, + "loss_iou": 0.271484375, + "loss_num": 0.00836181640625, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 493999912, + "step": 5412 + }, + { + "epoch": 22.554166666666667, + "grad_norm": 2.142711327252004, + "learning_rate": 5e-05, + "loss": 0.08, + "num_input_tokens_seen": 494091008, + "step": 5413 + }, + { + "epoch": 22.554166666666667, + "loss": 0.10330962389707565, + "loss_ce": 0.000221242691623047, + "loss_iou": 0.26953125, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 494091008, + "step": 5413 + }, + { + "epoch": 22.558333333333334, + "grad_norm": 2.894368719760623, + "learning_rate": 5e-05, + "loss": 0.0305, + "num_input_tokens_seen": 494182104, + "step": 5414 + }, + { + "epoch": 22.558333333333334, + "loss": 0.02766202762722969, + "loss_ce": 0.0001428009127266705, + "loss_iou": 0.248046875, + "loss_num": 0.0054931640625, + "loss_xval": 0.0274658203125, + "num_input_tokens_seen": 494182104, + "step": 5414 + }, + { + "epoch": 22.5625, + "grad_norm": 2.7283296438572817, + "learning_rate": 5e-05, + "loss": 0.0604, + "num_input_tokens_seen": 494273712, + "step": 5415 + }, + { + "epoch": 22.5625, + "loss": 0.05128024145960808, + "loss_ce": 0.00020907694124616683, + "loss_iou": 0.19140625, + "loss_num": 0.01019287109375, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 494273712, + "step": 5415 + }, + { + "epoch": 22.566666666666666, + "grad_norm": 3.605747533364917, + "learning_rate": 5e-05, + "loss": 0.0553, + "num_input_tokens_seen": 494364492, + "step": 5416 + }, + { + "epoch": 22.566666666666666, + "loss": 0.046604275703430176, + "loss_ce": 1.9191516912542284e-05, + "loss_iou": 0.1767578125, + "loss_num": 0.00927734375, + "loss_xval": 0.046630859375, + "num_input_tokens_seen": 494364492, + "step": 5416 + }, + { + "epoch": 22.570833333333333, + "grad_norm": 2.559940130277489, + "learning_rate": 5e-05, + "loss": 0.0548, + "num_input_tokens_seen": 494455692, + "step": 5417 + }, + { + "epoch": 22.570833333333333, + "loss": 0.0370146818459034, + "loss_ce": 1.2118907761760056e-05, + "loss_iou": 0.2197265625, + "loss_num": 0.007415771484375, + "loss_xval": 0.037109375, + "num_input_tokens_seen": 494455692, + "step": 5417 + }, + { + "epoch": 22.575, + "grad_norm": 1.9876258889721863, + "learning_rate": 5e-05, + "loss": 0.0389, + "num_input_tokens_seen": 494546932, + "step": 5418 + }, + { + "epoch": 22.575, + "loss": 0.043613698333501816, + "loss_ce": 0.00012615090236067772, + "loss_iou": 0.0634765625, + "loss_num": 0.0086669921875, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 494546932, + "step": 5418 + }, + { + "epoch": 22.579166666666666, + "grad_norm": 2.014097885386239, + "learning_rate": 5e-05, + "loss": 0.0365, + "num_input_tokens_seen": 494638788, + "step": 5419 + }, + { + "epoch": 22.579166666666666, + "loss": 0.049494411796331406, + "loss_ce": 0.00042214401764795184, + "loss_iou": 0.265625, + "loss_num": 0.00982666015625, + "loss_xval": 0.049072265625, + "num_input_tokens_seen": 494638788, + "step": 5419 + }, + { + "epoch": 22.583333333333332, + "grad_norm": 1.3184404900712061, + "learning_rate": 5e-05, + "loss": 0.0292, + "num_input_tokens_seen": 494730208, + "step": 5420 + }, + { + "epoch": 22.583333333333332, + "loss": 0.028579875826835632, + "loss_ce": 2.3050801246427e-05, + "loss_iou": 0.2001953125, + "loss_num": 0.005706787109375, + "loss_xval": 0.028564453125, + "num_input_tokens_seen": 494730208, + "step": 5420 + }, + { + "epoch": 22.5875, + "grad_norm": 4.326848124366721, + "learning_rate": 5e-05, + "loss": 0.0387, + "num_input_tokens_seen": 494821976, + "step": 5421 + }, + { + "epoch": 22.5875, + "loss": 0.04439885914325714, + "loss_ce": 0.00027044268790632486, + "loss_iou": 0.3125, + "loss_num": 0.00885009765625, + "loss_xval": 0.044189453125, + "num_input_tokens_seen": 494821976, + "step": 5421 + }, + { + "epoch": 22.591666666666665, + "grad_norm": 1.9149965433139318, + "learning_rate": 5e-05, + "loss": 0.028, + "num_input_tokens_seen": 494913644, + "step": 5422 + }, + { + "epoch": 22.591666666666665, + "loss": 0.03406630456447601, + "loss_ce": 3.1575760658597574e-05, + "loss_iou": 0.1962890625, + "loss_num": 0.006805419921875, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 494913644, + "step": 5422 + }, + { + "epoch": 22.595833333333335, + "grad_norm": 2.4941618530637837, + "learning_rate": 5e-05, + "loss": 0.0676, + "num_input_tokens_seen": 495005188, + "step": 5423 + }, + { + "epoch": 22.595833333333335, + "loss": 0.018450569361448288, + "loss_ce": 0.00010950414434773847, + "loss_iou": 0.19921875, + "loss_num": 0.003662109375, + "loss_xval": 0.018310546875, + "num_input_tokens_seen": 495005188, + "step": 5423 + }, + { + "epoch": 22.6, + "grad_norm": 3.29433751743764, + "learning_rate": 5e-05, + "loss": 0.0464, + "num_input_tokens_seen": 495096512, + "step": 5424 + }, + { + "epoch": 22.6, + "loss": 0.05725552886724472, + "loss_ce": 3.507136716507375e-05, + "loss_iou": 0.28125, + "loss_num": 0.011474609375, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 495096512, + "step": 5424 + }, + { + "epoch": 22.604166666666668, + "grad_norm": 2.8359036528199524, + "learning_rate": 5e-05, + "loss": 0.0667, + "num_input_tokens_seen": 495187732, + "step": 5425 + }, + { + "epoch": 22.604166666666668, + "loss": 0.07588600367307663, + "loss_ce": 1.166831498267129e-05, + "loss_iou": 0.2001953125, + "loss_num": 0.01519775390625, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 495187732, + "step": 5425 + }, + { + "epoch": 22.608333333333334, + "grad_norm": 1.358567931342101, + "learning_rate": 5e-05, + "loss": 0.0261, + "num_input_tokens_seen": 495279620, + "step": 5426 + }, + { + "epoch": 22.608333333333334, + "loss": 0.01989165134727955, + "loss_ce": 2.47081188717857e-05, + "loss_iou": 0.171875, + "loss_num": 0.00396728515625, + "loss_xval": 0.0198974609375, + "num_input_tokens_seen": 495279620, + "step": 5426 + }, + { + "epoch": 22.6125, + "grad_norm": 1.2792343501441592, + "learning_rate": 5e-05, + "loss": 0.0432, + "num_input_tokens_seen": 495371628, + "step": 5427 + }, + { + "epoch": 22.6125, + "loss": 0.016980396583676338, + "loss_ce": 5.839836740051396e-05, + "loss_iou": 0.1865234375, + "loss_num": 0.003387451171875, + "loss_xval": 0.0169677734375, + "num_input_tokens_seen": 495371628, + "step": 5427 + }, + { + "epoch": 22.616666666666667, + "grad_norm": 14.962869241023144, + "learning_rate": 5e-05, + "loss": 0.0682, + "num_input_tokens_seen": 495462292, + "step": 5428 + }, + { + "epoch": 22.616666666666667, + "loss": 0.03393097221851349, + "loss_ce": 1.0683897926355712e-05, + "loss_iou": 0.296875, + "loss_num": 0.00677490234375, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 495462292, + "step": 5428 + }, + { + "epoch": 22.620833333333334, + "grad_norm": 1.7903859967395788, + "learning_rate": 5e-05, + "loss": 0.0491, + "num_input_tokens_seen": 495553636, + "step": 5429 + }, + { + "epoch": 22.620833333333334, + "loss": 0.06721779704093933, + "loss_ce": 0.0002851189929060638, + "loss_iou": 0.283203125, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 495553636, + "step": 5429 + }, + { + "epoch": 22.625, + "grad_norm": 2.733708686422691, + "learning_rate": 5e-05, + "loss": 0.0337, + "num_input_tokens_seen": 495645176, + "step": 5430 + }, + { + "epoch": 22.625, + "loss": 0.03249605372548103, + "loss_ce": 2.1535624910029583e-05, + "loss_iou": 0.1923828125, + "loss_num": 0.006500244140625, + "loss_xval": 0.032470703125, + "num_input_tokens_seen": 495645176, + "step": 5430 + }, + { + "epoch": 22.629166666666666, + "grad_norm": 3.2757873666002766, + "learning_rate": 5e-05, + "loss": 0.0528, + "num_input_tokens_seen": 495736032, + "step": 5431 + }, + { + "epoch": 22.629166666666666, + "loss": 0.07405970990657806, + "loss_ce": 0.0018932658713310957, + "loss_iou": 0.3046875, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 495736032, + "step": 5431 + }, + { + "epoch": 22.633333333333333, + "grad_norm": 3.369393966718524, + "learning_rate": 5e-05, + "loss": 0.0293, + "num_input_tokens_seen": 495827532, + "step": 5432 + }, + { + "epoch": 22.633333333333333, + "loss": 0.023361414670944214, + "loss_ce": 5.3613319323631003e-05, + "loss_iou": 0.2890625, + "loss_num": 0.004669189453125, + "loss_xval": 0.0233154296875, + "num_input_tokens_seen": 495827532, + "step": 5432 + }, + { + "epoch": 22.6375, + "grad_norm": 2.362708376690048, + "learning_rate": 5e-05, + "loss": 0.0265, + "num_input_tokens_seen": 495918936, + "step": 5433 + }, + { + "epoch": 22.6375, + "loss": 0.023834653198719025, + "loss_ce": 0.00024456530809402466, + "loss_iou": 0.2373046875, + "loss_num": 0.00469970703125, + "loss_xval": 0.0235595703125, + "num_input_tokens_seen": 495918936, + "step": 5433 + }, + { + "epoch": 22.641666666666666, + "grad_norm": 2.785947410744313, + "learning_rate": 5e-05, + "loss": 0.0361, + "num_input_tokens_seen": 496010332, + "step": 5434 + }, + { + "epoch": 22.641666666666666, + "loss": 0.03664480894804001, + "loss_ce": 0.00014578511763829738, + "loss_iou": 0.212890625, + "loss_num": 0.007293701171875, + "loss_xval": 0.03662109375, + "num_input_tokens_seen": 496010332, + "step": 5434 + }, + { + "epoch": 22.645833333333332, + "grad_norm": 4.351682632944168, + "learning_rate": 5e-05, + "loss": 0.1063, + "num_input_tokens_seen": 496101500, + "step": 5435 + }, + { + "epoch": 22.645833333333332, + "loss": 0.17546984553337097, + "loss_ce": 0.00040574927697889507, + "loss_iou": 0.44921875, + "loss_num": 0.034912109375, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 496101500, + "step": 5435 + }, + { + "epoch": 22.65, + "grad_norm": 3.1939984607540053, + "learning_rate": 5e-05, + "loss": 0.0277, + "num_input_tokens_seen": 496193140, + "step": 5436 + }, + { + "epoch": 22.65, + "loss": 0.026585184037685394, + "loss_ce": 6.540679169120267e-05, + "loss_iou": 0.1796875, + "loss_num": 0.00531005859375, + "loss_xval": 0.0264892578125, + "num_input_tokens_seen": 496193140, + "step": 5436 + }, + { + "epoch": 22.654166666666665, + "grad_norm": 3.436034185560342, + "learning_rate": 5e-05, + "loss": 0.0389, + "num_input_tokens_seen": 496284496, + "step": 5437 + }, + { + "epoch": 22.654166666666665, + "loss": 0.04690772294998169, + "loss_ce": 0.0001090200967155397, + "loss_iou": 0.265625, + "loss_num": 0.00933837890625, + "loss_xval": 0.046875, + "num_input_tokens_seen": 496284496, + "step": 5437 + }, + { + "epoch": 22.658333333333335, + "grad_norm": 1.6985662007924813, + "learning_rate": 5e-05, + "loss": 0.0326, + "num_input_tokens_seen": 496375820, + "step": 5438 + }, + { + "epoch": 22.658333333333335, + "loss": 0.03478825092315674, + "loss_ce": 0.00031864611082710326, + "loss_iou": 0.255859375, + "loss_num": 0.00689697265625, + "loss_xval": 0.034423828125, + "num_input_tokens_seen": 496375820, + "step": 5438 + }, + { + "epoch": 22.6625, + "grad_norm": 2.257397167145903, + "learning_rate": 5e-05, + "loss": 0.0423, + "num_input_tokens_seen": 496466412, + "step": 5439 + }, + { + "epoch": 22.6625, + "loss": 0.05808237940073013, + "loss_ce": 0.00018290436128154397, + "loss_iou": 0.201171875, + "loss_num": 0.0115966796875, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 496466412, + "step": 5439 + }, + { + "epoch": 22.666666666666668, + "grad_norm": 3.766741952029807, + "learning_rate": 5e-05, + "loss": 0.0446, + "num_input_tokens_seen": 496557896, + "step": 5440 + }, + { + "epoch": 22.666666666666668, + "loss": 0.026158884167671204, + "loss_ce": 2.0578299881890416e-05, + "loss_iou": 0.224609375, + "loss_num": 0.005218505859375, + "loss_xval": 0.026123046875, + "num_input_tokens_seen": 496557896, + "step": 5440 + }, + { + "epoch": 22.670833333333334, + "grad_norm": 2.7989902007061924, + "learning_rate": 5e-05, + "loss": 0.0486, + "num_input_tokens_seen": 496649280, + "step": 5441 + }, + { + "epoch": 22.670833333333334, + "loss": 0.06078026816248894, + "loss_ce": 1.9770006474573165e-05, + "loss_iou": 0.1953125, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 496649280, + "step": 5441 + }, + { + "epoch": 22.675, + "grad_norm": 3.8692101114858515, + "learning_rate": 5e-05, + "loss": 0.0421, + "num_input_tokens_seen": 496739608, + "step": 5442 + }, + { + "epoch": 22.675, + "loss": 0.0622529461979866, + "loss_ce": 1.2350255929050036e-05, + "loss_iou": 0.173828125, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 496739608, + "step": 5442 + }, + { + "epoch": 22.679166666666667, + "grad_norm": 2.8602886566309023, + "learning_rate": 5e-05, + "loss": 0.0588, + "num_input_tokens_seen": 496831084, + "step": 5443 + }, + { + "epoch": 22.679166666666667, + "loss": 0.039982639253139496, + "loss_ce": 1.987188807106577e-05, + "loss_iou": 0.2236328125, + "loss_num": 0.00799560546875, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 496831084, + "step": 5443 + }, + { + "epoch": 22.683333333333334, + "grad_norm": 2.6429848934326396, + "learning_rate": 5e-05, + "loss": 0.0287, + "num_input_tokens_seen": 496922348, + "step": 5444 + }, + { + "epoch": 22.683333333333334, + "loss": 0.029625367373228073, + "loss_ce": 0.00011868414003401995, + "loss_iou": 0.1435546875, + "loss_num": 0.005889892578125, + "loss_xval": 0.029541015625, + "num_input_tokens_seen": 496922348, + "step": 5444 + }, + { + "epoch": 22.6875, + "grad_norm": 5.833692611461288, + "learning_rate": 5e-05, + "loss": 0.0692, + "num_input_tokens_seen": 497013648, + "step": 5445 + }, + { + "epoch": 22.6875, + "loss": 0.04049752280116081, + "loss_ce": 1.595700450707227e-05, + "loss_iou": 0.177734375, + "loss_num": 0.00811767578125, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 497013648, + "step": 5445 + }, + { + "epoch": 22.691666666666666, + "grad_norm": 2.1422200155879714, + "learning_rate": 5e-05, + "loss": 0.0388, + "num_input_tokens_seen": 497104764, + "step": 5446 + }, + { + "epoch": 22.691666666666666, + "loss": 0.04196276515722275, + "loss_ce": 3.923796975868754e-05, + "loss_iou": 0.2109375, + "loss_num": 0.00836181640625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 497104764, + "step": 5446 + }, + { + "epoch": 22.695833333333333, + "grad_norm": 3.225344975091235, + "learning_rate": 5e-05, + "loss": 0.0694, + "num_input_tokens_seen": 497196636, + "step": 5447 + }, + { + "epoch": 22.695833333333333, + "loss": 0.06967813521623611, + "loss_ce": 4.46484045824036e-05, + "loss_iou": 0.306640625, + "loss_num": 0.013916015625, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 497196636, + "step": 5447 + }, + { + "epoch": 22.7, + "grad_norm": 4.874570569091514, + "learning_rate": 5e-05, + "loss": 0.0497, + "num_input_tokens_seen": 497288316, + "step": 5448 + }, + { + "epoch": 22.7, + "loss": 0.05220876634120941, + "loss_ce": 0.00010000343172578141, + "loss_iou": 0.234375, + "loss_num": 0.01043701171875, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 497288316, + "step": 5448 + }, + { + "epoch": 22.704166666666666, + "grad_norm": 3.17848314051083, + "learning_rate": 5e-05, + "loss": 0.0567, + "num_input_tokens_seen": 497379588, + "step": 5449 + }, + { + "epoch": 22.704166666666666, + "loss": 0.024372564628720284, + "loss_ce": 1.9538067135727033e-05, + "loss_iou": 0.2158203125, + "loss_num": 0.0048828125, + "loss_xval": 0.0244140625, + "num_input_tokens_seen": 497379588, + "step": 5449 + }, + { + "epoch": 22.708333333333332, + "grad_norm": 2.602008060990133, + "learning_rate": 5e-05, + "loss": 0.0364, + "num_input_tokens_seen": 497471692, + "step": 5450 + }, + { + "epoch": 22.708333333333332, + "loss": 0.03950629383325577, + "loss_ce": 0.003465034067630768, + "loss_iou": 0.294921875, + "loss_num": 0.0072021484375, + "loss_xval": 0.0361328125, + "num_input_tokens_seen": 497471692, + "step": 5450 + }, + { + "epoch": 22.7125, + "grad_norm": 1.9733341846682342, + "learning_rate": 5e-05, + "loss": 0.0672, + "num_input_tokens_seen": 497562564, + "step": 5451 + }, + { + "epoch": 22.7125, + "loss": 0.08485489338636398, + "loss_ce": 0.00010757323616417125, + "loss_iou": 0.298828125, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 497562564, + "step": 5451 + }, + { + "epoch": 22.716666666666665, + "grad_norm": 1.796458844908592, + "learning_rate": 5e-05, + "loss": 0.0774, + "num_input_tokens_seen": 497653716, + "step": 5452 + }, + { + "epoch": 22.716666666666665, + "loss": 0.03222234547138214, + "loss_ce": 4.155963324592449e-05, + "loss_iou": 0.0625, + "loss_num": 0.006439208984375, + "loss_xval": 0.0322265625, + "num_input_tokens_seen": 497653716, + "step": 5452 + }, + { + "epoch": 22.720833333333335, + "grad_norm": 0.7352762056029919, + "learning_rate": 5e-05, + "loss": 0.045, + "num_input_tokens_seen": 497744864, + "step": 5453 + }, + { + "epoch": 22.720833333333335, + "loss": 0.03118591569364071, + "loss_ce": 1.2207925465190783e-05, + "loss_iou": 0.24609375, + "loss_num": 0.0062255859375, + "loss_xval": 0.0311279296875, + "num_input_tokens_seen": 497744864, + "step": 5453 + }, + { + "epoch": 22.725, + "grad_norm": 2.196564472678189, + "learning_rate": 5e-05, + "loss": 0.0687, + "num_input_tokens_seen": 497836280, + "step": 5454 + }, + { + "epoch": 22.725, + "loss": 0.061884164810180664, + "loss_ce": 1.740168227115646e-05, + "loss_iou": 0.119140625, + "loss_num": 0.01239013671875, + "loss_xval": 0.061767578125, + "num_input_tokens_seen": 497836280, + "step": 5454 + }, + { + "epoch": 22.729166666666668, + "grad_norm": 1.600302678177142, + "learning_rate": 5e-05, + "loss": 0.0461, + "num_input_tokens_seen": 497927552, + "step": 5455 + }, + { + "epoch": 22.729166666666668, + "loss": 0.037744879722595215, + "loss_ce": 0.0005744679365307093, + "loss_iou": 0.1650390625, + "loss_num": 0.0074462890625, + "loss_xval": 0.037109375, + "num_input_tokens_seen": 497927552, + "step": 5455 + }, + { + "epoch": 22.733333333333334, + "grad_norm": 1.6767414487852184, + "learning_rate": 5e-05, + "loss": 0.0352, + "num_input_tokens_seen": 498018864, + "step": 5456 + }, + { + "epoch": 22.733333333333334, + "loss": 0.0258328877389431, + "loss_ce": 0.00012182634236523882, + "loss_iou": 0.265625, + "loss_num": 0.005126953125, + "loss_xval": 0.0257568359375, + "num_input_tokens_seen": 498018864, + "step": 5456 + }, + { + "epoch": 22.7375, + "grad_norm": 2.4109369563480625, + "learning_rate": 5e-05, + "loss": 0.0421, + "num_input_tokens_seen": 498110780, + "step": 5457 + }, + { + "epoch": 22.7375, + "loss": 0.0353752076625824, + "loss_ce": 8.162675658240914e-05, + "loss_iou": 0.2470703125, + "loss_num": 0.007049560546875, + "loss_xval": 0.035400390625, + "num_input_tokens_seen": 498110780, + "step": 5457 + }, + { + "epoch": 22.741666666666667, + "grad_norm": 3.127469878487144, + "learning_rate": 5e-05, + "loss": 0.031, + "num_input_tokens_seen": 498202140, + "step": 5458 + }, + { + "epoch": 22.741666666666667, + "loss": 0.03448343276977539, + "loss_ce": 2.9086375434417278e-05, + "loss_iou": 0.1669921875, + "loss_num": 0.00689697265625, + "loss_xval": 0.034423828125, + "num_input_tokens_seen": 498202140, + "step": 5458 + }, + { + "epoch": 22.745833333333334, + "grad_norm": 3.0979169922431016, + "learning_rate": 5e-05, + "loss": 0.0692, + "num_input_tokens_seen": 498293228, + "step": 5459 + }, + { + "epoch": 22.745833333333334, + "loss": 0.054097265005111694, + "loss_ce": 0.0001269291969947517, + "loss_iou": 0.2314453125, + "loss_num": 0.01080322265625, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 498293228, + "step": 5459 + }, + { + "epoch": 22.75, + "grad_norm": 2.7281658817110155, + "learning_rate": 5e-05, + "loss": 0.0295, + "num_input_tokens_seen": 498384192, + "step": 5460 + }, + { + "epoch": 22.75, + "loss": 0.024137474596500397, + "loss_ce": 5.91044663451612e-05, + "loss_iou": 0.298828125, + "loss_num": 0.00482177734375, + "loss_xval": 0.0240478515625, + "num_input_tokens_seen": 498384192, + "step": 5460 + }, + { + "epoch": 22.754166666666666, + "grad_norm": 2.6444292311403528, + "learning_rate": 5e-05, + "loss": 0.0264, + "num_input_tokens_seen": 498475908, + "step": 5461 + }, + { + "epoch": 22.754166666666666, + "loss": 0.029545985162258148, + "loss_ce": 5.8375371736474335e-05, + "loss_iou": 0.1982421875, + "loss_num": 0.005889892578125, + "loss_xval": 0.029541015625, + "num_input_tokens_seen": 498475908, + "step": 5461 + }, + { + "epoch": 22.758333333333333, + "grad_norm": 2.908463530849759, + "learning_rate": 5e-05, + "loss": 0.0317, + "num_input_tokens_seen": 498565328, + "step": 5462 + }, + { + "epoch": 22.758333333333333, + "loss": 0.03385067731142044, + "loss_ce": 0.00017453177133575082, + "loss_iou": 0.1767578125, + "loss_num": 0.006744384765625, + "loss_xval": 0.03369140625, + "num_input_tokens_seen": 498565328, + "step": 5462 + }, + { + "epoch": 22.7625, + "grad_norm": 3.6003888672228377, + "learning_rate": 5e-05, + "loss": 0.0341, + "num_input_tokens_seen": 498657020, + "step": 5463 + }, + { + "epoch": 22.7625, + "loss": 0.01993653178215027, + "loss_ce": 1.9998524294351228e-05, + "loss_iou": 0.1796875, + "loss_num": 0.00396728515625, + "loss_xval": 0.0198974609375, + "num_input_tokens_seen": 498657020, + "step": 5463 + }, + { + "epoch": 22.766666666666666, + "grad_norm": 2.367609533052053, + "learning_rate": 5e-05, + "loss": 0.0435, + "num_input_tokens_seen": 498748380, + "step": 5464 + }, + { + "epoch": 22.766666666666666, + "loss": 0.025592446327209473, + "loss_ce": 1.8714170437306166e-05, + "loss_iou": 0.1611328125, + "loss_num": 0.005126953125, + "loss_xval": 0.025634765625, + "num_input_tokens_seen": 498748380, + "step": 5464 + }, + { + "epoch": 22.770833333333332, + "grad_norm": 3.0676588689448865, + "learning_rate": 5e-05, + "loss": 0.0425, + "num_input_tokens_seen": 498839220, + "step": 5465 + }, + { + "epoch": 22.770833333333332, + "loss": 0.028899289667606354, + "loss_ce": 4.4918717321706936e-05, + "loss_iou": 0.21484375, + "loss_num": 0.005767822265625, + "loss_xval": 0.02880859375, + "num_input_tokens_seen": 498839220, + "step": 5465 + }, + { + "epoch": 22.775, + "grad_norm": 2.8282670386731663, + "learning_rate": 5e-05, + "loss": 0.0846, + "num_input_tokens_seen": 498930532, + "step": 5466 + }, + { + "epoch": 22.775, + "loss": 0.13063108921051025, + "loss_ce": 1.2047556992911268e-05, + "loss_iou": 0.2119140625, + "loss_num": 0.026123046875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 498930532, + "step": 5466 + }, + { + "epoch": 22.779166666666665, + "grad_norm": 2.358372792627603, + "learning_rate": 5e-05, + "loss": 0.0402, + "num_input_tokens_seen": 499021668, + "step": 5467 + }, + { + "epoch": 22.779166666666665, + "loss": 0.037430137395858765, + "loss_ce": 3.08443013636861e-05, + "loss_iou": 0.1982421875, + "loss_num": 0.007476806640625, + "loss_xval": 0.037353515625, + "num_input_tokens_seen": 499021668, + "step": 5467 + }, + { + "epoch": 22.783333333333335, + "grad_norm": 6.374577377660233, + "learning_rate": 5e-05, + "loss": 0.0778, + "num_input_tokens_seen": 499112924, + "step": 5468 + }, + { + "epoch": 22.783333333333335, + "loss": 0.03872350603342056, + "loss_ce": 1.19601600090391e-05, + "loss_iou": 0.298828125, + "loss_num": 0.00775146484375, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 499112924, + "step": 5468 + }, + { + "epoch": 22.7875, + "grad_norm": 4.431361490860381, + "learning_rate": 5e-05, + "loss": 0.0441, + "num_input_tokens_seen": 499204276, + "step": 5469 + }, + { + "epoch": 22.7875, + "loss": 0.04221169278025627, + "loss_ce": 0.00010506440594326705, + "loss_iou": 0.412109375, + "loss_num": 0.0084228515625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 499204276, + "step": 5469 + }, + { + "epoch": 22.791666666666668, + "grad_norm": 2.181301579207962, + "learning_rate": 5e-05, + "loss": 0.0443, + "num_input_tokens_seen": 499295268, + "step": 5470 + }, + { + "epoch": 22.791666666666668, + "loss": 0.02486196532845497, + "loss_ce": 2.06578042707406e-05, + "loss_iou": 0.203125, + "loss_num": 0.004974365234375, + "loss_xval": 0.02490234375, + "num_input_tokens_seen": 499295268, + "step": 5470 + }, + { + "epoch": 22.795833333333334, + "grad_norm": 2.5528680243042245, + "learning_rate": 5e-05, + "loss": 0.0579, + "num_input_tokens_seen": 499386908, + "step": 5471 + }, + { + "epoch": 22.795833333333334, + "loss": 0.029936883598566055, + "loss_ce": 3.728695082827471e-05, + "loss_iou": 0.26953125, + "loss_num": 0.0059814453125, + "loss_xval": 0.0299072265625, + "num_input_tokens_seen": 499386908, + "step": 5471 + }, + { + "epoch": 22.8, + "grad_norm": 4.452297157296129, + "learning_rate": 5e-05, + "loss": 0.0375, + "num_input_tokens_seen": 499478288, + "step": 5472 + }, + { + "epoch": 22.8, + "loss": 0.026759404689073563, + "loss_ce": 5.652549589285627e-05, + "loss_iou": 0.255859375, + "loss_num": 0.005340576171875, + "loss_xval": 0.0267333984375, + "num_input_tokens_seen": 499478288, + "step": 5472 + }, + { + "epoch": 22.804166666666667, + "grad_norm": 2.9466131421578656, + "learning_rate": 5e-05, + "loss": 0.0524, + "num_input_tokens_seen": 499569332, + "step": 5473 + }, + { + "epoch": 22.804166666666667, + "loss": 0.027132410556077957, + "loss_ce": 1.7542834029882215e-05, + "loss_iou": 0.2109375, + "loss_num": 0.00543212890625, + "loss_xval": 0.027099609375, + "num_input_tokens_seen": 499569332, + "step": 5473 + }, + { + "epoch": 22.808333333333334, + "grad_norm": 7.186883142939526, + "learning_rate": 5e-05, + "loss": 0.047, + "num_input_tokens_seen": 499661064, + "step": 5474 + }, + { + "epoch": 22.808333333333334, + "loss": 0.061519306153059006, + "loss_ce": 0.0020329162944108248, + "loss_iou": 0.16015625, + "loss_num": 0.01190185546875, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 499661064, + "step": 5474 + }, + { + "epoch": 22.8125, + "grad_norm": 3.554601374181729, + "learning_rate": 5e-05, + "loss": 0.0612, + "num_input_tokens_seen": 499752312, + "step": 5475 + }, + { + "epoch": 22.8125, + "loss": 0.057638224214315414, + "loss_ce": 1.3407001461018808e-05, + "loss_iou": 0.1953125, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 499752312, + "step": 5475 + }, + { + "epoch": 22.816666666666666, + "grad_norm": 2.7402453833752336, + "learning_rate": 5e-05, + "loss": 0.0421, + "num_input_tokens_seen": 499842672, + "step": 5476 + }, + { + "epoch": 22.816666666666666, + "loss": 0.03763018548488617, + "loss_ce": 1.7269090676563792e-05, + "loss_iou": 0.181640625, + "loss_num": 0.007537841796875, + "loss_xval": 0.03759765625, + "num_input_tokens_seen": 499842672, + "step": 5476 + }, + { + "epoch": 22.820833333333333, + "grad_norm": 2.1660452232601135, + "learning_rate": 5e-05, + "loss": 0.0274, + "num_input_tokens_seen": 499933896, + "step": 5477 + }, + { + "epoch": 22.820833333333333, + "loss": 0.028365662321448326, + "loss_ce": 0.00019030747353099287, + "loss_iou": 0.322265625, + "loss_num": 0.005645751953125, + "loss_xval": 0.0281982421875, + "num_input_tokens_seen": 499933896, + "step": 5477 + }, + { + "epoch": 22.825, + "grad_norm": 5.217329977893872, + "learning_rate": 5e-05, + "loss": 0.029, + "num_input_tokens_seen": 500025440, + "step": 5478 + }, + { + "epoch": 22.825, + "loss": 0.0354175791144371, + "loss_ce": 0.0007496136240661144, + "loss_iou": 0.2490234375, + "loss_num": 0.006927490234375, + "loss_xval": 0.03466796875, + "num_input_tokens_seen": 500025440, + "step": 5478 + }, + { + "epoch": 22.829166666666666, + "grad_norm": 2.940064067788056, + "learning_rate": 5e-05, + "loss": 0.0624, + "num_input_tokens_seen": 500116292, + "step": 5479 + }, + { + "epoch": 22.829166666666666, + "loss": 0.08425501734018326, + "loss_ce": 5.702173075405881e-05, + "loss_iou": 0.326171875, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 500116292, + "step": 5479 + }, + { + "epoch": 22.833333333333332, + "grad_norm": 4.7275692494247, + "learning_rate": 5e-05, + "loss": 0.0417, + "num_input_tokens_seen": 500208452, + "step": 5480 + }, + { + "epoch": 22.833333333333332, + "loss": 0.028766117990016937, + "loss_ce": 4.907785842078738e-05, + "loss_iou": 0.236328125, + "loss_num": 0.0057373046875, + "loss_xval": 0.0286865234375, + "num_input_tokens_seen": 500208452, + "step": 5480 + }, + { + "epoch": 22.8375, + "grad_norm": 2.5102813220794524, + "learning_rate": 5e-05, + "loss": 0.034, + "num_input_tokens_seen": 500300040, + "step": 5481 + }, + { + "epoch": 22.8375, + "loss": 0.0304593276232481, + "loss_ce": 1.0413465133751743e-05, + "loss_iou": 0.279296875, + "loss_num": 0.006103515625, + "loss_xval": 0.0303955078125, + "num_input_tokens_seen": 500300040, + "step": 5481 + }, + { + "epoch": 22.841666666666665, + "grad_norm": 3.260438483373206, + "learning_rate": 5e-05, + "loss": 0.0604, + "num_input_tokens_seen": 500390616, + "step": 5482 + }, + { + "epoch": 22.841666666666665, + "loss": 0.04531024396419525, + "loss_ce": 6.793491775169969e-05, + "loss_iou": 0.28515625, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 500390616, + "step": 5482 + }, + { + "epoch": 22.845833333333335, + "grad_norm": 3.018756694811929, + "learning_rate": 5e-05, + "loss": 0.0436, + "num_input_tokens_seen": 500482756, + "step": 5483 + }, + { + "epoch": 22.845833333333335, + "loss": 0.040407925844192505, + "loss_ce": 4.080204962519929e-05, + "loss_iou": 0.240234375, + "loss_num": 0.008056640625, + "loss_xval": 0.040283203125, + "num_input_tokens_seen": 500482756, + "step": 5483 + }, + { + "epoch": 22.85, + "grad_norm": 2.6317817181670455, + "learning_rate": 5e-05, + "loss": 0.026, + "num_input_tokens_seen": 500573928, + "step": 5484 + }, + { + "epoch": 22.85, + "loss": 0.03063778206706047, + "loss_ce": 1.3393741028266959e-05, + "loss_iou": 0.1953125, + "loss_num": 0.006134033203125, + "loss_xval": 0.0306396484375, + "num_input_tokens_seen": 500573928, + "step": 5484 + }, + { + "epoch": 22.854166666666668, + "grad_norm": 1.9321251904243877, + "learning_rate": 5e-05, + "loss": 0.0346, + "num_input_tokens_seen": 500665412, + "step": 5485 + }, + { + "epoch": 22.854166666666668, + "loss": 0.03834788501262665, + "loss_ce": 1.3989140825287905e-05, + "loss_iou": 0.162109375, + "loss_num": 0.007659912109375, + "loss_xval": 0.038330078125, + "num_input_tokens_seen": 500665412, + "step": 5485 + }, + { + "epoch": 22.858333333333334, + "grad_norm": 1.399606320446629, + "learning_rate": 5e-05, + "loss": 0.0412, + "num_input_tokens_seen": 500756168, + "step": 5486 + }, + { + "epoch": 22.858333333333334, + "loss": 0.06183755397796631, + "loss_ce": 8.943742614064831e-06, + "loss_iou": 0.298828125, + "loss_num": 0.01239013671875, + "loss_xval": 0.061767578125, + "num_input_tokens_seen": 500756168, + "step": 5486 + }, + { + "epoch": 22.8625, + "grad_norm": 0.778141119459895, + "learning_rate": 5e-05, + "loss": 0.0352, + "num_input_tokens_seen": 500847932, + "step": 5487 + }, + { + "epoch": 22.8625, + "loss": 0.05563715845346451, + "loss_ce": 0.00026873595197685063, + "loss_iou": 0.248046875, + "loss_num": 0.01104736328125, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 500847932, + "step": 5487 + }, + { + "epoch": 22.866666666666667, + "grad_norm": 0.732235143616492, + "learning_rate": 5e-05, + "loss": 0.0398, + "num_input_tokens_seen": 500939360, + "step": 5488 + }, + { + "epoch": 22.866666666666667, + "loss": 0.02937071956694126, + "loss_ce": 3.569724867702462e-05, + "loss_iou": 0.30078125, + "loss_num": 0.005859375, + "loss_xval": 0.029296875, + "num_input_tokens_seen": 500939360, + "step": 5488 + }, + { + "epoch": 22.870833333333334, + "grad_norm": 1.4242854197731527, + "learning_rate": 5e-05, + "loss": 0.0457, + "num_input_tokens_seen": 501031012, + "step": 5489 + }, + { + "epoch": 22.870833333333334, + "loss": 0.0672885924577713, + "loss_ce": 1.2588001482072286e-05, + "loss_iou": 0.197265625, + "loss_num": 0.013427734375, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 501031012, + "step": 5489 + }, + { + "epoch": 22.875, + "grad_norm": 1.4123170340242057, + "learning_rate": 5e-05, + "loss": 0.0349, + "num_input_tokens_seen": 501122108, + "step": 5490 + }, + { + "epoch": 22.875, + "loss": 0.02706168405711651, + "loss_ce": 1.548104228277225e-05, + "loss_iou": 0.2119140625, + "loss_num": 0.00543212890625, + "loss_xval": 0.027099609375, + "num_input_tokens_seen": 501122108, + "step": 5490 + }, + { + "epoch": 22.879166666666666, + "grad_norm": 1.9084277753171575, + "learning_rate": 5e-05, + "loss": 0.0648, + "num_input_tokens_seen": 501213440, + "step": 5491 + }, + { + "epoch": 22.879166666666666, + "loss": 0.06372037529945374, + "loss_ce": 8.359698404092342e-05, + "loss_iou": 0.28125, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 501213440, + "step": 5491 + }, + { + "epoch": 22.883333333333333, + "grad_norm": 3.015035456018019, + "learning_rate": 5e-05, + "loss": 0.0564, + "num_input_tokens_seen": 501304532, + "step": 5492 + }, + { + "epoch": 22.883333333333333, + "loss": 0.07055270671844482, + "loss_ce": 4.1843977669486776e-05, + "loss_iou": 0.21875, + "loss_num": 0.01409912109375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 501304532, + "step": 5492 + }, + { + "epoch": 22.8875, + "grad_norm": 2.5319543671648184, + "learning_rate": 5e-05, + "loss": 0.0354, + "num_input_tokens_seen": 501395636, + "step": 5493 + }, + { + "epoch": 22.8875, + "loss": 0.04397744685411453, + "loss_ce": 7.79082765802741e-05, + "loss_iou": 0.2431640625, + "loss_num": 0.0087890625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 501395636, + "step": 5493 + }, + { + "epoch": 22.891666666666666, + "grad_norm": 2.1391885521606966, + "learning_rate": 5e-05, + "loss": 0.0705, + "num_input_tokens_seen": 501486500, + "step": 5494 + }, + { + "epoch": 22.891666666666666, + "loss": 0.07880009710788727, + "loss_ce": 1.1342142897774465e-05, + "loss_iou": 0.16015625, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 501486500, + "step": 5494 + }, + { + "epoch": 22.895833333333332, + "grad_norm": 2.5589569030363566, + "learning_rate": 5e-05, + "loss": 0.0266, + "num_input_tokens_seen": 501577884, + "step": 5495 + }, + { + "epoch": 22.895833333333332, + "loss": 0.027315836399793625, + "loss_ce": 0.0009867959888651967, + "loss_iou": 0.1640625, + "loss_num": 0.0052490234375, + "loss_xval": 0.0263671875, + "num_input_tokens_seen": 501577884, + "step": 5495 + }, + { + "epoch": 22.9, + "grad_norm": 2.884642880141269, + "learning_rate": 5e-05, + "loss": 0.0253, + "num_input_tokens_seen": 501669688, + "step": 5496 + }, + { + "epoch": 22.9, + "loss": 0.029087038710713387, + "loss_ce": 0.0001868915860541165, + "loss_iou": 0.2734375, + "loss_num": 0.005767822265625, + "loss_xval": 0.0289306640625, + "num_input_tokens_seen": 501669688, + "step": 5496 + }, + { + "epoch": 22.904166666666665, + "grad_norm": 21.98736131357622, + "learning_rate": 5e-05, + "loss": 0.0646, + "num_input_tokens_seen": 501760892, + "step": 5497 + }, + { + "epoch": 22.904166666666665, + "loss": 0.05251148343086243, + "loss_ce": 0.0001890959101729095, + "loss_iou": 0.326171875, + "loss_num": 0.01043701171875, + "loss_xval": 0.05224609375, + "num_input_tokens_seen": 501760892, + "step": 5497 + }, + { + "epoch": 22.908333333333335, + "grad_norm": 2.8407728203382576, + "learning_rate": 5e-05, + "loss": 0.035, + "num_input_tokens_seen": 501852496, + "step": 5498 + }, + { + "epoch": 22.908333333333335, + "loss": 0.03330732882022858, + "loss_ce": 2.028298877121415e-05, + "loss_iou": 0.1787109375, + "loss_num": 0.00665283203125, + "loss_xval": 0.033203125, + "num_input_tokens_seen": 501852496, + "step": 5498 + }, + { + "epoch": 22.9125, + "grad_norm": 22.062520543176024, + "learning_rate": 5e-05, + "loss": 0.0606, + "num_input_tokens_seen": 501944384, + "step": 5499 + }, + { + "epoch": 22.9125, + "loss": 0.10073038935661316, + "loss_ce": 5.289072942105122e-05, + "loss_iou": 0.298828125, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 501944384, + "step": 5499 + }, + { + "epoch": 22.916666666666668, + "grad_norm": 3.530217917213057, + "learning_rate": 5e-05, + "loss": 0.0583, + "num_input_tokens_seen": 502035740, + "step": 5500 + }, + { + "epoch": 22.916666666666668, + "eval_seeclick_CIoU": 0.2251809574663639, + "eval_seeclick_GIoU": 0.20743417367339134, + "eval_seeclick_IoU": 0.3241002485156059, + "eval_seeclick_MAE_all": 0.10077449306845665, + "eval_seeclick_MAE_h": 0.07250755280256271, + "eval_seeclick_MAE_w": 0.22622816264629364, + "eval_seeclick_MAE_x_boxes": 0.21967153996229172, + "eval_seeclick_MAE_y_boxes": 0.07597211748361588, + "eval_seeclick_NUM_probability": 0.999996155500412, + "eval_seeclick_inside_bbox": 0.5411931872367859, + "eval_seeclick_loss": 0.5401211977005005, + "eval_seeclick_loss_ce": 0.10009105876088142, + "eval_seeclick_loss_iou": 0.4600830078125, + "eval_seeclick_loss_num": 0.0853424072265625, + "eval_seeclick_loss_xval": 0.4266357421875, + "eval_seeclick_runtime": 76.5178, + "eval_seeclick_samples_per_second": 0.562, + "eval_seeclick_steps_per_second": 0.026, + "num_input_tokens_seen": 502035740, + "step": 5500 + }, + { + "epoch": 22.916666666666668, + "eval_icons_CIoU": 0.27284765988588333, + "eval_icons_GIoU": 0.23190713673830032, + "eval_icons_IoU": 0.37249836325645447, + "eval_icons_MAE_all": 0.08035346493124962, + "eval_icons_MAE_h": 0.1744953989982605, + "eval_icons_MAE_w": 0.11795252934098244, + "eval_icons_MAE_x_boxes": 0.12169399112462997, + "eval_icons_MAE_y_boxes": 0.17085515707731247, + "eval_icons_NUM_probability": 0.9999938905239105, + "eval_icons_inside_bbox": 0.5347222238779068, + "eval_icons_loss": 0.3813272714614868, + "eval_icons_loss_ce": 0.0018568845698609948, + "eval_icons_loss_iou": 0.19012451171875, + "eval_icons_loss_num": 0.07770538330078125, + "eval_icons_loss_xval": 0.38848876953125, + "eval_icons_runtime": 85.1043, + "eval_icons_samples_per_second": 0.588, + "eval_icons_steps_per_second": 0.024, + "num_input_tokens_seen": 502035740, + "step": 5500 + }, + { + "epoch": 22.916666666666668, + "eval_screenspot_CIoU": 0.3852354089419047, + "eval_screenspot_GIoU": 0.36637531717618305, + "eval_screenspot_IoU": 0.4568243424097697, + "eval_screenspot_MAE_all": 0.09730574985345204, + "eval_screenspot_MAE_h": 0.09697473794221878, + "eval_screenspot_MAE_w": 0.2014616330464681, + "eval_screenspot_MAE_x_boxes": 0.17775118350982666, + "eval_screenspot_MAE_y_boxes": 0.09227648874123891, + "eval_screenspot_NUM_probability": 0.9999014139175415, + "eval_screenspot_inside_bbox": 0.7458333373069763, + "eval_screenspot_loss": 0.49223896861076355, + "eval_screenspot_loss_ce": 0.0051902799362627166, + "eval_screenspot_loss_iou": 0.346435546875, + "eval_screenspot_loss_num": 0.1008148193359375, + "eval_screenspot_loss_xval": 0.5041097005208334, + "eval_screenspot_runtime": 150.1583, + "eval_screenspot_samples_per_second": 0.593, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 502035740, + "step": 5500 + }, + { + "epoch": 22.916666666666668, + "eval_compot_CIoU": 0.42840753495693207, + "eval_compot_GIoU": 0.4277106374502182, + "eval_compot_IoU": 0.5115479528903961, + "eval_compot_MAE_all": 0.05933877266943455, + "eval_compot_MAE_h": 0.06422919780015945, + "eval_compot_MAE_w": 0.15599841624498367, + "eval_compot_MAE_x_boxes": 0.15585462749004364, + "eval_compot_MAE_y_boxes": 0.06365378201007843, + "eval_compot_NUM_probability": 0.9999901056289673, + "eval_compot_inside_bbox": 0.6458333432674408, + "eval_compot_loss": 0.36150631308555603, + "eval_compot_loss_ce": 0.06297808699309826, + "eval_compot_loss_iou": 0.25494384765625, + "eval_compot_loss_num": 0.05878448486328125, + "eval_compot_loss_xval": 0.293853759765625, + "eval_compot_runtime": 85.918, + "eval_compot_samples_per_second": 0.582, + "eval_compot_steps_per_second": 0.023, + "num_input_tokens_seen": 502035740, + "step": 5500 + }, + { + "epoch": 22.916666666666668, + "loss": 0.3481367528438568, + "loss_ce": 0.06633742153644562, + "loss_iou": 0.28515625, + "loss_num": 0.056396484375, + "loss_xval": 0.28125, + "num_input_tokens_seen": 502035740, + "step": 5500 + }, + { + "epoch": 22.920833333333334, + "grad_norm": 3.842643013858874, + "learning_rate": 5e-05, + "loss": 0.0406, + "num_input_tokens_seen": 502126788, + "step": 5501 + }, + { + "epoch": 22.920833333333334, + "loss": 0.0501682311296463, + "loss_ce": 0.0003940639435313642, + "loss_iou": 0.22265625, + "loss_num": 0.00994873046875, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 502126788, + "step": 5501 + }, + { + "epoch": 22.925, + "grad_norm": 3.3158857799497126, + "learning_rate": 5e-05, + "loss": 0.0714, + "num_input_tokens_seen": 502218088, + "step": 5502 + }, + { + "epoch": 22.925, + "loss": 0.04445202276110649, + "loss_ce": 3.368945908732712e-05, + "loss_iou": 0.3046875, + "loss_num": 0.0089111328125, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 502218088, + "step": 5502 + }, + { + "epoch": 22.929166666666667, + "grad_norm": 4.140354488668016, + "learning_rate": 5e-05, + "loss": 0.066, + "num_input_tokens_seen": 502308548, + "step": 5503 + }, + { + "epoch": 22.929166666666667, + "loss": 0.057744890451431274, + "loss_ce": 0.00011244323832215741, + "loss_iou": 0.228515625, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 502308548, + "step": 5503 + }, + { + "epoch": 22.933333333333334, + "grad_norm": 8.781019526057698, + "learning_rate": 5e-05, + "loss": 0.0368, + "num_input_tokens_seen": 502399836, + "step": 5504 + }, + { + "epoch": 22.933333333333334, + "loss": 0.043314363807439804, + "loss_ce": 5.569650966208428e-05, + "loss_iou": 0.1943359375, + "loss_num": 0.0086669921875, + "loss_xval": 0.043212890625, + "num_input_tokens_seen": 502399836, + "step": 5504 + }, + { + "epoch": 22.9375, + "grad_norm": 3.7258668949793123, + "learning_rate": 5e-05, + "loss": 0.0237, + "num_input_tokens_seen": 502491764, + "step": 5505 + }, + { + "epoch": 22.9375, + "loss": 0.02385835163295269, + "loss_ce": 0.001237196265719831, + "loss_iou": 0.2109375, + "loss_num": 0.0045166015625, + "loss_xval": 0.0225830078125, + "num_input_tokens_seen": 502491764, + "step": 5505 + }, + { + "epoch": 22.941666666666666, + "grad_norm": 2.332813839233267, + "learning_rate": 5e-05, + "loss": 0.0416, + "num_input_tokens_seen": 502582812, + "step": 5506 + }, + { + "epoch": 22.941666666666666, + "loss": 0.04756104573607445, + "loss_ce": 1.465791319787968e-05, + "loss_iou": 0.2578125, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 502582812, + "step": 5506 + }, + { + "epoch": 22.945833333333333, + "grad_norm": 3.417653919295563, + "learning_rate": 5e-05, + "loss": 0.0641, + "num_input_tokens_seen": 502673976, + "step": 5507 + }, + { + "epoch": 22.945833333333333, + "loss": 0.06305991858243942, + "loss_ce": 2.5857691070996225e-05, + "loss_iou": 0.26953125, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 502673976, + "step": 5507 + }, + { + "epoch": 22.95, + "grad_norm": 3.655723485092921, + "learning_rate": 5e-05, + "loss": 0.0387, + "num_input_tokens_seen": 502765292, + "step": 5508 + }, + { + "epoch": 22.95, + "loss": 0.0506102591753006, + "loss_ce": 1.2114001947338693e-05, + "loss_iou": 0.3515625, + "loss_num": 0.0101318359375, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 502765292, + "step": 5508 + }, + { + "epoch": 22.954166666666666, + "grad_norm": 2.4607481843272843, + "learning_rate": 5e-05, + "loss": 0.0806, + "num_input_tokens_seen": 502856384, + "step": 5509 + }, + { + "epoch": 22.954166666666666, + "loss": 0.03219888359308243, + "loss_ce": 1.0468009350006469e-05, + "loss_iou": 0.271484375, + "loss_num": 0.006439208984375, + "loss_xval": 0.0322265625, + "num_input_tokens_seen": 502856384, + "step": 5509 + }, + { + "epoch": 22.958333333333332, + "grad_norm": 2.0587149015457027, + "learning_rate": 5e-05, + "loss": 0.0368, + "num_input_tokens_seen": 502947704, + "step": 5510 + }, + { + "epoch": 22.958333333333332, + "loss": 0.025407759472727776, + "loss_ce": 1.713353776722215e-05, + "loss_iou": 0.15625, + "loss_num": 0.00506591796875, + "loss_xval": 0.025390625, + "num_input_tokens_seen": 502947704, + "step": 5510 + }, + { + "epoch": 22.9625, + "grad_norm": 2.916617177381313, + "learning_rate": 5e-05, + "loss": 0.0364, + "num_input_tokens_seen": 503039284, + "step": 5511 + }, + { + "epoch": 22.9625, + "loss": 0.031609997153282166, + "loss_ce": 5.482199776452035e-05, + "loss_iou": 0.2890625, + "loss_num": 0.006317138671875, + "loss_xval": 0.031494140625, + "num_input_tokens_seen": 503039284, + "step": 5511 + }, + { + "epoch": 22.966666666666665, + "grad_norm": 2.9932210038096225, + "learning_rate": 5e-05, + "loss": 0.0594, + "num_input_tokens_seen": 503130680, + "step": 5512 + }, + { + "epoch": 22.966666666666665, + "loss": 0.04617247357964516, + "loss_ce": 0.0009225325775332749, + "loss_iou": 0.16796875, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 503130680, + "step": 5512 + }, + { + "epoch": 22.970833333333335, + "grad_norm": 1.7561536928197354, + "learning_rate": 5e-05, + "loss": 0.0347, + "num_input_tokens_seen": 503222204, + "step": 5513 + }, + { + "epoch": 22.970833333333335, + "loss": 0.020861037075519562, + "loss_ce": 0.00020826596301048994, + "loss_iou": 0.158203125, + "loss_num": 0.004119873046875, + "loss_xval": 0.0206298828125, + "num_input_tokens_seen": 503222204, + "step": 5513 + }, + { + "epoch": 22.975, + "grad_norm": 7.348923333995983, + "learning_rate": 5e-05, + "loss": 0.0783, + "num_input_tokens_seen": 503313968, + "step": 5514 + }, + { + "epoch": 22.975, + "loss": 0.04428169131278992, + "loss_ce": 0.00024482791195623577, + "loss_iou": 0.279296875, + "loss_num": 0.0087890625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 503313968, + "step": 5514 + }, + { + "epoch": 22.979166666666668, + "grad_norm": 1.9808279719757151, + "learning_rate": 5e-05, + "loss": 0.033, + "num_input_tokens_seen": 503405344, + "step": 5515 + }, + { + "epoch": 22.979166666666668, + "loss": 0.025117503479123116, + "loss_ce": 1.679652268649079e-05, + "loss_iou": 0.181640625, + "loss_num": 0.0050048828125, + "loss_xval": 0.025146484375, + "num_input_tokens_seen": 503405344, + "step": 5515 + }, + { + "epoch": 22.983333333333334, + "grad_norm": 2.335548030348546, + "learning_rate": 5e-05, + "loss": 0.0637, + "num_input_tokens_seen": 503496644, + "step": 5516 + }, + { + "epoch": 22.983333333333334, + "loss": 0.09109736979007721, + "loss_ce": 7.869096589274704e-05, + "loss_iou": 0.28515625, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 503496644, + "step": 5516 + }, + { + "epoch": 22.9875, + "grad_norm": 2.6540184672026235, + "learning_rate": 5e-05, + "loss": 0.026, + "num_input_tokens_seen": 503587868, + "step": 5517 + }, + { + "epoch": 22.9875, + "loss": 0.027322892099618912, + "loss_ce": 7.069473940646276e-05, + "loss_iou": 0.24609375, + "loss_num": 0.005462646484375, + "loss_xval": 0.0272216796875, + "num_input_tokens_seen": 503587868, + "step": 5517 + }, + { + "epoch": 22.991666666666667, + "grad_norm": 2.2279579443252535, + "learning_rate": 5e-05, + "loss": 0.0277, + "num_input_tokens_seen": 503679220, + "step": 5518 + }, + { + "epoch": 22.991666666666667, + "loss": 0.03166805952787399, + "loss_ce": 6.072716132621281e-06, + "loss_iou": 0.310546875, + "loss_num": 0.00634765625, + "loss_xval": 0.03173828125, + "num_input_tokens_seen": 503679220, + "step": 5518 + }, + { + "epoch": 22.995833333333334, + "grad_norm": 3.0643537195442683, + "learning_rate": 5e-05, + "loss": 0.0478, + "num_input_tokens_seen": 503770452, + "step": 5519 + }, + { + "epoch": 22.995833333333334, + "loss": 0.043202586472034454, + "loss_ce": 3.547139203874394e-05, + "loss_iou": 0.2333984375, + "loss_num": 0.00860595703125, + "loss_xval": 0.043212890625, + "num_input_tokens_seen": 503770452, + "step": 5519 + }, + { + "epoch": 23.0, + "grad_norm": 2.721422461831951, + "learning_rate": 5e-05, + "loss": 0.0323, + "num_input_tokens_seen": 503861852, + "step": 5520 + }, + { + "epoch": 23.0, + "loss": 0.04136139899492264, + "loss_ce": 1.770826020219829e-05, + "loss_iou": 0.1650390625, + "loss_num": 0.00823974609375, + "loss_xval": 0.041259765625, + "num_input_tokens_seen": 503861852, + "step": 5520 + }, + { + "epoch": 23.004166666666666, + "grad_norm": 2.016052229840184, + "learning_rate": 5e-05, + "loss": 0.0547, + "num_input_tokens_seen": 503951212, + "step": 5521 + }, + { + "epoch": 23.004166666666666, + "loss": 0.0762738585472107, + "loss_ce": 1.8063037714455277e-05, + "loss_iou": 0.1025390625, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 503951212, + "step": 5521 + }, + { + "epoch": 23.008333333333333, + "grad_norm": 1.5897666750004529, + "learning_rate": 5e-05, + "loss": 0.0462, + "num_input_tokens_seen": 504042516, + "step": 5522 + }, + { + "epoch": 23.008333333333333, + "loss": 0.038626015186309814, + "loss_ce": 4.417174932314083e-05, + "loss_iou": 0.19921875, + "loss_num": 0.007720947265625, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 504042516, + "step": 5522 + }, + { + "epoch": 23.0125, + "grad_norm": 2.138708352238235, + "learning_rate": 5e-05, + "loss": 0.0331, + "num_input_tokens_seen": 504134440, + "step": 5523 + }, + { + "epoch": 23.0125, + "loss": 0.040923915803432465, + "loss_ce": 1.5102302313607652e-05, + "loss_iou": 0.2431640625, + "loss_num": 0.0081787109375, + "loss_xval": 0.041015625, + "num_input_tokens_seen": 504134440, + "step": 5523 + }, + { + "epoch": 23.016666666666666, + "grad_norm": 4.708663084029856, + "learning_rate": 5e-05, + "loss": 0.046, + "num_input_tokens_seen": 504226180, + "step": 5524 + }, + { + "epoch": 23.016666666666666, + "loss": 0.06659691035747528, + "loss_ce": 5.3330928494688123e-05, + "loss_iou": 0.20703125, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 504226180, + "step": 5524 + }, + { + "epoch": 23.020833333333332, + "grad_norm": 2.53702349066275, + "learning_rate": 5e-05, + "loss": 0.0359, + "num_input_tokens_seen": 504317652, + "step": 5525 + }, + { + "epoch": 23.020833333333332, + "loss": 0.03276386857032776, + "loss_ce": 1.0879201909119729e-05, + "loss_iou": 0.212890625, + "loss_num": 0.006561279296875, + "loss_xval": 0.03271484375, + "num_input_tokens_seen": 504317652, + "step": 5525 + }, + { + "epoch": 23.025, + "grad_norm": 2.078387757852989, + "learning_rate": 5e-05, + "loss": 0.0465, + "num_input_tokens_seen": 504408592, + "step": 5526 + }, + { + "epoch": 23.025, + "loss": 0.05985802784562111, + "loss_ce": 2.0683584807557054e-05, + "loss_iou": 0.251953125, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 504408592, + "step": 5526 + }, + { + "epoch": 23.029166666666665, + "grad_norm": 2.063475413626266, + "learning_rate": 5e-05, + "loss": 0.0365, + "num_input_tokens_seen": 504498564, + "step": 5527 + }, + { + "epoch": 23.029166666666665, + "loss": 0.021073997020721436, + "loss_ce": 1.6867019439814612e-05, + "loss_iou": 0.150390625, + "loss_num": 0.00421142578125, + "loss_xval": 0.02099609375, + "num_input_tokens_seen": 504498564, + "step": 5527 + }, + { + "epoch": 23.033333333333335, + "grad_norm": 1.5947610008134536, + "learning_rate": 5e-05, + "loss": 0.0427, + "num_input_tokens_seen": 504590120, + "step": 5528 + }, + { + "epoch": 23.033333333333335, + "loss": 0.03138340264558792, + "loss_ce": 0.0002173245302401483, + "loss_iou": 0.1865234375, + "loss_num": 0.0062255859375, + "loss_xval": 0.0311279296875, + "num_input_tokens_seen": 504590120, + "step": 5528 + }, + { + "epoch": 23.0375, + "grad_norm": 2.6926222323791267, + "learning_rate": 5e-05, + "loss": 0.0394, + "num_input_tokens_seen": 504681684, + "step": 5529 + }, + { + "epoch": 23.0375, + "loss": 0.051821961998939514, + "loss_ce": 1.0743910024757497e-05, + "loss_iou": 0.2255859375, + "loss_num": 0.0103759765625, + "loss_xval": 0.0517578125, + "num_input_tokens_seen": 504681684, + "step": 5529 + }, + { + "epoch": 23.041666666666668, + "grad_norm": 3.0147621341280004, + "learning_rate": 5e-05, + "loss": 0.0484, + "num_input_tokens_seen": 504772928, + "step": 5530 + }, + { + "epoch": 23.041666666666668, + "loss": 0.05108293890953064, + "loss_ce": 1.1768741387641057e-05, + "loss_iou": 0.3125, + "loss_num": 0.01025390625, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 504772928, + "step": 5530 + }, + { + "epoch": 23.045833333333334, + "grad_norm": 3.529754894607426, + "learning_rate": 5e-05, + "loss": 0.04, + "num_input_tokens_seen": 504864352, + "step": 5531 + }, + { + "epoch": 23.045833333333334, + "loss": 0.04382365196943283, + "loss_ce": 0.00035135942744091153, + "loss_iou": 0.234375, + "loss_num": 0.0086669921875, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 504864352, + "step": 5531 + }, + { + "epoch": 23.05, + "grad_norm": 2.2367983125380175, + "learning_rate": 5e-05, + "loss": 0.088, + "num_input_tokens_seen": 504955744, + "step": 5532 + }, + { + "epoch": 23.05, + "loss": 0.037780724465847015, + "loss_ce": 3.81076788471546e-05, + "loss_iou": 0.1904296875, + "loss_num": 0.007568359375, + "loss_xval": 0.037841796875, + "num_input_tokens_seen": 504955744, + "step": 5532 + }, + { + "epoch": 23.054166666666667, + "grad_norm": 2.6973369667724714, + "learning_rate": 5e-05, + "loss": 0.048, + "num_input_tokens_seen": 505047124, + "step": 5533 + }, + { + "epoch": 23.054166666666667, + "loss": 0.0411471463739872, + "loss_ce": 9.451230653212406e-06, + "loss_iou": 0.2275390625, + "loss_num": 0.00823974609375, + "loss_xval": 0.041015625, + "num_input_tokens_seen": 505047124, + "step": 5533 + }, + { + "epoch": 23.058333333333334, + "grad_norm": 2.6149604800336013, + "learning_rate": 5e-05, + "loss": 0.0247, + "num_input_tokens_seen": 505138476, + "step": 5534 + }, + { + "epoch": 23.058333333333334, + "loss": 0.02741740271449089, + "loss_ce": 1.2616639651241712e-05, + "loss_iou": 0.234375, + "loss_num": 0.0054931640625, + "loss_xval": 0.02734375, + "num_input_tokens_seen": 505138476, + "step": 5534 + }, + { + "epoch": 23.0625, + "grad_norm": 3.4400342266393626, + "learning_rate": 5e-05, + "loss": 0.0645, + "num_input_tokens_seen": 505229832, + "step": 5535 + }, + { + "epoch": 23.0625, + "loss": 0.041741691529750824, + "loss_ce": 8.90308729140088e-06, + "loss_iou": 0.265625, + "loss_num": 0.00836181640625, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 505229832, + "step": 5535 + }, + { + "epoch": 23.066666666666666, + "grad_norm": 5.02985472813745, + "learning_rate": 5e-05, + "loss": 0.0398, + "num_input_tokens_seen": 505321416, + "step": 5536 + }, + { + "epoch": 23.066666666666666, + "loss": 0.04101718217134476, + "loss_ce": 6.259123620111495e-05, + "loss_iou": 0.330078125, + "loss_num": 0.0081787109375, + "loss_xval": 0.041015625, + "num_input_tokens_seen": 505321416, + "step": 5536 + }, + { + "epoch": 23.070833333333333, + "grad_norm": 3.003234993754144, + "learning_rate": 5e-05, + "loss": 0.0574, + "num_input_tokens_seen": 505412784, + "step": 5537 + }, + { + "epoch": 23.070833333333333, + "loss": 0.03090827539563179, + "loss_ce": 9.229412171407603e-06, + "loss_iou": 0.291015625, + "loss_num": 0.00616455078125, + "loss_xval": 0.0308837890625, + "num_input_tokens_seen": 505412784, + "step": 5537 + }, + { + "epoch": 23.075, + "grad_norm": 2.281625317511175, + "learning_rate": 5e-05, + "loss": 0.0369, + "num_input_tokens_seen": 505504404, + "step": 5538 + }, + { + "epoch": 23.075, + "loss": 0.05516308546066284, + "loss_ce": 3.307756196591072e-05, + "loss_iou": 0.404296875, + "loss_num": 0.01104736328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 505504404, + "step": 5538 + }, + { + "epoch": 23.079166666666666, + "grad_norm": 2.681882718810235, + "learning_rate": 5e-05, + "loss": 0.0282, + "num_input_tokens_seen": 505595452, + "step": 5539 + }, + { + "epoch": 23.079166666666666, + "loss": 0.020011726766824722, + "loss_ce": 2.2712603822583333e-05, + "loss_iou": 0.0791015625, + "loss_num": 0.003997802734375, + "loss_xval": 0.02001953125, + "num_input_tokens_seen": 505595452, + "step": 5539 + }, + { + "epoch": 23.083333333333332, + "grad_norm": 1.9297403026196684, + "learning_rate": 5e-05, + "loss": 0.0309, + "num_input_tokens_seen": 505687264, + "step": 5540 + }, + { + "epoch": 23.083333333333332, + "loss": 0.02788577787578106, + "loss_ce": 7.970892511366401e-06, + "loss_iou": 0.248046875, + "loss_num": 0.005584716796875, + "loss_xval": 0.02783203125, + "num_input_tokens_seen": 505687264, + "step": 5540 + }, + { + "epoch": 23.0875, + "grad_norm": 2.1713723754796974, + "learning_rate": 5e-05, + "loss": 0.047, + "num_input_tokens_seen": 505778960, + "step": 5541 + }, + { + "epoch": 23.0875, + "loss": 0.029800841584801674, + "loss_ce": 3.094381099799648e-05, + "loss_iou": 0.2080078125, + "loss_num": 0.005950927734375, + "loss_xval": 0.02978515625, + "num_input_tokens_seen": 505778960, + "step": 5541 + }, + { + "epoch": 23.091666666666665, + "grad_norm": 2.7247538835885536, + "learning_rate": 5e-05, + "loss": 0.0237, + "num_input_tokens_seen": 505870764, + "step": 5542 + }, + { + "epoch": 23.091666666666665, + "loss": 0.0216769240796566, + "loss_ce": 6.28488342044875e-05, + "loss_iou": 0.2578125, + "loss_num": 0.00433349609375, + "loss_xval": 0.0216064453125, + "num_input_tokens_seen": 505870764, + "step": 5542 + }, + { + "epoch": 23.095833333333335, + "grad_norm": 2.7685141402952986, + "learning_rate": 5e-05, + "loss": 0.0513, + "num_input_tokens_seen": 505962108, + "step": 5543 + }, + { + "epoch": 23.095833333333335, + "loss": 0.06512368470430374, + "loss_ce": 2.2063091819291003e-05, + "loss_iou": 0.359375, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 505962108, + "step": 5543 + }, + { + "epoch": 23.1, + "grad_norm": 2.771916361936285, + "learning_rate": 5e-05, + "loss": 0.0685, + "num_input_tokens_seen": 506053248, + "step": 5544 + }, + { + "epoch": 23.1, + "loss": 0.045903388410806656, + "loss_ce": 8.124551095534116e-05, + "loss_iou": 0.201171875, + "loss_num": 0.0091552734375, + "loss_xval": 0.0458984375, + "num_input_tokens_seen": 506053248, + "step": 5544 + }, + { + "epoch": 23.104166666666668, + "grad_norm": 2.2927554760290954, + "learning_rate": 5e-05, + "loss": 0.0405, + "num_input_tokens_seen": 506143796, + "step": 5545 + }, + { + "epoch": 23.104166666666668, + "loss": 0.04807007312774658, + "loss_ce": 4.886148417426739e-06, + "loss_iou": 0.2119140625, + "loss_num": 0.00958251953125, + "loss_xval": 0.048095703125, + "num_input_tokens_seen": 506143796, + "step": 5545 + }, + { + "epoch": 23.108333333333334, + "grad_norm": 3.9575731812955586, + "learning_rate": 5e-05, + "loss": 0.0435, + "num_input_tokens_seen": 506235168, + "step": 5546 + }, + { + "epoch": 23.108333333333334, + "loss": 0.02666931226849556, + "loss_ce": 1.2208596672280692e-05, + "loss_iou": 0.1494140625, + "loss_num": 0.005340576171875, + "loss_xval": 0.026611328125, + "num_input_tokens_seen": 506235168, + "step": 5546 + }, + { + "epoch": 23.1125, + "grad_norm": 2.430795858501176, + "learning_rate": 5e-05, + "loss": 0.0289, + "num_input_tokens_seen": 506325352, + "step": 5547 + }, + { + "epoch": 23.1125, + "loss": 0.02848159149289131, + "loss_ce": 8.690438335179351e-06, + "loss_iou": 0.375, + "loss_num": 0.005706787109375, + "loss_xval": 0.0284423828125, + "num_input_tokens_seen": 506325352, + "step": 5547 + }, + { + "epoch": 23.116666666666667, + "grad_norm": 1.8939699679452089, + "learning_rate": 5e-05, + "loss": 0.0298, + "num_input_tokens_seen": 506416588, + "step": 5548 + }, + { + "epoch": 23.116666666666667, + "loss": 0.020750660449266434, + "loss_ce": 5.211283132666722e-05, + "loss_iou": 0.314453125, + "loss_num": 0.004150390625, + "loss_xval": 0.020751953125, + "num_input_tokens_seen": 506416588, + "step": 5548 + }, + { + "epoch": 23.120833333333334, + "grad_norm": 2.1950882893637242, + "learning_rate": 5e-05, + "loss": 0.0537, + "num_input_tokens_seen": 506507772, + "step": 5549 + }, + { + "epoch": 23.120833333333334, + "loss": 0.08341377973556519, + "loss_ce": 2.4499406208633445e-05, + "loss_iou": 0.1767578125, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 506507772, + "step": 5549 + }, + { + "epoch": 23.125, + "grad_norm": 2.8894699194279636, + "learning_rate": 5e-05, + "loss": 0.0305, + "num_input_tokens_seen": 506598800, + "step": 5550 + }, + { + "epoch": 23.125, + "loss": 0.030126187950372696, + "loss_ce": 1.2968208466190845e-05, + "loss_iou": 0.328125, + "loss_num": 0.006011962890625, + "loss_xval": 0.0301513671875, + "num_input_tokens_seen": 506598800, + "step": 5550 + }, + { + "epoch": 23.129166666666666, + "grad_norm": 2.4512212798468482, + "learning_rate": 5e-05, + "loss": 0.0615, + "num_input_tokens_seen": 506690068, + "step": 5551 + }, + { + "epoch": 23.129166666666666, + "loss": 0.03477202355861664, + "loss_ce": 3.539209137670696e-05, + "loss_iou": 0.040771484375, + "loss_num": 0.0069580078125, + "loss_xval": 0.03466796875, + "num_input_tokens_seen": 506690068, + "step": 5551 + }, + { + "epoch": 23.133333333333333, + "grad_norm": 2.06191445933578, + "learning_rate": 5e-05, + "loss": 0.054, + "num_input_tokens_seen": 506781076, + "step": 5552 + }, + { + "epoch": 23.133333333333333, + "loss": 0.05612967908382416, + "loss_ce": 6.88864674884826e-05, + "loss_iou": 0.267578125, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 506781076, + "step": 5552 + }, + { + "epoch": 23.1375, + "grad_norm": 2.12436030769707, + "learning_rate": 5e-05, + "loss": 0.0392, + "num_input_tokens_seen": 506871476, + "step": 5553 + }, + { + "epoch": 23.1375, + "loss": 0.03658786416053772, + "loss_ce": 1.2547091500891838e-05, + "loss_iou": 0.2197265625, + "loss_num": 0.00732421875, + "loss_xval": 0.03662109375, + "num_input_tokens_seen": 506871476, + "step": 5553 + }, + { + "epoch": 23.141666666666666, + "grad_norm": 1.7897164036218582, + "learning_rate": 5e-05, + "loss": 0.032, + "num_input_tokens_seen": 506962352, + "step": 5554 + }, + { + "epoch": 23.141666666666666, + "loss": 0.04012250900268555, + "loss_ce": 7.153533715609228e-06, + "loss_iou": 0.2451171875, + "loss_num": 0.008056640625, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 506962352, + "step": 5554 + }, + { + "epoch": 23.145833333333332, + "grad_norm": 1.7781822095634223, + "learning_rate": 5e-05, + "loss": 0.035, + "num_input_tokens_seen": 507053444, + "step": 5555 + }, + { + "epoch": 23.145833333333332, + "loss": 0.026572369039058685, + "loss_ce": 6.817023859184701e-06, + "loss_iou": 0.232421875, + "loss_num": 0.00531005859375, + "loss_xval": 0.026611328125, + "num_input_tokens_seen": 507053444, + "step": 5555 + }, + { + "epoch": 23.15, + "grad_norm": 2.145307682262882, + "learning_rate": 5e-05, + "loss": 0.031, + "num_input_tokens_seen": 507145192, + "step": 5556 + }, + { + "epoch": 23.15, + "loss": 0.033625900745391846, + "loss_ce": 1.0787004612211604e-05, + "loss_iou": 0.158203125, + "loss_num": 0.0067138671875, + "loss_xval": 0.03369140625, + "num_input_tokens_seen": 507145192, + "step": 5556 + }, + { + "epoch": 23.154166666666665, + "grad_norm": 4.031522357261041, + "learning_rate": 5e-05, + "loss": 0.0297, + "num_input_tokens_seen": 507236560, + "step": 5557 + }, + { + "epoch": 23.154166666666665, + "loss": 0.02342827618122101, + "loss_ce": 1.3663468052982353e-05, + "loss_iou": 0.1884765625, + "loss_num": 0.00469970703125, + "loss_xval": 0.0234375, + "num_input_tokens_seen": 507236560, + "step": 5557 + }, + { + "epoch": 23.158333333333335, + "grad_norm": 2.1104238745025725, + "learning_rate": 5e-05, + "loss": 0.0404, + "num_input_tokens_seen": 507327812, + "step": 5558 + }, + { + "epoch": 23.158333333333335, + "loss": 0.05977874621748924, + "loss_ce": 1.0070112693938427e-05, + "loss_iou": 0.171875, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 507327812, + "step": 5558 + }, + { + "epoch": 23.1625, + "grad_norm": 2.5630999970900707, + "learning_rate": 5e-05, + "loss": 0.0622, + "num_input_tokens_seen": 507418900, + "step": 5559 + }, + { + "epoch": 23.1625, + "loss": 0.04528948292136192, + "loss_ce": 6.243255484150723e-05, + "loss_iou": 0.2255859375, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 507418900, + "step": 5559 + }, + { + "epoch": 23.166666666666668, + "grad_norm": 2.3850673187207754, + "learning_rate": 5e-05, + "loss": 0.0276, + "num_input_tokens_seen": 507509864, + "step": 5560 + }, + { + "epoch": 23.166666666666668, + "loss": 0.026554886251688004, + "loss_ce": 3.511292743496597e-05, + "loss_iou": 0.291015625, + "loss_num": 0.00531005859375, + "loss_xval": 0.0264892578125, + "num_input_tokens_seen": 507509864, + "step": 5560 + }, + { + "epoch": 23.170833333333334, + "grad_norm": 3.807294907387483, + "learning_rate": 5e-05, + "loss": 0.041, + "num_input_tokens_seen": 507601516, + "step": 5561 + }, + { + "epoch": 23.170833333333334, + "loss": 0.025786317884922028, + "loss_ce": 2.9481008823495358e-05, + "loss_iou": 0.23046875, + "loss_num": 0.005157470703125, + "loss_xval": 0.0257568359375, + "num_input_tokens_seen": 507601516, + "step": 5561 + }, + { + "epoch": 23.175, + "grad_norm": 3.2862311410004694, + "learning_rate": 5e-05, + "loss": 0.0743, + "num_input_tokens_seen": 507692300, + "step": 5562 + }, + { + "epoch": 23.175, + "loss": 0.0893373042345047, + "loss_ce": 1.9988350686617196e-05, + "loss_iou": 0.25, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 507692300, + "step": 5562 + }, + { + "epoch": 23.179166666666667, + "grad_norm": 3.985297934381173, + "learning_rate": 5e-05, + "loss": 0.0407, + "num_input_tokens_seen": 507784504, + "step": 5563 + }, + { + "epoch": 23.179166666666667, + "loss": 0.03454427421092987, + "loss_ce": 6.704148836433887e-05, + "loss_iou": 0.287109375, + "loss_num": 0.00689697265625, + "loss_xval": 0.034423828125, + "num_input_tokens_seen": 507784504, + "step": 5563 + }, + { + "epoch": 23.183333333333334, + "grad_norm": 2.9936148131713076, + "learning_rate": 5e-05, + "loss": 0.0486, + "num_input_tokens_seen": 507875576, + "step": 5564 + }, + { + "epoch": 23.183333333333334, + "loss": 0.06957919895648956, + "loss_ce": 2.9639351851074025e-05, + "loss_iou": 0.205078125, + "loss_num": 0.013916015625, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 507875576, + "step": 5564 + }, + { + "epoch": 23.1875, + "grad_norm": 2.751201836326489, + "learning_rate": 5e-05, + "loss": 0.038, + "num_input_tokens_seen": 507967012, + "step": 5565 + }, + { + "epoch": 23.1875, + "loss": 0.056741319596767426, + "loss_ce": 0.00013121204392518848, + "loss_iou": 0.314453125, + "loss_num": 0.0113525390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 507967012, + "step": 5565 + }, + { + "epoch": 23.191666666666666, + "grad_norm": 3.655389361669214, + "learning_rate": 5e-05, + "loss": 0.029, + "num_input_tokens_seen": 508058164, + "step": 5566 + }, + { + "epoch": 23.191666666666666, + "loss": 0.01976543664932251, + "loss_ce": 2.0563513317028992e-05, + "loss_iou": 0.2216796875, + "loss_num": 0.003936767578125, + "loss_xval": 0.019775390625, + "num_input_tokens_seen": 508058164, + "step": 5566 + }, + { + "epoch": 23.195833333333333, + "grad_norm": 3.2946355046531512, + "learning_rate": 5e-05, + "loss": 0.0429, + "num_input_tokens_seen": 508148328, + "step": 5567 + }, + { + "epoch": 23.195833333333333, + "loss": 0.056723110377788544, + "loss_ce": 9.774295176612213e-05, + "loss_iou": 0.212890625, + "loss_num": 0.0113525390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 508148328, + "step": 5567 + }, + { + "epoch": 23.2, + "grad_norm": 2.3083514977333017, + "learning_rate": 5e-05, + "loss": 0.0602, + "num_input_tokens_seen": 508238268, + "step": 5568 + }, + { + "epoch": 23.2, + "loss": 0.07298716902732849, + "loss_ce": 1.964076545846183e-05, + "loss_iou": 0.2001953125, + "loss_num": 0.01458740234375, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 508238268, + "step": 5568 + }, + { + "epoch": 23.204166666666666, + "grad_norm": 2.787342467258707, + "learning_rate": 5e-05, + "loss": 0.0328, + "num_input_tokens_seen": 508329436, + "step": 5569 + }, + { + "epoch": 23.204166666666666, + "loss": 0.02801087126135826, + "loss_ce": 0.0026660216972231865, + "loss_iou": 0.2060546875, + "loss_num": 0.00506591796875, + "loss_xval": 0.025390625, + "num_input_tokens_seen": 508329436, + "step": 5569 + }, + { + "epoch": 23.208333333333332, + "grad_norm": 1.803000257307526, + "learning_rate": 5e-05, + "loss": 0.0448, + "num_input_tokens_seen": 508420948, + "step": 5570 + }, + { + "epoch": 23.208333333333332, + "loss": 0.03525412082672119, + "loss_ce": 0.0007082189549691975, + "loss_iou": 0.0546875, + "loss_num": 0.00689697265625, + "loss_xval": 0.03466796875, + "num_input_tokens_seen": 508420948, + "step": 5570 + }, + { + "epoch": 23.2125, + "grad_norm": 1.768192239444829, + "learning_rate": 5e-05, + "loss": 0.0501, + "num_input_tokens_seen": 508512224, + "step": 5571 + }, + { + "epoch": 23.2125, + "loss": 0.0395321287214756, + "loss_ce": 2.7123618565383367e-05, + "loss_iou": 0.25390625, + "loss_num": 0.00787353515625, + "loss_xval": 0.03955078125, + "num_input_tokens_seen": 508512224, + "step": 5571 + }, + { + "epoch": 23.216666666666665, + "grad_norm": 1.3839672931764273, + "learning_rate": 5e-05, + "loss": 0.0367, + "num_input_tokens_seen": 508603140, + "step": 5572 + }, + { + "epoch": 23.216666666666665, + "loss": 0.018957097083330154, + "loss_ce": 5.68003724765731e-06, + "loss_iou": 0.220703125, + "loss_num": 0.0037841796875, + "loss_xval": 0.0189208984375, + "num_input_tokens_seen": 508603140, + "step": 5572 + }, + { + "epoch": 23.220833333333335, + "grad_norm": 1.5503597653476882, + "learning_rate": 5e-05, + "loss": 0.037, + "num_input_tokens_seen": 508694560, + "step": 5573 + }, + { + "epoch": 23.220833333333335, + "loss": 0.04941952973604202, + "loss_ce": 0.00024426612071692944, + "loss_iou": 0.193359375, + "loss_num": 0.00982666015625, + "loss_xval": 0.049072265625, + "num_input_tokens_seen": 508694560, + "step": 5573 + }, + { + "epoch": 23.225, + "grad_norm": 1.8940524628460167, + "learning_rate": 5e-05, + "loss": 0.0337, + "num_input_tokens_seen": 508785764, + "step": 5574 + }, + { + "epoch": 23.225, + "loss": 0.03350139036774635, + "loss_ce": 9.990063699660823e-05, + "loss_iou": 0.1943359375, + "loss_num": 0.006683349609375, + "loss_xval": 0.033447265625, + "num_input_tokens_seen": 508785764, + "step": 5574 + }, + { + "epoch": 23.229166666666668, + "grad_norm": 2.607973144041499, + "learning_rate": 5e-05, + "loss": 0.0651, + "num_input_tokens_seen": 508876752, + "step": 5575 + }, + { + "epoch": 23.229166666666668, + "loss": 0.06860657036304474, + "loss_ce": 0.0004608180024661124, + "loss_iou": 0.3203125, + "loss_num": 0.01361083984375, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 508876752, + "step": 5575 + }, + { + "epoch": 23.233333333333334, + "grad_norm": 3.5325802197812854, + "learning_rate": 5e-05, + "loss": 0.0353, + "num_input_tokens_seen": 508968252, + "step": 5576 + }, + { + "epoch": 23.233333333333334, + "loss": 0.046381525695323944, + "loss_ce": 1.00669694802491e-05, + "loss_iou": 0.271484375, + "loss_num": 0.00927734375, + "loss_xval": 0.04638671875, + "num_input_tokens_seen": 508968252, + "step": 5576 + }, + { + "epoch": 23.2375, + "grad_norm": 3.024591578915835, + "learning_rate": 5e-05, + "loss": 0.0315, + "num_input_tokens_seen": 509058952, + "step": 5577 + }, + { + "epoch": 23.2375, + "loss": 0.028720693662762642, + "loss_ce": 1.8911972802015953e-05, + "loss_iou": 0.3046875, + "loss_num": 0.0057373046875, + "loss_xval": 0.0286865234375, + "num_input_tokens_seen": 509058952, + "step": 5577 + }, + { + "epoch": 23.241666666666667, + "grad_norm": 2.56935630758592, + "learning_rate": 5e-05, + "loss": 0.0278, + "num_input_tokens_seen": 509149648, + "step": 5578 + }, + { + "epoch": 23.241666666666667, + "loss": 0.020547185093164444, + "loss_ce": 8.854508450895082e-06, + "loss_iou": 0.1640625, + "loss_num": 0.004119873046875, + "loss_xval": 0.0205078125, + "num_input_tokens_seen": 509149648, + "step": 5578 + }, + { + "epoch": 23.245833333333334, + "grad_norm": 2.1576622370877194, + "learning_rate": 5e-05, + "loss": 0.0348, + "num_input_tokens_seen": 509241252, + "step": 5579 + }, + { + "epoch": 23.245833333333334, + "loss": 0.0377604141831398, + "loss_ce": 1.0170961104449816e-05, + "loss_iou": 0.263671875, + "loss_num": 0.007537841796875, + "loss_xval": 0.037841796875, + "num_input_tokens_seen": 509241252, + "step": 5579 + }, + { + "epoch": 23.25, + "grad_norm": 4.0794898404105835, + "learning_rate": 5e-05, + "loss": 0.0617, + "num_input_tokens_seen": 509332528, + "step": 5580 + }, + { + "epoch": 23.25, + "loss": 0.059780120849609375, + "loss_ce": 4.958963836543262e-05, + "loss_iou": 0.2412109375, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 509332528, + "step": 5580 + }, + { + "epoch": 23.254166666666666, + "grad_norm": 3.0912967392107884, + "learning_rate": 5e-05, + "loss": 0.0412, + "num_input_tokens_seen": 509423812, + "step": 5581 + }, + { + "epoch": 23.254166666666666, + "loss": 0.04734455794095993, + "loss_ce": 7.28306476958096e-05, + "loss_iou": 0.291015625, + "loss_num": 0.00946044921875, + "loss_xval": 0.04736328125, + "num_input_tokens_seen": 509423812, + "step": 5581 + }, + { + "epoch": 23.258333333333333, + "grad_norm": 3.2029406976315844, + "learning_rate": 5e-05, + "loss": 0.0287, + "num_input_tokens_seen": 509515444, + "step": 5582 + }, + { + "epoch": 23.258333333333333, + "loss": 0.0323030985891819, + "loss_ce": 5.3646705055143684e-05, + "loss_iou": 0.228515625, + "loss_num": 0.006439208984375, + "loss_xval": 0.0322265625, + "num_input_tokens_seen": 509515444, + "step": 5582 + }, + { + "epoch": 23.2625, + "grad_norm": 2.5528821665535215, + "learning_rate": 5e-05, + "loss": 0.0473, + "num_input_tokens_seen": 509606412, + "step": 5583 + }, + { + "epoch": 23.2625, + "loss": 0.03638289123773575, + "loss_ce": 4.4086533307563514e-05, + "loss_iou": 0.205078125, + "loss_num": 0.00726318359375, + "loss_xval": 0.036376953125, + "num_input_tokens_seen": 509606412, + "step": 5583 + }, + { + "epoch": 23.266666666666666, + "grad_norm": 2.021321254403423, + "learning_rate": 5e-05, + "loss": 0.0443, + "num_input_tokens_seen": 509697308, + "step": 5584 + }, + { + "epoch": 23.266666666666666, + "loss": 0.04420618340373039, + "loss_ce": 1.6727790352888405e-05, + "loss_iou": 0.1689453125, + "loss_num": 0.00885009765625, + "loss_xval": 0.044189453125, + "num_input_tokens_seen": 509697308, + "step": 5584 + }, + { + "epoch": 23.270833333333332, + "grad_norm": 1.019907924151526, + "learning_rate": 5e-05, + "loss": 0.0387, + "num_input_tokens_seen": 509788872, + "step": 5585 + }, + { + "epoch": 23.270833333333332, + "loss": 0.03787646442651749, + "loss_ce": 0.0007403845083899796, + "loss_iou": 0.23828125, + "loss_num": 0.007415771484375, + "loss_xval": 0.037109375, + "num_input_tokens_seen": 509788872, + "step": 5585 + }, + { + "epoch": 23.275, + "grad_norm": 1.211511372144261, + "learning_rate": 5e-05, + "loss": 0.0448, + "num_input_tokens_seen": 509880496, + "step": 5586 + }, + { + "epoch": 23.275, + "loss": 0.043580561876297, + "loss_ce": 1.672182224865537e-05, + "loss_iou": 0.142578125, + "loss_num": 0.00872802734375, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 509880496, + "step": 5586 + }, + { + "epoch": 23.279166666666665, + "grad_norm": 2.38833077590804, + "learning_rate": 5e-05, + "loss": 0.0276, + "num_input_tokens_seen": 509972204, + "step": 5587 + }, + { + "epoch": 23.279166666666665, + "loss": 0.03036055527627468, + "loss_ce": 3.371208003954962e-05, + "loss_iou": 0.26953125, + "loss_num": 0.006072998046875, + "loss_xval": 0.0302734375, + "num_input_tokens_seen": 509972204, + "step": 5587 + }, + { + "epoch": 23.283333333333335, + "grad_norm": 2.455506356018568, + "learning_rate": 5e-05, + "loss": 0.0586, + "num_input_tokens_seen": 510063756, + "step": 5588 + }, + { + "epoch": 23.283333333333335, + "loss": 0.027071993798017502, + "loss_ce": 1.8160757463192567e-05, + "loss_iou": 0.2265625, + "loss_num": 0.005401611328125, + "loss_xval": 0.027099609375, + "num_input_tokens_seen": 510063756, + "step": 5588 + }, + { + "epoch": 23.2875, + "grad_norm": 4.171101864763519, + "learning_rate": 5e-05, + "loss": 0.0375, + "num_input_tokens_seen": 510155320, + "step": 5589 + }, + { + "epoch": 23.2875, + "loss": 0.028365176171064377, + "loss_ce": 4.486342368181795e-05, + "loss_iou": 0.3125, + "loss_num": 0.00567626953125, + "loss_xval": 0.0283203125, + "num_input_tokens_seen": 510155320, + "step": 5589 + }, + { + "epoch": 23.291666666666668, + "grad_norm": 3.2719513676794, + "learning_rate": 5e-05, + "loss": 0.0689, + "num_input_tokens_seen": 510246112, + "step": 5590 + }, + { + "epoch": 23.291666666666668, + "loss": 0.0256138164550066, + "loss_ce": 9.567937013343908e-06, + "loss_iou": 0.19140625, + "loss_num": 0.005126953125, + "loss_xval": 0.025634765625, + "num_input_tokens_seen": 510246112, + "step": 5590 + }, + { + "epoch": 23.295833333333334, + "grad_norm": 2.1603423840105767, + "learning_rate": 5e-05, + "loss": 0.0305, + "num_input_tokens_seen": 510337288, + "step": 5591 + }, + { + "epoch": 23.295833333333334, + "loss": 0.02922121249139309, + "loss_ce": 2.3519523892900907e-05, + "loss_iou": 0.216796875, + "loss_num": 0.005828857421875, + "loss_xval": 0.0291748046875, + "num_input_tokens_seen": 510337288, + "step": 5591 + }, + { + "epoch": 23.3, + "grad_norm": 1.7808574393579057, + "learning_rate": 5e-05, + "loss": 0.0384, + "num_input_tokens_seen": 510428620, + "step": 5592 + }, + { + "epoch": 23.3, + "loss": 0.05729286000132561, + "loss_ce": 1.8992475816048682e-05, + "loss_iou": 0.013671875, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 510428620, + "step": 5592 + }, + { + "epoch": 23.304166666666667, + "grad_norm": 12.535122906527729, + "learning_rate": 5e-05, + "loss": 0.0344, + "num_input_tokens_seen": 510518272, + "step": 5593 + }, + { + "epoch": 23.304166666666667, + "loss": 0.030911097303032875, + "loss_ce": 2.7308211429044604e-05, + "loss_iou": 0.1826171875, + "loss_num": 0.00616455078125, + "loss_xval": 0.0308837890625, + "num_input_tokens_seen": 510518272, + "step": 5593 + }, + { + "epoch": 23.308333333333334, + "grad_norm": 2.121029182691071, + "learning_rate": 5e-05, + "loss": 0.0237, + "num_input_tokens_seen": 510609588, + "step": 5594 + }, + { + "epoch": 23.308333333333334, + "loss": 0.02242228388786316, + "loss_ce": 7.123675459297374e-06, + "loss_iou": 0.2041015625, + "loss_num": 0.004486083984375, + "loss_xval": 0.0224609375, + "num_input_tokens_seen": 510609588, + "step": 5594 + }, + { + "epoch": 23.3125, + "grad_norm": 2.2235792181374423, + "learning_rate": 5e-05, + "loss": 0.0532, + "num_input_tokens_seen": 510701224, + "step": 5595 + }, + { + "epoch": 23.3125, + "loss": 0.06232095882296562, + "loss_ce": 5.747355316998437e-05, + "loss_iou": 0.138671875, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 510701224, + "step": 5595 + }, + { + "epoch": 23.316666666666666, + "grad_norm": 2.1027845062454613, + "learning_rate": 5e-05, + "loss": 0.0282, + "num_input_tokens_seen": 510792288, + "step": 5596 + }, + { + "epoch": 23.316666666666666, + "loss": 0.01713455468416214, + "loss_ce": 6.564921932294965e-06, + "loss_iou": 0.13671875, + "loss_num": 0.00341796875, + "loss_xval": 0.01708984375, + "num_input_tokens_seen": 510792288, + "step": 5596 + }, + { + "epoch": 23.320833333333333, + "grad_norm": 2.039990023904755, + "learning_rate": 5e-05, + "loss": 0.0279, + "num_input_tokens_seen": 510883200, + "step": 5597 + }, + { + "epoch": 23.320833333333333, + "loss": 0.025514788925647736, + "loss_ce": 9.722947652335279e-06, + "loss_iou": 0.1611328125, + "loss_num": 0.005096435546875, + "loss_xval": 0.0255126953125, + "num_input_tokens_seen": 510883200, + "step": 5597 + }, + { + "epoch": 23.325, + "grad_norm": 3.283042243440875, + "learning_rate": 5e-05, + "loss": 0.041, + "num_input_tokens_seen": 510974092, + "step": 5598 + }, + { + "epoch": 23.325, + "loss": 0.0327766016125679, + "loss_ce": 2.3610751668456942e-05, + "loss_iou": 0.224609375, + "loss_num": 0.006561279296875, + "loss_xval": 0.03271484375, + "num_input_tokens_seen": 510974092, + "step": 5598 + }, + { + "epoch": 23.329166666666666, + "grad_norm": 3.2783258428697626, + "learning_rate": 5e-05, + "loss": 0.0355, + "num_input_tokens_seen": 511065076, + "step": 5599 + }, + { + "epoch": 23.329166666666666, + "loss": 0.044812239706516266, + "loss_ce": 6.583758658962324e-05, + "loss_iou": 0.031982421875, + "loss_num": 0.0089111328125, + "loss_xval": 0.044677734375, + "num_input_tokens_seen": 511065076, + "step": 5599 + }, + { + "epoch": 23.333333333333332, + "grad_norm": 3.227421569002465, + "learning_rate": 5e-05, + "loss": 0.0626, + "num_input_tokens_seen": 511156564, + "step": 5600 + }, + { + "epoch": 23.333333333333332, + "loss": 0.09824306517839432, + "loss_ce": 6.981414117035456e-06, + "loss_iou": 0.19921875, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 511156564, + "step": 5600 + }, + { + "epoch": 23.3375, + "grad_norm": 4.576749131718821, + "learning_rate": 5e-05, + "loss": 0.054, + "num_input_tokens_seen": 511247844, + "step": 5601 + }, + { + "epoch": 23.3375, + "loss": 0.03233812749385834, + "loss_ce": 4.754355359182227e-06, + "loss_iou": 0.32421875, + "loss_num": 0.0064697265625, + "loss_xval": 0.0322265625, + "num_input_tokens_seen": 511247844, + "step": 5601 + }, + { + "epoch": 23.341666666666665, + "grad_norm": 1.756281817515379, + "learning_rate": 5e-05, + "loss": 0.0412, + "num_input_tokens_seen": 511338828, + "step": 5602 + }, + { + "epoch": 23.341666666666665, + "loss": 0.04080682992935181, + "loss_ce": 2.0088969904463738e-05, + "loss_iou": 0.2734375, + "loss_num": 0.0081787109375, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 511338828, + "step": 5602 + }, + { + "epoch": 23.345833333333335, + "grad_norm": 2.3779147808503587, + "learning_rate": 5e-05, + "loss": 0.0343, + "num_input_tokens_seen": 511430068, + "step": 5603 + }, + { + "epoch": 23.345833333333335, + "loss": 0.046825163066387177, + "loss_ce": 1.1200661901966669e-05, + "loss_iou": 0.2265625, + "loss_num": 0.00933837890625, + "loss_xval": 0.046875, + "num_input_tokens_seen": 511430068, + "step": 5603 + }, + { + "epoch": 23.35, + "grad_norm": 2.3823996882656218, + "learning_rate": 5e-05, + "loss": 0.0351, + "num_input_tokens_seen": 511521604, + "step": 5604 + }, + { + "epoch": 23.35, + "loss": 0.018639925867319107, + "loss_ce": 2.4203442080761306e-05, + "loss_iou": 0.1572265625, + "loss_num": 0.00372314453125, + "loss_xval": 0.0185546875, + "num_input_tokens_seen": 511521604, + "step": 5604 + }, + { + "epoch": 23.354166666666668, + "grad_norm": 3.1417895799293074, + "learning_rate": 5e-05, + "loss": 0.0366, + "num_input_tokens_seen": 511613616, + "step": 5605 + }, + { + "epoch": 23.354166666666668, + "loss": 0.03632240742444992, + "loss_ce": 2.1747357095591724e-05, + "loss_iou": 0.197265625, + "loss_num": 0.00726318359375, + "loss_xval": 0.036376953125, + "num_input_tokens_seen": 511613616, + "step": 5605 + }, + { + "epoch": 23.358333333333334, + "grad_norm": 2.2423365837651823, + "learning_rate": 5e-05, + "loss": 0.0664, + "num_input_tokens_seen": 511704868, + "step": 5606 + }, + { + "epoch": 23.358333333333334, + "loss": 0.07503880560398102, + "loss_ce": 0.002468003425747156, + "loss_iou": 0.1650390625, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 511704868, + "step": 5606 + }, + { + "epoch": 23.3625, + "grad_norm": 1.4279371299504686, + "learning_rate": 5e-05, + "loss": 0.0287, + "num_input_tokens_seen": 511796228, + "step": 5607 + }, + { + "epoch": 23.3625, + "loss": 0.021348990499973297, + "loss_ce": 4.390611138660461e-05, + "loss_iou": 0.1982421875, + "loss_num": 0.0042724609375, + "loss_xval": 0.0213623046875, + "num_input_tokens_seen": 511796228, + "step": 5607 + }, + { + "epoch": 23.366666666666667, + "grad_norm": 2.260574375748545, + "learning_rate": 5e-05, + "loss": 0.0366, + "num_input_tokens_seen": 511887356, + "step": 5608 + }, + { + "epoch": 23.366666666666667, + "loss": 0.024202514439821243, + "loss_ce": 1.733544559101574e-05, + "loss_iou": 0.16015625, + "loss_num": 0.00482177734375, + "loss_xval": 0.024169921875, + "num_input_tokens_seen": 511887356, + "step": 5608 + }, + { + "epoch": 23.370833333333334, + "grad_norm": 2.2463134527685646, + "learning_rate": 5e-05, + "loss": 0.0794, + "num_input_tokens_seen": 511978756, + "step": 5609 + }, + { + "epoch": 23.370833333333334, + "loss": 0.08751125633716583, + "loss_ce": 3.2622563594486564e-05, + "loss_iou": 0.271484375, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 511978756, + "step": 5609 + }, + { + "epoch": 23.375, + "grad_norm": 1.5376900540037068, + "learning_rate": 5e-05, + "loss": 0.0331, + "num_input_tokens_seen": 512069884, + "step": 5610 + }, + { + "epoch": 23.375, + "loss": 0.028105616569519043, + "loss_ce": 6.5541730691620614e-06, + "loss_iou": 0.2021484375, + "loss_num": 0.005615234375, + "loss_xval": 0.028076171875, + "num_input_tokens_seen": 512069884, + "step": 5610 + }, + { + "epoch": 23.379166666666666, + "grad_norm": 2.708976095662761, + "learning_rate": 5e-05, + "loss": 0.0549, + "num_input_tokens_seen": 512160852, + "step": 5611 + }, + { + "epoch": 23.379166666666666, + "loss": 0.07938659191131592, + "loss_ce": 0.00013243715511634946, + "loss_iou": 0.25390625, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 512160852, + "step": 5611 + }, + { + "epoch": 23.383333333333333, + "grad_norm": 2.4624848588135295, + "learning_rate": 5e-05, + "loss": 0.0416, + "num_input_tokens_seen": 512252172, + "step": 5612 + }, + { + "epoch": 23.383333333333333, + "loss": 0.028108566999435425, + "loss_ce": 7.817186997272074e-05, + "loss_iou": 0.13671875, + "loss_num": 0.005615234375, + "loss_xval": 0.028076171875, + "num_input_tokens_seen": 512252172, + "step": 5612 + }, + { + "epoch": 23.3875, + "grad_norm": 2.234790836550349, + "learning_rate": 5e-05, + "loss": 0.0236, + "num_input_tokens_seen": 512343476, + "step": 5613 + }, + { + "epoch": 23.3875, + "loss": 0.024507207795977592, + "loss_ce": 0.00011603281018324196, + "loss_iou": 0.3046875, + "loss_num": 0.0048828125, + "loss_xval": 0.0244140625, + "num_input_tokens_seen": 512343476, + "step": 5613 + }, + { + "epoch": 23.391666666666666, + "grad_norm": 2.6623352490414596, + "learning_rate": 5e-05, + "loss": 0.0342, + "num_input_tokens_seen": 512434636, + "step": 5614 + }, + { + "epoch": 23.391666666666666, + "loss": 0.0387955941259861, + "loss_ce": 7.751992598059587e-06, + "loss_iou": 0.267578125, + "loss_num": 0.007781982421875, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 512434636, + "step": 5614 + }, + { + "epoch": 23.395833333333332, + "grad_norm": 3.3554717038231856, + "learning_rate": 5e-05, + "loss": 0.0293, + "num_input_tokens_seen": 512525512, + "step": 5615 + }, + { + "epoch": 23.395833333333332, + "loss": 0.024253401905298233, + "loss_ce": 7.185792583186412e-06, + "loss_iou": 0.306640625, + "loss_num": 0.004852294921875, + "loss_xval": 0.0242919921875, + "num_input_tokens_seen": 512525512, + "step": 5615 + }, + { + "epoch": 23.4, + "grad_norm": 2.4752797068979233, + "learning_rate": 5e-05, + "loss": 0.0277, + "num_input_tokens_seen": 512617024, + "step": 5616 + }, + { + "epoch": 23.4, + "loss": 0.020623400807380676, + "loss_ce": 8.507090387865901e-05, + "loss_iou": 0.169921875, + "loss_num": 0.004119873046875, + "loss_xval": 0.0205078125, + "num_input_tokens_seen": 512617024, + "step": 5616 + }, + { + "epoch": 23.404166666666665, + "grad_norm": 1.6684475913014143, + "learning_rate": 5e-05, + "loss": 0.0228, + "num_input_tokens_seen": 512708132, + "step": 5617 + }, + { + "epoch": 23.404166666666665, + "loss": 0.02751937136054039, + "loss_ce": 7.775272024446167e-06, + "loss_iou": 0.216796875, + "loss_num": 0.0054931640625, + "loss_xval": 0.0274658203125, + "num_input_tokens_seen": 512708132, + "step": 5617 + }, + { + "epoch": 23.408333333333335, + "grad_norm": 1.8713570994552078, + "learning_rate": 5e-05, + "loss": 0.0337, + "num_input_tokens_seen": 512798976, + "step": 5618 + }, + { + "epoch": 23.408333333333335, + "loss": 0.03592763468623161, + "loss_ce": 8.446499123238027e-06, + "loss_iou": 0.2392578125, + "loss_num": 0.007171630859375, + "loss_xval": 0.035888671875, + "num_input_tokens_seen": 512798976, + "step": 5618 + }, + { + "epoch": 23.4125, + "grad_norm": 3.3172557575079473, + "learning_rate": 5e-05, + "loss": 0.0498, + "num_input_tokens_seen": 512890368, + "step": 5619 + }, + { + "epoch": 23.4125, + "loss": 0.03127144277095795, + "loss_ce": 5.196149504627101e-05, + "loss_iou": 0.287109375, + "loss_num": 0.006256103515625, + "loss_xval": 0.03125, + "num_input_tokens_seen": 512890368, + "step": 5619 + }, + { + "epoch": 23.416666666666668, + "grad_norm": 2.2956983752384836, + "learning_rate": 5e-05, + "loss": 0.0261, + "num_input_tokens_seen": 512982264, + "step": 5620 + }, + { + "epoch": 23.416666666666668, + "loss": 0.02757253870368004, + "loss_ce": 3.0425067961914465e-05, + "loss_iou": 0.259765625, + "loss_num": 0.0054931640625, + "loss_xval": 0.027587890625, + "num_input_tokens_seen": 512982264, + "step": 5620 + }, + { + "epoch": 23.420833333333334, + "grad_norm": 1.5291227424452554, + "learning_rate": 5e-05, + "loss": 0.039, + "num_input_tokens_seen": 513073336, + "step": 5621 + }, + { + "epoch": 23.420833333333334, + "loss": 0.024489475414156914, + "loss_ce": 1.437848732166458e-05, + "loss_iou": 0.1357421875, + "loss_num": 0.0048828125, + "loss_xval": 0.0244140625, + "num_input_tokens_seen": 513073336, + "step": 5621 + }, + { + "epoch": 23.425, + "grad_norm": 1.4357362026895593, + "learning_rate": 5e-05, + "loss": 0.0634, + "num_input_tokens_seen": 513165060, + "step": 5622 + }, + { + "epoch": 23.425, + "loss": 0.08363399654626846, + "loss_ce": 8.204024197766557e-06, + "loss_iou": 0.1962890625, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 513165060, + "step": 5622 + }, + { + "epoch": 23.429166666666667, + "grad_norm": 2.465944564759923, + "learning_rate": 5e-05, + "loss": 0.0422, + "num_input_tokens_seen": 513256796, + "step": 5623 + }, + { + "epoch": 23.429166666666667, + "loss": 0.057980284094810486, + "loss_ce": 1.2144162610638887e-05, + "loss_iou": 0.359375, + "loss_num": 0.0115966796875, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 513256796, + "step": 5623 + }, + { + "epoch": 23.433333333333334, + "grad_norm": 1.8353432433000278, + "learning_rate": 5e-05, + "loss": 0.0444, + "num_input_tokens_seen": 513348044, + "step": 5624 + }, + { + "epoch": 23.433333333333334, + "loss": 0.06825631856918335, + "loss_ce": 1.1382675438653678e-05, + "loss_iou": 0.2138671875, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 513348044, + "step": 5624 + }, + { + "epoch": 23.4375, + "grad_norm": 2.3975083646021935, + "learning_rate": 5e-05, + "loss": 0.0407, + "num_input_tokens_seen": 513439524, + "step": 5625 + }, + { + "epoch": 23.4375, + "loss": 0.018934469670057297, + "loss_ce": 5.941654762864346e-06, + "loss_iou": 0.251953125, + "loss_num": 0.0037841796875, + "loss_xval": 0.0189208984375, + "num_input_tokens_seen": 513439524, + "step": 5625 + }, + { + "epoch": 23.441666666666666, + "grad_norm": 2.7420797301869957, + "learning_rate": 5e-05, + "loss": 0.0498, + "num_input_tokens_seen": 513531316, + "step": 5626 + }, + { + "epoch": 23.441666666666666, + "loss": 0.036931782960891724, + "loss_ce": 5.513968972081784e-06, + "loss_iou": 0.234375, + "loss_num": 0.00738525390625, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 513531316, + "step": 5626 + }, + { + "epoch": 23.445833333333333, + "grad_norm": 3.887594423043908, + "learning_rate": 5e-05, + "loss": 0.0709, + "num_input_tokens_seen": 513622396, + "step": 5627 + }, + { + "epoch": 23.445833333333333, + "loss": 0.11473788321018219, + "loss_ce": 3.7564546801149845e-05, + "loss_iou": 0.1962890625, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 513622396, + "step": 5627 + }, + { + "epoch": 23.45, + "grad_norm": 3.32244812447736, + "learning_rate": 5e-05, + "loss": 0.033, + "num_input_tokens_seen": 513711996, + "step": 5628 + }, + { + "epoch": 23.45, + "loss": 0.03876044601202011, + "loss_ce": 1.0750181900220923e-05, + "loss_iou": 0.310546875, + "loss_num": 0.00775146484375, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 513711996, + "step": 5628 + }, + { + "epoch": 23.454166666666666, + "grad_norm": 2.196725364122393, + "learning_rate": 5e-05, + "loss": 0.0509, + "num_input_tokens_seen": 513803728, + "step": 5629 + }, + { + "epoch": 23.454166666666666, + "loss": 0.044431425631046295, + "loss_ce": 1.3089969797874801e-05, + "loss_iou": 0.21484375, + "loss_num": 0.0089111328125, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 513803728, + "step": 5629 + }, + { + "epoch": 23.458333333333332, + "grad_norm": 2.0038379188545035, + "learning_rate": 5e-05, + "loss": 0.0297, + "num_input_tokens_seen": 513895252, + "step": 5630 + }, + { + "epoch": 23.458333333333332, + "loss": 0.01975741982460022, + "loss_ce": 1.254726703336928e-05, + "loss_iou": 0.1455078125, + "loss_num": 0.003936767578125, + "loss_xval": 0.019775390625, + "num_input_tokens_seen": 513895252, + "step": 5630 + }, + { + "epoch": 23.4625, + "grad_norm": 2.1180023798296244, + "learning_rate": 5e-05, + "loss": 0.0537, + "num_input_tokens_seen": 513985996, + "step": 5631 + }, + { + "epoch": 23.4625, + "loss": 0.05978050082921982, + "loss_ce": 4.191587322566193e-06, + "loss_iou": 0.236328125, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 513985996, + "step": 5631 + }, + { + "epoch": 23.466666666666665, + "grad_norm": 2.1144073204801748, + "learning_rate": 5e-05, + "loss": 0.027, + "num_input_tokens_seen": 514077428, + "step": 5632 + }, + { + "epoch": 23.466666666666665, + "loss": 0.022730417549610138, + "loss_ce": 1.0080520951305516e-05, + "loss_iou": 0.15625, + "loss_num": 0.004547119140625, + "loss_xval": 0.022705078125, + "num_input_tokens_seen": 514077428, + "step": 5632 + }, + { + "epoch": 23.470833333333335, + "grad_norm": 2.089796949258426, + "learning_rate": 5e-05, + "loss": 0.0248, + "num_input_tokens_seen": 514168880, + "step": 5633 + }, + { + "epoch": 23.470833333333335, + "loss": 0.024836096912622452, + "loss_ce": 1.004880505206529e-05, + "loss_iou": 0.2109375, + "loss_num": 0.004974365234375, + "loss_xval": 0.0247802734375, + "num_input_tokens_seen": 514168880, + "step": 5633 + }, + { + "epoch": 23.475, + "grad_norm": 1.985972227164406, + "learning_rate": 5e-05, + "loss": 0.039, + "num_input_tokens_seen": 514261112, + "step": 5634 + }, + { + "epoch": 23.475, + "loss": 0.050126075744628906, + "loss_ce": 8.583403541706502e-06, + "loss_iou": 0.2578125, + "loss_num": 0.010009765625, + "loss_xval": 0.050048828125, + "num_input_tokens_seen": 514261112, + "step": 5634 + }, + { + "epoch": 23.479166666666668, + "grad_norm": 2.72394390309996, + "learning_rate": 5e-05, + "loss": 0.0298, + "num_input_tokens_seen": 514351744, + "step": 5635 + }, + { + "epoch": 23.479166666666668, + "loss": 0.0400192067027092, + "loss_ce": 1.066156619344838e-05, + "loss_iou": 0.193359375, + "loss_num": 0.00799560546875, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 514351744, + "step": 5635 + }, + { + "epoch": 23.483333333333334, + "grad_norm": 2.702846446322467, + "learning_rate": 5e-05, + "loss": 0.0338, + "num_input_tokens_seen": 514442600, + "step": 5636 + }, + { + "epoch": 23.483333333333334, + "loss": 0.02324534021317959, + "loss_ce": 9.775830403668806e-05, + "loss_iou": 0.248046875, + "loss_num": 0.004638671875, + "loss_xval": 0.023193359375, + "num_input_tokens_seen": 514442600, + "step": 5636 + }, + { + "epoch": 23.4875, + "grad_norm": 2.4199996769429224, + "learning_rate": 5e-05, + "loss": 0.0572, + "num_input_tokens_seen": 514534080, + "step": 5637 + }, + { + "epoch": 23.4875, + "loss": 0.02815338969230652, + "loss_ce": 1.236794105352601e-05, + "loss_iou": 0.1640625, + "loss_num": 0.005615234375, + "loss_xval": 0.0281982421875, + "num_input_tokens_seen": 514534080, + "step": 5637 + }, + { + "epoch": 23.491666666666667, + "grad_norm": 2.728410632734877, + "learning_rate": 5e-05, + "loss": 0.0806, + "num_input_tokens_seen": 514624976, + "step": 5638 + }, + { + "epoch": 23.491666666666667, + "loss": 0.03780459612607956, + "loss_ce": 1.6205263818847016e-05, + "loss_iou": 0.2451171875, + "loss_num": 0.007568359375, + "loss_xval": 0.037841796875, + "num_input_tokens_seen": 514624976, + "step": 5638 + }, + { + "epoch": 23.495833333333334, + "grad_norm": 2.685654801734654, + "learning_rate": 5e-05, + "loss": 0.0467, + "num_input_tokens_seen": 514716316, + "step": 5639 + }, + { + "epoch": 23.495833333333334, + "loss": 0.0410284698009491, + "loss_ce": 4.3360669224057347e-05, + "loss_iou": 0.28515625, + "loss_num": 0.0081787109375, + "loss_xval": 0.041015625, + "num_input_tokens_seen": 514716316, + "step": 5639 + }, + { + "epoch": 23.5, + "grad_norm": 2.8698868644425732, + "learning_rate": 5e-05, + "loss": 0.0442, + "num_input_tokens_seen": 514807012, + "step": 5640 + }, + { + "epoch": 23.5, + "loss": 0.04485444352030754, + "loss_ce": 8.859709851094522e-06, + "loss_iou": 0.32421875, + "loss_num": 0.00897216796875, + "loss_xval": 0.044921875, + "num_input_tokens_seen": 514807012, + "step": 5640 + }, + { + "epoch": 23.504166666666666, + "grad_norm": 2.716969476117626, + "learning_rate": 5e-05, + "loss": 0.0425, + "num_input_tokens_seen": 514896080, + "step": 5641 + }, + { + "epoch": 23.504166666666666, + "loss": 0.048580192029476166, + "loss_ce": 3.835902589344187e-06, + "loss_iou": 0.2392578125, + "loss_num": 0.00970458984375, + "loss_xval": 0.048583984375, + "num_input_tokens_seen": 514896080, + "step": 5641 + }, + { + "epoch": 23.508333333333333, + "grad_norm": 2.975009957374591, + "learning_rate": 5e-05, + "loss": 0.0884, + "num_input_tokens_seen": 514988056, + "step": 5642 + }, + { + "epoch": 23.508333333333333, + "loss": 0.09305469691753387, + "loss_ce": 6.598234904231504e-06, + "loss_iou": 0.22265625, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 514988056, + "step": 5642 + }, + { + "epoch": 23.5125, + "grad_norm": 1.8440447054389957, + "learning_rate": 5e-05, + "loss": 0.1001, + "num_input_tokens_seen": 515079164, + "step": 5643 + }, + { + "epoch": 23.5125, + "loss": 0.053382910788059235, + "loss_ce": 1.5296969650080428e-05, + "loss_iou": 0.2392578125, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 515079164, + "step": 5643 + }, + { + "epoch": 23.516666666666666, + "grad_norm": 1.9757833576710389, + "learning_rate": 5e-05, + "loss": 0.0374, + "num_input_tokens_seen": 515170564, + "step": 5644 + }, + { + "epoch": 23.516666666666666, + "loss": 0.04524645209312439, + "loss_ce": 4.142591023992281e-06, + "loss_iou": 0.1513671875, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 515170564, + "step": 5644 + }, + { + "epoch": 23.520833333333332, + "grad_norm": 2.7063172851091832, + "learning_rate": 5e-05, + "loss": 0.0594, + "num_input_tokens_seen": 515262060, + "step": 5645 + }, + { + "epoch": 23.520833333333332, + "loss": 0.05944689363241196, + "loss_ce": 1.3914520422986243e-05, + "loss_iou": 0.380859375, + "loss_num": 0.01190185546875, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 515262060, + "step": 5645 + }, + { + "epoch": 23.525, + "grad_norm": 2.179403906582386, + "learning_rate": 5e-05, + "loss": 0.0346, + "num_input_tokens_seen": 515352696, + "step": 5646 + }, + { + "epoch": 23.525, + "loss": 0.03545938432216644, + "loss_ce": 5.586158749792958e-06, + "loss_iou": 0.1748046875, + "loss_num": 0.007110595703125, + "loss_xval": 0.035400390625, + "num_input_tokens_seen": 515352696, + "step": 5646 + }, + { + "epoch": 23.529166666666665, + "grad_norm": 3.323901434148456, + "learning_rate": 5e-05, + "loss": 0.0312, + "num_input_tokens_seen": 515443752, + "step": 5647 + }, + { + "epoch": 23.529166666666665, + "loss": 0.035969581454992294, + "loss_ce": 1.9874481949955225e-05, + "loss_iou": 0.353515625, + "loss_num": 0.0072021484375, + "loss_xval": 0.035888671875, + "num_input_tokens_seen": 515443752, + "step": 5647 + }, + { + "epoch": 23.533333333333335, + "grad_norm": 3.3645703196290655, + "learning_rate": 5e-05, + "loss": 0.0409, + "num_input_tokens_seen": 515535332, + "step": 5648 + }, + { + "epoch": 23.533333333333335, + "loss": 0.04916052892804146, + "loss_ce": 7.30073224985972e-05, + "loss_iou": 0.267578125, + "loss_num": 0.00982666015625, + "loss_xval": 0.049072265625, + "num_input_tokens_seen": 515535332, + "step": 5648 + }, + { + "epoch": 23.5375, + "grad_norm": 2.3016853169182405, + "learning_rate": 5e-05, + "loss": 0.0599, + "num_input_tokens_seen": 515626824, + "step": 5649 + }, + { + "epoch": 23.5375, + "loss": 0.1020599901676178, + "loss_ce": 9.21357968763914e-06, + "loss_iou": 0.23828125, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 515626824, + "step": 5649 + }, + { + "epoch": 23.541666666666668, + "grad_norm": 2.678787146150945, + "learning_rate": 5e-05, + "loss": 0.0282, + "num_input_tokens_seen": 515717788, + "step": 5650 + }, + { + "epoch": 23.541666666666668, + "loss": 0.0280543714761734, + "loss_ce": 8.716306183487177e-06, + "loss_iou": 0.2890625, + "loss_num": 0.005615234375, + "loss_xval": 0.028076171875, + "num_input_tokens_seen": 515717788, + "step": 5650 + }, + { + "epoch": 23.545833333333334, + "grad_norm": 3.1924204887193137, + "learning_rate": 5e-05, + "loss": 0.0405, + "num_input_tokens_seen": 515809500, + "step": 5651 + }, + { + "epoch": 23.545833333333334, + "loss": 0.022662218660116196, + "loss_ce": 9.447037882637233e-05, + "loss_iou": 0.3046875, + "loss_num": 0.0045166015625, + "loss_xval": 0.0225830078125, + "num_input_tokens_seen": 515809500, + "step": 5651 + }, + { + "epoch": 23.55, + "grad_norm": 2.1580431546123497, + "learning_rate": 5e-05, + "loss": 0.044, + "num_input_tokens_seen": 515900556, + "step": 5652 + }, + { + "epoch": 23.55, + "loss": 0.04361415654420853, + "loss_ce": 4.532886578090256e-06, + "loss_iou": 0.1455078125, + "loss_num": 0.00872802734375, + "loss_xval": 0.043701171875, + "num_input_tokens_seen": 515900556, + "step": 5652 + }, + { + "epoch": 23.554166666666667, + "grad_norm": 2.0805932446315856, + "learning_rate": 5e-05, + "loss": 0.0769, + "num_input_tokens_seen": 515991728, + "step": 5653 + }, + { + "epoch": 23.554166666666667, + "loss": 0.09516514092683792, + "loss_ce": 3.702312824316323e-06, + "loss_iou": 0.296875, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 515991728, + "step": 5653 + }, + { + "epoch": 23.558333333333334, + "grad_norm": 2.322630467922017, + "learning_rate": 5e-05, + "loss": 0.0296, + "num_input_tokens_seen": 516082928, + "step": 5654 + }, + { + "epoch": 23.558333333333334, + "loss": 0.03366834297776222, + "loss_ce": 7.451967121596681e-06, + "loss_iou": 0.2099609375, + "loss_num": 0.006744384765625, + "loss_xval": 0.03369140625, + "num_input_tokens_seen": 516082928, + "step": 5654 + }, + { + "epoch": 23.5625, + "grad_norm": 2.53132198944104, + "learning_rate": 5e-05, + "loss": 0.0344, + "num_input_tokens_seen": 516174480, + "step": 5655 + }, + { + "epoch": 23.5625, + "loss": 0.023414544761180878, + "loss_ce": 3.808091423707083e-05, + "loss_iou": 0.16796875, + "loss_num": 0.004669189453125, + "loss_xval": 0.0234375, + "num_input_tokens_seen": 516174480, + "step": 5655 + }, + { + "epoch": 23.566666666666666, + "grad_norm": 3.3568984132217246, + "learning_rate": 5e-05, + "loss": 0.0743, + "num_input_tokens_seen": 516264312, + "step": 5656 + }, + { + "epoch": 23.566666666666666, + "loss": 0.07518689334392548, + "loss_ce": 6.83938969814335e-06, + "loss_iou": 0.2236328125, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 516264312, + "step": 5656 + }, + { + "epoch": 23.570833333333333, + "grad_norm": 3.0716487542142206, + "learning_rate": 5e-05, + "loss": 0.0637, + "num_input_tokens_seen": 516355408, + "step": 5657 + }, + { + "epoch": 23.570833333333333, + "loss": 0.058454468846321106, + "loss_ce": 7.43414057069458e-05, + "loss_iou": 0.275390625, + "loss_num": 0.01171875, + "loss_xval": 0.058349609375, + "num_input_tokens_seen": 516355408, + "step": 5657 + }, + { + "epoch": 23.575, + "grad_norm": 3.63304833056305, + "learning_rate": 5e-05, + "loss": 0.0593, + "num_input_tokens_seen": 516447012, + "step": 5658 + }, + { + "epoch": 23.575, + "loss": 0.08642973005771637, + "loss_ce": 3.446522532613017e-05, + "loss_iou": 0.306640625, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 516447012, + "step": 5658 + }, + { + "epoch": 23.579166666666666, + "grad_norm": 3.6600622195241517, + "learning_rate": 5e-05, + "loss": 0.0458, + "num_input_tokens_seen": 516538268, + "step": 5659 + }, + { + "epoch": 23.579166666666666, + "loss": 0.031593114137649536, + "loss_ce": 1.5052954040584154e-05, + "loss_iou": 0.232421875, + "loss_num": 0.006317138671875, + "loss_xval": 0.031494140625, + "num_input_tokens_seen": 516538268, + "step": 5659 + }, + { + "epoch": 23.583333333333332, + "grad_norm": 3.4145638310871465, + "learning_rate": 5e-05, + "loss": 0.0356, + "num_input_tokens_seen": 516629376, + "step": 5660 + }, + { + "epoch": 23.583333333333332, + "loss": 0.03159729763865471, + "loss_ce": 1.1604059181991033e-05, + "loss_iou": 0.205078125, + "loss_num": 0.006317138671875, + "loss_xval": 0.031494140625, + "num_input_tokens_seen": 516629376, + "step": 5660 + }, + { + "epoch": 23.5875, + "grad_norm": 2.025115193252702, + "learning_rate": 5e-05, + "loss": 0.0373, + "num_input_tokens_seen": 516720792, + "step": 5661 + }, + { + "epoch": 23.5875, + "loss": 0.037495002150535583, + "loss_ce": 4.157373041380197e-06, + "loss_iou": 0.1787109375, + "loss_num": 0.00750732421875, + "loss_xval": 0.03759765625, + "num_input_tokens_seen": 516720792, + "step": 5661 + }, + { + "epoch": 23.591666666666665, + "grad_norm": 2.747381866770108, + "learning_rate": 5e-05, + "loss": 0.0277, + "num_input_tokens_seen": 516811784, + "step": 5662 + }, + { + "epoch": 23.591666666666665, + "loss": 0.028072349727153778, + "loss_ce": 3.805643473242526e-06, + "loss_iou": 0.236328125, + "loss_num": 0.005615234375, + "loss_xval": 0.028076171875, + "num_input_tokens_seen": 516811784, + "step": 5662 + }, + { + "epoch": 23.595833333333335, + "grad_norm": 5.975171417229366, + "learning_rate": 5e-05, + "loss": 0.0705, + "num_input_tokens_seen": 516902588, + "step": 5663 + }, + { + "epoch": 23.595833333333335, + "loss": 0.029614463448524475, + "loss_ce": 4.786224963027053e-06, + "loss_iou": 0.30078125, + "loss_num": 0.00592041015625, + "loss_xval": 0.0296630859375, + "num_input_tokens_seen": 516902588, + "step": 5663 + }, + { + "epoch": 23.6, + "grad_norm": 4.029427007700395, + "learning_rate": 5e-05, + "loss": 0.0311, + "num_input_tokens_seen": 516994656, + "step": 5664 + }, + { + "epoch": 23.6, + "loss": 0.03429003804922104, + "loss_ce": 0.0009038057178258896, + "loss_iou": 0.35546875, + "loss_num": 0.006683349609375, + "loss_xval": 0.033447265625, + "num_input_tokens_seen": 516994656, + "step": 5664 + }, + { + "epoch": 23.604166666666668, + "grad_norm": 6.616968713190724, + "learning_rate": 5e-05, + "loss": 0.0357, + "num_input_tokens_seen": 517086096, + "step": 5665 + }, + { + "epoch": 23.604166666666668, + "loss": 0.030396755784749985, + "loss_ce": 1.65061246661935e-05, + "loss_iou": 0.306640625, + "loss_num": 0.006072998046875, + "loss_xval": 0.0303955078125, + "num_input_tokens_seen": 517086096, + "step": 5665 + }, + { + "epoch": 23.608333333333334, + "grad_norm": 2.383206818989159, + "learning_rate": 5e-05, + "loss": 0.0488, + "num_input_tokens_seen": 517177220, + "step": 5666 + }, + { + "epoch": 23.608333333333334, + "loss": 0.06495459377765656, + "loss_ce": 4.3700241803890094e-05, + "loss_iou": 0.171875, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 517177220, + "step": 5666 + }, + { + "epoch": 23.6125, + "grad_norm": 4.632698059280031, + "learning_rate": 5e-05, + "loss": 0.0617, + "num_input_tokens_seen": 517268364, + "step": 5667 + }, + { + "epoch": 23.6125, + "loss": 0.05848491191864014, + "loss_ce": 0.00015056514530442655, + "loss_iou": 0.1142578125, + "loss_num": 0.01165771484375, + "loss_xval": 0.058349609375, + "num_input_tokens_seen": 517268364, + "step": 5667 + }, + { + "epoch": 23.616666666666667, + "grad_norm": 2.6065365247113124, + "learning_rate": 5e-05, + "loss": 0.0422, + "num_input_tokens_seen": 517359364, + "step": 5668 + }, + { + "epoch": 23.616666666666667, + "loss": 0.04949769377708435, + "loss_ce": 1.3440771908790339e-05, + "loss_iou": 0.275390625, + "loss_num": 0.0098876953125, + "loss_xval": 0.049560546875, + "num_input_tokens_seen": 517359364, + "step": 5668 + }, + { + "epoch": 23.620833333333334, + "grad_norm": 1.5957956640113704, + "learning_rate": 5e-05, + "loss": 0.059, + "num_input_tokens_seen": 517451020, + "step": 5669 + }, + { + "epoch": 23.620833333333334, + "loss": 0.09316151589155197, + "loss_ce": 0.0001363093324471265, + "loss_iou": 0.279296875, + "loss_num": 0.0185546875, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 517451020, + "step": 5669 + }, + { + "epoch": 23.625, + "grad_norm": 1.7661471096168884, + "learning_rate": 5e-05, + "loss": 0.0292, + "num_input_tokens_seen": 517542264, + "step": 5670 + }, + { + "epoch": 23.625, + "loss": 0.031134188175201416, + "loss_ce": 6.257385848584818e-06, + "loss_iou": 0.1669921875, + "loss_num": 0.0062255859375, + "loss_xval": 0.0311279296875, + "num_input_tokens_seen": 517542264, + "step": 5670 + }, + { + "epoch": 23.629166666666666, + "grad_norm": 1.9049760315075954, + "learning_rate": 5e-05, + "loss": 0.0836, + "num_input_tokens_seen": 517633508, + "step": 5671 + }, + { + "epoch": 23.629166666666666, + "loss": 0.15114159882068634, + "loss_ce": 3.300470325484639e-06, + "loss_iou": 0.1337890625, + "loss_num": 0.0301513671875, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 517633508, + "step": 5671 + }, + { + "epoch": 23.633333333333333, + "grad_norm": 3.357283780868695, + "learning_rate": 5e-05, + "loss": 0.0601, + "num_input_tokens_seen": 517725464, + "step": 5672 + }, + { + "epoch": 23.633333333333333, + "loss": 0.06346787512302399, + "loss_ce": 6.760370160918683e-05, + "loss_iou": 0.1572265625, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 517725464, + "step": 5672 + }, + { + "epoch": 23.6375, + "grad_norm": 1.5407731254040886, + "learning_rate": 5e-05, + "loss": 0.0464, + "num_input_tokens_seen": 517816540, + "step": 5673 + }, + { + "epoch": 23.6375, + "loss": 0.048604048788547516, + "loss_ce": 4.8071092351165134e-06, + "loss_iou": 0.302734375, + "loss_num": 0.00970458984375, + "loss_xval": 0.048583984375, + "num_input_tokens_seen": 517816540, + "step": 5673 + }, + { + "epoch": 23.641666666666666, + "grad_norm": 2.323368477040437, + "learning_rate": 5e-05, + "loss": 0.0521, + "num_input_tokens_seen": 517907760, + "step": 5674 + }, + { + "epoch": 23.641666666666666, + "loss": 0.06671786308288574, + "loss_ce": 6.43744670014712e-06, + "loss_iou": 0.1982421875, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 517907760, + "step": 5674 + }, + { + "epoch": 23.645833333333332, + "grad_norm": 3.0599498239958325, + "learning_rate": 5e-05, + "loss": 0.0501, + "num_input_tokens_seen": 517999192, + "step": 5675 + }, + { + "epoch": 23.645833333333332, + "loss": 0.04143279045820236, + "loss_ce": 5.178941592021147e-06, + "loss_iou": 0.2314453125, + "loss_num": 0.00830078125, + "loss_xval": 0.04150390625, + "num_input_tokens_seen": 517999192, + "step": 5675 + }, + { + "epoch": 23.65, + "grad_norm": 2.769269428960717, + "learning_rate": 5e-05, + "loss": 0.0544, + "num_input_tokens_seen": 518090364, + "step": 5676 + }, + { + "epoch": 23.65, + "loss": 0.03592964634299278, + "loss_ce": 1.0455694791744463e-05, + "loss_iou": 0.333984375, + "loss_num": 0.0072021484375, + "loss_xval": 0.035888671875, + "num_input_tokens_seen": 518090364, + "step": 5676 + }, + { + "epoch": 23.654166666666665, + "grad_norm": 3.6940671800591534, + "learning_rate": 5e-05, + "loss": 0.0602, + "num_input_tokens_seen": 518182272, + "step": 5677 + }, + { + "epoch": 23.654166666666665, + "loss": 0.061431143432855606, + "loss_ce": 6.890437816764461e-06, + "loss_iou": 0.1845703125, + "loss_num": 0.01226806640625, + "loss_xval": 0.0615234375, + "num_input_tokens_seen": 518182272, + "step": 5677 + }, + { + "epoch": 23.658333333333335, + "grad_norm": 2.0216151866354664, + "learning_rate": 5e-05, + "loss": 0.0426, + "num_input_tokens_seen": 518273624, + "step": 5678 + }, + { + "epoch": 23.658333333333335, + "loss": 0.03560943901538849, + "loss_ce": 1.0682435458875261e-05, + "loss_iou": 0.23046875, + "loss_num": 0.007110595703125, + "loss_xval": 0.03564453125, + "num_input_tokens_seen": 518273624, + "step": 5678 + }, + { + "epoch": 23.6625, + "grad_norm": 2.523235154108421, + "learning_rate": 5e-05, + "loss": 0.0428, + "num_input_tokens_seen": 518365116, + "step": 5679 + }, + { + "epoch": 23.6625, + "loss": 0.025457948446273804, + "loss_ce": 6.287944870564388e-06, + "loss_iou": 0.212890625, + "loss_num": 0.005096435546875, + "loss_xval": 0.025390625, + "num_input_tokens_seen": 518365116, + "step": 5679 + }, + { + "epoch": 23.666666666666668, + "grad_norm": 3.5324845201005473, + "learning_rate": 5e-05, + "loss": 0.064, + "num_input_tokens_seen": 518456160, + "step": 5680 + }, + { + "epoch": 23.666666666666668, + "loss": 0.053803130984306335, + "loss_ce": 4.641583655029535e-05, + "loss_iou": 0.28125, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 518456160, + "step": 5680 + }, + { + "epoch": 23.670833333333334, + "grad_norm": 1.614914817798423, + "learning_rate": 5e-05, + "loss": 0.0455, + "num_input_tokens_seen": 518547808, + "step": 5681 + }, + { + "epoch": 23.670833333333334, + "loss": 0.04403459653258324, + "loss_ce": 5.876697468920611e-05, + "loss_iou": 0.203125, + "loss_num": 0.0087890625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 518547808, + "step": 5681 + }, + { + "epoch": 23.675, + "grad_norm": 1.7345038472368803, + "learning_rate": 5e-05, + "loss": 0.0223, + "num_input_tokens_seen": 518639376, + "step": 5682 + }, + { + "epoch": 23.675, + "loss": 0.018705224618315697, + "loss_ce": 5.8984762290492654e-05, + "loss_iou": 0.1005859375, + "loss_num": 0.00372314453125, + "loss_xval": 0.0186767578125, + "num_input_tokens_seen": 518639376, + "step": 5682 + }, + { + "epoch": 23.679166666666667, + "grad_norm": 2.8706427380183652, + "learning_rate": 5e-05, + "loss": 0.0627, + "num_input_tokens_seen": 518731060, + "step": 5683 + }, + { + "epoch": 23.679166666666667, + "loss": 0.06468746066093445, + "loss_ce": 5.123291339259595e-05, + "loss_iou": 0.375, + "loss_num": 0.012939453125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 518731060, + "step": 5683 + }, + { + "epoch": 23.683333333333334, + "grad_norm": 2.297743551190372, + "learning_rate": 5e-05, + "loss": 0.0242, + "num_input_tokens_seen": 518822916, + "step": 5684 + }, + { + "epoch": 23.683333333333334, + "loss": 0.023034725338220596, + "loss_ce": 3.973131606471725e-05, + "loss_iou": 0.1728515625, + "loss_num": 0.004608154296875, + "loss_xval": 0.02294921875, + "num_input_tokens_seen": 518822916, + "step": 5684 + }, + { + "epoch": 23.6875, + "grad_norm": 2.9373343094831625, + "learning_rate": 5e-05, + "loss": 0.0361, + "num_input_tokens_seen": 518914300, + "step": 5685 + }, + { + "epoch": 23.6875, + "loss": 0.024574488401412964, + "loss_ce": 7.83845371188363e-06, + "loss_iou": 0.283203125, + "loss_num": 0.004913330078125, + "loss_xval": 0.0245361328125, + "num_input_tokens_seen": 518914300, + "step": 5685 + }, + { + "epoch": 23.691666666666666, + "grad_norm": 2.7813206419182723, + "learning_rate": 5e-05, + "loss": 0.0559, + "num_input_tokens_seen": 519006072, + "step": 5686 + }, + { + "epoch": 23.691666666666666, + "loss": 0.051136475056409836, + "loss_ce": 4.271988018444972e-06, + "loss_iou": 0.25390625, + "loss_num": 0.01019287109375, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 519006072, + "step": 5686 + }, + { + "epoch": 23.695833333333333, + "grad_norm": 3.26538931228411, + "learning_rate": 5e-05, + "loss": 0.0244, + "num_input_tokens_seen": 519097556, + "step": 5687 + }, + { + "epoch": 23.695833333333333, + "loss": 0.02645106054842472, + "loss_ce": 7.578772965644021e-06, + "loss_iou": 0.2041015625, + "loss_num": 0.005279541015625, + "loss_xval": 0.0264892578125, + "num_input_tokens_seen": 519097556, + "step": 5687 + }, + { + "epoch": 23.7, + "grad_norm": 2.546712078646406, + "learning_rate": 5e-05, + "loss": 0.0645, + "num_input_tokens_seen": 519188724, + "step": 5688 + }, + { + "epoch": 23.7, + "loss": 0.028148168697953224, + "loss_ce": 1.0961780390061904e-05, + "loss_iou": 0.22265625, + "loss_num": 0.005615234375, + "loss_xval": 0.028076171875, + "num_input_tokens_seen": 519188724, + "step": 5688 + }, + { + "epoch": 23.704166666666666, + "grad_norm": 2.802237920982695, + "learning_rate": 5e-05, + "loss": 0.0302, + "num_input_tokens_seen": 519280104, + "step": 5689 + }, + { + "epoch": 23.704166666666666, + "loss": 0.02710503339767456, + "loss_ce": 5.424570190371014e-06, + "loss_iou": 0.2578125, + "loss_num": 0.00543212890625, + "loss_xval": 0.027099609375, + "num_input_tokens_seen": 519280104, + "step": 5689 + }, + { + "epoch": 23.708333333333332, + "grad_norm": 2.5827248584449936, + "learning_rate": 5e-05, + "loss": 0.036, + "num_input_tokens_seen": 519370780, + "step": 5690 + }, + { + "epoch": 23.708333333333332, + "loss": 0.029688237234950066, + "loss_ce": 9.89344607660314e-06, + "loss_iou": 0.3203125, + "loss_num": 0.00592041015625, + "loss_xval": 0.0296630859375, + "num_input_tokens_seen": 519370780, + "step": 5690 + }, + { + "epoch": 23.7125, + "grad_norm": 1.889586030914547, + "learning_rate": 5e-05, + "loss": 0.0361, + "num_input_tokens_seen": 519462148, + "step": 5691 + }, + { + "epoch": 23.7125, + "loss": 0.026575732976198196, + "loss_ce": 2.551576017140178e-06, + "loss_iou": 0.12451171875, + "loss_num": 0.00531005859375, + "loss_xval": 0.026611328125, + "num_input_tokens_seen": 519462148, + "step": 5691 + }, + { + "epoch": 23.716666666666665, + "grad_norm": 2.020125572866113, + "learning_rate": 5e-05, + "loss": 0.047, + "num_input_tokens_seen": 519553376, + "step": 5692 + }, + { + "epoch": 23.716666666666665, + "loss": 0.02690189890563488, + "loss_ce": 1.5912401067907922e-05, + "loss_iou": 0.21484375, + "loss_num": 0.00537109375, + "loss_xval": 0.02685546875, + "num_input_tokens_seen": 519553376, + "step": 5692 + }, + { + "epoch": 23.720833333333335, + "grad_norm": 2.432818855651626, + "learning_rate": 5e-05, + "loss": 0.0359, + "num_input_tokens_seen": 519644864, + "step": 5693 + }, + { + "epoch": 23.720833333333335, + "loss": 0.02428411692380905, + "loss_ce": 2.26416923396755e-05, + "loss_iou": 0.2099609375, + "loss_num": 0.004852294921875, + "loss_xval": 0.0242919921875, + "num_input_tokens_seen": 519644864, + "step": 5693 + }, + { + "epoch": 23.725, + "grad_norm": 3.385207640452561, + "learning_rate": 5e-05, + "loss": 0.0294, + "num_input_tokens_seen": 519736944, + "step": 5694 + }, + { + "epoch": 23.725, + "loss": 0.03325570002198219, + "loss_ce": 9.835181845119223e-05, + "loss_iou": 0.1787109375, + "loss_num": 0.006622314453125, + "loss_xval": 0.033203125, + "num_input_tokens_seen": 519736944, + "step": 5694 + }, + { + "epoch": 23.729166666666668, + "grad_norm": 3.044204278231839, + "learning_rate": 5e-05, + "loss": 0.0224, + "num_input_tokens_seen": 519828240, + "step": 5695 + }, + { + "epoch": 23.729166666666668, + "loss": 0.02027270570397377, + "loss_ce": 0.0020460819359868765, + "loss_iou": 0.234375, + "loss_num": 0.0036468505859375, + "loss_xval": 0.0181884765625, + "num_input_tokens_seen": 519828240, + "step": 5695 + }, + { + "epoch": 23.733333333333334, + "grad_norm": 2.4047813414462795, + "learning_rate": 5e-05, + "loss": 0.0336, + "num_input_tokens_seen": 519917812, + "step": 5696 + }, + { + "epoch": 23.733333333333334, + "loss": 0.024971704930067062, + "loss_ce": 8.326714123541024e-06, + "loss_iou": 0.251953125, + "loss_num": 0.0050048828125, + "loss_xval": 0.02490234375, + "num_input_tokens_seen": 519917812, + "step": 5696 + }, + { + "epoch": 23.7375, + "grad_norm": 2.0692491005082876, + "learning_rate": 5e-05, + "loss": 0.0451, + "num_input_tokens_seen": 520008928, + "step": 5697 + }, + { + "epoch": 23.7375, + "loss": 0.048609986901283264, + "loss_ce": 2.6005120162153617e-05, + "loss_iou": 0.27734375, + "loss_num": 0.00970458984375, + "loss_xval": 0.048583984375, + "num_input_tokens_seen": 520008928, + "step": 5697 + }, + { + "epoch": 23.741666666666667, + "grad_norm": 5.549537271069052, + "learning_rate": 5e-05, + "loss": 0.0607, + "num_input_tokens_seen": 520100712, + "step": 5698 + }, + { + "epoch": 23.741666666666667, + "loss": 0.07673782110214233, + "loss_ce": 9.292815957451239e-05, + "loss_iou": 0.220703125, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 520100712, + "step": 5698 + }, + { + "epoch": 23.745833333333334, + "grad_norm": 0.8884850629976035, + "learning_rate": 5e-05, + "loss": 0.0429, + "num_input_tokens_seen": 520192780, + "step": 5699 + }, + { + "epoch": 23.745833333333334, + "loss": 0.06898298114538193, + "loss_ce": 4.377113509690389e-05, + "loss_iou": 0.212890625, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 520192780, + "step": 5699 + }, + { + "epoch": 23.75, + "grad_norm": 5.180084394794358, + "learning_rate": 5e-05, + "loss": 0.0338, + "num_input_tokens_seen": 520284164, + "step": 5700 + }, + { + "epoch": 23.75, + "loss": 0.02840365283191204, + "loss_ce": 7.046019163681194e-06, + "loss_iou": 0.240234375, + "loss_num": 0.00567626953125, + "loss_xval": 0.0284423828125, + "num_input_tokens_seen": 520284164, + "step": 5700 + }, + { + "epoch": 23.754166666666666, + "grad_norm": 3.16269972531882, + "learning_rate": 5e-05, + "loss": 0.0351, + "num_input_tokens_seen": 520375736, + "step": 5701 + }, + { + "epoch": 23.754166666666666, + "loss": 0.043525584042072296, + "loss_ce": 7.5167668001085985e-06, + "loss_iou": 0.306640625, + "loss_num": 0.00872802734375, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 520375736, + "step": 5701 + }, + { + "epoch": 23.758333333333333, + "grad_norm": 4.3941677964058705, + "learning_rate": 5e-05, + "loss": 0.0589, + "num_input_tokens_seen": 520466708, + "step": 5702 + }, + { + "epoch": 23.758333333333333, + "loss": 0.040162280201911926, + "loss_ce": 8.777634320722427e-06, + "loss_iou": 0.259765625, + "loss_num": 0.008056640625, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 520466708, + "step": 5702 + }, + { + "epoch": 23.7625, + "grad_norm": 6.172707932635992, + "learning_rate": 5e-05, + "loss": 0.0557, + "num_input_tokens_seen": 520558396, + "step": 5703 + }, + { + "epoch": 23.7625, + "loss": 0.08005905896425247, + "loss_ce": 7.24865822121501e-05, + "loss_iou": 0.333984375, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 520558396, + "step": 5703 + }, + { + "epoch": 23.766666666666666, + "grad_norm": 4.081751568320687, + "learning_rate": 5e-05, + "loss": 0.0469, + "num_input_tokens_seen": 520649760, + "step": 5704 + }, + { + "epoch": 23.766666666666666, + "loss": 0.036110296845436096, + "loss_ce": 7.999641638889443e-06, + "loss_iou": 0.275390625, + "loss_num": 0.007232666015625, + "loss_xval": 0.0361328125, + "num_input_tokens_seen": 520649760, + "step": 5704 + }, + { + "epoch": 23.770833333333332, + "grad_norm": 2.1443196714070716, + "learning_rate": 5e-05, + "loss": 0.0521, + "num_input_tokens_seen": 520739288, + "step": 5705 + }, + { + "epoch": 23.770833333333332, + "loss": 0.0386023223400116, + "loss_ce": 1.2845989658671897e-05, + "loss_iou": 0.30078125, + "loss_num": 0.007720947265625, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 520739288, + "step": 5705 + }, + { + "epoch": 23.775, + "grad_norm": 6.136813251555154, + "learning_rate": 5e-05, + "loss": 0.0257, + "num_input_tokens_seen": 520830760, + "step": 5706 + }, + { + "epoch": 23.775, + "loss": 0.025622554123401642, + "loss_ce": 0.00014037435175850987, + "loss_iou": 0.25, + "loss_num": 0.005096435546875, + "loss_xval": 0.0255126953125, + "num_input_tokens_seen": 520830760, + "step": 5706 + }, + { + "epoch": 23.779166666666665, + "grad_norm": 1.9800169580594917, + "learning_rate": 5e-05, + "loss": 0.0519, + "num_input_tokens_seen": 520920600, + "step": 5707 + }, + { + "epoch": 23.779166666666665, + "loss": 0.047232139855623245, + "loss_ce": 6.186518021422671e-06, + "loss_iou": 0.1708984375, + "loss_num": 0.00946044921875, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 520920600, + "step": 5707 + }, + { + "epoch": 23.783333333333335, + "grad_norm": 2.7088956283659313, + "learning_rate": 5e-05, + "loss": 0.0355, + "num_input_tokens_seen": 521011732, + "step": 5708 + }, + { + "epoch": 23.783333333333335, + "loss": 0.03868023678660393, + "loss_ce": 2.2094476662459783e-05, + "loss_iou": 0.359375, + "loss_num": 0.00775146484375, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 521011732, + "step": 5708 + }, + { + "epoch": 23.7875, + "grad_norm": 4.752971767412011, + "learning_rate": 5e-05, + "loss": 0.0837, + "num_input_tokens_seen": 521103056, + "step": 5709 + }, + { + "epoch": 23.7875, + "loss": 0.0954560711979866, + "loss_ce": 1.2343047274043784e-05, + "loss_iou": 0.337890625, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 521103056, + "step": 5709 + }, + { + "epoch": 23.791666666666668, + "grad_norm": 3.074283646205475, + "learning_rate": 5e-05, + "loss": 0.0413, + "num_input_tokens_seen": 521194392, + "step": 5710 + }, + { + "epoch": 23.791666666666668, + "loss": 0.046271517872810364, + "loss_ce": 0.0009071393287740648, + "loss_iou": 0.3515625, + "loss_num": 0.00909423828125, + "loss_xval": 0.04541015625, + "num_input_tokens_seen": 521194392, + "step": 5710 + }, + { + "epoch": 23.795833333333334, + "grad_norm": 2.3827833336182005, + "learning_rate": 5e-05, + "loss": 0.0271, + "num_input_tokens_seen": 521285936, + "step": 5711 + }, + { + "epoch": 23.795833333333334, + "loss": 0.03246616944670677, + "loss_ce": 3.0945029720896855e-06, + "loss_iou": 0.2236328125, + "loss_num": 0.006500244140625, + "loss_xval": 0.032470703125, + "num_input_tokens_seen": 521285936, + "step": 5711 + }, + { + "epoch": 23.8, + "grad_norm": 3.117069638103086, + "learning_rate": 5e-05, + "loss": 0.0361, + "num_input_tokens_seen": 521376912, + "step": 5712 + }, + { + "epoch": 23.8, + "loss": 0.04672073572874069, + "loss_ce": 5.953358595434111e-06, + "loss_iou": 0.24609375, + "loss_num": 0.00933837890625, + "loss_xval": 0.046630859375, + "num_input_tokens_seen": 521376912, + "step": 5712 + }, + { + "epoch": 23.804166666666667, + "grad_norm": 3.8832364392280327, + "learning_rate": 5e-05, + "loss": 0.0384, + "num_input_tokens_seen": 521467616, + "step": 5713 + }, + { + "epoch": 23.804166666666667, + "loss": 0.042544711381196976, + "loss_ce": 3.2088705665955786e-06, + "loss_iou": 0.240234375, + "loss_num": 0.00848388671875, + "loss_xval": 0.04248046875, + "num_input_tokens_seen": 521467616, + "step": 5713 + }, + { + "epoch": 23.808333333333334, + "grad_norm": 2.2566435105155254, + "learning_rate": 5e-05, + "loss": 0.0611, + "num_input_tokens_seen": 521559408, + "step": 5714 + }, + { + "epoch": 23.808333333333334, + "loss": 0.0935177356004715, + "loss_ce": 4.247904143994674e-06, + "loss_iou": 0.1552734375, + "loss_num": 0.0186767578125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 521559408, + "step": 5714 + }, + { + "epoch": 23.8125, + "grad_norm": 2.4054971165737107, + "learning_rate": 5e-05, + "loss": 0.0314, + "num_input_tokens_seen": 521650176, + "step": 5715 + }, + { + "epoch": 23.8125, + "loss": 0.040401313453912735, + "loss_ce": 3.6688702493847813e-06, + "loss_iou": 0.2158203125, + "loss_num": 0.008056640625, + "loss_xval": 0.040283203125, + "num_input_tokens_seen": 521650176, + "step": 5715 + }, + { + "epoch": 23.816666666666666, + "grad_norm": 2.791598862794589, + "learning_rate": 5e-05, + "loss": 0.0304, + "num_input_tokens_seen": 521741528, + "step": 5716 + }, + { + "epoch": 23.816666666666666, + "loss": 0.030244871973991394, + "loss_ce": 9.350531036034226e-05, + "loss_iou": 0.193359375, + "loss_num": 0.00604248046875, + "loss_xval": 0.0301513671875, + "num_input_tokens_seen": 521741528, + "step": 5716 + }, + { + "epoch": 23.820833333333333, + "grad_norm": 2.8172338788712628, + "learning_rate": 5e-05, + "loss": 0.0228, + "num_input_tokens_seen": 521832880, + "step": 5717 + }, + { + "epoch": 23.820833333333333, + "loss": 0.026685720309615135, + "loss_ce": 5.727418283640873e-06, + "loss_iou": 0.263671875, + "loss_num": 0.005340576171875, + "loss_xval": 0.0267333984375, + "num_input_tokens_seen": 521832880, + "step": 5717 + }, + { + "epoch": 23.825, + "grad_norm": 3.34368630324826, + "learning_rate": 5e-05, + "loss": 0.0569, + "num_input_tokens_seen": 521924456, + "step": 5718 + }, + { + "epoch": 23.825, + "loss": 0.05164847895503044, + "loss_ce": 1.2735023119603284e-05, + "loss_iou": 0.1982421875, + "loss_num": 0.01031494140625, + "loss_xval": 0.0517578125, + "num_input_tokens_seen": 521924456, + "step": 5718 + }, + { + "epoch": 23.829166666666666, + "grad_norm": 2.6815516294356097, + "learning_rate": 5e-05, + "loss": 0.0323, + "num_input_tokens_seen": 522015648, + "step": 5719 + }, + { + "epoch": 23.829166666666666, + "loss": 0.03111160174012184, + "loss_ce": 6.561008376593236e-06, + "loss_iou": 0.25, + "loss_num": 0.0062255859375, + "loss_xval": 0.0311279296875, + "num_input_tokens_seen": 522015648, + "step": 5719 + }, + { + "epoch": 23.833333333333332, + "grad_norm": 2.6739108353856627, + "learning_rate": 5e-05, + "loss": 0.0549, + "num_input_tokens_seen": 522107236, + "step": 5720 + }, + { + "epoch": 23.833333333333332, + "loss": 0.06111540645360947, + "loss_ce": 3.956250111514237e-06, + "loss_iou": 0.38671875, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 522107236, + "step": 5720 + }, + { + "epoch": 23.8375, + "grad_norm": 2.837654807232517, + "learning_rate": 5e-05, + "loss": 0.0312, + "num_input_tokens_seen": 522197216, + "step": 5721 + }, + { + "epoch": 23.8375, + "loss": 0.044276271015405655, + "loss_ce": 1.0525964171392843e-05, + "loss_iou": 0.232421875, + "loss_num": 0.00885009765625, + "loss_xval": 0.044189453125, + "num_input_tokens_seen": 522197216, + "step": 5721 + }, + { + "epoch": 23.841666666666665, + "grad_norm": 3.3273477416902866, + "learning_rate": 5e-05, + "loss": 0.0346, + "num_input_tokens_seen": 522288964, + "step": 5722 + }, + { + "epoch": 23.841666666666665, + "loss": 0.02608104608952999, + "loss_ce": 6.481039599748328e-05, + "loss_iou": 0.19921875, + "loss_num": 0.00518798828125, + "loss_xval": 0.0260009765625, + "num_input_tokens_seen": 522288964, + "step": 5722 + }, + { + "epoch": 23.845833333333335, + "grad_norm": 2.5924484025116574, + "learning_rate": 5e-05, + "loss": 0.0429, + "num_input_tokens_seen": 522379756, + "step": 5723 + }, + { + "epoch": 23.845833333333335, + "loss": 0.0439254529774189, + "loss_ce": 1.0658584869815968e-05, + "loss_iou": 0.2041015625, + "loss_num": 0.0087890625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 522379756, + "step": 5723 + }, + { + "epoch": 23.85, + "grad_norm": 2.9276761082141527, + "learning_rate": 5e-05, + "loss": 0.0414, + "num_input_tokens_seen": 522470980, + "step": 5724 + }, + { + "epoch": 23.85, + "loss": 0.04932574927806854, + "loss_ce": 9.343220881419256e-06, + "loss_iou": 0.25390625, + "loss_num": 0.00982666015625, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 522470980, + "step": 5724 + }, + { + "epoch": 23.854166666666668, + "grad_norm": 2.089152950539155, + "learning_rate": 5e-05, + "loss": 0.0494, + "num_input_tokens_seen": 522561248, + "step": 5725 + }, + { + "epoch": 23.854166666666668, + "loss": 0.02687397226691246, + "loss_ce": 3.2470645692228572e-06, + "loss_iou": 0.314453125, + "loss_num": 0.00537109375, + "loss_xval": 0.02685546875, + "num_input_tokens_seen": 522561248, + "step": 5725 + }, + { + "epoch": 23.858333333333334, + "grad_norm": 2.4594153447981038, + "learning_rate": 5e-05, + "loss": 0.0431, + "num_input_tokens_seen": 522652544, + "step": 5726 + }, + { + "epoch": 23.858333333333334, + "loss": 0.0471075065433979, + "loss_ce": 3.6227847886038944e-06, + "loss_iou": 0.318359375, + "loss_num": 0.0093994140625, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 522652544, + "step": 5726 + }, + { + "epoch": 23.8625, + "grad_norm": 1.9635727454926588, + "learning_rate": 5e-05, + "loss": 0.0418, + "num_input_tokens_seen": 522744140, + "step": 5727 + }, + { + "epoch": 23.8625, + "loss": 0.03888658434152603, + "loss_ce": 0.00014451687457039952, + "loss_iou": 0.259765625, + "loss_num": 0.00775146484375, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 522744140, + "step": 5727 + }, + { + "epoch": 23.866666666666667, + "grad_norm": 0.9211156787835062, + "learning_rate": 5e-05, + "loss": 0.0933, + "num_input_tokens_seen": 522835084, + "step": 5728 + }, + { + "epoch": 23.866666666666667, + "loss": 0.15552428364753723, + "loss_ce": 2.894853651014273e-06, + "loss_iou": 0.10888671875, + "loss_num": 0.0311279296875, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 522835084, + "step": 5728 + }, + { + "epoch": 23.870833333333334, + "grad_norm": 0.9425039639689817, + "learning_rate": 5e-05, + "loss": 0.0438, + "num_input_tokens_seen": 522926308, + "step": 5729 + }, + { + "epoch": 23.870833333333334, + "loss": 0.05509873479604721, + "loss_ce": 1.4508611457131337e-05, + "loss_iou": 0.2099609375, + "loss_num": 0.010986328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 522926308, + "step": 5729 + }, + { + "epoch": 23.875, + "grad_norm": 1.2982466535460244, + "learning_rate": 5e-05, + "loss": 0.0478, + "num_input_tokens_seen": 523017560, + "step": 5730 + }, + { + "epoch": 23.875, + "loss": 0.038000769913196564, + "loss_ce": 6.385762389982119e-06, + "loss_iou": 0.283203125, + "loss_num": 0.007598876953125, + "loss_xval": 0.0380859375, + "num_input_tokens_seen": 523017560, + "step": 5730 + }, + { + "epoch": 23.879166666666666, + "grad_norm": 1.4479330651413629, + "learning_rate": 5e-05, + "loss": 0.0408, + "num_input_tokens_seen": 523108756, + "step": 5731 + }, + { + "epoch": 23.879166666666666, + "loss": 0.02427910827100277, + "loss_ce": 0.00010918613406829536, + "loss_iou": 0.0927734375, + "loss_num": 0.00482177734375, + "loss_xval": 0.024169921875, + "num_input_tokens_seen": 523108756, + "step": 5731 + }, + { + "epoch": 23.883333333333333, + "grad_norm": 2.694617725258283, + "learning_rate": 5e-05, + "loss": 0.0513, + "num_input_tokens_seen": 523200448, + "step": 5732 + }, + { + "epoch": 23.883333333333333, + "loss": 0.030782945454120636, + "loss_ce": 1.359751149720978e-05, + "loss_iou": 0.2080078125, + "loss_num": 0.00616455078125, + "loss_xval": 0.03076171875, + "num_input_tokens_seen": 523200448, + "step": 5732 + }, + { + "epoch": 23.8875, + "grad_norm": 2.8220834724397847, + "learning_rate": 5e-05, + "loss": 0.0747, + "num_input_tokens_seen": 523292276, + "step": 5733 + }, + { + "epoch": 23.8875, + "loss": 0.0910167396068573, + "loss_ce": 5.691027581633534e-06, + "loss_iou": 0.19921875, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 523292276, + "step": 5733 + }, + { + "epoch": 23.891666666666666, + "grad_norm": 2.1295513985099093, + "learning_rate": 5e-05, + "loss": 0.0654, + "num_input_tokens_seen": 523384228, + "step": 5734 + }, + { + "epoch": 23.891666666666666, + "loss": 0.10112430155277252, + "loss_ce": 4.301551598473452e-06, + "loss_iou": 0.173828125, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 523384228, + "step": 5734 + }, + { + "epoch": 23.895833333333332, + "grad_norm": 3.017383371266045, + "learning_rate": 5e-05, + "loss": 0.0294, + "num_input_tokens_seen": 523475492, + "step": 5735 + }, + { + "epoch": 23.895833333333332, + "loss": 0.025943610817193985, + "loss_ce": 3.6696526422019815e-06, + "loss_iou": 0.2021484375, + "loss_num": 0.00518798828125, + "loss_xval": 0.02587890625, + "num_input_tokens_seen": 523475492, + "step": 5735 + }, + { + "epoch": 23.9, + "grad_norm": 1.2758619859096862, + "learning_rate": 5e-05, + "loss": 0.0385, + "num_input_tokens_seen": 523566848, + "step": 5736 + }, + { + "epoch": 23.9, + "loss": 0.04210450500249863, + "loss_ce": 5.505870831257198e-06, + "loss_iou": 0.138671875, + "loss_num": 0.0084228515625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 523566848, + "step": 5736 + }, + { + "epoch": 23.904166666666665, + "grad_norm": 1.2387003727139727, + "learning_rate": 5e-05, + "loss": 0.0278, + "num_input_tokens_seen": 523658692, + "step": 5737 + }, + { + "epoch": 23.904166666666665, + "loss": 0.022983882576227188, + "loss_ce": 1.9404327758820727e-05, + "loss_iou": 0.296875, + "loss_num": 0.00457763671875, + "loss_xval": 0.02294921875, + "num_input_tokens_seen": 523658692, + "step": 5737 + }, + { + "epoch": 23.908333333333335, + "grad_norm": 1.9687035345363064, + "learning_rate": 5e-05, + "loss": 0.0711, + "num_input_tokens_seen": 523750232, + "step": 5738 + }, + { + "epoch": 23.908333333333335, + "loss": 0.07121194899082184, + "loss_ce": 0.00019754076492972672, + "loss_iou": 0.2294921875, + "loss_num": 0.01422119140625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 523750232, + "step": 5738 + }, + { + "epoch": 23.9125, + "grad_norm": 1.7952091530297702, + "learning_rate": 5e-05, + "loss": 0.0715, + "num_input_tokens_seen": 523841588, + "step": 5739 + }, + { + "epoch": 23.9125, + "loss": 0.06907568871974945, + "loss_ce": 1.4407991329790093e-05, + "loss_iou": 0.21875, + "loss_num": 0.01385498046875, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 523841588, + "step": 5739 + }, + { + "epoch": 23.916666666666668, + "grad_norm": 1.9967058223364322, + "learning_rate": 5e-05, + "loss": 0.0332, + "num_input_tokens_seen": 523932836, + "step": 5740 + }, + { + "epoch": 23.916666666666668, + "loss": 0.036780357360839844, + "loss_ce": 6.675976237602299e-06, + "loss_iou": 0.2255859375, + "loss_num": 0.007354736328125, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 523932836, + "step": 5740 + }, + { + "epoch": 23.920833333333334, + "grad_norm": 1.4979108449408447, + "learning_rate": 5e-05, + "loss": 0.0686, + "num_input_tokens_seen": 524023568, + "step": 5741 + }, + { + "epoch": 23.920833333333334, + "loss": 0.08334586024284363, + "loss_ce": 2.3527120447397465e-06, + "loss_iou": 0.26171875, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 524023568, + "step": 5741 + }, + { + "epoch": 23.925, + "grad_norm": 0.9084482747463926, + "learning_rate": 5e-05, + "loss": 0.0274, + "num_input_tokens_seen": 524115120, + "step": 5742 + }, + { + "epoch": 23.925, + "loss": 0.02169908955693245, + "loss_ce": 3.160901178489439e-05, + "loss_iou": 0.1796875, + "loss_num": 0.00433349609375, + "loss_xval": 0.021728515625, + "num_input_tokens_seen": 524115120, + "step": 5742 + }, + { + "epoch": 23.929166666666667, + "grad_norm": 1.268154810820986, + "learning_rate": 5e-05, + "loss": 0.0263, + "num_input_tokens_seen": 524206932, + "step": 5743 + }, + { + "epoch": 23.929166666666667, + "loss": 0.0273551344871521, + "loss_ce": 1.1384383469703607e-05, + "loss_iou": 0.2138671875, + "loss_num": 0.005462646484375, + "loss_xval": 0.02734375, + "num_input_tokens_seen": 524206932, + "step": 5743 + }, + { + "epoch": 23.933333333333334, + "grad_norm": 1.048607072370536, + "learning_rate": 5e-05, + "loss": 0.0304, + "num_input_tokens_seen": 524298416, + "step": 5744 + }, + { + "epoch": 23.933333333333334, + "loss": 0.03643643110990524, + "loss_ce": 6.072639735066332e-06, + "loss_iou": 0.09619140625, + "loss_num": 0.007293701171875, + "loss_xval": 0.036376953125, + "num_input_tokens_seen": 524298416, + "step": 5744 + }, + { + "epoch": 23.9375, + "grad_norm": 0.8956328009301813, + "learning_rate": 5e-05, + "loss": 0.0439, + "num_input_tokens_seen": 524390416, + "step": 5745 + }, + { + "epoch": 23.9375, + "loss": 0.047046225517988205, + "loss_ce": 6.441253935918212e-05, + "loss_iou": 0.1591796875, + "loss_num": 0.0093994140625, + "loss_xval": 0.046875, + "num_input_tokens_seen": 524390416, + "step": 5745 + }, + { + "epoch": 23.941666666666666, + "grad_norm": 1.490795535747788, + "learning_rate": 5e-05, + "loss": 0.062, + "num_input_tokens_seen": 524481288, + "step": 5746 + }, + { + "epoch": 23.941666666666666, + "loss": 0.047673288732767105, + "loss_ce": 8.644723493489437e-06, + "loss_iou": 0.177734375, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 524481288, + "step": 5746 + }, + { + "epoch": 23.945833333333333, + "grad_norm": 2.632357504177815, + "learning_rate": 5e-05, + "loss": 0.0261, + "num_input_tokens_seen": 524573912, + "step": 5747 + }, + { + "epoch": 23.945833333333333, + "loss": 0.030250361189246178, + "loss_ce": 6.847688200650737e-05, + "loss_iou": 0.2275390625, + "loss_num": 0.00604248046875, + "loss_xval": 0.0301513671875, + "num_input_tokens_seen": 524573912, + "step": 5747 + }, + { + "epoch": 23.95, + "grad_norm": 3.2126670774182244, + "learning_rate": 5e-05, + "loss": 0.0586, + "num_input_tokens_seen": 524665208, + "step": 5748 + }, + { + "epoch": 23.95, + "loss": 0.09400118887424469, + "loss_ce": 7.044025551294908e-06, + "loss_iou": 0.1953125, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 524665208, + "step": 5748 + }, + { + "epoch": 23.954166666666666, + "grad_norm": 2.3353116804874805, + "learning_rate": 5e-05, + "loss": 0.0466, + "num_input_tokens_seen": 524756128, + "step": 5749 + }, + { + "epoch": 23.954166666666666, + "loss": 0.04571268707513809, + "loss_ce": 2.787526318570599e-05, + "loss_iou": 0.376953125, + "loss_num": 0.0091552734375, + "loss_xval": 0.045654296875, + "num_input_tokens_seen": 524756128, + "step": 5749 + }, + { + "epoch": 23.958333333333332, + "grad_norm": 2.554961548389431, + "learning_rate": 5e-05, + "loss": 0.0399, + "num_input_tokens_seen": 524847664, + "step": 5750 + }, + { + "epoch": 23.958333333333332, + "eval_seeclick_CIoU": 0.20855355635285378, + "eval_seeclick_GIoU": 0.1821647845208645, + "eval_seeclick_IoU": 0.3190717250108719, + "eval_seeclick_MAE_all": 0.12102610990405083, + "eval_seeclick_MAE_h": 0.11113836616277695, + "eval_seeclick_MAE_w": 0.2582087442278862, + "eval_seeclick_MAE_x_boxes": 0.2431691437959671, + "eval_seeclick_MAE_y_boxes": 0.11242787539958954, + "eval_seeclick_NUM_probability": 0.999996691942215, + "eval_seeclick_inside_bbox": 0.6008522808551788, + "eval_seeclick_loss": 0.6176849007606506, + "eval_seeclick_loss_ce": 0.09981685131788254, + "eval_seeclick_loss_iou": 0.4412841796875, + "eval_seeclick_loss_num": 0.0973968505859375, + "eval_seeclick_loss_xval": 0.48699951171875, + "eval_seeclick_runtime": 78.2106, + "eval_seeclick_samples_per_second": 0.55, + "eval_seeclick_steps_per_second": 0.026, + "num_input_tokens_seen": 524847664, + "step": 5750 + }, + { + "epoch": 23.958333333333332, + "eval_icons_CIoU": 0.24831150472164154, + "eval_icons_GIoU": 0.1872703991830349, + "eval_icons_IoU": 0.3625485599040985, + "eval_icons_MAE_all": 0.08768786117434502, + "eval_icons_MAE_h": 0.1570756696164608, + "eval_icons_MAE_w": 0.1565881222486496, + "eval_icons_MAE_x_boxes": 0.16092297434806824, + "eval_icons_MAE_y_boxes": 0.1555836908519268, + "eval_icons_NUM_probability": 0.9999974370002747, + "eval_icons_inside_bbox": 0.5381944477558136, + "eval_icons_loss": 0.41147759556770325, + "eval_icons_loss_ce": 0.0004623572021955624, + "eval_icons_loss_iou": 0.2532958984375, + "eval_icons_loss_num": 0.083740234375, + "eval_icons_loss_xval": 0.418701171875, + "eval_icons_runtime": 85.4332, + "eval_icons_samples_per_second": 0.585, + "eval_icons_steps_per_second": 0.023, + "num_input_tokens_seen": 524847664, + "step": 5750 + }, + { + "epoch": 23.958333333333332, + "eval_screenspot_CIoU": 0.38652878999710083, + "eval_screenspot_GIoU": 0.3825633426507314, + "eval_screenspot_IoU": 0.4596469004948934, + "eval_screenspot_MAE_all": 0.09237878521283467, + "eval_screenspot_MAE_h": 0.08135384569565456, + "eval_screenspot_MAE_w": 0.20547441641489664, + "eval_screenspot_MAE_x_boxes": 0.18527878324190775, + "eval_screenspot_MAE_y_boxes": 0.07767945279677708, + "eval_screenspot_NUM_probability": 0.9999972383181254, + "eval_screenspot_inside_bbox": 0.7145833373069763, + "eval_screenspot_loss": 0.46794506907463074, + "eval_screenspot_loss_ce": 0.0023575042529652515, + "eval_screenspot_loss_iou": 0.3312581380208333, + "eval_screenspot_loss_num": 0.09613037109375, + "eval_screenspot_loss_xval": 0.48065185546875, + "eval_screenspot_runtime": 150.8434, + "eval_screenspot_samples_per_second": 0.59, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 524847664, + "step": 5750 + }, + { + "epoch": 23.958333333333332, + "eval_compot_CIoU": 0.4482163190841675, + "eval_compot_GIoU": 0.43212345242500305, + "eval_compot_IoU": 0.5305320024490356, + "eval_compot_MAE_all": 0.06178927421569824, + "eval_compot_MAE_h": 0.07599110901355743, + "eval_compot_MAE_w": 0.15570373833179474, + "eval_compot_MAE_x_boxes": 0.15898607671260834, + "eval_compot_MAE_y_boxes": 0.07342762127518654, + "eval_compot_NUM_probability": 0.9999977648258209, + "eval_compot_inside_bbox": 0.7048611044883728, + "eval_compot_loss": 0.36854714155197144, + "eval_compot_loss_ce": 0.060504671186208725, + "eval_compot_loss_iou": 0.2802734375, + "eval_compot_loss_num": 0.06056976318359375, + "eval_compot_loss_xval": 0.302886962890625, + "eval_compot_runtime": 91.2487, + "eval_compot_samples_per_second": 0.548, + "eval_compot_steps_per_second": 0.022, + "num_input_tokens_seen": 524847664, + "step": 5750 + }, + { + "epoch": 23.958333333333332, + "loss": 0.3686365485191345, + "loss_ce": 0.06394904106855392, + "loss_iou": 0.279296875, + "loss_num": 0.06103515625, + "loss_xval": 0.3046875, + "num_input_tokens_seen": 524847664, + "step": 5750 + }, + { + "epoch": 23.9625, + "grad_norm": 5.963476561237152, + "learning_rate": 5e-05, + "loss": 0.0481, + "num_input_tokens_seen": 524938412, + "step": 5751 + }, + { + "epoch": 23.9625, + "loss": 0.04771061986684799, + "loss_ce": 4.9790302000474185e-05, + "loss_iou": 0.1953125, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 524938412, + "step": 5751 + }, + { + "epoch": 23.966666666666665, + "grad_norm": 2.50413266026656, + "learning_rate": 5e-05, + "loss": 0.0628, + "num_input_tokens_seen": 525029396, + "step": 5752 + }, + { + "epoch": 23.966666666666665, + "loss": 0.020784705877304077, + "loss_ce": 9.865495485428255e-06, + "loss_iou": 0.1962890625, + "loss_num": 0.004150390625, + "loss_xval": 0.020751953125, + "num_input_tokens_seen": 525029396, + "step": 5752 + }, + { + "epoch": 23.970833333333335, + "grad_norm": 3.325364795867375, + "learning_rate": 5e-05, + "loss": 0.0478, + "num_input_tokens_seen": 525121112, + "step": 5753 + }, + { + "epoch": 23.970833333333335, + "loss": 0.049129169434309006, + "loss_ce": 1.1128087862743996e-05, + "loss_iou": 0.2353515625, + "loss_num": 0.00982666015625, + "loss_xval": 0.049072265625, + "num_input_tokens_seen": 525121112, + "step": 5753 + }, + { + "epoch": 23.975, + "grad_norm": 3.1010127423143454, + "learning_rate": 5e-05, + "loss": 0.045, + "num_input_tokens_seen": 525212764, + "step": 5754 + }, + { + "epoch": 23.975, + "loss": 0.0367819145321846, + "loss_ce": 1.586503640282899e-05, + "loss_iou": 0.1259765625, + "loss_num": 0.00732421875, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 525212764, + "step": 5754 + }, + { + "epoch": 23.979166666666668, + "grad_norm": 2.509055326477751, + "learning_rate": 5e-05, + "loss": 0.0633, + "num_input_tokens_seen": 525304080, + "step": 5755 + }, + { + "epoch": 23.979166666666668, + "loss": 0.065666064620018, + "loss_ce": 3.801234925049357e-05, + "loss_iou": 0.2578125, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 525304080, + "step": 5755 + }, + { + "epoch": 23.983333333333334, + "grad_norm": 4.192342223928191, + "learning_rate": 5e-05, + "loss": 0.0378, + "num_input_tokens_seen": 525395496, + "step": 5756 + }, + { + "epoch": 23.983333333333334, + "loss": 0.030342694371938705, + "loss_ce": 3.111306796199642e-05, + "loss_iou": 0.21875, + "loss_num": 0.006072998046875, + "loss_xval": 0.0302734375, + "num_input_tokens_seen": 525395496, + "step": 5756 + }, + { + "epoch": 23.9875, + "grad_norm": 2.277673951286204, + "learning_rate": 5e-05, + "loss": 0.0787, + "num_input_tokens_seen": 525486396, + "step": 5757 + }, + { + "epoch": 23.9875, + "loss": 0.12976506352424622, + "loss_ce": 0.005314384587109089, + "loss_iou": 0.25, + "loss_num": 0.02490234375, + "loss_xval": 0.12451171875, + "num_input_tokens_seen": 525486396, + "step": 5757 + }, + { + "epoch": 23.991666666666667, + "grad_norm": 3.663415045011244, + "learning_rate": 5e-05, + "loss": 0.0423, + "num_input_tokens_seen": 525577360, + "step": 5758 + }, + { + "epoch": 23.991666666666667, + "loss": 0.02190682850778103, + "loss_ce": 2.836089606716996e-06, + "loss_iou": 0.15625, + "loss_num": 0.004364013671875, + "loss_xval": 0.0218505859375, + "num_input_tokens_seen": 525577360, + "step": 5758 + }, + { + "epoch": 23.995833333333334, + "grad_norm": 2.4707032068519803, + "learning_rate": 5e-05, + "loss": 0.0376, + "num_input_tokens_seen": 525668344, + "step": 5759 + }, + { + "epoch": 23.995833333333334, + "loss": 0.044314391911029816, + "loss_ce": 2.866746399377007e-06, + "loss_iou": 0.248046875, + "loss_num": 0.00885009765625, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 525668344, + "step": 5759 + }, + { + "epoch": 24.0, + "grad_norm": 1.9659605951454453, + "learning_rate": 5e-05, + "loss": 0.0426, + "num_input_tokens_seen": 525758440, + "step": 5760 + }, + { + "epoch": 24.0, + "loss": 0.04882945120334625, + "loss_ce": 8.954636541602667e-06, + "loss_iou": 0.24609375, + "loss_num": 0.009765625, + "loss_xval": 0.048828125, + "num_input_tokens_seen": 525758440, + "step": 5760 + }, + { + "epoch": 24.004166666666666, + "grad_norm": 2.824927214021873, + "learning_rate": 5e-05, + "loss": 0.0271, + "num_input_tokens_seen": 525849888, + "step": 5761 + }, + { + "epoch": 24.004166666666666, + "loss": 0.02209128439426422, + "loss_ce": 4.188737420918187e-06, + "loss_iou": 0.203125, + "loss_num": 0.004425048828125, + "loss_xval": 0.0220947265625, + "num_input_tokens_seen": 525849888, + "step": 5761 + }, + { + "epoch": 24.008333333333333, + "grad_norm": 3.656224607655451, + "learning_rate": 5e-05, + "loss": 0.0442, + "num_input_tokens_seen": 525940484, + "step": 5762 + }, + { + "epoch": 24.008333333333333, + "loss": 0.04184925556182861, + "loss_ce": 9.655170288169757e-06, + "loss_iou": 0.28515625, + "loss_num": 0.00836181640625, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 525940484, + "step": 5762 + }, + { + "epoch": 24.0125, + "grad_norm": 2.9338961938291477, + "learning_rate": 5e-05, + "loss": 0.0392, + "num_input_tokens_seen": 526031980, + "step": 5763 + }, + { + "epoch": 24.0125, + "loss": 0.04121887683868408, + "loss_ce": 0.00023377228353638202, + "loss_iou": 0.26953125, + "loss_num": 0.0081787109375, + "loss_xval": 0.041015625, + "num_input_tokens_seen": 526031980, + "step": 5763 + }, + { + "epoch": 24.016666666666666, + "grad_norm": 3.04974742807937, + "learning_rate": 5e-05, + "loss": 0.0421, + "num_input_tokens_seen": 526123056, + "step": 5764 + }, + { + "epoch": 24.016666666666666, + "loss": 0.030127380043268204, + "loss_ce": 2.941884122265037e-05, + "loss_iou": 0.27734375, + "loss_num": 0.006011962890625, + "loss_xval": 0.0301513671875, + "num_input_tokens_seen": 526123056, + "step": 5764 + }, + { + "epoch": 24.020833333333332, + "grad_norm": 2.788804168959336, + "learning_rate": 5e-05, + "loss": 0.0529, + "num_input_tokens_seen": 526214632, + "step": 5765 + }, + { + "epoch": 24.020833333333332, + "loss": 0.04753812402486801, + "loss_ce": 6.996840966166928e-06, + "loss_iou": 0.2333984375, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 526214632, + "step": 5765 + }, + { + "epoch": 24.025, + "grad_norm": 2.4890111163498854, + "learning_rate": 5e-05, + "loss": 0.033, + "num_input_tokens_seen": 526305964, + "step": 5766 + }, + { + "epoch": 24.025, + "loss": 0.0316397026181221, + "loss_ce": 8.229689228755888e-06, + "loss_iou": 0.2353515625, + "loss_num": 0.006317138671875, + "loss_xval": 0.03173828125, + "num_input_tokens_seen": 526305964, + "step": 5766 + }, + { + "epoch": 24.029166666666665, + "grad_norm": 2.6348467698823947, + "learning_rate": 5e-05, + "loss": 0.0287, + "num_input_tokens_seen": 526397548, + "step": 5767 + }, + { + "epoch": 24.029166666666665, + "loss": 0.033110395073890686, + "loss_ce": 6.449178272305289e-06, + "loss_iou": 0.189453125, + "loss_num": 0.006622314453125, + "loss_xval": 0.033203125, + "num_input_tokens_seen": 526397548, + "step": 5767 + }, + { + "epoch": 24.033333333333335, + "grad_norm": 3.1410931077727784, + "learning_rate": 5e-05, + "loss": 0.0572, + "num_input_tokens_seen": 526488664, + "step": 5768 + }, + { + "epoch": 24.033333333333335, + "loss": 0.07993629574775696, + "loss_ce": 1.075953696272336e-05, + "loss_iou": 0.330078125, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 526488664, + "step": 5768 + }, + { + "epoch": 24.0375, + "grad_norm": 3.1439928139816, + "learning_rate": 5e-05, + "loss": 0.0634, + "num_input_tokens_seen": 526580092, + "step": 5769 + }, + { + "epoch": 24.0375, + "loss": 0.0736217349767685, + "loss_ce": 4.385370630188845e-05, + "loss_iou": 0.302734375, + "loss_num": 0.01470947265625, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 526580092, + "step": 5769 + }, + { + "epoch": 24.041666666666668, + "grad_norm": 2.068542934078751, + "learning_rate": 5e-05, + "loss": 0.0448, + "num_input_tokens_seen": 526672048, + "step": 5770 + }, + { + "epoch": 24.041666666666668, + "loss": 0.06701646745204926, + "loss_ce": 4.564270784612745e-05, + "loss_iou": 0.2177734375, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 526672048, + "step": 5770 + }, + { + "epoch": 24.045833333333334, + "grad_norm": 3.422356384221351, + "learning_rate": 5e-05, + "loss": 0.0597, + "num_input_tokens_seen": 526762864, + "step": 5771 + }, + { + "epoch": 24.045833333333334, + "loss": 0.05668189376592636, + "loss_ce": 3.1243744160747156e-06, + "loss_iou": 0.275390625, + "loss_num": 0.0113525390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 526762864, + "step": 5771 + }, + { + "epoch": 24.05, + "grad_norm": 2.8548143452335255, + "learning_rate": 5e-05, + "loss": 0.0289, + "num_input_tokens_seen": 526854648, + "step": 5772 + }, + { + "epoch": 24.05, + "loss": 0.029136527329683304, + "loss_ce": 3.038828981516417e-05, + "loss_iou": 0.29296875, + "loss_num": 0.005828857421875, + "loss_xval": 0.029052734375, + "num_input_tokens_seen": 526854648, + "step": 5772 + }, + { + "epoch": 24.054166666666667, + "grad_norm": 3.744100095025013, + "learning_rate": 5e-05, + "loss": 0.0245, + "num_input_tokens_seen": 526945920, + "step": 5773 + }, + { + "epoch": 24.054166666666667, + "loss": 0.025915004312992096, + "loss_ce": 5.581115146924276e-06, + "loss_iou": 0.22265625, + "loss_num": 0.00518798828125, + "loss_xval": 0.02587890625, + "num_input_tokens_seen": 526945920, + "step": 5773 + }, + { + "epoch": 24.058333333333334, + "grad_norm": 2.647107076103916, + "learning_rate": 5e-05, + "loss": 0.0606, + "num_input_tokens_seen": 527036784, + "step": 5774 + }, + { + "epoch": 24.058333333333334, + "loss": 0.04015457630157471, + "loss_ce": 8.70062558533391e-06, + "loss_iou": 0.349609375, + "loss_num": 0.008056640625, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 527036784, + "step": 5774 + }, + { + "epoch": 24.0625, + "grad_norm": 2.331636608977699, + "learning_rate": 5e-05, + "loss": 0.0315, + "num_input_tokens_seen": 527128884, + "step": 5775 + }, + { + "epoch": 24.0625, + "loss": 0.02479863166809082, + "loss_ce": 0.00012517115101218224, + "loss_iou": 0.1181640625, + "loss_num": 0.00494384765625, + "loss_xval": 0.024658203125, + "num_input_tokens_seen": 527128884, + "step": 5775 + }, + { + "epoch": 24.066666666666666, + "grad_norm": 1.5536981632931506, + "learning_rate": 5e-05, + "loss": 0.0271, + "num_input_tokens_seen": 527220576, + "step": 5776 + }, + { + "epoch": 24.066666666666666, + "loss": 0.028964003548026085, + "loss_ce": 1.808075830922462e-05, + "loss_iou": 0.205078125, + "loss_num": 0.00579833984375, + "loss_xval": 0.0289306640625, + "num_input_tokens_seen": 527220576, + "step": 5776 + }, + { + "epoch": 24.070833333333333, + "grad_norm": 0.988929743493313, + "learning_rate": 5e-05, + "loss": 0.0172, + "num_input_tokens_seen": 527312016, + "step": 5777 + }, + { + "epoch": 24.070833333333333, + "loss": 0.017999500036239624, + "loss_ce": 2.4647210011607967e-05, + "loss_iou": 0.1826171875, + "loss_num": 0.00360107421875, + "loss_xval": 0.0179443359375, + "num_input_tokens_seen": 527312016, + "step": 5777 + }, + { + "epoch": 24.075, + "grad_norm": 1.4301392756288007, + "learning_rate": 5e-05, + "loss": 0.0266, + "num_input_tokens_seen": 527403400, + "step": 5778 + }, + { + "epoch": 24.075, + "loss": 0.03159000352025032, + "loss_ce": 4.308335064706625e-06, + "loss_iou": 0.3359375, + "loss_num": 0.006317138671875, + "loss_xval": 0.031494140625, + "num_input_tokens_seen": 527403400, + "step": 5778 + }, + { + "epoch": 24.079166666666666, + "grad_norm": 2.4242658306767693, + "learning_rate": 5e-05, + "loss": 0.0713, + "num_input_tokens_seen": 527495060, + "step": 5779 + }, + { + "epoch": 24.079166666666666, + "loss": 0.03482062369585037, + "loss_ce": 7.694960913795512e-06, + "loss_iou": 0.251953125, + "loss_num": 0.0069580078125, + "loss_xval": 0.034912109375, + "num_input_tokens_seen": 527495060, + "step": 5779 + }, + { + "epoch": 24.083333333333332, + "grad_norm": 3.061292902066298, + "learning_rate": 5e-05, + "loss": 0.0395, + "num_input_tokens_seen": 527586612, + "step": 5780 + }, + { + "epoch": 24.083333333333332, + "loss": 0.05456852912902832, + "loss_ce": 3.361558265169151e-05, + "loss_iou": 0.24609375, + "loss_num": 0.01092529296875, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 527586612, + "step": 5780 + }, + { + "epoch": 24.0875, + "grad_norm": 3.0053826041693625, + "learning_rate": 5e-05, + "loss": 0.0708, + "num_input_tokens_seen": 527678016, + "step": 5781 + }, + { + "epoch": 24.0875, + "loss": 0.06668329238891602, + "loss_ce": 2.387518179602921e-06, + "loss_iou": 0.25390625, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 527678016, + "step": 5781 + }, + { + "epoch": 24.091666666666665, + "grad_norm": 3.228063081715965, + "learning_rate": 5e-05, + "loss": 0.0454, + "num_input_tokens_seen": 527769724, + "step": 5782 + }, + { + "epoch": 24.091666666666665, + "loss": 0.025066962465643883, + "loss_ce": 8.215374691644683e-06, + "loss_iou": 0.201171875, + "loss_num": 0.0050048828125, + "loss_xval": 0.0250244140625, + "num_input_tokens_seen": 527769724, + "step": 5782 + }, + { + "epoch": 24.095833333333335, + "grad_norm": 3.9331987558361563, + "learning_rate": 5e-05, + "loss": 0.0596, + "num_input_tokens_seen": 527859344, + "step": 5783 + }, + { + "epoch": 24.095833333333335, + "loss": 0.08312556147575378, + "loss_ce": 3.3073583836085163e-06, + "loss_iou": 0.1533203125, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 527859344, + "step": 5783 + }, + { + "epoch": 24.1, + "grad_norm": 2.1210316223620156, + "learning_rate": 5e-05, + "loss": 0.0517, + "num_input_tokens_seen": 527950948, + "step": 5784 + }, + { + "epoch": 24.1, + "loss": 0.08202598989009857, + "loss_ce": 0.0043129813857376575, + "loss_iou": 0.2080078125, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 527950948, + "step": 5784 + }, + { + "epoch": 24.104166666666668, + "grad_norm": 1.1943825243234527, + "learning_rate": 5e-05, + "loss": 0.0472, + "num_input_tokens_seen": 528041896, + "step": 5785 + }, + { + "epoch": 24.104166666666668, + "loss": 0.025781847536563873, + "loss_ce": 2.124416369042592e-06, + "loss_iou": 0.265625, + "loss_num": 0.005157470703125, + "loss_xval": 0.0257568359375, + "num_input_tokens_seen": 528041896, + "step": 5785 + }, + { + "epoch": 24.108333333333334, + "grad_norm": 1.8329688133690072, + "learning_rate": 5e-05, + "loss": 0.0323, + "num_input_tokens_seen": 528133600, + "step": 5786 + }, + { + "epoch": 24.108333333333334, + "loss": 0.03760939836502075, + "loss_ce": 2.7001751732314005e-05, + "loss_iou": 0.16796875, + "loss_num": 0.00750732421875, + "loss_xval": 0.03759765625, + "num_input_tokens_seen": 528133600, + "step": 5786 + }, + { + "epoch": 24.1125, + "grad_norm": 3.261515832216803, + "learning_rate": 5e-05, + "loss": 0.0251, + "num_input_tokens_seen": 528224624, + "step": 5787 + }, + { + "epoch": 24.1125, + "loss": 0.021124158054590225, + "loss_ce": 1.362401235383004e-05, + "loss_iou": 0.232421875, + "loss_num": 0.00421142578125, + "loss_xval": 0.0211181640625, + "num_input_tokens_seen": 528224624, + "step": 5787 + }, + { + "epoch": 24.116666666666667, + "grad_norm": 3.310631450485702, + "learning_rate": 5e-05, + "loss": 0.0276, + "num_input_tokens_seen": 528315756, + "step": 5788 + }, + { + "epoch": 24.116666666666667, + "loss": 0.028005395084619522, + "loss_ce": 0.00029543385608121753, + "loss_iou": 0.38671875, + "loss_num": 0.00555419921875, + "loss_xval": 0.0277099609375, + "num_input_tokens_seen": 528315756, + "step": 5788 + }, + { + "epoch": 24.120833333333334, + "grad_norm": 2.3412593794878704, + "learning_rate": 5e-05, + "loss": 0.0334, + "num_input_tokens_seen": 528407176, + "step": 5789 + }, + { + "epoch": 24.120833333333334, + "loss": 0.024646885693073273, + "loss_ce": 8.786471880739555e-05, + "loss_iou": 0.23046875, + "loss_num": 0.004913330078125, + "loss_xval": 0.0245361328125, + "num_input_tokens_seen": 528407176, + "step": 5789 + }, + { + "epoch": 24.125, + "grad_norm": 2.243844554830831, + "learning_rate": 5e-05, + "loss": 0.0392, + "num_input_tokens_seen": 528498852, + "step": 5790 + }, + { + "epoch": 24.125, + "loss": 0.03345056623220444, + "loss_ce": 0.00045343622332438827, + "loss_iou": 0.19921875, + "loss_num": 0.006622314453125, + "loss_xval": 0.032958984375, + "num_input_tokens_seen": 528498852, + "step": 5790 + }, + { + "epoch": 24.129166666666666, + "grad_norm": 2.1521874782066295, + "learning_rate": 5e-05, + "loss": 0.0519, + "num_input_tokens_seen": 528590380, + "step": 5791 + }, + { + "epoch": 24.129166666666666, + "loss": 0.06372282654047012, + "loss_ce": 1.7384611055604182e-05, + "loss_iou": 0.193359375, + "loss_num": 0.01275634765625, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 528590380, + "step": 5791 + }, + { + "epoch": 24.133333333333333, + "grad_norm": 2.3804266455050396, + "learning_rate": 5e-05, + "loss": 0.0491, + "num_input_tokens_seen": 528681884, + "step": 5792 + }, + { + "epoch": 24.133333333333333, + "loss": 0.0350668840110302, + "loss_ce": 2.1849505174031947e-06, + "loss_iou": 0.306640625, + "loss_num": 0.00701904296875, + "loss_xval": 0.03515625, + "num_input_tokens_seen": 528681884, + "step": 5792 + }, + { + "epoch": 24.1375, + "grad_norm": 2.7841512549241845, + "learning_rate": 5e-05, + "loss": 0.0517, + "num_input_tokens_seen": 528772944, + "step": 5793 + }, + { + "epoch": 24.1375, + "loss": 0.07232136279344559, + "loss_ce": 2.3284333110495936e-06, + "loss_iou": 0.1943359375, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 528772944, + "step": 5793 + }, + { + "epoch": 24.141666666666666, + "grad_norm": 2.399875513009259, + "learning_rate": 5e-05, + "loss": 0.0609, + "num_input_tokens_seen": 528864260, + "step": 5794 + }, + { + "epoch": 24.141666666666666, + "loss": 0.09643752872943878, + "loss_ce": 1.9853255253110547e-06, + "loss_iou": 0.1953125, + "loss_num": 0.019287109375, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 528864260, + "step": 5794 + }, + { + "epoch": 24.145833333333332, + "grad_norm": 2.2389868112988216, + "learning_rate": 5e-05, + "loss": 0.0337, + "num_input_tokens_seen": 528955488, + "step": 5795 + }, + { + "epoch": 24.145833333333332, + "loss": 0.03514765202999115, + "loss_ce": 2.1921698134974577e-05, + "loss_iou": 0.0810546875, + "loss_num": 0.00701904296875, + "loss_xval": 0.03515625, + "num_input_tokens_seen": 528955488, + "step": 5795 + }, + { + "epoch": 24.15, + "grad_norm": 1.9068440934972977, + "learning_rate": 5e-05, + "loss": 0.0251, + "num_input_tokens_seen": 529046428, + "step": 5796 + }, + { + "epoch": 24.15, + "loss": 0.02915019728243351, + "loss_ce": 5.908582352276426e-06, + "loss_iou": 0.1640625, + "loss_num": 0.005828857421875, + "loss_xval": 0.0291748046875, + "num_input_tokens_seen": 529046428, + "step": 5796 + }, + { + "epoch": 24.154166666666665, + "grad_norm": 1.9521876939459881, + "learning_rate": 5e-05, + "loss": 0.0565, + "num_input_tokens_seen": 529137792, + "step": 5797 + }, + { + "epoch": 24.154166666666665, + "loss": 0.06888218224048615, + "loss_ce": 1.926880213432014e-05, + "loss_iou": 0.201171875, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 529137792, + "step": 5797 + }, + { + "epoch": 24.158333333333335, + "grad_norm": 2.960062059778355, + "learning_rate": 5e-05, + "loss": 0.0235, + "num_input_tokens_seen": 529228716, + "step": 5798 + }, + { + "epoch": 24.158333333333335, + "loss": 0.0200907401740551, + "loss_ce": 1.0174837370868772e-05, + "loss_iou": 0.2177734375, + "loss_num": 0.0040283203125, + "loss_xval": 0.02001953125, + "num_input_tokens_seen": 529228716, + "step": 5798 + }, + { + "epoch": 24.1625, + "grad_norm": 2.1244946710045514, + "learning_rate": 5e-05, + "loss": 0.0562, + "num_input_tokens_seen": 529320692, + "step": 5799 + }, + { + "epoch": 24.1625, + "loss": 0.04741659015417099, + "loss_ce": 4.186751175438985e-05, + "loss_iou": 0.314453125, + "loss_num": 0.00946044921875, + "loss_xval": 0.04736328125, + "num_input_tokens_seen": 529320692, + "step": 5799 + }, + { + "epoch": 24.166666666666668, + "grad_norm": 0.9728819809690822, + "learning_rate": 5e-05, + "loss": 0.0388, + "num_input_tokens_seen": 529411972, + "step": 5800 + }, + { + "epoch": 24.166666666666668, + "loss": 0.0351259745657444, + "loss_ce": 7.87230601417832e-06, + "loss_iou": 0.16015625, + "loss_num": 0.00701904296875, + "loss_xval": 0.03515625, + "num_input_tokens_seen": 529411972, + "step": 5800 + }, + { + "epoch": 24.170833333333334, + "grad_norm": 1.0448045563911708, + "learning_rate": 5e-05, + "loss": 0.0766, + "num_input_tokens_seen": 529503304, + "step": 5801 + }, + { + "epoch": 24.170833333333334, + "loss": 0.10207903385162354, + "loss_ce": 5.366885943658417e-06, + "loss_iou": 0.2734375, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 529503304, + "step": 5801 + }, + { + "epoch": 24.175, + "grad_norm": 1.406917201813283, + "learning_rate": 5e-05, + "loss": 0.0525, + "num_input_tokens_seen": 529594264, + "step": 5802 + }, + { + "epoch": 24.175, + "loss": 0.033842310309410095, + "loss_ce": 0.00047133976477198303, + "loss_iou": 0.2353515625, + "loss_num": 0.006683349609375, + "loss_xval": 0.033447265625, + "num_input_tokens_seen": 529594264, + "step": 5802 + }, + { + "epoch": 24.179166666666667, + "grad_norm": 2.4702882382279254, + "learning_rate": 5e-05, + "loss": 0.0264, + "num_input_tokens_seen": 529685644, + "step": 5803 + }, + { + "epoch": 24.179166666666667, + "loss": 0.02854456752538681, + "loss_ce": 1.4447728972299956e-05, + "loss_iou": 0.208984375, + "loss_num": 0.005706787109375, + "loss_xval": 0.028564453125, + "num_input_tokens_seen": 529685644, + "step": 5803 + }, + { + "epoch": 24.183333333333334, + "grad_norm": 3.463313588988296, + "learning_rate": 5e-05, + "loss": 0.039, + "num_input_tokens_seen": 529776884, + "step": 5804 + }, + { + "epoch": 24.183333333333334, + "loss": 0.04076559096574783, + "loss_ce": 9.366483027406503e-06, + "loss_iou": 0.3671875, + "loss_num": 0.0081787109375, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 529776884, + "step": 5804 + }, + { + "epoch": 24.1875, + "grad_norm": 3.6153047387307544, + "learning_rate": 5e-05, + "loss": 0.0422, + "num_input_tokens_seen": 529868612, + "step": 5805 + }, + { + "epoch": 24.1875, + "loss": 0.04769245535135269, + "loss_ce": 2.3997785319807008e-05, + "loss_iou": 0.38671875, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 529868612, + "step": 5805 + }, + { + "epoch": 24.191666666666666, + "grad_norm": 2.5920213435423065, + "learning_rate": 5e-05, + "loss": 0.0246, + "num_input_tokens_seen": 529959920, + "step": 5806 + }, + { + "epoch": 24.191666666666666, + "loss": 0.028584707528352737, + "loss_ce": 1.2625709132407792e-05, + "loss_iou": 0.29296875, + "loss_num": 0.005706787109375, + "loss_xval": 0.028564453125, + "num_input_tokens_seen": 529959920, + "step": 5806 + }, + { + "epoch": 24.195833333333333, + "grad_norm": 2.306361847712388, + "learning_rate": 5e-05, + "loss": 0.0323, + "num_input_tokens_seen": 530050764, + "step": 5807 + }, + { + "epoch": 24.195833333333333, + "loss": 0.02445165440440178, + "loss_ce": 1.4706112779094838e-05, + "loss_iou": 0.275390625, + "loss_num": 0.0048828125, + "loss_xval": 0.0244140625, + "num_input_tokens_seen": 530050764, + "step": 5807 + }, + { + "epoch": 24.2, + "grad_norm": 5.89266302181465, + "learning_rate": 5e-05, + "loss": 0.041, + "num_input_tokens_seen": 530142624, + "step": 5808 + }, + { + "epoch": 24.2, + "loss": 0.0434047132730484, + "loss_ce": 8.719249308342114e-06, + "loss_iou": 0.212890625, + "loss_num": 0.0086669921875, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 530142624, + "step": 5808 + }, + { + "epoch": 24.204166666666666, + "grad_norm": 2.340488034661521, + "learning_rate": 5e-05, + "loss": 0.0574, + "num_input_tokens_seen": 530234124, + "step": 5809 + }, + { + "epoch": 24.204166666666666, + "loss": 0.04728977009654045, + "loss_ce": 2.782668843792635e-06, + "loss_iou": 0.2216796875, + "loss_num": 0.00946044921875, + "loss_xval": 0.04736328125, + "num_input_tokens_seen": 530234124, + "step": 5809 + }, + { + "epoch": 24.208333333333332, + "grad_norm": 2.644660101456627, + "learning_rate": 5e-05, + "loss": 0.0636, + "num_input_tokens_seen": 530324832, + "step": 5810 + }, + { + "epoch": 24.208333333333332, + "loss": 0.07045643031597137, + "loss_ce": 5.238256562734023e-05, + "loss_iou": 0.208984375, + "loss_num": 0.01409912109375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 530324832, + "step": 5810 + }, + { + "epoch": 24.2125, + "grad_norm": 1.476402483879766, + "learning_rate": 5e-05, + "loss": 0.0384, + "num_input_tokens_seen": 530416428, + "step": 5811 + }, + { + "epoch": 24.2125, + "loss": 0.05975855141878128, + "loss_ce": 5.134122147865128e-06, + "loss_iou": 0.26171875, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 530416428, + "step": 5811 + }, + { + "epoch": 24.216666666666665, + "grad_norm": 2.6559357381664386, + "learning_rate": 5e-05, + "loss": 0.0322, + "num_input_tokens_seen": 530507672, + "step": 5812 + }, + { + "epoch": 24.216666666666665, + "loss": 0.03925604373216629, + "loss_ce": 0.0005216074059717357, + "loss_iou": 0.2158203125, + "loss_num": 0.00775146484375, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 530507672, + "step": 5812 + }, + { + "epoch": 24.220833333333335, + "grad_norm": 2.797958040194132, + "learning_rate": 5e-05, + "loss": 0.0416, + "num_input_tokens_seen": 530599024, + "step": 5813 + }, + { + "epoch": 24.220833333333335, + "loss": 0.05553257465362549, + "loss_ce": 5.839197001478169e-06, + "loss_iou": 0.212890625, + "loss_num": 0.0111083984375, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 530599024, + "step": 5813 + }, + { + "epoch": 24.225, + "grad_norm": 2.2344297164372944, + "learning_rate": 5e-05, + "loss": 0.0431, + "num_input_tokens_seen": 530688792, + "step": 5814 + }, + { + "epoch": 24.225, + "loss": 0.025300024077296257, + "loss_ce": 1.6210691683227196e-05, + "loss_iou": 0.337890625, + "loss_num": 0.00506591796875, + "loss_xval": 0.0252685546875, + "num_input_tokens_seen": 530688792, + "step": 5814 + }, + { + "epoch": 24.229166666666668, + "grad_norm": 2.558663557519644, + "learning_rate": 5e-05, + "loss": 0.0538, + "num_input_tokens_seen": 530780032, + "step": 5815 + }, + { + "epoch": 24.229166666666668, + "loss": 0.03335661441087723, + "loss_ce": 8.532742867828347e-06, + "loss_iou": 0.296875, + "loss_num": 0.00665283203125, + "loss_xval": 0.033447265625, + "num_input_tokens_seen": 530780032, + "step": 5815 + }, + { + "epoch": 24.233333333333334, + "grad_norm": 3.527114855363792, + "learning_rate": 5e-05, + "loss": 0.029, + "num_input_tokens_seen": 530871548, + "step": 5816 + }, + { + "epoch": 24.233333333333334, + "loss": 0.0321367122232914, + "loss_ce": 1.7028531829055282e-06, + "loss_iou": 0.29296875, + "loss_num": 0.006439208984375, + "loss_xval": 0.0322265625, + "num_input_tokens_seen": 530871548, + "step": 5816 + }, + { + "epoch": 24.2375, + "grad_norm": 3.648796475096683, + "learning_rate": 5e-05, + "loss": 0.0457, + "num_input_tokens_seen": 530962972, + "step": 5817 + }, + { + "epoch": 24.2375, + "loss": 0.03030816838145256, + "loss_ce": 1.947232522070408e-05, + "loss_iou": 0.166015625, + "loss_num": 0.00604248046875, + "loss_xval": 0.0302734375, + "num_input_tokens_seen": 530962972, + "step": 5817 + }, + { + "epoch": 24.241666666666667, + "grad_norm": 2.5664420438996842, + "learning_rate": 5e-05, + "loss": 0.0772, + "num_input_tokens_seen": 531054116, + "step": 5818 + }, + { + "epoch": 24.241666666666667, + "loss": 0.12223930656909943, + "loss_ce": 1.6416102880612016e-05, + "loss_iou": 0.109375, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 531054116, + "step": 5818 + }, + { + "epoch": 24.245833333333334, + "grad_norm": 3.0292135260258783, + "learning_rate": 5e-05, + "loss": 0.0555, + "num_input_tokens_seen": 531145604, + "step": 5819 + }, + { + "epoch": 24.245833333333334, + "loss": 0.08507491648197174, + "loss_ce": 3.768545502680354e-05, + "loss_iou": 0.37109375, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 531145604, + "step": 5819 + }, + { + "epoch": 24.25, + "grad_norm": 3.3324941616368946, + "learning_rate": 5e-05, + "loss": 0.041, + "num_input_tokens_seen": 531237248, + "step": 5820 + }, + { + "epoch": 24.25, + "loss": 0.028207680210471153, + "loss_ce": 0.000329872767906636, + "loss_iou": 0.35546875, + "loss_num": 0.00555419921875, + "loss_xval": 0.02783203125, + "num_input_tokens_seen": 531237248, + "step": 5820 + }, + { + "epoch": 24.254166666666666, + "grad_norm": 2.4142162953720256, + "learning_rate": 5e-05, + "loss": 0.0393, + "num_input_tokens_seen": 531329000, + "step": 5821 + }, + { + "epoch": 24.254166666666666, + "loss": 0.04822106659412384, + "loss_ce": 3.289967935415916e-06, + "loss_iou": 0.12158203125, + "loss_num": 0.0096435546875, + "loss_xval": 0.04833984375, + "num_input_tokens_seen": 531329000, + "step": 5821 + }, + { + "epoch": 24.258333333333333, + "grad_norm": 1.4771427598436098, + "learning_rate": 5e-05, + "loss": 0.0467, + "num_input_tokens_seen": 531420252, + "step": 5822 + }, + { + "epoch": 24.258333333333333, + "loss": 0.06313102692365646, + "loss_ce": 5.416107342171017e-06, + "loss_iou": 0.0625, + "loss_num": 0.01263427734375, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 531420252, + "step": 5822 + }, + { + "epoch": 24.2625, + "grad_norm": 2.3319254388492543, + "learning_rate": 5e-05, + "loss": 0.0359, + "num_input_tokens_seen": 531511460, + "step": 5823 + }, + { + "epoch": 24.2625, + "loss": 0.029953958466649055, + "loss_ce": 0.0006036765989847481, + "loss_iou": 0.232421875, + "loss_num": 0.005859375, + "loss_xval": 0.029296875, + "num_input_tokens_seen": 531511460, + "step": 5823 + }, + { + "epoch": 24.266666666666666, + "grad_norm": 2.276987456311338, + "learning_rate": 5e-05, + "loss": 0.0564, + "num_input_tokens_seen": 531603208, + "step": 5824 + }, + { + "epoch": 24.266666666666666, + "loss": 0.08067796379327774, + "loss_ce": 4.747425009554718e-06, + "loss_iou": 0.28125, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 531603208, + "step": 5824 + }, + { + "epoch": 24.270833333333332, + "grad_norm": 1.7570098726468637, + "learning_rate": 5e-05, + "loss": 0.0956, + "num_input_tokens_seen": 531694384, + "step": 5825 + }, + { + "epoch": 24.270833333333332, + "loss": 0.15547670423984528, + "loss_ce": 4.906281901639886e-06, + "loss_iou": 0.1669921875, + "loss_num": 0.0311279296875, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 531694384, + "step": 5825 + }, + { + "epoch": 24.275, + "grad_norm": 1.2715875752875219, + "learning_rate": 5e-05, + "loss": 0.0187, + "num_input_tokens_seen": 531785304, + "step": 5826 + }, + { + "epoch": 24.275, + "loss": 0.01852235570549965, + "loss_ce": 2.0009840682178037e-06, + "loss_iou": 0.1787109375, + "loss_num": 0.0037078857421875, + "loss_xval": 0.0185546875, + "num_input_tokens_seen": 531785304, + "step": 5826 + }, + { + "epoch": 24.279166666666665, + "grad_norm": 4.224597135091308, + "learning_rate": 5e-05, + "loss": 0.0451, + "num_input_tokens_seen": 531876528, + "step": 5827 + }, + { + "epoch": 24.279166666666665, + "loss": 0.0325944647192955, + "loss_ce": 9.317414878751151e-06, + "loss_iou": 0.1201171875, + "loss_num": 0.00653076171875, + "loss_xval": 0.032470703125, + "num_input_tokens_seen": 531876528, + "step": 5827 + }, + { + "epoch": 24.283333333333335, + "grad_norm": 1.0235952605649894, + "learning_rate": 5e-05, + "loss": 0.0438, + "num_input_tokens_seen": 531967608, + "step": 5828 + }, + { + "epoch": 24.283333333333335, + "loss": 0.03589292988181114, + "loss_ce": 4.256347892805934e-06, + "loss_iou": 0.26171875, + "loss_num": 0.007171630859375, + "loss_xval": 0.035888671875, + "num_input_tokens_seen": 531967608, + "step": 5828 + }, + { + "epoch": 24.2875, + "grad_norm": 4.7815468426885275, + "learning_rate": 5e-05, + "loss": 0.0311, + "num_input_tokens_seen": 532059628, + "step": 5829 + }, + { + "epoch": 24.2875, + "loss": 0.037222668528556824, + "loss_ce": 6.481990567408502e-06, + "loss_iou": 0.2294921875, + "loss_num": 0.0074462890625, + "loss_xval": 0.037109375, + "num_input_tokens_seen": 532059628, + "step": 5829 + }, + { + "epoch": 24.291666666666668, + "grad_norm": 2.1804774460721172, + "learning_rate": 5e-05, + "loss": 0.0483, + "num_input_tokens_seen": 532150832, + "step": 5830 + }, + { + "epoch": 24.291666666666668, + "loss": 0.0251055508852005, + "loss_ce": 4.842361704504583e-06, + "loss_iou": 0.10205078125, + "loss_num": 0.0050048828125, + "loss_xval": 0.025146484375, + "num_input_tokens_seen": 532150832, + "step": 5830 + }, + { + "epoch": 24.295833333333334, + "grad_norm": 2.6740592119510853, + "learning_rate": 5e-05, + "loss": 0.0535, + "num_input_tokens_seen": 532242692, + "step": 5831 + }, + { + "epoch": 24.295833333333334, + "loss": 0.05839364975690842, + "loss_ce": 0.00021188265236560255, + "loss_iou": 0.294921875, + "loss_num": 0.0115966796875, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 532242692, + "step": 5831 + }, + { + "epoch": 24.3, + "grad_norm": 2.9955427450303573, + "learning_rate": 5e-05, + "loss": 0.0438, + "num_input_tokens_seen": 532334064, + "step": 5832 + }, + { + "epoch": 24.3, + "loss": 0.044400908052921295, + "loss_ce": 2.0719862732221372e-05, + "loss_iou": 0.279296875, + "loss_num": 0.0089111328125, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 532334064, + "step": 5832 + }, + { + "epoch": 24.304166666666667, + "grad_norm": 3.123291369024042, + "learning_rate": 5e-05, + "loss": 0.0376, + "num_input_tokens_seen": 532426100, + "step": 5833 + }, + { + "epoch": 24.304166666666667, + "loss": 0.038879990577697754, + "loss_ce": 8.226255886256695e-06, + "loss_iou": 0.10595703125, + "loss_num": 0.007781982421875, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 532426100, + "step": 5833 + }, + { + "epoch": 24.308333333333334, + "grad_norm": 1.2739468334377282, + "learning_rate": 5e-05, + "loss": 0.0369, + "num_input_tokens_seen": 532517564, + "step": 5834 + }, + { + "epoch": 24.308333333333334, + "loss": 0.03598446026444435, + "loss_ce": 4.234915650158655e-06, + "loss_iou": 0.376953125, + "loss_num": 0.0072021484375, + "loss_xval": 0.035888671875, + "num_input_tokens_seen": 532517564, + "step": 5834 + }, + { + "epoch": 24.3125, + "grad_norm": 2.1308749770898876, + "learning_rate": 5e-05, + "loss": 0.0417, + "num_input_tokens_seen": 532608880, + "step": 5835 + }, + { + "epoch": 24.3125, + "loss": 0.04994537681341171, + "loss_ce": 3.362632469361415e-06, + "loss_iou": 0.267578125, + "loss_num": 0.010009765625, + "loss_xval": 0.050048828125, + "num_input_tokens_seen": 532608880, + "step": 5835 + }, + { + "epoch": 24.316666666666666, + "grad_norm": 2.5524278736227934, + "learning_rate": 5e-05, + "loss": 0.0352, + "num_input_tokens_seen": 532700312, + "step": 5836 + }, + { + "epoch": 24.316666666666666, + "loss": 0.04723707586526871, + "loss_ce": 1.1121027455374133e-05, + "loss_iou": 0.283203125, + "loss_num": 0.00946044921875, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 532700312, + "step": 5836 + }, + { + "epoch": 24.320833333333333, + "grad_norm": 2.18093667139612, + "learning_rate": 5e-05, + "loss": 0.0285, + "num_input_tokens_seen": 532791900, + "step": 5837 + }, + { + "epoch": 24.320833333333333, + "loss": 0.025013525038957596, + "loss_ce": 4.369113412394654e-06, + "loss_iou": 0.208984375, + "loss_num": 0.0050048828125, + "loss_xval": 0.0250244140625, + "num_input_tokens_seen": 532791900, + "step": 5837 + }, + { + "epoch": 24.325, + "grad_norm": 3.4516992775793227, + "learning_rate": 5e-05, + "loss": 0.0382, + "num_input_tokens_seen": 532883116, + "step": 5838 + }, + { + "epoch": 24.325, + "loss": 0.05090608820319176, + "loss_ce": 2.7665159905154724e-06, + "loss_iou": 0.470703125, + "loss_num": 0.01019287109375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 532883116, + "step": 5838 + }, + { + "epoch": 24.329166666666666, + "grad_norm": 2.892724046199087, + "learning_rate": 5e-05, + "loss": 0.0348, + "num_input_tokens_seen": 532974236, + "step": 5839 + }, + { + "epoch": 24.329166666666666, + "loss": 0.034184593707323074, + "loss_ce": 4.90624779558857e-06, + "loss_iou": 0.33984375, + "loss_num": 0.0068359375, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 532974236, + "step": 5839 + }, + { + "epoch": 24.333333333333332, + "grad_norm": 3.0937124821286814, + "learning_rate": 5e-05, + "loss": 0.0494, + "num_input_tokens_seen": 533065456, + "step": 5840 + }, + { + "epoch": 24.333333333333332, + "loss": 0.042577601969242096, + "loss_ce": 5.579438948188908e-06, + "loss_iou": 0.345703125, + "loss_num": 0.008544921875, + "loss_xval": 0.04248046875, + "num_input_tokens_seen": 533065456, + "step": 5840 + }, + { + "epoch": 24.3375, + "grad_norm": 3.3613541657894075, + "learning_rate": 5e-05, + "loss": 0.0505, + "num_input_tokens_seen": 533155756, + "step": 5841 + }, + { + "epoch": 24.3375, + "loss": 0.050340019166469574, + "loss_ce": 8.902155059331562e-06, + "loss_iou": 0.043701171875, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 533155756, + "step": 5841 + }, + { + "epoch": 24.341666666666665, + "grad_norm": 0.7344652474322563, + "learning_rate": 5e-05, + "loss": 0.0603, + "num_input_tokens_seen": 533246872, + "step": 5842 + }, + { + "epoch": 24.341666666666665, + "loss": 0.0807098001241684, + "loss_ce": 6.0692091210512444e-06, + "loss_iou": 0.265625, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 533246872, + "step": 5842 + }, + { + "epoch": 24.345833333333335, + "grad_norm": 1.4355854996546975, + "learning_rate": 5e-05, + "loss": 0.0379, + "num_input_tokens_seen": 533338396, + "step": 5843 + }, + { + "epoch": 24.345833333333335, + "loss": 0.03224565088748932, + "loss_ce": 0.00011063837155234069, + "loss_iou": 0.1943359375, + "loss_num": 0.00640869140625, + "loss_xval": 0.0322265625, + "num_input_tokens_seen": 533338396, + "step": 5843 + }, + { + "epoch": 24.35, + "grad_norm": 1.298353541852667, + "learning_rate": 5e-05, + "loss": 0.0325, + "num_input_tokens_seen": 533429764, + "step": 5844 + }, + { + "epoch": 24.35, + "loss": 0.040697984397411346, + "loss_ce": 2.79236428468721e-06, + "loss_iou": 0.1591796875, + "loss_num": 0.00811767578125, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 533429764, + "step": 5844 + }, + { + "epoch": 24.354166666666668, + "grad_norm": 1.4414559994848586, + "learning_rate": 5e-05, + "loss": 0.0287, + "num_input_tokens_seen": 533520880, + "step": 5845 + }, + { + "epoch": 24.354166666666668, + "loss": 0.018156087026000023, + "loss_ce": 5.757582130172523e-06, + "loss_iou": 0.119140625, + "loss_num": 0.003631591796875, + "loss_xval": 0.0181884765625, + "num_input_tokens_seen": 533520880, + "step": 5845 + }, + { + "epoch": 24.358333333333334, + "grad_norm": 2.666583677527782, + "learning_rate": 5e-05, + "loss": 0.0403, + "num_input_tokens_seen": 533612312, + "step": 5846 + }, + { + "epoch": 24.358333333333334, + "loss": 0.034728314727544785, + "loss_ce": 6.940867024241015e-06, + "loss_iou": 0.26953125, + "loss_num": 0.0069580078125, + "loss_xval": 0.03466796875, + "num_input_tokens_seen": 533612312, + "step": 5846 + }, + { + "epoch": 24.3625, + "grad_norm": 3.2379036976763613, + "learning_rate": 5e-05, + "loss": 0.0696, + "num_input_tokens_seen": 533703372, + "step": 5847 + }, + { + "epoch": 24.3625, + "loss": 0.027110453695058823, + "loss_ce": 3.214926209693658e-06, + "loss_iou": 0.2890625, + "loss_num": 0.00543212890625, + "loss_xval": 0.027099609375, + "num_input_tokens_seen": 533703372, + "step": 5847 + }, + { + "epoch": 24.366666666666667, + "grad_norm": 3.387507370053438, + "learning_rate": 5e-05, + "loss": 0.0343, + "num_input_tokens_seen": 533794456, + "step": 5848 + }, + { + "epoch": 24.366666666666667, + "loss": 0.03715943545103073, + "loss_ce": 4.285943759896327e-06, + "loss_iou": 0.3984375, + "loss_num": 0.007415771484375, + "loss_xval": 0.037109375, + "num_input_tokens_seen": 533794456, + "step": 5848 + }, + { + "epoch": 24.370833333333334, + "grad_norm": 2.5877390535824008, + "learning_rate": 5e-05, + "loss": 0.0551, + "num_input_tokens_seen": 533885400, + "step": 5849 + }, + { + "epoch": 24.370833333333334, + "loss": 0.04111175611615181, + "loss_ce": 4.5775800572300795e-06, + "loss_iou": 0.19140625, + "loss_num": 0.00823974609375, + "loss_xval": 0.041015625, + "num_input_tokens_seen": 533885400, + "step": 5849 + }, + { + "epoch": 24.375, + "grad_norm": 2.70954407175507, + "learning_rate": 5e-05, + "loss": 0.0407, + "num_input_tokens_seen": 533976732, + "step": 5850 + }, + { + "epoch": 24.375, + "loss": 0.035382576286792755, + "loss_ce": 8.136580436257645e-05, + "loss_iou": 0.208984375, + "loss_num": 0.007049560546875, + "loss_xval": 0.035400390625, + "num_input_tokens_seen": 533976732, + "step": 5850 + }, + { + "epoch": 24.379166666666666, + "grad_norm": 3.679600535484754, + "learning_rate": 5e-05, + "loss": 0.05, + "num_input_tokens_seen": 534068004, + "step": 5851 + }, + { + "epoch": 24.379166666666666, + "loss": 0.07135792076587677, + "loss_ce": 0.00014515325892716646, + "loss_iou": 0.2197265625, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 534068004, + "step": 5851 + }, + { + "epoch": 24.383333333333333, + "grad_norm": 2.9961527662194998, + "learning_rate": 5e-05, + "loss": 0.0532, + "num_input_tokens_seen": 534158804, + "step": 5852 + }, + { + "epoch": 24.383333333333333, + "loss": 0.0763065367937088, + "loss_ce": 4.957843884767499e-06, + "loss_iou": 0.31640625, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 534158804, + "step": 5852 + }, + { + "epoch": 24.3875, + "grad_norm": 2.6116225656771643, + "learning_rate": 5e-05, + "loss": 0.0696, + "num_input_tokens_seen": 534249748, + "step": 5853 + }, + { + "epoch": 24.3875, + "loss": 0.04672173783183098, + "loss_ce": 6.956057404750027e-06, + "loss_iou": 0.21875, + "loss_num": 0.00933837890625, + "loss_xval": 0.046630859375, + "num_input_tokens_seen": 534249748, + "step": 5853 + }, + { + "epoch": 24.391666666666666, + "grad_norm": 3.241412571020756, + "learning_rate": 5e-05, + "loss": 0.056, + "num_input_tokens_seen": 534340768, + "step": 5854 + }, + { + "epoch": 24.391666666666666, + "loss": 0.08435220271348953, + "loss_ce": 1.6172627965715947e-06, + "loss_iou": 0.251953125, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 534340768, + "step": 5854 + }, + { + "epoch": 24.395833333333332, + "grad_norm": 2.9697163409184744, + "learning_rate": 5e-05, + "loss": 0.0359, + "num_input_tokens_seen": 534431588, + "step": 5855 + }, + { + "epoch": 24.395833333333332, + "loss": 0.02374483086168766, + "loss_ce": 3.267165084253065e-05, + "loss_iou": 0.2451171875, + "loss_num": 0.004730224609375, + "loss_xval": 0.023681640625, + "num_input_tokens_seen": 534431588, + "step": 5855 + }, + { + "epoch": 24.4, + "grad_norm": 2.96063650221661, + "learning_rate": 5e-05, + "loss": 0.0288, + "num_input_tokens_seen": 534522928, + "step": 5856 + }, + { + "epoch": 24.4, + "loss": 0.02467840537428856, + "loss_ce": 4.943191015627235e-06, + "loss_iou": 0.259765625, + "loss_num": 0.00494384765625, + "loss_xval": 0.024658203125, + "num_input_tokens_seen": 534522928, + "step": 5856 + }, + { + "epoch": 24.404166666666665, + "grad_norm": 2.0848545508983465, + "learning_rate": 5e-05, + "loss": 0.064, + "num_input_tokens_seen": 534614356, + "step": 5857 + }, + { + "epoch": 24.404166666666665, + "loss": 0.039054907858371735, + "loss_ce": 8.396092744078487e-05, + "loss_iou": 0.1982421875, + "loss_num": 0.007781982421875, + "loss_xval": 0.0390625, + "num_input_tokens_seen": 534614356, + "step": 5857 + }, + { + "epoch": 24.408333333333335, + "grad_norm": 1.946150724260364, + "learning_rate": 5e-05, + "loss": 0.0372, + "num_input_tokens_seen": 534704084, + "step": 5858 + }, + { + "epoch": 24.408333333333335, + "loss": 0.027506262063980103, + "loss_ce": 2.2961814920563484e-06, + "loss_iou": 0.1669921875, + "loss_num": 0.0054931640625, + "loss_xval": 0.0274658203125, + "num_input_tokens_seen": 534704084, + "step": 5858 + }, + { + "epoch": 24.4125, + "grad_norm": 2.0678636855620534, + "learning_rate": 5e-05, + "loss": 0.0477, + "num_input_tokens_seen": 534795508, + "step": 5859 + }, + { + "epoch": 24.4125, + "loss": 0.03595554456114769, + "loss_ce": 5.838414836034644e-06, + "loss_iou": 0.181640625, + "loss_num": 0.007171630859375, + "loss_xval": 0.035888671875, + "num_input_tokens_seen": 534795508, + "step": 5859 + }, + { + "epoch": 24.416666666666668, + "grad_norm": 2.1996007767003674, + "learning_rate": 5e-05, + "loss": 0.0373, + "num_input_tokens_seen": 534887200, + "step": 5860 + }, + { + "epoch": 24.416666666666668, + "loss": 0.03372432291507721, + "loss_ce": 5.5806183809181675e-05, + "loss_iou": 0.2421875, + "loss_num": 0.006744384765625, + "loss_xval": 0.03369140625, + "num_input_tokens_seen": 534887200, + "step": 5860 + }, + { + "epoch": 24.420833333333334, + "grad_norm": 2.484761238516966, + "learning_rate": 5e-05, + "loss": 0.0426, + "num_input_tokens_seen": 534978140, + "step": 5861 + }, + { + "epoch": 24.420833333333334, + "loss": 0.031754445284605026, + "loss_ce": 3.142125206068158e-05, + "loss_iou": 0.3125, + "loss_num": 0.00634765625, + "loss_xval": 0.03173828125, + "num_input_tokens_seen": 534978140, + "step": 5861 + }, + { + "epoch": 24.425, + "grad_norm": 2.0217484965386037, + "learning_rate": 5e-05, + "loss": 0.034, + "num_input_tokens_seen": 535069568, + "step": 5862 + }, + { + "epoch": 24.425, + "loss": 0.043887313455343246, + "loss_ce": 3.037053375010146e-06, + "loss_iou": 0.185546875, + "loss_num": 0.0087890625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 535069568, + "step": 5862 + }, + { + "epoch": 24.429166666666667, + "grad_norm": 2.4210319897447197, + "learning_rate": 5e-05, + "loss": 0.0386, + "num_input_tokens_seen": 535160888, + "step": 5863 + }, + { + "epoch": 24.429166666666667, + "loss": 0.040758512914180756, + "loss_ce": 2.287687721036491e-06, + "loss_iou": 0.2734375, + "loss_num": 0.0081787109375, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 535160888, + "step": 5863 + }, + { + "epoch": 24.433333333333334, + "grad_norm": 3.0517251807657018, + "learning_rate": 5e-05, + "loss": 0.0805, + "num_input_tokens_seen": 535251832, + "step": 5864 + }, + { + "epoch": 24.433333333333334, + "loss": 0.04389939457178116, + "loss_ce": 3.037423812202178e-05, + "loss_iou": 0.17578125, + "loss_num": 0.0087890625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 535251832, + "step": 5864 + }, + { + "epoch": 24.4375, + "grad_norm": 2.2832054005693054, + "learning_rate": 5e-05, + "loss": 0.0353, + "num_input_tokens_seen": 535343836, + "step": 5865 + }, + { + "epoch": 24.4375, + "loss": 0.026977423578500748, + "loss_ce": 7.515183824580163e-06, + "loss_iou": 0.19140625, + "loss_num": 0.005401611328125, + "loss_xval": 0.0269775390625, + "num_input_tokens_seen": 535343836, + "step": 5865 + }, + { + "epoch": 24.441666666666666, + "grad_norm": 1.8672205491893512, + "learning_rate": 5e-05, + "loss": 0.0316, + "num_input_tokens_seen": 535435272, + "step": 5866 + }, + { + "epoch": 24.441666666666666, + "loss": 0.042527902871370316, + "loss_ce": 1.6572091681155143e-06, + "loss_iou": 0.29296875, + "loss_num": 0.00848388671875, + "loss_xval": 0.04248046875, + "num_input_tokens_seen": 535435272, + "step": 5866 + }, + { + "epoch": 24.445833333333333, + "grad_norm": 2.5144469528048283, + "learning_rate": 5e-05, + "loss": 0.0325, + "num_input_tokens_seen": 535527408, + "step": 5867 + }, + { + "epoch": 24.445833333333333, + "loss": 0.025866053998470306, + "loss_ce": 2.4057346763584064e-06, + "loss_iou": 0.2177734375, + "loss_num": 0.005157470703125, + "loss_xval": 0.02587890625, + "num_input_tokens_seen": 535527408, + "step": 5867 + }, + { + "epoch": 24.45, + "grad_norm": 2.8159218187853825, + "learning_rate": 5e-05, + "loss": 0.0335, + "num_input_tokens_seen": 535618412, + "step": 5868 + }, + { + "epoch": 24.45, + "loss": 0.033513039350509644, + "loss_ce": 4.736622940981761e-06, + "loss_iou": 0.283203125, + "loss_num": 0.006683349609375, + "loss_xval": 0.033447265625, + "num_input_tokens_seen": 535618412, + "step": 5868 + }, + { + "epoch": 24.454166666666666, + "grad_norm": 3.4214494091468204, + "learning_rate": 5e-05, + "loss": 0.0422, + "num_input_tokens_seen": 535709292, + "step": 5869 + }, + { + "epoch": 24.454166666666666, + "loss": 0.03987106680870056, + "loss_ce": 1.5108943443919998e-05, + "loss_iou": 0.2734375, + "loss_num": 0.00799560546875, + "loss_xval": 0.039794921875, + "num_input_tokens_seen": 535709292, + "step": 5869 + }, + { + "epoch": 24.458333333333332, + "grad_norm": 2.7589786437540322, + "learning_rate": 5e-05, + "loss": 0.0287, + "num_input_tokens_seen": 535800576, + "step": 5870 + }, + { + "epoch": 24.458333333333332, + "loss": 0.029235608875751495, + "loss_ce": 7.60642797104083e-05, + "loss_iou": 0.279296875, + "loss_num": 0.005828857421875, + "loss_xval": 0.0291748046875, + "num_input_tokens_seen": 535800576, + "step": 5870 + }, + { + "epoch": 24.4625, + "grad_norm": 2.6709935919613668, + "learning_rate": 5e-05, + "loss": 0.027, + "num_input_tokens_seen": 535892424, + "step": 5871 + }, + { + "epoch": 24.4625, + "loss": 0.030119696632027626, + "loss_ce": 1.4105718946666457e-05, + "loss_iou": 0.318359375, + "loss_num": 0.00604248046875, + "loss_xval": 0.0301513671875, + "num_input_tokens_seen": 535892424, + "step": 5871 + }, + { + "epoch": 24.466666666666665, + "grad_norm": 2.115546282310238, + "learning_rate": 5e-05, + "loss": 0.0811, + "num_input_tokens_seen": 535983528, + "step": 5872 + }, + { + "epoch": 24.466666666666665, + "loss": 0.03541748225688934, + "loss_ce": 1.8342611838306766e-06, + "loss_iou": 0.271484375, + "loss_num": 0.007080078125, + "loss_xval": 0.035400390625, + "num_input_tokens_seen": 535983528, + "step": 5872 + }, + { + "epoch": 24.470833333333335, + "grad_norm": 2.025575395497392, + "learning_rate": 5e-05, + "loss": 0.0308, + "num_input_tokens_seen": 536074768, + "step": 5873 + }, + { + "epoch": 24.470833333333335, + "loss": 0.031343501061201096, + "loss_ce": 1.949052148120245e-06, + "loss_iou": 0.494140625, + "loss_num": 0.00628662109375, + "loss_xval": 0.03125, + "num_input_tokens_seen": 536074768, + "step": 5873 + }, + { + "epoch": 24.475, + "grad_norm": 2.321141271223248, + "learning_rate": 5e-05, + "loss": 0.0337, + "num_input_tokens_seen": 536165936, + "step": 5874 + }, + { + "epoch": 24.475, + "loss": 0.03415609523653984, + "loss_ce": 6.9254256231943145e-06, + "loss_iou": 0.306640625, + "loss_num": 0.0068359375, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 536165936, + "step": 5874 + }, + { + "epoch": 24.479166666666668, + "grad_norm": 2.6402123602693823, + "learning_rate": 5e-05, + "loss": 0.0393, + "num_input_tokens_seen": 536256940, + "step": 5875 + }, + { + "epoch": 24.479166666666668, + "loss": 0.03880603611469269, + "loss_ce": 2.935068550868891e-06, + "loss_iou": 0.2109375, + "loss_num": 0.00775146484375, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 536256940, + "step": 5875 + }, + { + "epoch": 24.483333333333334, + "grad_norm": 2.118281042942542, + "learning_rate": 5e-05, + "loss": 0.0335, + "num_input_tokens_seen": 536348488, + "step": 5876 + }, + { + "epoch": 24.483333333333334, + "loss": 0.04635797068476677, + "loss_ce": 1.7691461380309192e-06, + "loss_iou": 0.265625, + "loss_num": 0.00927734375, + "loss_xval": 0.04638671875, + "num_input_tokens_seen": 536348488, + "step": 5876 + }, + { + "epoch": 24.4875, + "grad_norm": 2.0982521767984488, + "learning_rate": 5e-05, + "loss": 0.0245, + "num_input_tokens_seen": 536439932, + "step": 5877 + }, + { + "epoch": 24.4875, + "loss": 0.025177722796797752, + "loss_ce": 1.5980136595317163e-05, + "loss_iou": 0.255859375, + "loss_num": 0.005035400390625, + "loss_xval": 0.025146484375, + "num_input_tokens_seen": 536439932, + "step": 5877 + }, + { + "epoch": 24.491666666666667, + "grad_norm": 1.9342299361749438, + "learning_rate": 5e-05, + "loss": 0.0328, + "num_input_tokens_seen": 536531264, + "step": 5878 + }, + { + "epoch": 24.491666666666667, + "loss": 0.038806505501270294, + "loss_ce": 3.403896243980853e-06, + "loss_iou": 0.271484375, + "loss_num": 0.00775146484375, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 536531264, + "step": 5878 + }, + { + "epoch": 24.495833333333334, + "grad_norm": 2.7704734606875876, + "learning_rate": 5e-05, + "loss": 0.0409, + "num_input_tokens_seen": 536622972, + "step": 5879 + }, + { + "epoch": 24.495833333333334, + "loss": 0.025232654064893723, + "loss_ce": 2.2465160327556077e-06, + "loss_iou": 0.25, + "loss_num": 0.00506591796875, + "loss_xval": 0.0252685546875, + "num_input_tokens_seen": 536622972, + "step": 5879 + }, + { + "epoch": 24.5, + "grad_norm": 3.179644385492909, + "learning_rate": 5e-05, + "loss": 0.064, + "num_input_tokens_seen": 536714900, + "step": 5880 + }, + { + "epoch": 24.5, + "loss": 0.07569757103919983, + "loss_ce": 1.3975191905046813e-05, + "loss_iou": 0.2890625, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 536714900, + "step": 5880 + }, + { + "epoch": 24.504166666666666, + "grad_norm": 2.253804276771664, + "learning_rate": 5e-05, + "loss": 0.0351, + "num_input_tokens_seen": 536806200, + "step": 5881 + }, + { + "epoch": 24.504166666666666, + "loss": 0.0256878063082695, + "loss_ce": 7.26442249288084e-06, + "loss_iou": 0.224609375, + "loss_num": 0.005126953125, + "loss_xval": 0.025634765625, + "num_input_tokens_seen": 536806200, + "step": 5881 + }, + { + "epoch": 24.508333333333333, + "grad_norm": 2.586446671393119, + "learning_rate": 5e-05, + "loss": 0.0303, + "num_input_tokens_seen": 536897652, + "step": 5882 + }, + { + "epoch": 24.508333333333333, + "loss": 0.027839424088597298, + "loss_ce": 0.0007703331648372114, + "loss_iou": 0.26171875, + "loss_num": 0.005401611328125, + "loss_xval": 0.027099609375, + "num_input_tokens_seen": 536897652, + "step": 5882 + }, + { + "epoch": 24.5125, + "grad_norm": 1.9124413383342893, + "learning_rate": 5e-05, + "loss": 0.0441, + "num_input_tokens_seen": 536989336, + "step": 5883 + }, + { + "epoch": 24.5125, + "loss": 0.021991252899169922, + "loss_ce": 3.337895577715244e-06, + "loss_iou": 0.1513671875, + "loss_num": 0.00439453125, + "loss_xval": 0.02197265625, + "num_input_tokens_seen": 536989336, + "step": 5883 + }, + { + "epoch": 24.516666666666666, + "grad_norm": 1.914465500214039, + "learning_rate": 5e-05, + "loss": 0.0306, + "num_input_tokens_seen": 537080820, + "step": 5884 + }, + { + "epoch": 24.516666666666666, + "loss": 0.036232881247997284, + "loss_ce": 2.377348573645577e-05, + "loss_iou": 0.2314453125, + "loss_num": 0.00726318359375, + "loss_xval": 0.0361328125, + "num_input_tokens_seen": 537080820, + "step": 5884 + }, + { + "epoch": 24.520833333333332, + "grad_norm": 1.8379129218005454, + "learning_rate": 5e-05, + "loss": 0.053, + "num_input_tokens_seen": 537172124, + "step": 5885 + }, + { + "epoch": 24.520833333333332, + "loss": 0.06826095283031464, + "loss_ce": 8.385493856621906e-06, + "loss_iou": 0.3046875, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 537172124, + "step": 5885 + }, + { + "epoch": 24.525, + "grad_norm": 2.205236982199377, + "learning_rate": 5e-05, + "loss": 0.0392, + "num_input_tokens_seen": 537262800, + "step": 5886 + }, + { + "epoch": 24.525, + "loss": 0.0484449602663517, + "loss_ce": 5.936674369877437e-06, + "loss_iou": 0.2177734375, + "loss_num": 0.00970458984375, + "loss_xval": 0.04833984375, + "num_input_tokens_seen": 537262800, + "step": 5886 + }, + { + "epoch": 24.529166666666665, + "grad_norm": 2.5479061923370026, + "learning_rate": 5e-05, + "loss": 0.0419, + "num_input_tokens_seen": 537354264, + "step": 5887 + }, + { + "epoch": 24.529166666666665, + "loss": 0.02890472114086151, + "loss_ce": 4.57498435935122e-06, + "loss_iou": 0.24609375, + "loss_num": 0.005767822265625, + "loss_xval": 0.0289306640625, + "num_input_tokens_seen": 537354264, + "step": 5887 + }, + { + "epoch": 24.533333333333335, + "grad_norm": 2.409790801483189, + "learning_rate": 5e-05, + "loss": 0.0282, + "num_input_tokens_seen": 537445900, + "step": 5888 + }, + { + "epoch": 24.533333333333335, + "loss": 0.030124662443995476, + "loss_ce": 3.8115688312245766e-06, + "loss_iou": 0.1787109375, + "loss_num": 0.00604248046875, + "loss_xval": 0.0301513671875, + "num_input_tokens_seen": 537445900, + "step": 5888 + }, + { + "epoch": 24.5375, + "grad_norm": 3.0075649723691718, + "learning_rate": 5e-05, + "loss": 0.0415, + "num_input_tokens_seen": 537536752, + "step": 5889 + }, + { + "epoch": 24.5375, + "loss": 0.06466107070446014, + "loss_ce": 1.954044137164601e-06, + "loss_iou": 0.2890625, + "loss_num": 0.012939453125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 537536752, + "step": 5889 + }, + { + "epoch": 24.541666666666668, + "grad_norm": 2.898725150209427, + "learning_rate": 5e-05, + "loss": 0.0397, + "num_input_tokens_seen": 537627952, + "step": 5890 + }, + { + "epoch": 24.541666666666668, + "loss": 0.04495709389448166, + "loss_ce": 4.702219484897796e-06, + "loss_iou": 0.271484375, + "loss_num": 0.00897216796875, + "loss_xval": 0.044921875, + "num_input_tokens_seen": 537627952, + "step": 5890 + }, + { + "epoch": 24.545833333333334, + "grad_norm": 2.8535368875900153, + "learning_rate": 5e-05, + "loss": 0.0451, + "num_input_tokens_seen": 537719124, + "step": 5891 + }, + { + "epoch": 24.545833333333334, + "loss": 0.049891311675310135, + "loss_ce": 1.0329967153666075e-05, + "loss_iou": 0.298828125, + "loss_num": 0.00994873046875, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 537719124, + "step": 5891 + }, + { + "epoch": 24.55, + "grad_norm": 2.337094480779963, + "learning_rate": 5e-05, + "loss": 0.0475, + "num_input_tokens_seen": 537810200, + "step": 5892 + }, + { + "epoch": 24.55, + "loss": 0.04928762465715408, + "loss_ce": 1.732335135784524e-06, + "loss_iou": 0.1787109375, + "loss_num": 0.0098876953125, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 537810200, + "step": 5892 + }, + { + "epoch": 24.554166666666667, + "grad_norm": 3.505489995181878, + "learning_rate": 5e-05, + "loss": 0.0388, + "num_input_tokens_seen": 537901240, + "step": 5893 + }, + { + "epoch": 24.554166666666667, + "loss": 0.02528042159974575, + "loss_ce": 4.236781023791991e-06, + "loss_iou": 0.27734375, + "loss_num": 0.00506591796875, + "loss_xval": 0.0252685546875, + "num_input_tokens_seen": 537901240, + "step": 5893 + }, + { + "epoch": 24.558333333333334, + "grad_norm": 3.1120218461714346, + "learning_rate": 5e-05, + "loss": 0.0602, + "num_input_tokens_seen": 537991900, + "step": 5894 + }, + { + "epoch": 24.558333333333334, + "loss": 0.03692232817411423, + "loss_ce": 9.52406189753674e-05, + "loss_iou": 0.248046875, + "loss_num": 0.007354736328125, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 537991900, + "step": 5894 + }, + { + "epoch": 24.5625, + "grad_norm": 2.377712094838735, + "learning_rate": 5e-05, + "loss": 0.0498, + "num_input_tokens_seen": 538083508, + "step": 5895 + }, + { + "epoch": 24.5625, + "loss": 0.028869686648249626, + "loss_ce": 3.0576215067412704e-05, + "loss_iou": 0.306640625, + "loss_num": 0.005767822265625, + "loss_xval": 0.02880859375, + "num_input_tokens_seen": 538083508, + "step": 5895 + }, + { + "epoch": 24.566666666666666, + "grad_norm": 2.4117882798127974, + "learning_rate": 5e-05, + "loss": 0.0537, + "num_input_tokens_seen": 538174856, + "step": 5896 + }, + { + "epoch": 24.566666666666666, + "loss": 0.07982829958200455, + "loss_ce": 9.573964234732557e-06, + "loss_iou": 0.291015625, + "loss_num": 0.0159912109375, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 538174856, + "step": 5896 + }, + { + "epoch": 24.570833333333333, + "grad_norm": 2.7877261288827166, + "learning_rate": 5e-05, + "loss": 0.0391, + "num_input_tokens_seen": 538266120, + "step": 5897 + }, + { + "epoch": 24.570833333333333, + "loss": 0.03959943354129791, + "loss_ce": 2.8757731342921034e-06, + "loss_iou": 0.330078125, + "loss_num": 0.0079345703125, + "loss_xval": 0.03955078125, + "num_input_tokens_seen": 538266120, + "step": 5897 + }, + { + "epoch": 24.575, + "grad_norm": 2.9636345146482475, + "learning_rate": 5e-05, + "loss": 0.052, + "num_input_tokens_seen": 538357420, + "step": 5898 + }, + { + "epoch": 24.575, + "loss": 0.05580301955342293, + "loss_ce": 1.6886146113392897e-05, + "loss_iou": 0.3203125, + "loss_num": 0.01116943359375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 538357420, + "step": 5898 + }, + { + "epoch": 24.579166666666666, + "grad_norm": 4.463181674979655, + "learning_rate": 5e-05, + "loss": 0.037, + "num_input_tokens_seen": 538447944, + "step": 5899 + }, + { + "epoch": 24.579166666666666, + "loss": 0.043856751173734665, + "loss_ce": 2.9877814995415974e-06, + "loss_iou": 0.27734375, + "loss_num": 0.0087890625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 538447944, + "step": 5899 + }, + { + "epoch": 24.583333333333332, + "grad_norm": 8.215582023201812, + "learning_rate": 5e-05, + "loss": 0.0739, + "num_input_tokens_seen": 538539312, + "step": 5900 + }, + { + "epoch": 24.583333333333332, + "loss": 0.07832689583301544, + "loss_ce": 1.8785158317768946e-05, + "loss_iou": 0.18359375, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 538539312, + "step": 5900 + }, + { + "epoch": 24.5875, + "grad_norm": 3.5866870849822976, + "learning_rate": 5e-05, + "loss": 0.0926, + "num_input_tokens_seen": 538629948, + "step": 5901 + }, + { + "epoch": 24.5875, + "loss": 0.07081713527441025, + "loss_ce": 1.0961807674902957e-06, + "loss_iou": 0.228515625, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 538629948, + "step": 5901 + }, + { + "epoch": 24.591666666666665, + "grad_norm": 3.7779213662672033, + "learning_rate": 5e-05, + "loss": 0.057, + "num_input_tokens_seen": 538721432, + "step": 5902 + }, + { + "epoch": 24.591666666666665, + "loss": 0.04373526945710182, + "loss_ce": 3.5786486023425823e-06, + "loss_iou": 0.251953125, + "loss_num": 0.00872802734375, + "loss_xval": 0.043701171875, + "num_input_tokens_seen": 538721432, + "step": 5902 + }, + { + "epoch": 24.595833333333335, + "grad_norm": 2.168479694601854, + "learning_rate": 5e-05, + "loss": 0.0473, + "num_input_tokens_seen": 538812516, + "step": 5903 + }, + { + "epoch": 24.595833333333335, + "loss": 0.03565511852502823, + "loss_ce": 0.00011740053014364094, + "loss_iou": 0.171875, + "loss_num": 0.007110595703125, + "loss_xval": 0.03564453125, + "num_input_tokens_seen": 538812516, + "step": 5903 + }, + { + "epoch": 24.6, + "grad_norm": 3.0841797571544123, + "learning_rate": 5e-05, + "loss": 0.0291, + "num_input_tokens_seen": 538903744, + "step": 5904 + }, + { + "epoch": 24.6, + "loss": 0.02611241489648819, + "loss_ce": 6.566110096173361e-05, + "loss_iou": 0.2216796875, + "loss_num": 0.005218505859375, + "loss_xval": 0.0260009765625, + "num_input_tokens_seen": 538903744, + "step": 5904 + }, + { + "epoch": 24.604166666666668, + "grad_norm": 3.154338098881419, + "learning_rate": 5e-05, + "loss": 0.0524, + "num_input_tokens_seen": 538995084, + "step": 5905 + }, + { + "epoch": 24.604166666666668, + "loss": 0.03224372863769531, + "loss_ce": 1.9066790173383197e-06, + "loss_iou": 0.2578125, + "loss_num": 0.006439208984375, + "loss_xval": 0.0322265625, + "num_input_tokens_seen": 538995084, + "step": 5905 + }, + { + "epoch": 24.608333333333334, + "grad_norm": 2.69142730021738, + "learning_rate": 5e-05, + "loss": 0.0466, + "num_input_tokens_seen": 539086212, + "step": 5906 + }, + { + "epoch": 24.608333333333334, + "loss": 0.03064822033047676, + "loss_ce": 9.429805345462228e-07, + "loss_iou": 0.189453125, + "loss_num": 0.006134033203125, + "loss_xval": 0.0306396484375, + "num_input_tokens_seen": 539086212, + "step": 5906 + }, + { + "epoch": 24.6125, + "grad_norm": 2.5427073613900877, + "learning_rate": 5e-05, + "loss": 0.051, + "num_input_tokens_seen": 539177624, + "step": 5907 + }, + { + "epoch": 24.6125, + "loss": 0.0609009675681591, + "loss_ce": 3.139893124171067e-06, + "loss_iou": 0.314453125, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 539177624, + "step": 5907 + }, + { + "epoch": 24.616666666666667, + "grad_norm": 2.7146400523726304, + "learning_rate": 5e-05, + "loss": 0.0368, + "num_input_tokens_seen": 539268644, + "step": 5908 + }, + { + "epoch": 24.616666666666667, + "loss": 0.03491390123963356, + "loss_ce": 1.7929927480508923e-06, + "loss_iou": 0.1982421875, + "loss_num": 0.006988525390625, + "loss_xval": 0.034912109375, + "num_input_tokens_seen": 539268644, + "step": 5908 + }, + { + "epoch": 24.620833333333334, + "grad_norm": 1.9120792026763072, + "learning_rate": 5e-05, + "loss": 0.0416, + "num_input_tokens_seen": 539359980, + "step": 5909 + }, + { + "epoch": 24.620833333333334, + "loss": 0.05520816519856453, + "loss_ce": 1.8630300928634824e-06, + "loss_iou": 0.1103515625, + "loss_num": 0.01104736328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 539359980, + "step": 5909 + }, + { + "epoch": 24.625, + "grad_norm": 2.8697876589951665, + "learning_rate": 5e-05, + "loss": 0.0623, + "num_input_tokens_seen": 539450148, + "step": 5910 + }, + { + "epoch": 24.625, + "loss": 0.044227711856365204, + "loss_ce": 1.5374156646430492e-05, + "loss_iou": 0.166015625, + "loss_num": 0.00885009765625, + "loss_xval": 0.044189453125, + "num_input_tokens_seen": 539450148, + "step": 5910 + }, + { + "epoch": 24.629166666666666, + "grad_norm": 1.1012244154737323, + "learning_rate": 5e-05, + "loss": 0.0512, + "num_input_tokens_seen": 539541476, + "step": 5911 + }, + { + "epoch": 24.629166666666666, + "loss": 0.05504788085818291, + "loss_ce": 0.0006121534388512373, + "loss_iou": 0.1201171875, + "loss_num": 0.0108642578125, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 539541476, + "step": 5911 + }, + { + "epoch": 24.633333333333333, + "grad_norm": 4.048964876658251, + "learning_rate": 5e-05, + "loss": 0.0682, + "num_input_tokens_seen": 539631032, + "step": 5912 + }, + { + "epoch": 24.633333333333333, + "loss": 0.06917066872119904, + "loss_ce": 2.58000181929674e-06, + "loss_iou": 0.2275390625, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 539631032, + "step": 5912 + }, + { + "epoch": 24.6375, + "grad_norm": 1.0274591914629472, + "learning_rate": 5e-05, + "loss": 0.0617, + "num_input_tokens_seen": 539720944, + "step": 5913 + }, + { + "epoch": 24.6375, + "loss": 0.038511671125888824, + "loss_ce": 6.119631052570185e-06, + "loss_iou": 0.0859375, + "loss_num": 0.007720947265625, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 539720944, + "step": 5913 + }, + { + "epoch": 24.641666666666666, + "grad_norm": 0.7784951474058762, + "learning_rate": 5e-05, + "loss": 0.0265, + "num_input_tokens_seen": 539812508, + "step": 5914 + }, + { + "epoch": 24.641666666666666, + "loss": 0.02655962109565735, + "loss_ce": 4.7475121391471475e-05, + "loss_iou": 0.224609375, + "loss_num": 0.00531005859375, + "loss_xval": 0.0264892578125, + "num_input_tokens_seen": 539812508, + "step": 5914 + }, + { + "epoch": 24.645833333333332, + "grad_norm": 0.8845230205746087, + "learning_rate": 5e-05, + "loss": 0.0196, + "num_input_tokens_seen": 539904344, + "step": 5915 + }, + { + "epoch": 24.645833333333332, + "loss": 0.02053234726190567, + "loss_ce": 5.5050964874681085e-05, + "loss_iou": 0.216796875, + "loss_num": 0.00408935546875, + "loss_xval": 0.0205078125, + "num_input_tokens_seen": 539904344, + "step": 5915 + }, + { + "epoch": 24.65, + "grad_norm": 1.5167444500502159, + "learning_rate": 5e-05, + "loss": 0.0558, + "num_input_tokens_seen": 539995948, + "step": 5916 + }, + { + "epoch": 24.65, + "loss": 0.030595405027270317, + "loss_ce": 1.6793703252915293e-05, + "loss_iou": 0.154296875, + "loss_num": 0.006103515625, + "loss_xval": 0.030517578125, + "num_input_tokens_seen": 539995948, + "step": 5916 + }, + { + "epoch": 24.654166666666665, + "grad_norm": 3.4466687869544694, + "learning_rate": 5e-05, + "loss": 0.0709, + "num_input_tokens_seen": 540087116, + "step": 5917 + }, + { + "epoch": 24.654166666666665, + "loss": 0.08021612465381622, + "loss_ce": 0.00019903438806068152, + "loss_iou": 0.1328125, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 540087116, + "step": 5917 + }, + { + "epoch": 24.658333333333335, + "grad_norm": 4.366893586605952, + "learning_rate": 5e-05, + "loss": 0.1025, + "num_input_tokens_seen": 540177852, + "step": 5918 + }, + { + "epoch": 24.658333333333335, + "loss": 0.1458016186952591, + "loss_ce": 3.891634150932077e-06, + "loss_iou": 0.31640625, + "loss_num": 0.0291748046875, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 540177852, + "step": 5918 + }, + { + "epoch": 24.6625, + "grad_norm": 2.3891871671129277, + "learning_rate": 5e-05, + "loss": 0.028, + "num_input_tokens_seen": 540269780, + "step": 5919 + }, + { + "epoch": 24.6625, + "loss": 0.02481307089328766, + "loss_ce": 3.279614611528814e-05, + "loss_iou": 0.17578125, + "loss_num": 0.00494384765625, + "loss_xval": 0.0247802734375, + "num_input_tokens_seen": 540269780, + "step": 5919 + }, + { + "epoch": 24.666666666666668, + "grad_norm": 4.410840281215651, + "learning_rate": 5e-05, + "loss": 0.0265, + "num_input_tokens_seen": 540361224, + "step": 5920 + }, + { + "epoch": 24.666666666666668, + "loss": 0.021179374307394028, + "loss_ce": 2.3062859327183105e-05, + "loss_iou": 0.2138671875, + "loss_num": 0.004241943359375, + "loss_xval": 0.0211181640625, + "num_input_tokens_seen": 540361224, + "step": 5920 + }, + { + "epoch": 24.670833333333334, + "grad_norm": 1.9648680045006581, + "learning_rate": 5e-05, + "loss": 0.064, + "num_input_tokens_seen": 540452532, + "step": 5921 + }, + { + "epoch": 24.670833333333334, + "loss": 0.08267770707607269, + "loss_ce": 2.0848239728366025e-05, + "loss_iou": 0.1630859375, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 540452532, + "step": 5921 + }, + { + "epoch": 24.675, + "grad_norm": 2.7172343307171207, + "learning_rate": 5e-05, + "loss": 0.0339, + "num_input_tokens_seen": 540543684, + "step": 5922 + }, + { + "epoch": 24.675, + "loss": 0.03063477762043476, + "loss_ce": 2.758548589554266e-06, + "loss_iou": 0.1923828125, + "loss_num": 0.006134033203125, + "loss_xval": 0.0306396484375, + "num_input_tokens_seen": 540543684, + "step": 5922 + }, + { + "epoch": 24.679166666666667, + "grad_norm": 3.0738158052431808, + "learning_rate": 5e-05, + "loss": 0.0489, + "num_input_tokens_seen": 540635616, + "step": 5923 + }, + { + "epoch": 24.679166666666667, + "loss": 0.027951788157224655, + "loss_ce": 2.057402889477089e-05, + "loss_iou": 0.2451171875, + "loss_num": 0.005584716796875, + "loss_xval": 0.0279541015625, + "num_input_tokens_seen": 540635616, + "step": 5923 + }, + { + "epoch": 24.683333333333334, + "grad_norm": 2.6663540732172617, + "learning_rate": 5e-05, + "loss": 0.031, + "num_input_tokens_seen": 540726568, + "step": 5924 + }, + { + "epoch": 24.683333333333334, + "loss": 0.026276925578713417, + "loss_ce": 1.2912703368783696e-06, + "loss_iou": 0.11572265625, + "loss_num": 0.0052490234375, + "loss_xval": 0.0262451171875, + "num_input_tokens_seen": 540726568, + "step": 5924 + }, + { + "epoch": 24.6875, + "grad_norm": 6.225553984733464, + "learning_rate": 5e-05, + "loss": 0.0362, + "num_input_tokens_seen": 540818112, + "step": 5925 + }, + { + "epoch": 24.6875, + "loss": 0.032184898853302, + "loss_ce": 4.109516794414958e-06, + "loss_iou": 0.1982421875, + "loss_num": 0.006439208984375, + "loss_xval": 0.0322265625, + "num_input_tokens_seen": 540818112, + "step": 5925 + }, + { + "epoch": 24.691666666666666, + "grad_norm": 3.280122739815333, + "learning_rate": 5e-05, + "loss": 0.0468, + "num_input_tokens_seen": 540908712, + "step": 5926 + }, + { + "epoch": 24.691666666666666, + "loss": 0.05258284509181976, + "loss_ce": 1.0590549663902493e-06, + "loss_iou": 0.20703125, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 540908712, + "step": 5926 + }, + { + "epoch": 24.695833333333333, + "grad_norm": 5.29702773637457, + "learning_rate": 5e-05, + "loss": 0.0838, + "num_input_tokens_seen": 541000016, + "step": 5927 + }, + { + "epoch": 24.695833333333333, + "loss": 0.09465332329273224, + "loss_ce": 3.0550074825441698e-06, + "loss_iou": 0.271484375, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 541000016, + "step": 5927 + }, + { + "epoch": 24.7, + "grad_norm": 4.623311810622116, + "learning_rate": 5e-05, + "loss": 0.0383, + "num_input_tokens_seen": 541091220, + "step": 5928 + }, + { + "epoch": 24.7, + "loss": 0.04450526461005211, + "loss_ce": 3.0040828278288245e-06, + "loss_iou": 0.1640625, + "loss_num": 0.0089111328125, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 541091220, + "step": 5928 + }, + { + "epoch": 24.704166666666666, + "grad_norm": 1.3477347149151542, + "learning_rate": 5e-05, + "loss": 0.0489, + "num_input_tokens_seen": 541182772, + "step": 5929 + }, + { + "epoch": 24.704166666666666, + "loss": 0.04887685179710388, + "loss_ce": 2.949379450001288e-06, + "loss_iou": 0.1923828125, + "loss_num": 0.009765625, + "loss_xval": 0.048828125, + "num_input_tokens_seen": 541182772, + "step": 5929 + }, + { + "epoch": 24.708333333333332, + "grad_norm": 2.3558814680354536, + "learning_rate": 5e-05, + "loss": 0.0221, + "num_input_tokens_seen": 541273984, + "step": 5930 + }, + { + "epoch": 24.708333333333332, + "loss": 0.018274590373039246, + "loss_ce": 2.190319037254085e-06, + "loss_iou": 0.1689453125, + "loss_num": 0.0036468505859375, + "loss_xval": 0.018310546875, + "num_input_tokens_seen": 541273984, + "step": 5930 + }, + { + "epoch": 24.7125, + "grad_norm": 2.565239447900155, + "learning_rate": 5e-05, + "loss": 0.0437, + "num_input_tokens_seen": 541364932, + "step": 5931 + }, + { + "epoch": 24.7125, + "loss": 0.051742859184741974, + "loss_ce": 7.933152119221631e-06, + "loss_iou": 0.203125, + "loss_num": 0.0103759765625, + "loss_xval": 0.0517578125, + "num_input_tokens_seen": 541364932, + "step": 5931 + }, + { + "epoch": 24.716666666666665, + "grad_norm": 2.724349746006053, + "learning_rate": 5e-05, + "loss": 0.0564, + "num_input_tokens_seen": 541456372, + "step": 5932 + }, + { + "epoch": 24.716666666666665, + "loss": 0.07829385995864868, + "loss_ce": 1.0153214589081472e-06, + "loss_iou": 0.279296875, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 541456372, + "step": 5932 + }, + { + "epoch": 24.720833333333335, + "grad_norm": 2.7034234598003817, + "learning_rate": 5e-05, + "loss": 0.0347, + "num_input_tokens_seen": 541547664, + "step": 5933 + }, + { + "epoch": 24.720833333333335, + "loss": 0.028786811977624893, + "loss_ce": 8.735810297366697e-06, + "loss_iou": 0.2158203125, + "loss_num": 0.005767822265625, + "loss_xval": 0.02880859375, + "num_input_tokens_seen": 541547664, + "step": 5933 + }, + { + "epoch": 24.725, + "grad_norm": 2.9238049557551875, + "learning_rate": 5e-05, + "loss": 0.0276, + "num_input_tokens_seen": 541639076, + "step": 5934 + }, + { + "epoch": 24.725, + "loss": 0.03198646754026413, + "loss_ce": 4.048427854286274e-06, + "loss_iou": 0.1611328125, + "loss_num": 0.00640869140625, + "loss_xval": 0.031982421875, + "num_input_tokens_seen": 541639076, + "step": 5934 + }, + { + "epoch": 24.729166666666668, + "grad_norm": 3.1921958158987627, + "learning_rate": 5e-05, + "loss": 0.0673, + "num_input_tokens_seen": 541730268, + "step": 5935 + }, + { + "epoch": 24.729166666666668, + "loss": 0.07729228585958481, + "loss_ce": 0.000258283456787467, + "loss_iou": 0.244140625, + "loss_num": 0.015380859375, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 541730268, + "step": 5935 + }, + { + "epoch": 24.733333333333334, + "grad_norm": 2.3646558090848777, + "learning_rate": 5e-05, + "loss": 0.0674, + "num_input_tokens_seen": 541821312, + "step": 5936 + }, + { + "epoch": 24.733333333333334, + "loss": 0.10533357411623001, + "loss_ce": 9.777621016837656e-06, + "loss_iou": 0.34765625, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 541821312, + "step": 5936 + }, + { + "epoch": 24.7375, + "grad_norm": 4.5028653103550225, + "learning_rate": 5e-05, + "loss": 0.0372, + "num_input_tokens_seen": 541912692, + "step": 5937 + }, + { + "epoch": 24.7375, + "loss": 0.046153221279382706, + "loss_ce": 5.642018368234858e-05, + "loss_iou": 0.2177734375, + "loss_num": 0.00921630859375, + "loss_xval": 0.046142578125, + "num_input_tokens_seen": 541912692, + "step": 5937 + }, + { + "epoch": 24.741666666666667, + "grad_norm": 4.870812530160758, + "learning_rate": 5e-05, + "loss": 0.0411, + "num_input_tokens_seen": 542003980, + "step": 5938 + }, + { + "epoch": 24.741666666666667, + "loss": 0.03171246126294136, + "loss_ce": 4.69624001198099e-06, + "loss_iou": 0.1943359375, + "loss_num": 0.00634765625, + "loss_xval": 0.03173828125, + "num_input_tokens_seen": 542003980, + "step": 5938 + }, + { + "epoch": 24.745833333333334, + "grad_norm": 2.5231449756579, + "learning_rate": 5e-05, + "loss": 0.0806, + "num_input_tokens_seen": 542095820, + "step": 5939 + }, + { + "epoch": 24.745833333333334, + "loss": 0.05183350667357445, + "loss_ce": 7.0313944888766855e-06, + "loss_iou": 0.1884765625, + "loss_num": 0.0103759765625, + "loss_xval": 0.0517578125, + "num_input_tokens_seen": 542095820, + "step": 5939 + }, + { + "epoch": 24.75, + "grad_norm": 1.5649742368545314, + "learning_rate": 5e-05, + "loss": 0.0479, + "num_input_tokens_seen": 542186908, + "step": 5940 + }, + { + "epoch": 24.75, + "loss": 0.04194850102066994, + "loss_ce": 0.0010778369614854455, + "loss_iou": 0.29296875, + "loss_num": 0.0081787109375, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 542186908, + "step": 5940 + }, + { + "epoch": 24.754166666666666, + "grad_norm": 1.9013408198419663, + "learning_rate": 5e-05, + "loss": 0.029, + "num_input_tokens_seen": 542277800, + "step": 5941 + }, + { + "epoch": 24.754166666666666, + "loss": 0.020965928211808205, + "loss_ce": 1.5612324204994366e-05, + "loss_iou": 0.20703125, + "loss_num": 0.004180908203125, + "loss_xval": 0.02099609375, + "num_input_tokens_seen": 542277800, + "step": 5941 + }, + { + "epoch": 24.758333333333333, + "grad_norm": 2.0282852397219835, + "learning_rate": 5e-05, + "loss": 0.0532, + "num_input_tokens_seen": 542367376, + "step": 5942 + }, + { + "epoch": 24.758333333333333, + "loss": 0.07156237214803696, + "loss_ce": 0.0009141807677224278, + "loss_iou": 0.1181640625, + "loss_num": 0.01409912109375, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 542367376, + "step": 5942 + }, + { + "epoch": 24.7625, + "grad_norm": 4.284082256453397, + "learning_rate": 5e-05, + "loss": 0.0399, + "num_input_tokens_seen": 542458328, + "step": 5943 + }, + { + "epoch": 24.7625, + "loss": 0.03209364414215088, + "loss_ce": 6.544977077282965e-05, + "loss_iou": 0.279296875, + "loss_num": 0.00640869140625, + "loss_xval": 0.031982421875, + "num_input_tokens_seen": 542458328, + "step": 5943 + }, + { + "epoch": 24.766666666666666, + "grad_norm": 6.875100205694348, + "learning_rate": 5e-05, + "loss": 0.037, + "num_input_tokens_seen": 542549360, + "step": 5944 + }, + { + "epoch": 24.766666666666666, + "loss": 0.03342054411768913, + "loss_ce": 3.4312968637095764e-05, + "loss_iou": 0.28125, + "loss_num": 0.006683349609375, + "loss_xval": 0.033447265625, + "num_input_tokens_seen": 542549360, + "step": 5944 + }, + { + "epoch": 24.770833333333332, + "grad_norm": 2.7426263223939413, + "learning_rate": 5e-05, + "loss": 0.0277, + "num_input_tokens_seen": 542640716, + "step": 5945 + }, + { + "epoch": 24.770833333333332, + "loss": 0.03136511147022247, + "loss_ce": 0.0019232768099755049, + "loss_iou": 0.22265625, + "loss_num": 0.005889892578125, + "loss_xval": 0.0294189453125, + "num_input_tokens_seen": 542640716, + "step": 5945 + }, + { + "epoch": 24.775, + "grad_norm": 2.210203886249043, + "learning_rate": 5e-05, + "loss": 0.0567, + "num_input_tokens_seen": 542731968, + "step": 5946 + }, + { + "epoch": 24.775, + "loss": 0.041960351169109344, + "loss_ce": 6.309468972176546e-06, + "loss_iou": 0.2490234375, + "loss_num": 0.00836181640625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 542731968, + "step": 5946 + }, + { + "epoch": 24.779166666666665, + "grad_norm": 3.9227907162197333, + "learning_rate": 5e-05, + "loss": 0.036, + "num_input_tokens_seen": 542823432, + "step": 5947 + }, + { + "epoch": 24.779166666666665, + "loss": 0.047088660299777985, + "loss_ce": 7.663855285500176e-06, + "loss_iou": 0.21484375, + "loss_num": 0.0093994140625, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 542823432, + "step": 5947 + }, + { + "epoch": 24.783333333333335, + "grad_norm": 3.1668412273112407, + "learning_rate": 5e-05, + "loss": 0.0276, + "num_input_tokens_seen": 542914652, + "step": 5948 + }, + { + "epoch": 24.783333333333335, + "loss": 0.02823035418987274, + "loss_ce": 1.685171082499437e-05, + "loss_iou": 0.359375, + "loss_num": 0.005645751953125, + "loss_xval": 0.0281982421875, + "num_input_tokens_seen": 542914652, + "step": 5948 + }, + { + "epoch": 24.7875, + "grad_norm": 2.861085397257759, + "learning_rate": 5e-05, + "loss": 0.0812, + "num_input_tokens_seen": 543006236, + "step": 5949 + }, + { + "epoch": 24.7875, + "loss": 0.09519291669130325, + "loss_ce": 8.586997864767909e-06, + "loss_iou": 0.345703125, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 543006236, + "step": 5949 + }, + { + "epoch": 24.791666666666668, + "grad_norm": 3.458020700237174, + "learning_rate": 5e-05, + "loss": 0.0468, + "num_input_tokens_seen": 543098220, + "step": 5950 + }, + { + "epoch": 24.791666666666668, + "loss": 0.05714184045791626, + "loss_ce": 3.5821729397866875e-05, + "loss_iou": 0.17578125, + "loss_num": 0.01141357421875, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 543098220, + "step": 5950 + }, + { + "epoch": 24.795833333333334, + "grad_norm": 2.0891758994886893, + "learning_rate": 5e-05, + "loss": 0.0744, + "num_input_tokens_seen": 543189344, + "step": 5951 + }, + { + "epoch": 24.795833333333334, + "loss": 0.05110526829957962, + "loss_ce": 3.5859707168128807e-06, + "loss_iou": 0.224609375, + "loss_num": 0.01019287109375, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 543189344, + "step": 5951 + }, + { + "epoch": 24.8, + "grad_norm": 1.4596318900144993, + "learning_rate": 5e-05, + "loss": 0.0337, + "num_input_tokens_seen": 543280584, + "step": 5952 + }, + { + "epoch": 24.8, + "loss": 0.04132525622844696, + "loss_ce": 1.2085286471119616e-05, + "loss_iou": 0.1689453125, + "loss_num": 0.00823974609375, + "loss_xval": 0.041259765625, + "num_input_tokens_seen": 543280584, + "step": 5952 + }, + { + "epoch": 24.804166666666667, + "grad_norm": 2.243959651716921, + "learning_rate": 5e-05, + "loss": 0.0702, + "num_input_tokens_seen": 543371820, + "step": 5953 + }, + { + "epoch": 24.804166666666667, + "loss": 0.07528236508369446, + "loss_ce": 7.560790982097387e-05, + "loss_iou": 0.1796875, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 543371820, + "step": 5953 + }, + { + "epoch": 24.808333333333334, + "grad_norm": 8.27410245570298, + "learning_rate": 5e-05, + "loss": 0.0898, + "num_input_tokens_seen": 543462980, + "step": 5954 + }, + { + "epoch": 24.808333333333334, + "loss": 0.06564659625291824, + "loss_ce": 9.483678149990737e-05, + "loss_iou": 0.33984375, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 543462980, + "step": 5954 + }, + { + "epoch": 24.8125, + "grad_norm": 3.2307275820053536, + "learning_rate": 5e-05, + "loss": 0.0455, + "num_input_tokens_seen": 543553840, + "step": 5955 + }, + { + "epoch": 24.8125, + "loss": 0.05685967206954956, + "loss_ce": 3.5940804082201794e-05, + "loss_iou": 0.220703125, + "loss_num": 0.01141357421875, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 543553840, + "step": 5955 + }, + { + "epoch": 24.816666666666666, + "grad_norm": 2.9867327221025106, + "learning_rate": 5e-05, + "loss": 0.0502, + "num_input_tokens_seen": 543644836, + "step": 5956 + }, + { + "epoch": 24.816666666666666, + "loss": 0.024468587711453438, + "loss_ce": 3.926640783902258e-05, + "loss_iou": 0.294921875, + "loss_num": 0.0048828125, + "loss_xval": 0.0244140625, + "num_input_tokens_seen": 543644836, + "step": 5956 + }, + { + "epoch": 24.820833333333333, + "grad_norm": 3.7096201493254624, + "learning_rate": 5e-05, + "loss": 0.059, + "num_input_tokens_seen": 543735620, + "step": 5957 + }, + { + "epoch": 24.820833333333333, + "loss": 0.08750680834054947, + "loss_ce": 2.053728894679807e-05, + "loss_iou": 0.2353515625, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 543735620, + "step": 5957 + }, + { + "epoch": 24.825, + "grad_norm": 3.158868577300251, + "learning_rate": 5e-05, + "loss": 0.0486, + "num_input_tokens_seen": 543827112, + "step": 5958 + }, + { + "epoch": 24.825, + "loss": 0.06845375150442123, + "loss_ce": 0.00027748377760872245, + "loss_iou": 0.2451171875, + "loss_num": 0.01361083984375, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 543827112, + "step": 5958 + }, + { + "epoch": 24.829166666666666, + "grad_norm": 3.0045479568999065, + "learning_rate": 5e-05, + "loss": 0.0474, + "num_input_tokens_seen": 543918588, + "step": 5959 + }, + { + "epoch": 24.829166666666666, + "loss": 0.040871065109968185, + "loss_ce": 8.028022421058267e-06, + "loss_iou": 0.28125, + "loss_num": 0.0081787109375, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 543918588, + "step": 5959 + }, + { + "epoch": 24.833333333333332, + "grad_norm": 3.406046898558503, + "learning_rate": 5e-05, + "loss": 0.0479, + "num_input_tokens_seen": 544010588, + "step": 5960 + }, + { + "epoch": 24.833333333333332, + "loss": 0.043734535574913025, + "loss_ce": 1.8105354683939368e-05, + "loss_iou": 0.37109375, + "loss_num": 0.00872802734375, + "loss_xval": 0.043701171875, + "num_input_tokens_seen": 544010588, + "step": 5960 + }, + { + "epoch": 24.8375, + "grad_norm": 19.736425320828538, + "learning_rate": 5e-05, + "loss": 0.0451, + "num_input_tokens_seen": 544101932, + "step": 5961 + }, + { + "epoch": 24.8375, + "loss": 0.022242430597543716, + "loss_ce": 2.7447626962384675e-06, + "loss_iou": 0.1875, + "loss_num": 0.00445556640625, + "loss_xval": 0.022216796875, + "num_input_tokens_seen": 544101932, + "step": 5961 + }, + { + "epoch": 24.841666666666665, + "grad_norm": 3.5540091350026795, + "learning_rate": 5e-05, + "loss": 0.0277, + "num_input_tokens_seen": 544193824, + "step": 5962 + }, + { + "epoch": 24.841666666666665, + "loss": 0.02422468177974224, + "loss_ce": 8.527738100383431e-05, + "loss_iou": 0.28125, + "loss_num": 0.00482177734375, + "loss_xval": 0.024169921875, + "num_input_tokens_seen": 544193824, + "step": 5962 + }, + { + "epoch": 24.845833333333335, + "grad_norm": 3.2074751309801197, + "learning_rate": 5e-05, + "loss": 0.0911, + "num_input_tokens_seen": 544284820, + "step": 5963 + }, + { + "epoch": 24.845833333333335, + "loss": 0.12568366527557373, + "loss_ce": 1.2279349903110415e-05, + "loss_iou": 0.0810546875, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 544284820, + "step": 5963 + }, + { + "epoch": 24.85, + "grad_norm": 2.946357998987661, + "learning_rate": 5e-05, + "loss": 0.0687, + "num_input_tokens_seen": 544376068, + "step": 5964 + }, + { + "epoch": 24.85, + "loss": 0.0366411954164505, + "loss_ce": 4.842233465751633e-06, + "loss_iou": 0.189453125, + "loss_num": 0.00732421875, + "loss_xval": 0.03662109375, + "num_input_tokens_seen": 544376068, + "step": 5964 + }, + { + "epoch": 24.854166666666668, + "grad_norm": 2.875951617130414, + "learning_rate": 5e-05, + "loss": 0.0691, + "num_input_tokens_seen": 544467844, + "step": 5965 + }, + { + "epoch": 24.854166666666668, + "loss": 0.07936926931142807, + "loss_ce": 3.882595046889037e-05, + "loss_iou": 0.2001953125, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 544467844, + "step": 5965 + }, + { + "epoch": 24.858333333333334, + "grad_norm": 4.6237034845892255, + "learning_rate": 5e-05, + "loss": 0.0495, + "num_input_tokens_seen": 544559136, + "step": 5966 + }, + { + "epoch": 24.858333333333334, + "loss": 0.05209742859005928, + "loss_ce": 3.925106284441426e-06, + "loss_iou": 0.263671875, + "loss_num": 0.01043701171875, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 544559136, + "step": 5966 + }, + { + "epoch": 24.8625, + "grad_norm": 1.1493134989483929, + "learning_rate": 5e-05, + "loss": 0.0503, + "num_input_tokens_seen": 544649940, + "step": 5967 + }, + { + "epoch": 24.8625, + "loss": 0.04756912589073181, + "loss_ce": 3.036867201444693e-05, + "loss_iou": 0.244140625, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 544649940, + "step": 5967 + }, + { + "epoch": 24.866666666666667, + "grad_norm": 2.0982886092577315, + "learning_rate": 5e-05, + "loss": 0.0329, + "num_input_tokens_seen": 544741492, + "step": 5968 + }, + { + "epoch": 24.866666666666667, + "loss": 0.04254509508609772, + "loss_ce": 3.592864914025995e-06, + "loss_iou": 0.27734375, + "loss_num": 0.00848388671875, + "loss_xval": 0.04248046875, + "num_input_tokens_seen": 544741492, + "step": 5968 + }, + { + "epoch": 24.870833333333334, + "grad_norm": 0.746916826526483, + "learning_rate": 5e-05, + "loss": 0.0496, + "num_input_tokens_seen": 544833060, + "step": 5969 + }, + { + "epoch": 24.870833333333334, + "loss": 0.054372403770685196, + "loss_ce": 6.637265323661268e-05, + "loss_iou": 0.193359375, + "loss_num": 0.0108642578125, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 544833060, + "step": 5969 + }, + { + "epoch": 24.875, + "grad_norm": 1.2717332474770326, + "learning_rate": 5e-05, + "loss": 0.0588, + "num_input_tokens_seen": 544924372, + "step": 5970 + }, + { + "epoch": 24.875, + "loss": 0.024903494864702225, + "loss_ce": 1.1499631682454492e-06, + "loss_iou": 0.224609375, + "loss_num": 0.004974365234375, + "loss_xval": 0.02490234375, + "num_input_tokens_seen": 544924372, + "step": 5970 + }, + { + "epoch": 24.879166666666666, + "grad_norm": 2.792213761061456, + "learning_rate": 5e-05, + "loss": 0.0406, + "num_input_tokens_seen": 545015452, + "step": 5971 + }, + { + "epoch": 24.879166666666666, + "loss": 0.032501786947250366, + "loss_ce": 2.3451317247236148e-05, + "loss_iou": 0.1650390625, + "loss_num": 0.006500244140625, + "loss_xval": 0.032470703125, + "num_input_tokens_seen": 545015452, + "step": 5971 + }, + { + "epoch": 24.883333333333333, + "grad_norm": 3.4748548119466327, + "learning_rate": 5e-05, + "loss": 0.0404, + "num_input_tokens_seen": 545106680, + "step": 5972 + }, + { + "epoch": 24.883333333333333, + "loss": 0.03997427970170975, + "loss_ce": 0.00042349606519564986, + "loss_iou": 0.2734375, + "loss_num": 0.0079345703125, + "loss_xval": 0.03955078125, + "num_input_tokens_seen": 545106680, + "step": 5972 + }, + { + "epoch": 24.8875, + "grad_norm": 6.263040693590258, + "learning_rate": 5e-05, + "loss": 0.0397, + "num_input_tokens_seen": 545198316, + "step": 5973 + }, + { + "epoch": 24.8875, + "loss": 0.04715617746114731, + "loss_ce": 6.519712769659236e-06, + "loss_iou": 0.25, + "loss_num": 0.00946044921875, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 545198316, + "step": 5973 + }, + { + "epoch": 24.891666666666666, + "grad_norm": 2.448734942191181, + "learning_rate": 5e-05, + "loss": 0.0422, + "num_input_tokens_seen": 545289492, + "step": 5974 + }, + { + "epoch": 24.891666666666666, + "loss": 0.04973657801747322, + "loss_ce": 8.183667887351476e-06, + "loss_iou": 0.2314453125, + "loss_num": 0.00994873046875, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 545289492, + "step": 5974 + }, + { + "epoch": 24.895833333333332, + "grad_norm": 2.5058430825356885, + "learning_rate": 5e-05, + "loss": 0.037, + "num_input_tokens_seen": 545380824, + "step": 5975 + }, + { + "epoch": 24.895833333333332, + "loss": 0.05505327135324478, + "loss_ce": 7.189291409304133e-06, + "loss_iou": 0.1435546875, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 545380824, + "step": 5975 + }, + { + "epoch": 24.9, + "grad_norm": 2.4357214961264275, + "learning_rate": 5e-05, + "loss": 0.0449, + "num_input_tokens_seen": 545472512, + "step": 5976 + }, + { + "epoch": 24.9, + "loss": 0.047596514225006104, + "loss_ce": 0.0005994450766593218, + "loss_iou": 0.2470703125, + "loss_num": 0.0093994140625, + "loss_xval": 0.046875, + "num_input_tokens_seen": 545472512, + "step": 5976 + }, + { + "epoch": 24.904166666666665, + "grad_norm": 3.1992849131099597, + "learning_rate": 5e-05, + "loss": 0.0514, + "num_input_tokens_seen": 545563616, + "step": 5977 + }, + { + "epoch": 24.904166666666665, + "loss": 0.053942278027534485, + "loss_ce": 2.459473762428388e-06, + "loss_iou": 0.33203125, + "loss_num": 0.01080322265625, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 545563616, + "step": 5977 + }, + { + "epoch": 24.908333333333335, + "grad_norm": 1.7751656495507373, + "learning_rate": 5e-05, + "loss": 0.0281, + "num_input_tokens_seen": 545655460, + "step": 5978 + }, + { + "epoch": 24.908333333333335, + "loss": 0.0314764603972435, + "loss_ce": 5.2090144890826195e-06, + "loss_iou": 0.2119140625, + "loss_num": 0.00628662109375, + "loss_xval": 0.031494140625, + "num_input_tokens_seen": 545655460, + "step": 5978 + }, + { + "epoch": 24.9125, + "grad_norm": 2.447711320425934, + "learning_rate": 5e-05, + "loss": 0.0469, + "num_input_tokens_seen": 545746784, + "step": 5979 + }, + { + "epoch": 24.9125, + "loss": 0.0640127956867218, + "loss_ce": 2.172352651541587e-06, + "loss_iou": 0.37890625, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 545746784, + "step": 5979 + }, + { + "epoch": 24.916666666666668, + "grad_norm": 2.3039834216803268, + "learning_rate": 5e-05, + "loss": 0.0381, + "num_input_tokens_seen": 545838384, + "step": 5980 + }, + { + "epoch": 24.916666666666668, + "loss": 0.04834413528442383, + "loss_ce": 1.192315812659217e-05, + "loss_iou": 0.2294921875, + "loss_num": 0.0096435546875, + "loss_xval": 0.04833984375, + "num_input_tokens_seen": 545838384, + "step": 5980 + }, + { + "epoch": 24.920833333333334, + "grad_norm": 2.8196817319781955, + "learning_rate": 5e-05, + "loss": 0.0359, + "num_input_tokens_seen": 545930048, + "step": 5981 + }, + { + "epoch": 24.920833333333334, + "loss": 0.04672251641750336, + "loss_ce": 7.730915058346e-06, + "loss_iou": 0.275390625, + "loss_num": 0.00933837890625, + "loss_xval": 0.046630859375, + "num_input_tokens_seen": 545930048, + "step": 5981 + }, + { + "epoch": 24.925, + "grad_norm": 3.6656535804290504, + "learning_rate": 5e-05, + "loss": 0.0505, + "num_input_tokens_seen": 546021812, + "step": 5982 + }, + { + "epoch": 24.925, + "loss": 0.030796393752098083, + "loss_ce": 4.158198862569407e-06, + "loss_iou": 0.310546875, + "loss_num": 0.00616455078125, + "loss_xval": 0.03076171875, + "num_input_tokens_seen": 546021812, + "step": 5982 + }, + { + "epoch": 24.929166666666667, + "grad_norm": 2.479852690724127, + "learning_rate": 5e-05, + "loss": 0.0606, + "num_input_tokens_seen": 546113424, + "step": 5983 + }, + { + "epoch": 24.929166666666667, + "loss": 0.052928995341062546, + "loss_ce": 1.1513237041071989e-05, + "loss_iou": 0.197265625, + "loss_num": 0.0106201171875, + "loss_xval": 0.052978515625, + "num_input_tokens_seen": 546113424, + "step": 5983 + }, + { + "epoch": 24.933333333333334, + "grad_norm": 3.2366282793434946, + "learning_rate": 5e-05, + "loss": 0.0779, + "num_input_tokens_seen": 546205548, + "step": 5984 + }, + { + "epoch": 24.933333333333334, + "loss": 0.024994969367980957, + "loss_ce": 1.0731979500633315e-06, + "loss_iou": 0.265625, + "loss_num": 0.0050048828125, + "loss_xval": 0.0250244140625, + "num_input_tokens_seen": 546205548, + "step": 5984 + }, + { + "epoch": 24.9375, + "grad_norm": 2.9060950394394984, + "learning_rate": 5e-05, + "loss": 0.0731, + "num_input_tokens_seen": 546296632, + "step": 5985 + }, + { + "epoch": 24.9375, + "loss": 0.09871162474155426, + "loss_ce": 2.5201684366038535e-06, + "loss_iou": 0.2890625, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 546296632, + "step": 5985 + }, + { + "epoch": 24.941666666666666, + "grad_norm": 3.017673406017083, + "learning_rate": 5e-05, + "loss": 0.0315, + "num_input_tokens_seen": 546386652, + "step": 5986 + }, + { + "epoch": 24.941666666666666, + "loss": 0.025592109188437462, + "loss_ce": 3.1204160677589243e-06, + "loss_iou": 0.1748046875, + "loss_num": 0.005126953125, + "loss_xval": 0.025634765625, + "num_input_tokens_seen": 546386652, + "step": 5986 + }, + { + "epoch": 24.945833333333333, + "grad_norm": 2.7750188921117465, + "learning_rate": 5e-05, + "loss": 0.023, + "num_input_tokens_seen": 546478508, + "step": 5987 + }, + { + "epoch": 24.945833333333333, + "loss": 0.023148812353610992, + "loss_ce": 1.2287633808227838e-06, + "loss_iou": 0.1494140625, + "loss_num": 0.004638671875, + "loss_xval": 0.023193359375, + "num_input_tokens_seen": 546478508, + "step": 5987 + }, + { + "epoch": 24.95, + "grad_norm": 2.093958784103149, + "learning_rate": 5e-05, + "loss": 0.0568, + "num_input_tokens_seen": 546569960, + "step": 5988 + }, + { + "epoch": 24.95, + "loss": 0.0406670905649662, + "loss_ce": 2.414625669189263e-06, + "loss_iou": 0.24609375, + "loss_num": 0.00811767578125, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 546569960, + "step": 5988 + }, + { + "epoch": 24.954166666666666, + "grad_norm": 1.3102097345328108, + "learning_rate": 5e-05, + "loss": 0.0519, + "num_input_tokens_seen": 546661232, + "step": 5989 + }, + { + "epoch": 24.954166666666666, + "loss": 0.03977053984999657, + "loss_ce": 0.00011294680007267743, + "loss_iou": 0.212890625, + "loss_num": 0.0079345703125, + "loss_xval": 0.03955078125, + "num_input_tokens_seen": 546661232, + "step": 5989 + }, + { + "epoch": 24.958333333333332, + "grad_norm": 1.9589293422616056, + "learning_rate": 5e-05, + "loss": 0.0552, + "num_input_tokens_seen": 546752348, + "step": 5990 + }, + { + "epoch": 24.958333333333332, + "loss": 0.04497984051704407, + "loss_ce": 4.556593466986669e-06, + "loss_iou": 0.1669921875, + "loss_num": 0.00897216796875, + "loss_xval": 0.044921875, + "num_input_tokens_seen": 546752348, + "step": 5990 + }, + { + "epoch": 24.9625, + "grad_norm": 2.3781652883990727, + "learning_rate": 5e-05, + "loss": 0.0473, + "num_input_tokens_seen": 546843064, + "step": 5991 + }, + { + "epoch": 24.9625, + "loss": 0.03465817868709564, + "loss_ce": 2.0727646187879145e-05, + "loss_iou": 0.34375, + "loss_num": 0.006927490234375, + "loss_xval": 0.03466796875, + "num_input_tokens_seen": 546843064, + "step": 5991 + }, + { + "epoch": 24.966666666666665, + "grad_norm": 3.3296711759595006, + "learning_rate": 5e-05, + "loss": 0.0291, + "num_input_tokens_seen": 546934208, + "step": 5992 + }, + { + "epoch": 24.966666666666665, + "loss": 0.030732300132513046, + "loss_ce": 1.0995988759532338e-06, + "loss_iou": 0.349609375, + "loss_num": 0.00616455078125, + "loss_xval": 0.03076171875, + "num_input_tokens_seen": 546934208, + "step": 5992 + }, + { + "epoch": 24.970833333333335, + "grad_norm": 2.1965400503971133, + "learning_rate": 5e-05, + "loss": 0.0587, + "num_input_tokens_seen": 547026024, + "step": 5993 + }, + { + "epoch": 24.970833333333335, + "loss": 0.09637053310871124, + "loss_ce": 1.1280644685029984e-05, + "loss_iou": 0.22265625, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 547026024, + "step": 5993 + }, + { + "epoch": 24.975, + "grad_norm": 2.232533704551629, + "learning_rate": 5e-05, + "loss": 0.0364, + "num_input_tokens_seen": 547117604, + "step": 5994 + }, + { + "epoch": 24.975, + "loss": 0.0288502499461174, + "loss_ce": 3.508933104967582e-06, + "loss_iou": 0.2333984375, + "loss_num": 0.005767822265625, + "loss_xval": 0.02880859375, + "num_input_tokens_seen": 547117604, + "step": 5994 + }, + { + "epoch": 24.979166666666668, + "grad_norm": 2.0416217401760988, + "learning_rate": 5e-05, + "loss": 0.0165, + "num_input_tokens_seen": 547209032, + "step": 5995 + }, + { + "epoch": 24.979166666666668, + "loss": 0.01802220195531845, + "loss_ce": 5.386910288507352e-06, + "loss_iou": 0.31640625, + "loss_num": 0.00360107421875, + "loss_xval": 0.01806640625, + "num_input_tokens_seen": 547209032, + "step": 5995 + }, + { + "epoch": 24.983333333333334, + "grad_norm": 3.8484841226148996, + "learning_rate": 5e-05, + "loss": 0.0652, + "num_input_tokens_seen": 547299940, + "step": 5996 + }, + { + "epoch": 24.983333333333334, + "loss": 0.05408111587166786, + "loss_ce": 3.964207280660048e-06, + "loss_iou": 0.1474609375, + "loss_num": 0.01080322265625, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 547299940, + "step": 5996 + }, + { + "epoch": 24.9875, + "grad_norm": 3.1742817345645675, + "learning_rate": 5e-05, + "loss": 0.027, + "num_input_tokens_seen": 547390716, + "step": 5997 + }, + { + "epoch": 24.9875, + "loss": 0.02947721816599369, + "loss_ce": 2.7756241252063774e-05, + "loss_iou": 0.271484375, + "loss_num": 0.005889892578125, + "loss_xval": 0.0294189453125, + "num_input_tokens_seen": 547390716, + "step": 5997 + }, + { + "epoch": 24.991666666666667, + "grad_norm": 3.1706086611539157, + "learning_rate": 5e-05, + "loss": 0.0394, + "num_input_tokens_seen": 547482112, + "step": 5998 + }, + { + "epoch": 24.991666666666667, + "loss": 0.03286163881421089, + "loss_ce": 1.8380596884526312e-06, + "loss_iou": 0.26953125, + "loss_num": 0.006591796875, + "loss_xval": 0.032958984375, + "num_input_tokens_seen": 547482112, + "step": 5998 + }, + { + "epoch": 24.995833333333334, + "grad_norm": 5.756392964797963, + "learning_rate": 5e-05, + "loss": 0.049, + "num_input_tokens_seen": 547573120, + "step": 5999 + }, + { + "epoch": 24.995833333333334, + "loss": 0.052656348794698715, + "loss_ce": 2.878543091355823e-05, + "loss_iou": 0.330078125, + "loss_num": 0.010498046875, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 547573120, + "step": 5999 + }, + { + "epoch": 25.0, + "grad_norm": 2.5867875653331494, + "learning_rate": 5e-05, + "loss": 0.0334, + "num_input_tokens_seen": 547664792, + "step": 6000 + }, + { + "epoch": 25.0, + "eval_seeclick_CIoU": 0.24265528470277786, + "eval_seeclick_GIoU": 0.22994572669267654, + "eval_seeclick_IoU": 0.34904928505420685, + "eval_seeclick_MAE_all": 0.10797623917460442, + "eval_seeclick_MAE_h": 0.07592884637415409, + "eval_seeclick_MAE_w": 0.25676916539669037, + "eval_seeclick_MAE_x_boxes": 0.23508797585964203, + "eval_seeclick_MAE_y_boxes": 0.08006148040294647, + "eval_seeclick_NUM_probability": 0.9999965131282806, + "eval_seeclick_inside_bbox": 0.5553977340459824, + "eval_seeclick_loss": 0.5893933773040771, + "eval_seeclick_loss_ce": 0.10836980864405632, + "eval_seeclick_loss_iou": 0.4580078125, + "eval_seeclick_loss_num": 0.09033203125, + "eval_seeclick_loss_xval": 0.45166015625, + "eval_seeclick_runtime": 77.9192, + "eval_seeclick_samples_per_second": 0.552, + "eval_seeclick_steps_per_second": 0.026, + "num_input_tokens_seen": 547664792, + "step": 6000 + }, + { + "epoch": 25.0, + "eval_icons_CIoU": 0.2780071347951889, + "eval_icons_GIoU": 0.2557266652584076, + "eval_icons_IoU": 0.3593812435865402, + "eval_icons_MAE_all": 0.07604104280471802, + "eval_icons_MAE_h": 0.14657872170209885, + "eval_icons_MAE_w": 0.11666521430015564, + "eval_icons_MAE_x_boxes": 0.1220252551138401, + "eval_icons_MAE_y_boxes": 0.15165819227695465, + "eval_icons_NUM_probability": 0.9999914765357971, + "eval_icons_inside_bbox": 0.5347222238779068, + "eval_icons_loss": 0.38236290216445923, + "eval_icons_loss_ce": 0.0005112176586408168, + "eval_icons_loss_iou": 0.25677490234375, + "eval_icons_loss_num": 0.0786590576171875, + "eval_icons_loss_xval": 0.3931884765625, + "eval_icons_runtime": 89.2314, + "eval_icons_samples_per_second": 0.56, + "eval_icons_steps_per_second": 0.022, + "num_input_tokens_seen": 547664792, + "step": 6000 + }, + { + "epoch": 25.0, + "eval_screenspot_CIoU": 0.37966782848040265, + "eval_screenspot_GIoU": 0.3703218400478363, + "eval_screenspot_IoU": 0.44274385770161945, + "eval_screenspot_MAE_all": 0.08963250120480855, + "eval_screenspot_MAE_h": 0.07793333381414413, + "eval_screenspot_MAE_w": 0.1972505748271942, + "eval_screenspot_MAE_x_boxes": 0.16598065694173178, + "eval_screenspot_MAE_y_boxes": 0.07822733372449875, + "eval_screenspot_NUM_probability": 0.9999973376592001, + "eval_screenspot_inside_bbox": 0.7116666634877523, + "eval_screenspot_loss": 0.44954127073287964, + "eval_screenspot_loss_ce": 0.0009444566724899536, + "eval_screenspot_loss_iou": 0.3677571614583333, + "eval_screenspot_loss_num": 0.09143575032552083, + "eval_screenspot_loss_xval": 0.4573567708333333, + "eval_screenspot_runtime": 149.8438, + "eval_screenspot_samples_per_second": 0.594, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 547664792, + "step": 6000 + }, + { + "epoch": 25.0, + "eval_compot_CIoU": 0.4837338328361511, + "eval_compot_GIoU": 0.47165830433368683, + "eval_compot_IoU": 0.5592522621154785, + "eval_compot_MAE_all": 0.05793677642941475, + "eval_compot_MAE_h": 0.0747892614454031, + "eval_compot_MAE_w": 0.14299434423446655, + "eval_compot_MAE_x_boxes": 0.14643944799900055, + "eval_compot_MAE_y_boxes": 0.07384524680674076, + "eval_compot_NUM_probability": 0.9999956786632538, + "eval_compot_inside_bbox": 0.7361111044883728, + "eval_compot_loss": 0.3381621539592743, + "eval_compot_loss_ce": 0.059079062193632126, + "eval_compot_loss_iou": 0.3218994140625, + "eval_compot_loss_num": 0.05415534973144531, + "eval_compot_loss_xval": 0.2707672119140625, + "eval_compot_runtime": 85.9918, + "eval_compot_samples_per_second": 0.581, + "eval_compot_steps_per_second": 0.023, + "num_input_tokens_seen": 547664792, + "step": 6000 + }, + { + "epoch": 25.0, + "loss": 0.34967148303985596, + "loss_ce": 0.057862378656864166, + "loss_iou": 0.3359375, + "loss_num": 0.058349609375, + "loss_xval": 0.291015625, + "num_input_tokens_seen": 547664792, + "step": 6000 + }, + { + "epoch": 25.004166666666666, + "grad_norm": 1.902572635180254, + "learning_rate": 5e-05, + "loss": 0.0347, + "num_input_tokens_seen": 547755800, + "step": 6001 + }, + { + "epoch": 25.004166666666666, + "loss": 0.034302763640880585, + "loss_ce": 1.0069838936033193e-06, + "loss_iou": 0.2421875, + "loss_num": 0.006866455078125, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 547755800, + "step": 6001 + }, + { + "epoch": 25.008333333333333, + "grad_norm": 2.2311118770006884, + "learning_rate": 5e-05, + "loss": 0.0289, + "num_input_tokens_seen": 547846116, + "step": 6002 + }, + { + "epoch": 25.008333333333333, + "loss": 0.03062179684638977, + "loss_ce": 1.2666236216318794e-05, + "loss_iou": 0.251953125, + "loss_num": 0.006134033203125, + "loss_xval": 0.0306396484375, + "num_input_tokens_seen": 547846116, + "step": 6002 + }, + { + "epoch": 25.0125, + "grad_norm": 2.8990535287036945, + "learning_rate": 5e-05, + "loss": 0.051, + "num_input_tokens_seen": 547937616, + "step": 6003 + }, + { + "epoch": 25.0125, + "loss": 0.03604936972260475, + "loss_ce": 8.109300324576907e-06, + "loss_iou": 0.2451171875, + "loss_num": 0.0072021484375, + "loss_xval": 0.0361328125, + "num_input_tokens_seen": 547937616, + "step": 6003 + }, + { + "epoch": 25.016666666666666, + "grad_norm": 2.879151132864372, + "learning_rate": 5e-05, + "loss": 0.0442, + "num_input_tokens_seen": 548029308, + "step": 6004 + }, + { + "epoch": 25.016666666666666, + "loss": 0.028768474236130714, + "loss_ce": 0.0006312677287496626, + "loss_iou": 0.19140625, + "loss_num": 0.005615234375, + "loss_xval": 0.028076171875, + "num_input_tokens_seen": 548029308, + "step": 6004 + }, + { + "epoch": 25.020833333333332, + "grad_norm": 2.3560327219287287, + "learning_rate": 5e-05, + "loss": 0.0549, + "num_input_tokens_seen": 548121284, + "step": 6005 + }, + { + "epoch": 25.020833333333332, + "loss": 0.023445885628461838, + "loss_ce": 8.384964530705474e-06, + "loss_iou": 0.22265625, + "loss_num": 0.00469970703125, + "loss_xval": 0.0234375, + "num_input_tokens_seen": 548121284, + "step": 6005 + }, + { + "epoch": 25.025, + "grad_norm": 2.7276189833542777, + "learning_rate": 5e-05, + "loss": 0.0442, + "num_input_tokens_seen": 548212772, + "step": 6006 + }, + { + "epoch": 25.025, + "loss": 0.04950391501188278, + "loss_ce": 4.40308667748468e-06, + "loss_iou": 0.28515625, + "loss_num": 0.0098876953125, + "loss_xval": 0.049560546875, + "num_input_tokens_seen": 548212772, + "step": 6006 + }, + { + "epoch": 25.029166666666665, + "grad_norm": 2.5226347897824684, + "learning_rate": 5e-05, + "loss": 0.0503, + "num_input_tokens_seen": 548304444, + "step": 6007 + }, + { + "epoch": 25.029166666666665, + "loss": 0.050401873886585236, + "loss_ce": 2.0907170892314753e-06, + "loss_iou": 0.3828125, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 548304444, + "step": 6007 + }, + { + "epoch": 25.033333333333335, + "grad_norm": 1.0459329312119423, + "learning_rate": 5e-05, + "loss": 0.0834, + "num_input_tokens_seen": 548395768, + "step": 6008 + }, + { + "epoch": 25.033333333333335, + "loss": 0.14836296439170837, + "loss_ce": 1.7653171653364552e-06, + "loss_iou": 0.2255859375, + "loss_num": 0.0296630859375, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 548395768, + "step": 6008 + }, + { + "epoch": 25.0375, + "grad_norm": 1.9787605859110673, + "learning_rate": 5e-05, + "loss": 0.0205, + "num_input_tokens_seen": 548487192, + "step": 6009 + }, + { + "epoch": 25.0375, + "loss": 0.018174223601818085, + "loss_ce": 1.0055944130726857e-06, + "loss_iou": 0.2265625, + "loss_num": 0.003631591796875, + "loss_xval": 0.0181884765625, + "num_input_tokens_seen": 548487192, + "step": 6009 + }, + { + "epoch": 25.041666666666668, + "grad_norm": 1.121889175408013, + "learning_rate": 5e-05, + "loss": 0.0244, + "num_input_tokens_seen": 548578280, + "step": 6010 + }, + { + "epoch": 25.041666666666668, + "loss": 0.03376873955130577, + "loss_ce": 1.0389567250967957e-06, + "loss_iou": 0.291015625, + "loss_num": 0.006744384765625, + "loss_xval": 0.03369140625, + "num_input_tokens_seen": 548578280, + "step": 6010 + }, + { + "epoch": 25.045833333333334, + "grad_norm": 1.3851939658554242, + "learning_rate": 5e-05, + "loss": 0.0263, + "num_input_tokens_seen": 548669468, + "step": 6011 + }, + { + "epoch": 25.045833333333334, + "loss": 0.027827546000480652, + "loss_ce": 1.0773524081741925e-05, + "loss_iou": 0.25390625, + "loss_num": 0.00555419921875, + "loss_xval": 0.02783203125, + "num_input_tokens_seen": 548669468, + "step": 6011 + }, + { + "epoch": 25.05, + "grad_norm": 2.126157401900572, + "learning_rate": 5e-05, + "loss": 0.0559, + "num_input_tokens_seen": 548760536, + "step": 6012 + }, + { + "epoch": 25.05, + "loss": 0.06270837038755417, + "loss_ce": 1.0008010576711968e-05, + "loss_iou": 0.2109375, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 548760536, + "step": 6012 + }, + { + "epoch": 25.054166666666667, + "grad_norm": 3.4490338069498168, + "learning_rate": 5e-05, + "loss": 0.024, + "num_input_tokens_seen": 548852208, + "step": 6013 + }, + { + "epoch": 25.054166666666667, + "loss": 0.024851012974977493, + "loss_ce": 2.496362321835477e-05, + "loss_iou": 0.3203125, + "loss_num": 0.004974365234375, + "loss_xval": 0.0247802734375, + "num_input_tokens_seen": 548852208, + "step": 6013 + }, + { + "epoch": 25.058333333333334, + "grad_norm": 3.8029374535271323, + "learning_rate": 5e-05, + "loss": 0.0359, + "num_input_tokens_seen": 548943736, + "step": 6014 + }, + { + "epoch": 25.058333333333334, + "loss": 0.035249799489974976, + "loss_ce": 1.9983369838882936e-06, + "loss_iou": 0.25390625, + "loss_num": 0.007049560546875, + "loss_xval": 0.03515625, + "num_input_tokens_seen": 548943736, + "step": 6014 + }, + { + "epoch": 25.0625, + "grad_norm": 2.324225388968632, + "learning_rate": 5e-05, + "loss": 0.0263, + "num_input_tokens_seen": 549034768, + "step": 6015 + }, + { + "epoch": 25.0625, + "loss": 0.023685678839683533, + "loss_ce": 6.507354555651546e-05, + "loss_iou": 0.201171875, + "loss_num": 0.004730224609375, + "loss_xval": 0.023681640625, + "num_input_tokens_seen": 549034768, + "step": 6015 + }, + { + "epoch": 25.066666666666666, + "grad_norm": 3.0835970710088816, + "learning_rate": 5e-05, + "loss": 0.0276, + "num_input_tokens_seen": 549126412, + "step": 6016 + }, + { + "epoch": 25.066666666666666, + "loss": 0.03177113085985184, + "loss_ce": 1.758850885380525e-05, + "loss_iou": 0.166015625, + "loss_num": 0.00634765625, + "loss_xval": 0.03173828125, + "num_input_tokens_seen": 549126412, + "step": 6016 + }, + { + "epoch": 25.070833333333333, + "grad_norm": 2.917265856135224, + "learning_rate": 5e-05, + "loss": 0.0412, + "num_input_tokens_seen": 549218220, + "step": 6017 + }, + { + "epoch": 25.070833333333333, + "loss": 0.028635790571570396, + "loss_ce": 2.6737684493127745e-06, + "loss_iou": 0.3046875, + "loss_num": 0.0057373046875, + "loss_xval": 0.0286865234375, + "num_input_tokens_seen": 549218220, + "step": 6017 + }, + { + "epoch": 25.075, + "grad_norm": 1.7254570430119616, + "learning_rate": 5e-05, + "loss": 0.0401, + "num_input_tokens_seen": 549309384, + "step": 6018 + }, + { + "epoch": 25.075, + "loss": 0.031534358859062195, + "loss_ce": 2.0701659195765387e-06, + "loss_iou": 0.0849609375, + "loss_num": 0.006317138671875, + "loss_xval": 0.031494140625, + "num_input_tokens_seen": 549309384, + "step": 6018 + }, + { + "epoch": 25.079166666666666, + "grad_norm": 2.582436232179212, + "learning_rate": 5e-05, + "loss": 0.0312, + "num_input_tokens_seen": 549400380, + "step": 6019 + }, + { + "epoch": 25.079166666666666, + "loss": 0.031680814921855927, + "loss_ce": 3.5690659387910273e-06, + "loss_iou": 0.1748046875, + "loss_num": 0.00634765625, + "loss_xval": 0.03173828125, + "num_input_tokens_seen": 549400380, + "step": 6019 + }, + { + "epoch": 25.083333333333332, + "grad_norm": 2.511898733177223, + "learning_rate": 5e-05, + "loss": 0.0435, + "num_input_tokens_seen": 549489952, + "step": 6020 + }, + { + "epoch": 25.083333333333332, + "loss": 0.06118789315223694, + "loss_ce": 7.775071026117075e-06, + "loss_iou": 0.07421875, + "loss_num": 0.01220703125, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 549489952, + "step": 6020 + }, + { + "epoch": 25.0875, + "grad_norm": 3.1444060718215425, + "learning_rate": 5e-05, + "loss": 0.0331, + "num_input_tokens_seen": 549581396, + "step": 6021 + }, + { + "epoch": 25.0875, + "loss": 0.04130501300096512, + "loss_ce": 3.762021515285596e-05, + "loss_iou": 0.203125, + "loss_num": 0.00823974609375, + "loss_xval": 0.041259765625, + "num_input_tokens_seen": 549581396, + "step": 6021 + }, + { + "epoch": 25.091666666666665, + "grad_norm": 4.008643885183847, + "learning_rate": 5e-05, + "loss": 0.0731, + "num_input_tokens_seen": 549672464, + "step": 6022 + }, + { + "epoch": 25.091666666666665, + "loss": 0.0670071691274643, + "loss_ce": 2.10778416658286e-05, + "loss_iou": 0.224609375, + "loss_num": 0.013427734375, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 549672464, + "step": 6022 + }, + { + "epoch": 25.095833333333335, + "grad_norm": 2.5295437885586853, + "learning_rate": 5e-05, + "loss": 0.0268, + "num_input_tokens_seen": 549763584, + "step": 6023 + }, + { + "epoch": 25.095833333333335, + "loss": 0.029910461977124214, + "loss_ce": 3.235071289964253e-06, + "loss_iou": 0.234375, + "loss_num": 0.0059814453125, + "loss_xval": 0.0299072265625, + "num_input_tokens_seen": 549763584, + "step": 6023 + }, + { + "epoch": 25.1, + "grad_norm": 2.811572290753676, + "learning_rate": 5e-05, + "loss": 0.0487, + "num_input_tokens_seen": 549855288, + "step": 6024 + }, + { + "epoch": 25.1, + "loss": 0.03294540196657181, + "loss_ce": 1.6782751117716543e-06, + "loss_iou": 0.2734375, + "loss_num": 0.006591796875, + "loss_xval": 0.032958984375, + "num_input_tokens_seen": 549855288, + "step": 6024 + }, + { + "epoch": 25.104166666666668, + "grad_norm": 2.3896023458812206, + "learning_rate": 5e-05, + "loss": 0.0477, + "num_input_tokens_seen": 549946244, + "step": 6025 + }, + { + "epoch": 25.104166666666668, + "loss": 0.06967095285654068, + "loss_ce": 1.4581054529116955e-05, + "loss_iou": 0.30078125, + "loss_num": 0.013916015625, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 549946244, + "step": 6025 + }, + { + "epoch": 25.108333333333334, + "grad_norm": 3.5066065692268977, + "learning_rate": 5e-05, + "loss": 0.0216, + "num_input_tokens_seen": 550037400, + "step": 6026 + }, + { + "epoch": 25.108333333333334, + "loss": 0.021294424310326576, + "loss_ce": 2.3672111638006754e-05, + "loss_iou": 0.2265625, + "loss_num": 0.0042724609375, + "loss_xval": 0.021240234375, + "num_input_tokens_seen": 550037400, + "step": 6026 + }, + { + "epoch": 25.1125, + "grad_norm": 3.5435575532416266, + "learning_rate": 5e-05, + "loss": 0.0336, + "num_input_tokens_seen": 550129352, + "step": 6027 + }, + { + "epoch": 25.1125, + "loss": 0.027449486777186394, + "loss_ce": 0.00019728824554476887, + "loss_iou": 0.2216796875, + "loss_num": 0.005462646484375, + "loss_xval": 0.0272216796875, + "num_input_tokens_seen": 550129352, + "step": 6027 + }, + { + "epoch": 25.116666666666667, + "grad_norm": 2.154591012494385, + "learning_rate": 5e-05, + "loss": 0.0212, + "num_input_tokens_seen": 550220748, + "step": 6028 + }, + { + "epoch": 25.116666666666667, + "loss": 0.020502205938100815, + "loss_ce": 2.022517037403304e-06, + "loss_iou": 0.22265625, + "loss_num": 0.00408935546875, + "loss_xval": 0.0205078125, + "num_input_tokens_seen": 550220748, + "step": 6028 + }, + { + "epoch": 25.120833333333334, + "grad_norm": 2.858585344693595, + "learning_rate": 5e-05, + "loss": 0.043, + "num_input_tokens_seen": 550312356, + "step": 6029 + }, + { + "epoch": 25.120833333333334, + "loss": 0.027978356927633286, + "loss_ce": 1.3687447335541947e-06, + "loss_iou": 0.26953125, + "loss_num": 0.005584716796875, + "loss_xval": 0.0279541015625, + "num_input_tokens_seen": 550312356, + "step": 6029 + }, + { + "epoch": 25.125, + "grad_norm": 3.69369681425425, + "learning_rate": 5e-05, + "loss": 0.057, + "num_input_tokens_seen": 550403388, + "step": 6030 + }, + { + "epoch": 25.125, + "loss": 0.07482340186834335, + "loss_ce": 9.55643645283999e-06, + "loss_iou": 0.14453125, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 550403388, + "step": 6030 + }, + { + "epoch": 25.129166666666666, + "grad_norm": 2.2436760329202796, + "learning_rate": 5e-05, + "loss": 0.0246, + "num_input_tokens_seen": 550494840, + "step": 6031 + }, + { + "epoch": 25.129166666666666, + "loss": 0.022455964237451553, + "loss_ce": 1.7915572243509814e-05, + "loss_iou": 0.28515625, + "loss_num": 0.004486083984375, + "loss_xval": 0.0224609375, + "num_input_tokens_seen": 550494840, + "step": 6031 + }, + { + "epoch": 25.133333333333333, + "grad_norm": 3.0835356823004285, + "learning_rate": 5e-05, + "loss": 0.0829, + "num_input_tokens_seen": 550586596, + "step": 6032 + }, + { + "epoch": 25.133333333333333, + "loss": 0.07718226313591003, + "loss_ce": 0.023776497691869736, + "loss_iou": 0.27734375, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 550586596, + "step": 6032 + }, + { + "epoch": 25.1375, + "grad_norm": 3.9053492602716466, + "learning_rate": 5e-05, + "loss": 0.0479, + "num_input_tokens_seen": 550677468, + "step": 6033 + }, + { + "epoch": 25.1375, + "loss": 0.04059723764657974, + "loss_ce": 5.463583511300385e-05, + "loss_iou": 0.2421875, + "loss_num": 0.00811767578125, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 550677468, + "step": 6033 + }, + { + "epoch": 25.141666666666666, + "grad_norm": 4.391838195127731, + "learning_rate": 5e-05, + "loss": 0.0502, + "num_input_tokens_seen": 550769148, + "step": 6034 + }, + { + "epoch": 25.141666666666666, + "loss": 0.03933006525039673, + "loss_ce": 2.3427393898600712e-05, + "loss_iou": 0.310546875, + "loss_num": 0.00787353515625, + "loss_xval": 0.039306640625, + "num_input_tokens_seen": 550769148, + "step": 6034 + }, + { + "epoch": 25.145833333333332, + "grad_norm": 1.702615205429913, + "learning_rate": 5e-05, + "loss": 0.0276, + "num_input_tokens_seen": 550860784, + "step": 6035 + }, + { + "epoch": 25.145833333333332, + "loss": 0.03935377299785614, + "loss_ce": 1.3561893865698949e-06, + "loss_iou": 0.193359375, + "loss_num": 0.00787353515625, + "loss_xval": 0.039306640625, + "num_input_tokens_seen": 550860784, + "step": 6035 + }, + { + "epoch": 25.15, + "grad_norm": 8.526456658638734, + "learning_rate": 5e-05, + "loss": 0.042, + "num_input_tokens_seen": 550952312, + "step": 6036 + }, + { + "epoch": 25.15, + "loss": 0.04818908870220184, + "loss_ce": 1.8335138065594947e-06, + "loss_iou": 0.1337890625, + "loss_num": 0.0096435546875, + "loss_xval": 0.048095703125, + "num_input_tokens_seen": 550952312, + "step": 6036 + }, + { + "epoch": 25.154166666666665, + "grad_norm": 1.5319893922990855, + "learning_rate": 5e-05, + "loss": 0.0448, + "num_input_tokens_seen": 551043984, + "step": 6037 + }, + { + "epoch": 25.154166666666665, + "loss": 0.04240569844841957, + "loss_ce": 1.5248575664372765e-06, + "loss_iou": 0.201171875, + "loss_num": 0.00848388671875, + "loss_xval": 0.04248046875, + "num_input_tokens_seen": 551043984, + "step": 6037 + }, + { + "epoch": 25.158333333333335, + "grad_norm": 1.976474479972411, + "learning_rate": 5e-05, + "loss": 0.0447, + "num_input_tokens_seen": 551134884, + "step": 6038 + }, + { + "epoch": 25.158333333333335, + "loss": 0.06959168612957001, + "loss_ce": 3.971980731876101e-06, + "loss_iou": 0.1650390625, + "loss_num": 0.013916015625, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 551134884, + "step": 6038 + }, + { + "epoch": 25.1625, + "grad_norm": 8.437012268532477, + "learning_rate": 5e-05, + "loss": 0.0513, + "num_input_tokens_seen": 551226476, + "step": 6039 + }, + { + "epoch": 25.1625, + "loss": 0.051346659660339355, + "loss_ce": 0.00024497474078089, + "loss_iou": 0.265625, + "loss_num": 0.01019287109375, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 551226476, + "step": 6039 + }, + { + "epoch": 25.166666666666668, + "grad_norm": 2.817841350402008, + "learning_rate": 5e-05, + "loss": 0.0324, + "num_input_tokens_seen": 551317596, + "step": 6040 + }, + { + "epoch": 25.166666666666668, + "loss": 0.03814946487545967, + "loss_ce": 2.493340844011982e-06, + "loss_iou": 0.357421875, + "loss_num": 0.00762939453125, + "loss_xval": 0.0380859375, + "num_input_tokens_seen": 551317596, + "step": 6040 + }, + { + "epoch": 25.170833333333334, + "grad_norm": 2.861904774708129, + "learning_rate": 5e-05, + "loss": 0.0278, + "num_input_tokens_seen": 551409240, + "step": 6041 + }, + { + "epoch": 25.170833333333334, + "loss": 0.026386898010969162, + "loss_ce": 1.208052799483994e-05, + "loss_iou": 0.30078125, + "loss_num": 0.005279541015625, + "loss_xval": 0.0263671875, + "num_input_tokens_seen": 551409240, + "step": 6041 + }, + { + "epoch": 25.175, + "grad_norm": 2.7754420985172317, + "learning_rate": 5e-05, + "loss": 0.0257, + "num_input_tokens_seen": 551500688, + "step": 6042 + }, + { + "epoch": 25.175, + "loss": 0.02647359110414982, + "loss_ce": 0.00019795526168309152, + "loss_iou": 0.22265625, + "loss_num": 0.0052490234375, + "loss_xval": 0.0262451171875, + "num_input_tokens_seen": 551500688, + "step": 6042 + }, + { + "epoch": 25.179166666666667, + "grad_norm": 3.2049472199198212, + "learning_rate": 5e-05, + "loss": 0.034, + "num_input_tokens_seen": 551592012, + "step": 6043 + }, + { + "epoch": 25.179166666666667, + "loss": 0.03987519443035126, + "loss_ce": 3.9797023418941535e-06, + "loss_iou": 0.255859375, + "loss_num": 0.00799560546875, + "loss_xval": 0.039794921875, + "num_input_tokens_seen": 551592012, + "step": 6043 + }, + { + "epoch": 25.183333333333334, + "grad_norm": 4.081577881387569, + "learning_rate": 5e-05, + "loss": 0.0476, + "num_input_tokens_seen": 551683120, + "step": 6044 + }, + { + "epoch": 25.183333333333334, + "loss": 0.05834193900227547, + "loss_ce": 7.586995252495399e-06, + "loss_iou": 0.31640625, + "loss_num": 0.01165771484375, + "loss_xval": 0.058349609375, + "num_input_tokens_seen": 551683120, + "step": 6044 + }, + { + "epoch": 25.1875, + "grad_norm": 3.7294875673962977, + "learning_rate": 5e-05, + "loss": 0.0434, + "num_input_tokens_seen": 551773720, + "step": 6045 + }, + { + "epoch": 25.1875, + "loss": 0.047974590212106705, + "loss_ce": 9.558589226799086e-07, + "loss_iou": 0.279296875, + "loss_num": 0.00958251953125, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 551773720, + "step": 6045 + }, + { + "epoch": 25.191666666666666, + "grad_norm": 3.02537900411946, + "learning_rate": 5e-05, + "loss": 0.0323, + "num_input_tokens_seen": 551865096, + "step": 6046 + }, + { + "epoch": 25.191666666666666, + "loss": 0.025306470692157745, + "loss_ce": 7.398280104098376e-06, + "loss_iou": 0.271484375, + "loss_num": 0.00506591796875, + "loss_xval": 0.0252685546875, + "num_input_tokens_seen": 551865096, + "step": 6046 + }, + { + "epoch": 25.195833333333333, + "grad_norm": 2.8880457566644586, + "learning_rate": 5e-05, + "loss": 0.0446, + "num_input_tokens_seen": 551956344, + "step": 6047 + }, + { + "epoch": 25.195833333333333, + "loss": 0.027018003165721893, + "loss_ce": 9.946590580511838e-06, + "loss_iou": 0.23828125, + "loss_num": 0.005401611328125, + "loss_xval": 0.0269775390625, + "num_input_tokens_seen": 551956344, + "step": 6047 + }, + { + "epoch": 25.2, + "grad_norm": 3.0617668203476756, + "learning_rate": 5e-05, + "loss": 0.0341, + "num_input_tokens_seen": 552047920, + "step": 6048 + }, + { + "epoch": 25.2, + "loss": 0.04437287151813507, + "loss_ce": 0.0005038546514697373, + "loss_iou": 0.248046875, + "loss_num": 0.0087890625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 552047920, + "step": 6048 + }, + { + "epoch": 25.204166666666666, + "grad_norm": 3.1356232150398906, + "learning_rate": 5e-05, + "loss": 0.0315, + "num_input_tokens_seen": 552139636, + "step": 6049 + }, + { + "epoch": 25.204166666666666, + "loss": 0.045794256031513214, + "loss_ce": 2.6284108116669813e-06, + "loss_iou": 0.3046875, + "loss_num": 0.0091552734375, + "loss_xval": 0.0458984375, + "num_input_tokens_seen": 552139636, + "step": 6049 + }, + { + "epoch": 25.208333333333332, + "grad_norm": 3.108656855627276, + "learning_rate": 5e-05, + "loss": 0.0493, + "num_input_tokens_seen": 552230548, + "step": 6050 + }, + { + "epoch": 25.208333333333332, + "loss": 0.02768785133957863, + "loss_ce": 8.406275810557418e-06, + "loss_iou": 0.2060546875, + "loss_num": 0.005523681640625, + "loss_xval": 0.0277099609375, + "num_input_tokens_seen": 552230548, + "step": 6050 + }, + { + "epoch": 25.2125, + "grad_norm": 2.556294518952924, + "learning_rate": 5e-05, + "loss": 0.0431, + "num_input_tokens_seen": 552321944, + "step": 6051 + }, + { + "epoch": 25.2125, + "loss": 0.041742969304323196, + "loss_ce": 7.121398812159896e-05, + "loss_iou": 0.1455078125, + "loss_num": 0.00830078125, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 552321944, + "step": 6051 + }, + { + "epoch": 25.216666666666665, + "grad_norm": 1.604112879099758, + "learning_rate": 5e-05, + "loss": 0.0303, + "num_input_tokens_seen": 552412204, + "step": 6052 + }, + { + "epoch": 25.216666666666665, + "loss": 0.03724295273423195, + "loss_ce": 3.439585998421535e-05, + "loss_iou": 0.1767578125, + "loss_num": 0.0074462890625, + "loss_xval": 0.037109375, + "num_input_tokens_seen": 552412204, + "step": 6052 + }, + { + "epoch": 25.220833333333335, + "grad_norm": 2.169097902979818, + "learning_rate": 5e-05, + "loss": 0.0353, + "num_input_tokens_seen": 552503264, + "step": 6053 + }, + { + "epoch": 25.220833333333335, + "loss": 0.043073736131191254, + "loss_ce": 1.3429884347715415e-05, + "loss_iou": 0.134765625, + "loss_num": 0.00860595703125, + "loss_xval": 0.04296875, + "num_input_tokens_seen": 552503264, + "step": 6053 + }, + { + "epoch": 25.225, + "grad_norm": 2.6938214772458475, + "learning_rate": 5e-05, + "loss": 0.056, + "num_input_tokens_seen": 552594460, + "step": 6054 + }, + { + "epoch": 25.225, + "loss": 0.05623520910739899, + "loss_ce": 0.0016163706313818693, + "loss_iou": 0.388671875, + "loss_num": 0.01092529296875, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 552594460, + "step": 6054 + }, + { + "epoch": 25.229166666666668, + "grad_norm": 2.4388213256353657, + "learning_rate": 5e-05, + "loss": 0.0319, + "num_input_tokens_seen": 552686664, + "step": 6055 + }, + { + "epoch": 25.229166666666668, + "loss": 0.03916994854807854, + "loss_ce": 8.268221790785901e-06, + "loss_iou": 0.26953125, + "loss_num": 0.0078125, + "loss_xval": 0.0390625, + "num_input_tokens_seen": 552686664, + "step": 6055 + }, + { + "epoch": 25.233333333333334, + "grad_norm": 2.30275518305168, + "learning_rate": 5e-05, + "loss": 0.0368, + "num_input_tokens_seen": 552777596, + "step": 6056 + }, + { + "epoch": 25.233333333333334, + "loss": 0.030525466427206993, + "loss_ce": 7.88822399044875e-06, + "loss_iou": 0.248046875, + "loss_num": 0.006103515625, + "loss_xval": 0.030517578125, + "num_input_tokens_seen": 552777596, + "step": 6056 + }, + { + "epoch": 25.2375, + "grad_norm": 7.904650634242569, + "learning_rate": 5e-05, + "loss": 0.0728, + "num_input_tokens_seen": 552869104, + "step": 6057 + }, + { + "epoch": 25.2375, + "loss": 0.04267742484807968, + "loss_ce": 1.3848333765054122e-05, + "loss_iou": 0.314453125, + "loss_num": 0.008544921875, + "loss_xval": 0.042724609375, + "num_input_tokens_seen": 552869104, + "step": 6057 + }, + { + "epoch": 25.241666666666667, + "grad_norm": 1.6329504772202257, + "learning_rate": 5e-05, + "loss": 0.0224, + "num_input_tokens_seen": 552960884, + "step": 6058 + }, + { + "epoch": 25.241666666666667, + "loss": 0.017983609810471535, + "loss_ce": 7.74200598243624e-05, + "loss_iou": 0.1875, + "loss_num": 0.0035858154296875, + "loss_xval": 0.0179443359375, + "num_input_tokens_seen": 552960884, + "step": 6058 + }, + { + "epoch": 25.245833333333334, + "grad_norm": 1.629977688834735, + "learning_rate": 5e-05, + "loss": 0.0289, + "num_input_tokens_seen": 553052332, + "step": 6059 + }, + { + "epoch": 25.245833333333334, + "loss": 0.02765512838959694, + "loss_ce": 6.202018084877636e-06, + "loss_iou": 0.1650390625, + "loss_num": 0.005523681640625, + "loss_xval": 0.027587890625, + "num_input_tokens_seen": 553052332, + "step": 6059 + }, + { + "epoch": 25.25, + "grad_norm": 1.151649146290314, + "learning_rate": 5e-05, + "loss": 0.0361, + "num_input_tokens_seen": 553143824, + "step": 6060 + }, + { + "epoch": 25.25, + "loss": 0.04415666684508324, + "loss_ce": 1.2988108210265636e-05, + "loss_iou": 0.203125, + "loss_num": 0.00885009765625, + "loss_xval": 0.044189453125, + "num_input_tokens_seen": 553143824, + "step": 6060 + }, + { + "epoch": 25.254166666666666, + "grad_norm": 2.573206958944099, + "learning_rate": 5e-05, + "loss": 0.0265, + "num_input_tokens_seen": 553235640, + "step": 6061 + }, + { + "epoch": 25.254166666666666, + "loss": 0.022646624594926834, + "loss_ce": 0.00011702188930939883, + "loss_iou": 0.228515625, + "loss_num": 0.0045166015625, + "loss_xval": 0.0225830078125, + "num_input_tokens_seen": 553235640, + "step": 6061 + }, + { + "epoch": 25.258333333333333, + "grad_norm": 2.543205432218472, + "learning_rate": 5e-05, + "loss": 0.0283, + "num_input_tokens_seen": 553327176, + "step": 6062 + }, + { + "epoch": 25.258333333333333, + "loss": 0.031073156744241714, + "loss_ce": 6.262220722419443e-06, + "loss_iou": 0.2119140625, + "loss_num": 0.0062255859375, + "loss_xval": 0.031005859375, + "num_input_tokens_seen": 553327176, + "step": 6062 + }, + { + "epoch": 25.2625, + "grad_norm": 1.7539115692603051, + "learning_rate": 5e-05, + "loss": 0.0422, + "num_input_tokens_seen": 553417908, + "step": 6063 + }, + { + "epoch": 25.2625, + "loss": 0.0423419363796711, + "loss_ce": 2.9313752747839317e-05, + "loss_iou": 0.142578125, + "loss_num": 0.00848388671875, + "loss_xval": 0.042236328125, + "num_input_tokens_seen": 553417908, + "step": 6063 + }, + { + "epoch": 25.266666666666666, + "grad_norm": 2.6240582297613866, + "learning_rate": 5e-05, + "loss": 0.0447, + "num_input_tokens_seen": 553509024, + "step": 6064 + }, + { + "epoch": 25.266666666666666, + "loss": 0.04294588416814804, + "loss_ce": 2.2912352505954914e-05, + "loss_iou": 0.220703125, + "loss_num": 0.00860595703125, + "loss_xval": 0.04296875, + "num_input_tokens_seen": 553509024, + "step": 6064 + }, + { + "epoch": 25.270833333333332, + "grad_norm": 3.4733995884437077, + "learning_rate": 5e-05, + "loss": 0.0284, + "num_input_tokens_seen": 553600624, + "step": 6065 + }, + { + "epoch": 25.270833333333332, + "loss": 0.02730099856853485, + "loss_ce": 3.0246562801039545e-06, + "loss_iou": 0.26953125, + "loss_num": 0.005462646484375, + "loss_xval": 0.02734375, + "num_input_tokens_seen": 553600624, + "step": 6065 + }, + { + "epoch": 25.275, + "grad_norm": 2.3389696239677797, + "learning_rate": 5e-05, + "loss": 0.0342, + "num_input_tokens_seen": 553692356, + "step": 6066 + }, + { + "epoch": 25.275, + "loss": 0.036377716809511185, + "loss_ce": 8.394465112360194e-06, + "loss_iou": 0.23828125, + "loss_num": 0.00726318359375, + "loss_xval": 0.036376953125, + "num_input_tokens_seen": 553692356, + "step": 6066 + }, + { + "epoch": 25.279166666666665, + "grad_norm": 1.6033628000150022, + "learning_rate": 5e-05, + "loss": 0.0533, + "num_input_tokens_seen": 553784324, + "step": 6067 + }, + { + "epoch": 25.279166666666665, + "loss": 0.026155997067689896, + "loss_ce": 4.630144394468516e-05, + "loss_iou": 0.248046875, + "loss_num": 0.005218505859375, + "loss_xval": 0.026123046875, + "num_input_tokens_seen": 553784324, + "step": 6067 + }, + { + "epoch": 25.283333333333335, + "grad_norm": 2.9244443998332263, + "learning_rate": 5e-05, + "loss": 0.044, + "num_input_tokens_seen": 553875540, + "step": 6068 + }, + { + "epoch": 25.283333333333335, + "loss": 0.06535577028989792, + "loss_ce": 2.377549662924139e-06, + "loss_iou": 0.23828125, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 553875540, + "step": 6068 + }, + { + "epoch": 25.2875, + "grad_norm": 3.807395834154097, + "learning_rate": 5e-05, + "loss": 0.0378, + "num_input_tokens_seen": 553966832, + "step": 6069 + }, + { + "epoch": 25.2875, + "loss": 0.05497179552912712, + "loss_ce": 9.639084055379499e-06, + "loss_iou": 0.23046875, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 553966832, + "step": 6069 + }, + { + "epoch": 25.291666666666668, + "grad_norm": 2.834562012780839, + "learning_rate": 5e-05, + "loss": 0.0324, + "num_input_tokens_seen": 554058148, + "step": 6070 + }, + { + "epoch": 25.291666666666668, + "loss": 0.029552895575761795, + "loss_ce": 1.1879415978910401e-05, + "loss_iou": 0.216796875, + "loss_num": 0.005889892578125, + "loss_xval": 0.029541015625, + "num_input_tokens_seen": 554058148, + "step": 6070 + }, + { + "epoch": 25.295833333333334, + "grad_norm": 2.661763152696126, + "learning_rate": 5e-05, + "loss": 0.0307, + "num_input_tokens_seen": 554149580, + "step": 6071 + }, + { + "epoch": 25.295833333333334, + "loss": 0.02328696846961975, + "loss_ce": 3.257199205108918e-05, + "loss_iou": 0.21484375, + "loss_num": 0.004638671875, + "loss_xval": 0.023193359375, + "num_input_tokens_seen": 554149580, + "step": 6071 + }, + { + "epoch": 25.3, + "grad_norm": 2.5368975839965318, + "learning_rate": 5e-05, + "loss": 0.0576, + "num_input_tokens_seen": 554241028, + "step": 6072 + }, + { + "epoch": 25.3, + "loss": 0.04571257531642914, + "loss_ce": 0.00034056510776281357, + "loss_iou": 0.2431640625, + "loss_num": 0.00909423828125, + "loss_xval": 0.04541015625, + "num_input_tokens_seen": 554241028, + "step": 6072 + }, + { + "epoch": 25.304166666666667, + "grad_norm": 2.9454011017858925, + "learning_rate": 5e-05, + "loss": 0.0389, + "num_input_tokens_seen": 554332176, + "step": 6073 + }, + { + "epoch": 25.304166666666667, + "loss": 0.038939282298088074, + "loss_ce": 0.00022773287491872907, + "loss_iou": 0.150390625, + "loss_num": 0.00775146484375, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 554332176, + "step": 6073 + }, + { + "epoch": 25.308333333333334, + "grad_norm": 2.6148875530533044, + "learning_rate": 5e-05, + "loss": 0.0495, + "num_input_tokens_seen": 554424136, + "step": 6074 + }, + { + "epoch": 25.308333333333334, + "loss": 0.04254477471113205, + "loss_ce": 3.2680629828973906e-06, + "loss_iou": 0.2265625, + "loss_num": 0.008544921875, + "loss_xval": 0.04248046875, + "num_input_tokens_seen": 554424136, + "step": 6074 + }, + { + "epoch": 25.3125, + "grad_norm": 1.9303923916065067, + "learning_rate": 5e-05, + "loss": 0.0236, + "num_input_tokens_seen": 554515640, + "step": 6075 + }, + { + "epoch": 25.3125, + "loss": 0.022905535995960236, + "loss_ce": 2.0937043245794484e-06, + "loss_iou": 0.2412109375, + "loss_num": 0.00457763671875, + "loss_xval": 0.02294921875, + "num_input_tokens_seen": 554515640, + "step": 6075 + }, + { + "epoch": 25.316666666666666, + "grad_norm": 4.4026124210329085, + "learning_rate": 5e-05, + "loss": 0.0357, + "num_input_tokens_seen": 554607388, + "step": 6076 + }, + { + "epoch": 25.316666666666666, + "loss": 0.01897761970758438, + "loss_ce": 3.31442925016745e-06, + "loss_iou": 0.2578125, + "loss_num": 0.0037994384765625, + "loss_xval": 0.0189208984375, + "num_input_tokens_seen": 554607388, + "step": 6076 + }, + { + "epoch": 25.320833333333333, + "grad_norm": 2.605943019448658, + "learning_rate": 5e-05, + "loss": 0.0288, + "num_input_tokens_seen": 554698436, + "step": 6077 + }, + { + "epoch": 25.320833333333333, + "loss": 0.036523886024951935, + "loss_ce": 1.9757746940740617e-06, + "loss_iou": 0.185546875, + "loss_num": 0.007293701171875, + "loss_xval": 0.03662109375, + "num_input_tokens_seen": 554698436, + "step": 6077 + }, + { + "epoch": 25.325, + "grad_norm": 1.573186431228264, + "learning_rate": 5e-05, + "loss": 0.0208, + "num_input_tokens_seen": 554789692, + "step": 6078 + }, + { + "epoch": 25.325, + "loss": 0.020063627511262894, + "loss_ce": 6.698464130749926e-05, + "loss_iou": 0.158203125, + "loss_num": 0.003997802734375, + "loss_xval": 0.02001953125, + "num_input_tokens_seen": 554789692, + "step": 6078 + }, + { + "epoch": 25.329166666666666, + "grad_norm": 2.068659930680847, + "learning_rate": 5e-05, + "loss": 0.0248, + "num_input_tokens_seen": 554881188, + "step": 6079 + }, + { + "epoch": 25.329166666666666, + "loss": 0.02518697828054428, + "loss_ce": 9.976995897886809e-06, + "loss_iou": 0.302734375, + "loss_num": 0.005035400390625, + "loss_xval": 0.025146484375, + "num_input_tokens_seen": 554881188, + "step": 6079 + }, + { + "epoch": 25.333333333333332, + "grad_norm": 2.5877747783948086, + "learning_rate": 5e-05, + "loss": 0.0408, + "num_input_tokens_seen": 554971984, + "step": 6080 + }, + { + "epoch": 25.333333333333332, + "loss": 0.045886993408203125, + "loss_ce": 3.8162011151143815e-06, + "loss_iou": 0.23828125, + "loss_num": 0.0091552734375, + "loss_xval": 0.0458984375, + "num_input_tokens_seen": 554971984, + "step": 6080 + }, + { + "epoch": 25.3375, + "grad_norm": 2.0839909687644758, + "learning_rate": 5e-05, + "loss": 0.0583, + "num_input_tokens_seen": 555063872, + "step": 6081 + }, + { + "epoch": 25.3375, + "loss": 0.04413619637489319, + "loss_ce": 1.5406214515678585e-05, + "loss_iou": 0.138671875, + "loss_num": 0.00885009765625, + "loss_xval": 0.044189453125, + "num_input_tokens_seen": 555063872, + "step": 6081 + }, + { + "epoch": 25.341666666666665, + "grad_norm": 2.2894444802931857, + "learning_rate": 5e-05, + "loss": 0.0388, + "num_input_tokens_seen": 555156132, + "step": 6082 + }, + { + "epoch": 25.341666666666665, + "loss": 0.029500527307391167, + "loss_ce": 5.286502528178971e-06, + "loss_iou": 0.1650390625, + "loss_num": 0.005889892578125, + "loss_xval": 0.029541015625, + "num_input_tokens_seen": 555156132, + "step": 6082 + }, + { + "epoch": 25.345833333333335, + "grad_norm": 2.591850664136599, + "learning_rate": 5e-05, + "loss": 0.0549, + "num_input_tokens_seen": 555247880, + "step": 6083 + }, + { + "epoch": 25.345833333333335, + "loss": 0.08285599946975708, + "loss_ce": 8.408465873799287e-06, + "loss_iou": 0.314453125, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 555247880, + "step": 6083 + }, + { + "epoch": 25.35, + "grad_norm": 2.980730081723208, + "learning_rate": 5e-05, + "loss": 0.0454, + "num_input_tokens_seen": 555339520, + "step": 6084 + }, + { + "epoch": 25.35, + "loss": 0.04531401768326759, + "loss_ce": 1.0674537406885065e-05, + "loss_iou": 0.341796875, + "loss_num": 0.009033203125, + "loss_xval": 0.04541015625, + "num_input_tokens_seen": 555339520, + "step": 6084 + }, + { + "epoch": 25.354166666666668, + "grad_norm": 2.0120800401632333, + "learning_rate": 5e-05, + "loss": 0.0375, + "num_input_tokens_seen": 555431580, + "step": 6085 + }, + { + "epoch": 25.354166666666668, + "loss": 0.03285948187112808, + "loss_ce": 7.307754458452109e-06, + "loss_iou": 0.2734375, + "loss_num": 0.006561279296875, + "loss_xval": 0.032958984375, + "num_input_tokens_seen": 555431580, + "step": 6085 + }, + { + "epoch": 25.358333333333334, + "grad_norm": 1.744347188648586, + "learning_rate": 5e-05, + "loss": 0.0535, + "num_input_tokens_seen": 555522828, + "step": 6086 + }, + { + "epoch": 25.358333333333334, + "loss": 0.03260708227753639, + "loss_ce": 1.4308277059171814e-05, + "loss_iou": 0.1845703125, + "loss_num": 0.00653076171875, + "loss_xval": 0.03271484375, + "num_input_tokens_seen": 555522828, + "step": 6086 + }, + { + "epoch": 25.3625, + "grad_norm": 2.099605791813135, + "learning_rate": 5e-05, + "loss": 0.029, + "num_input_tokens_seen": 555614424, + "step": 6087 + }, + { + "epoch": 25.3625, + "loss": 0.02494313381612301, + "loss_ce": 4.84204210806638e-05, + "loss_iou": 0.2119140625, + "loss_num": 0.004974365234375, + "loss_xval": 0.02490234375, + "num_input_tokens_seen": 555614424, + "step": 6087 + }, + { + "epoch": 25.366666666666667, + "grad_norm": 2.571578714794204, + "learning_rate": 5e-05, + "loss": 0.057, + "num_input_tokens_seen": 555705584, + "step": 6088 + }, + { + "epoch": 25.366666666666667, + "loss": 0.03919106349349022, + "loss_ce": 6.493094133475097e-06, + "loss_iou": 0.232421875, + "loss_num": 0.0078125, + "loss_xval": 0.0390625, + "num_input_tokens_seen": 555705584, + "step": 6088 + }, + { + "epoch": 25.370833333333334, + "grad_norm": 4.240971299263898, + "learning_rate": 5e-05, + "loss": 0.0352, + "num_input_tokens_seen": 555796600, + "step": 6089 + }, + { + "epoch": 25.370833333333334, + "loss": 0.030032845214009285, + "loss_ce": 3.5493658288032748e-06, + "loss_iou": 0.2353515625, + "loss_num": 0.006011962890625, + "loss_xval": 0.030029296875, + "num_input_tokens_seen": 555796600, + "step": 6089 + }, + { + "epoch": 25.375, + "grad_norm": 2.91714991685141, + "learning_rate": 5e-05, + "loss": 0.0402, + "num_input_tokens_seen": 555888220, + "step": 6090 + }, + { + "epoch": 25.375, + "loss": 0.024421460926532745, + "loss_ce": 7.397808531095507e-06, + "loss_iou": 0.2392578125, + "loss_num": 0.0048828125, + "loss_xval": 0.0244140625, + "num_input_tokens_seen": 555888220, + "step": 6090 + }, + { + "epoch": 25.379166666666666, + "grad_norm": 2.6817740223888205, + "learning_rate": 5e-05, + "loss": 0.0401, + "num_input_tokens_seen": 555978744, + "step": 6091 + }, + { + "epoch": 25.379166666666666, + "loss": 0.04826442897319794, + "loss_ce": 0.0008095953380689025, + "loss_iou": 0.32421875, + "loss_num": 0.009521484375, + "loss_xval": 0.04736328125, + "num_input_tokens_seen": 555978744, + "step": 6091 + }, + { + "epoch": 25.383333333333333, + "grad_norm": 2.820622322141332, + "learning_rate": 5e-05, + "loss": 0.0452, + "num_input_tokens_seen": 556071016, + "step": 6092 + }, + { + "epoch": 25.383333333333333, + "loss": 0.04822583124041557, + "loss_ce": 8.055085345404223e-06, + "loss_iou": 0.29296875, + "loss_num": 0.0096435546875, + "loss_xval": 0.04833984375, + "num_input_tokens_seen": 556071016, + "step": 6092 + }, + { + "epoch": 25.3875, + "grad_norm": 2.7407924939090265, + "learning_rate": 5e-05, + "loss": 0.045, + "num_input_tokens_seen": 556161896, + "step": 6093 + }, + { + "epoch": 25.3875, + "loss": 0.03777515888214111, + "loss_ce": 2.491514533176087e-05, + "loss_iou": 0.2119140625, + "loss_num": 0.007537841796875, + "loss_xval": 0.037841796875, + "num_input_tokens_seen": 556161896, + "step": 6093 + }, + { + "epoch": 25.391666666666666, + "grad_norm": 3.111564298227178, + "learning_rate": 5e-05, + "loss": 0.0524, + "num_input_tokens_seen": 556253152, + "step": 6094 + }, + { + "epoch": 25.391666666666666, + "loss": 0.07930988818407059, + "loss_ce": 0.00014728563837707043, + "loss_iou": 0.1943359375, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 556253152, + "step": 6094 + }, + { + "epoch": 25.395833333333332, + "grad_norm": 2.469716527508771, + "learning_rate": 5e-05, + "loss": 0.0292, + "num_input_tokens_seen": 556344880, + "step": 6095 + }, + { + "epoch": 25.395833333333332, + "loss": 0.029712753370404243, + "loss_ce": 1.1521155101945624e-05, + "loss_iou": 0.208984375, + "loss_num": 0.005950927734375, + "loss_xval": 0.0296630859375, + "num_input_tokens_seen": 556344880, + "step": 6095 + }, + { + "epoch": 25.4, + "grad_norm": 2.9204098755484718, + "learning_rate": 5e-05, + "loss": 0.0423, + "num_input_tokens_seen": 556436112, + "step": 6096 + }, + { + "epoch": 25.4, + "loss": 0.06473027169704437, + "loss_ce": 2.4858775304892333e-06, + "loss_iou": 0.3046875, + "loss_num": 0.012939453125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 556436112, + "step": 6096 + }, + { + "epoch": 25.404166666666665, + "grad_norm": 3.7665925035988774, + "learning_rate": 5e-05, + "loss": 0.0461, + "num_input_tokens_seen": 556527744, + "step": 6097 + }, + { + "epoch": 25.404166666666665, + "loss": 0.06511792540550232, + "loss_ce": 0.0021601621992886066, + "loss_iou": 0.37890625, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 556527744, + "step": 6097 + }, + { + "epoch": 25.408333333333335, + "grad_norm": 2.260592190063528, + "learning_rate": 5e-05, + "loss": 0.0291, + "num_input_tokens_seen": 556618672, + "step": 6098 + }, + { + "epoch": 25.408333333333335, + "loss": 0.03642168268561363, + "loss_ce": 0.0002125762403011322, + "loss_iou": 0.2412109375, + "loss_num": 0.00726318359375, + "loss_xval": 0.0361328125, + "num_input_tokens_seen": 556618672, + "step": 6098 + }, + { + "epoch": 25.4125, + "grad_norm": 3.1675027303796948, + "learning_rate": 5e-05, + "loss": 0.0384, + "num_input_tokens_seen": 556710084, + "step": 6099 + }, + { + "epoch": 25.4125, + "loss": 0.0574367418885231, + "loss_ce": 2.656482593010878e-06, + "loss_iou": 0.294921875, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 556710084, + "step": 6099 + }, + { + "epoch": 25.416666666666668, + "grad_norm": 2.648084156187906, + "learning_rate": 5e-05, + "loss": 0.058, + "num_input_tokens_seen": 556801032, + "step": 6100 + }, + { + "epoch": 25.416666666666668, + "loss": 0.08385223150253296, + "loss_ce": 5.189658622839488e-06, + "loss_iou": 0.2353515625, + "loss_num": 0.0167236328125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 556801032, + "step": 6100 + }, + { + "epoch": 25.420833333333334, + "grad_norm": 1.7032329247033198, + "learning_rate": 5e-05, + "loss": 0.0318, + "num_input_tokens_seen": 556892808, + "step": 6101 + }, + { + "epoch": 25.420833333333334, + "loss": 0.03905937820672989, + "loss_ce": 2.739548472163733e-05, + "loss_iou": 0.3125, + "loss_num": 0.0078125, + "loss_xval": 0.0390625, + "num_input_tokens_seen": 556892808, + "step": 6101 + }, + { + "epoch": 25.425, + "grad_norm": 5.489724193926814, + "learning_rate": 5e-05, + "loss": 0.0485, + "num_input_tokens_seen": 556984324, + "step": 6102 + }, + { + "epoch": 25.425, + "loss": 0.025776395574212074, + "loss_ce": 4.301471562939696e-06, + "loss_iou": 0.2890625, + "loss_num": 0.005157470703125, + "loss_xval": 0.0257568359375, + "num_input_tokens_seen": 556984324, + "step": 6102 + }, + { + "epoch": 25.429166666666667, + "grad_norm": 1.882860573439344, + "learning_rate": 5e-05, + "loss": 0.0354, + "num_input_tokens_seen": 557075480, + "step": 6103 + }, + { + "epoch": 25.429166666666667, + "loss": 0.028099657967686653, + "loss_ce": 8.226294994528871e-06, + "loss_iou": 0.23828125, + "loss_num": 0.005615234375, + "loss_xval": 0.028076171875, + "num_input_tokens_seen": 557075480, + "step": 6103 + }, + { + "epoch": 25.433333333333334, + "grad_norm": 3.51761967547373, + "learning_rate": 5e-05, + "loss": 0.027, + "num_input_tokens_seen": 557166728, + "step": 6104 + }, + { + "epoch": 25.433333333333334, + "loss": 0.02423533797264099, + "loss_ce": 4.3809313865494914e-06, + "loss_iou": 0.1533203125, + "loss_num": 0.004852294921875, + "loss_xval": 0.024169921875, + "num_input_tokens_seen": 557166728, + "step": 6104 + }, + { + "epoch": 25.4375, + "grad_norm": 25.872497182724743, + "learning_rate": 5e-05, + "loss": 0.062, + "num_input_tokens_seen": 557258312, + "step": 6105 + }, + { + "epoch": 25.4375, + "loss": 0.059171389788389206, + "loss_ce": 1.3064649465377443e-05, + "loss_iou": 0.2431640625, + "loss_num": 0.0118408203125, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 557258312, + "step": 6105 + }, + { + "epoch": 25.441666666666666, + "grad_norm": 33.64559528321087, + "learning_rate": 5e-05, + "loss": 0.06, + "num_input_tokens_seen": 557349740, + "step": 6106 + }, + { + "epoch": 25.441666666666666, + "loss": 0.03349640592932701, + "loss_ce": 3.364375970704714e-06, + "loss_iou": 0.291015625, + "loss_num": 0.006683349609375, + "loss_xval": 0.033447265625, + "num_input_tokens_seen": 557349740, + "step": 6106 + }, + { + "epoch": 25.445833333333333, + "grad_norm": 2.288998411873553, + "learning_rate": 5e-05, + "loss": 0.0412, + "num_input_tokens_seen": 557440816, + "step": 6107 + }, + { + "epoch": 25.445833333333333, + "loss": 0.027652215212583542, + "loss_ce": 3.2900277346925577e-06, + "loss_iou": 0.1533203125, + "loss_num": 0.005523681640625, + "loss_xval": 0.027587890625, + "num_input_tokens_seen": 557440816, + "step": 6107 + }, + { + "epoch": 25.45, + "grad_norm": 0.9314464545262887, + "learning_rate": 5e-05, + "loss": 0.054, + "num_input_tokens_seen": 557532080, + "step": 6108 + }, + { + "epoch": 25.45, + "loss": 0.07703755050897598, + "loss_ce": 7.985006959643215e-05, + "loss_iou": 0.201171875, + "loss_num": 0.015380859375, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 557532080, + "step": 6108 + }, + { + "epoch": 25.454166666666666, + "grad_norm": 1.241327751825131, + "learning_rate": 5e-05, + "loss": 0.0594, + "num_input_tokens_seen": 557623024, + "step": 6109 + }, + { + "epoch": 25.454166666666666, + "loss": 0.06884145736694336, + "loss_ce": 9.057611350726802e-06, + "loss_iou": 0.1455078125, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 557623024, + "step": 6109 + }, + { + "epoch": 25.458333333333332, + "grad_norm": 1.1212944439629025, + "learning_rate": 5e-05, + "loss": 0.0198, + "num_input_tokens_seen": 557714656, + "step": 6110 + }, + { + "epoch": 25.458333333333332, + "loss": 0.02408221922814846, + "loss_ce": 1.147904731624294e-05, + "loss_iou": 0.1962890625, + "loss_num": 0.00482177734375, + "loss_xval": 0.0240478515625, + "num_input_tokens_seen": 557714656, + "step": 6110 + }, + { + "epoch": 25.4625, + "grad_norm": 2.017701233508004, + "learning_rate": 5e-05, + "loss": 0.0365, + "num_input_tokens_seen": 557805492, + "step": 6111 + }, + { + "epoch": 25.4625, + "loss": 0.03724497929215431, + "loss_ce": 5.9048184084531385e-06, + "loss_iou": 0.21484375, + "loss_num": 0.0074462890625, + "loss_xval": 0.037353515625, + "num_input_tokens_seen": 557805492, + "step": 6111 + }, + { + "epoch": 25.466666666666665, + "grad_norm": 2.0441244319648897, + "learning_rate": 5e-05, + "loss": 0.0389, + "num_input_tokens_seen": 557896420, + "step": 6112 + }, + { + "epoch": 25.466666666666665, + "loss": 0.03492957353591919, + "loss_ce": 2.2076055756770074e-06, + "loss_iou": 0.150390625, + "loss_num": 0.006988525390625, + "loss_xval": 0.034912109375, + "num_input_tokens_seen": 557896420, + "step": 6112 + }, + { + "epoch": 25.470833333333335, + "grad_norm": 2.491763401469673, + "learning_rate": 5e-05, + "loss": 0.0295, + "num_input_tokens_seen": 557988276, + "step": 6113 + }, + { + "epoch": 25.470833333333335, + "loss": 0.028048336505889893, + "loss_ce": 2.5569899662514217e-05, + "loss_iou": 0.177734375, + "loss_num": 0.005615234375, + "loss_xval": 0.028076171875, + "num_input_tokens_seen": 557988276, + "step": 6113 + }, + { + "epoch": 25.475, + "grad_norm": 2.5447415831955773, + "learning_rate": 5e-05, + "loss": 0.0311, + "num_input_tokens_seen": 558079432, + "step": 6114 + }, + { + "epoch": 25.475, + "loss": 0.02955949306488037, + "loss_ce": 1.8475300748832524e-05, + "loss_iou": 0.26953125, + "loss_num": 0.00592041015625, + "loss_xval": 0.029541015625, + "num_input_tokens_seen": 558079432, + "step": 6114 + }, + { + "epoch": 25.479166666666668, + "grad_norm": 3.3400385296586834, + "learning_rate": 5e-05, + "loss": 0.04, + "num_input_tokens_seen": 558170584, + "step": 6115 + }, + { + "epoch": 25.479166666666668, + "loss": 0.036854639649391174, + "loss_ce": 4.66698429590906e-06, + "loss_iou": 0.275390625, + "loss_num": 0.00738525390625, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 558170584, + "step": 6115 + }, + { + "epoch": 25.483333333333334, + "grad_norm": 3.265928169946302, + "learning_rate": 5e-05, + "loss": 0.0495, + "num_input_tokens_seen": 558261288, + "step": 6116 + }, + { + "epoch": 25.483333333333334, + "loss": 0.047640688717365265, + "loss_ce": 2.7500962005433394e-06, + "loss_iou": 0.181640625, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 558261288, + "step": 6116 + }, + { + "epoch": 25.4875, + "grad_norm": 2.7706822553364066, + "learning_rate": 5e-05, + "loss": 0.0565, + "num_input_tokens_seen": 558352716, + "step": 6117 + }, + { + "epoch": 25.4875, + "loss": 0.08690313994884491, + "loss_ce": 4.333661308919545e-06, + "loss_iou": 0.1953125, + "loss_num": 0.0174560546875, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 558352716, + "step": 6117 + }, + { + "epoch": 25.491666666666667, + "grad_norm": 2.24556863927375, + "learning_rate": 5e-05, + "loss": 0.0401, + "num_input_tokens_seen": 558443444, + "step": 6118 + }, + { + "epoch": 25.491666666666667, + "loss": 0.040499813854694366, + "loss_ce": 2.989433141920017e-06, + "loss_iou": 0.30078125, + "loss_num": 0.00811767578125, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 558443444, + "step": 6118 + }, + { + "epoch": 25.495833333333334, + "grad_norm": 1.6439354079658433, + "learning_rate": 5e-05, + "loss": 0.0424, + "num_input_tokens_seen": 558535240, + "step": 6119 + }, + { + "epoch": 25.495833333333334, + "loss": 0.048288244754076004, + "loss_ce": 9.436404070584103e-06, + "loss_iou": 0.34375, + "loss_num": 0.0096435546875, + "loss_xval": 0.04833984375, + "num_input_tokens_seen": 558535240, + "step": 6119 + }, + { + "epoch": 25.5, + "grad_norm": 3.24354781076264, + "learning_rate": 5e-05, + "loss": 0.0423, + "num_input_tokens_seen": 558626272, + "step": 6120 + }, + { + "epoch": 25.5, + "loss": 0.05987918749451637, + "loss_ce": 3.6996714243286988e-06, + "loss_iou": 0.1494140625, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 558626272, + "step": 6120 + }, + { + "epoch": 25.504166666666666, + "grad_norm": 2.5912436325314285, + "learning_rate": 5e-05, + "loss": 0.0421, + "num_input_tokens_seen": 558717976, + "step": 6121 + }, + { + "epoch": 25.504166666666666, + "loss": 0.05477771908044815, + "loss_ce": 2.1548537915805355e-05, + "loss_iou": 0.2412109375, + "loss_num": 0.01092529296875, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 558717976, + "step": 6121 + }, + { + "epoch": 25.508333333333333, + "grad_norm": 2.347266316565128, + "learning_rate": 5e-05, + "loss": 0.0509, + "num_input_tokens_seen": 558808952, + "step": 6122 + }, + { + "epoch": 25.508333333333333, + "loss": 0.028351813554763794, + "loss_ce": 1.6242007404798642e-05, + "loss_iou": 0.29296875, + "loss_num": 0.00567626953125, + "loss_xval": 0.0283203125, + "num_input_tokens_seen": 558808952, + "step": 6122 + }, + { + "epoch": 25.5125, + "grad_norm": 1.9858400015423128, + "learning_rate": 5e-05, + "loss": 0.0465, + "num_input_tokens_seen": 558899452, + "step": 6123 + }, + { + "epoch": 25.5125, + "loss": 0.06100527197122574, + "loss_ce": 1.5890123904682696e-05, + "loss_iou": 0.2353515625, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 558899452, + "step": 6123 + }, + { + "epoch": 25.516666666666666, + "grad_norm": 2.3835819394228754, + "learning_rate": 5e-05, + "loss": 0.0407, + "num_input_tokens_seen": 558990088, + "step": 6124 + }, + { + "epoch": 25.516666666666666, + "loss": 0.02575359307229519, + "loss_ce": 4.386562977742869e-06, + "loss_iou": 0.26171875, + "loss_num": 0.005157470703125, + "loss_xval": 0.0257568359375, + "num_input_tokens_seen": 558990088, + "step": 6124 + }, + { + "epoch": 25.520833333333332, + "grad_norm": 3.904938479971484, + "learning_rate": 5e-05, + "loss": 0.0402, + "num_input_tokens_seen": 559081656, + "step": 6125 + }, + { + "epoch": 25.520833333333332, + "loss": 0.04114597290754318, + "loss_ce": 8.280196198029444e-06, + "loss_iou": 0.28125, + "loss_num": 0.00823974609375, + "loss_xval": 0.041015625, + "num_input_tokens_seen": 559081656, + "step": 6125 + }, + { + "epoch": 25.525, + "grad_norm": 2.9375503556599205, + "learning_rate": 5e-05, + "loss": 0.0808, + "num_input_tokens_seen": 559173288, + "step": 6126 + }, + { + "epoch": 25.525, + "loss": 0.11002198606729507, + "loss_ce": 0.0004333627293817699, + "loss_iou": 0.22265625, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 559173288, + "step": 6126 + }, + { + "epoch": 25.529166666666665, + "grad_norm": 2.037901143825145, + "learning_rate": 5e-05, + "loss": 0.0602, + "num_input_tokens_seen": 559264852, + "step": 6127 + }, + { + "epoch": 25.529166666666665, + "loss": 0.042439281940460205, + "loss_ce": 4.587895546137588e-06, + "loss_iou": 0.189453125, + "loss_num": 0.00848388671875, + "loss_xval": 0.04248046875, + "num_input_tokens_seen": 559264852, + "step": 6127 + }, + { + "epoch": 25.533333333333335, + "grad_norm": 2.7313235323653307, + "learning_rate": 5e-05, + "loss": 0.0377, + "num_input_tokens_seen": 559356024, + "step": 6128 + }, + { + "epoch": 25.533333333333335, + "loss": 0.04620426520705223, + "loss_ce": 7.69436446717009e-05, + "loss_iou": 0.251953125, + "loss_num": 0.00921630859375, + "loss_xval": 0.046142578125, + "num_input_tokens_seen": 559356024, + "step": 6128 + }, + { + "epoch": 25.5375, + "grad_norm": 3.677637384356609, + "learning_rate": 5e-05, + "loss": 0.031, + "num_input_tokens_seen": 559445656, + "step": 6129 + }, + { + "epoch": 25.5375, + "loss": 0.0315367616713047, + "loss_ce": 1.2105560927011538e-05, + "loss_iou": 0.2275390625, + "loss_num": 0.006317138671875, + "loss_xval": 0.031494140625, + "num_input_tokens_seen": 559445656, + "step": 6129 + }, + { + "epoch": 25.541666666666668, + "grad_norm": 1.4822979443891784, + "learning_rate": 5e-05, + "loss": 0.026, + "num_input_tokens_seen": 559536732, + "step": 6130 + }, + { + "epoch": 25.541666666666668, + "loss": 0.029575761407613754, + "loss_ce": 4.225951670377981e-06, + "loss_iou": 0.201171875, + "loss_num": 0.00592041015625, + "loss_xval": 0.029541015625, + "num_input_tokens_seen": 559536732, + "step": 6130 + }, + { + "epoch": 25.545833333333334, + "grad_norm": 1.8855107184609745, + "learning_rate": 5e-05, + "loss": 0.0493, + "num_input_tokens_seen": 559628444, + "step": 6131 + }, + { + "epoch": 25.545833333333334, + "loss": 0.056017693132162094, + "loss_ce": 2.6805405468621757e-06, + "loss_iou": 0.27734375, + "loss_num": 0.01123046875, + "loss_xval": 0.055908203125, + "num_input_tokens_seen": 559628444, + "step": 6131 + }, + { + "epoch": 25.55, + "grad_norm": 2.651588365035582, + "learning_rate": 5e-05, + "loss": 0.0519, + "num_input_tokens_seen": 559719208, + "step": 6132 + }, + { + "epoch": 25.55, + "loss": 0.04326911270618439, + "loss_ce": 2.819902192641166e-06, + "loss_iou": 0.2138671875, + "loss_num": 0.0086669921875, + "loss_xval": 0.043212890625, + "num_input_tokens_seen": 559719208, + "step": 6132 + }, + { + "epoch": 25.554166666666667, + "grad_norm": 2.445094593769028, + "learning_rate": 5e-05, + "loss": 0.0427, + "num_input_tokens_seen": 559810484, + "step": 6133 + }, + { + "epoch": 25.554166666666667, + "loss": 0.0506831593811512, + "loss_ce": 8.721475751372054e-06, + "loss_iou": 0.294921875, + "loss_num": 0.0101318359375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 559810484, + "step": 6133 + }, + { + "epoch": 25.558333333333334, + "grad_norm": 3.094808076272301, + "learning_rate": 5e-05, + "loss": 0.0439, + "num_input_tokens_seen": 559900268, + "step": 6134 + }, + { + "epoch": 25.558333333333334, + "loss": 0.034136466681957245, + "loss_ce": 2.554569846324739e-06, + "loss_iou": 0.201171875, + "loss_num": 0.0068359375, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 559900268, + "step": 6134 + }, + { + "epoch": 25.5625, + "grad_norm": 3.0796429838124317, + "learning_rate": 5e-05, + "loss": 0.0344, + "num_input_tokens_seen": 559991272, + "step": 6135 + }, + { + "epoch": 25.5625, + "loss": 0.027941647917032242, + "loss_ce": 2.805052190524293e-06, + "loss_iou": 0.3359375, + "loss_num": 0.005584716796875, + "loss_xval": 0.0279541015625, + "num_input_tokens_seen": 559991272, + "step": 6135 + }, + { + "epoch": 25.566666666666666, + "grad_norm": 2.9306883834033335, + "learning_rate": 5e-05, + "loss": 0.0395, + "num_input_tokens_seen": 560082028, + "step": 6136 + }, + { + "epoch": 25.566666666666666, + "loss": 0.046127140522003174, + "loss_ce": 0.0001371528342133388, + "loss_iou": 0.21875, + "loss_num": 0.00921630859375, + "loss_xval": 0.0458984375, + "num_input_tokens_seen": 560082028, + "step": 6136 + }, + { + "epoch": 25.570833333333333, + "grad_norm": 12.48278292680775, + "learning_rate": 5e-05, + "loss": 0.0317, + "num_input_tokens_seen": 560172916, + "step": 6137 + }, + { + "epoch": 25.570833333333333, + "loss": 0.03275076672434807, + "loss_ce": 5.406980108091375e-06, + "loss_iou": 0.171875, + "loss_num": 0.00653076171875, + "loss_xval": 0.03271484375, + "num_input_tokens_seen": 560172916, + "step": 6137 + }, + { + "epoch": 25.575, + "grad_norm": 3.4255258269354933, + "learning_rate": 5e-05, + "loss": 0.0504, + "num_input_tokens_seen": 560264344, + "step": 6138 + }, + { + "epoch": 25.575, + "loss": 0.05948089435696602, + "loss_ce": 2.1331347852537874e-06, + "loss_iou": 0.224609375, + "loss_num": 0.01190185546875, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 560264344, + "step": 6138 + }, + { + "epoch": 25.579166666666666, + "grad_norm": 6.176161240214384, + "learning_rate": 5e-05, + "loss": 0.0672, + "num_input_tokens_seen": 560355508, + "step": 6139 + }, + { + "epoch": 25.579166666666666, + "loss": 0.08296459913253784, + "loss_ce": 2.5523606836941326e-06, + "loss_iou": 0.19140625, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 560355508, + "step": 6139 + }, + { + "epoch": 25.583333333333332, + "grad_norm": 14.835425161934097, + "learning_rate": 5e-05, + "loss": 0.0764, + "num_input_tokens_seen": 560446256, + "step": 6140 + }, + { + "epoch": 25.583333333333332, + "loss": 0.08755681663751602, + "loss_ce": 9.343799320049584e-05, + "loss_iou": 0.3046875, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 560446256, + "step": 6140 + }, + { + "epoch": 25.5875, + "grad_norm": 205.04027595026457, + "learning_rate": 5e-05, + "loss": 0.2514, + "num_input_tokens_seen": 560537824, + "step": 6141 + }, + { + "epoch": 25.5875, + "loss": 0.28212207555770874, + "loss_ce": 0.08082813024520874, + "loss_iou": 0.34375, + "loss_num": 0.040283203125, + "loss_xval": 0.201171875, + "num_input_tokens_seen": 560537824, + "step": 6141 + }, + { + "epoch": 25.591666666666665, + "grad_norm": 7.055879885382561, + "learning_rate": 5e-05, + "loss": 0.0733, + "num_input_tokens_seen": 560629236, + "step": 6142 + }, + { + "epoch": 25.591666666666665, + "loss": 0.08724255859851837, + "loss_ce": 0.04560131952166557, + "loss_iou": 0.23046875, + "loss_num": 0.00830078125, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 560629236, + "step": 6142 + }, + { + "epoch": 25.595833333333335, + "grad_norm": 1.1593042300951693, + "learning_rate": 5e-05, + "loss": 0.0636, + "num_input_tokens_seen": 560720168, + "step": 6143 + }, + { + "epoch": 25.595833333333335, + "loss": 0.035621605813503265, + "loss_ce": 3.0483281079796143e-05, + "loss_iou": 0.208984375, + "loss_num": 0.007110595703125, + "loss_xval": 0.03564453125, + "num_input_tokens_seen": 560720168, + "step": 6143 + }, + { + "epoch": 25.6, + "grad_norm": 6.720734426976948, + "learning_rate": 5e-05, + "loss": 0.0393, + "num_input_tokens_seen": 560811132, + "step": 6144 + }, + { + "epoch": 25.6, + "loss": 0.03479313105344772, + "loss_ce": 3.0915059596736683e-06, + "loss_iou": 0.27734375, + "loss_num": 0.0069580078125, + "loss_xval": 0.03466796875, + "num_input_tokens_seen": 560811132, + "step": 6144 + }, + { + "epoch": 25.604166666666668, + "grad_norm": 2.518419210158942, + "learning_rate": 5e-05, + "loss": 0.0522, + "num_input_tokens_seen": 560901332, + "step": 6145 + }, + { + "epoch": 25.604166666666668, + "loss": 0.02495124191045761, + "loss_ce": 3.1241229407896753e-06, + "loss_iou": 0.2021484375, + "loss_num": 0.004974365234375, + "loss_xval": 0.02490234375, + "num_input_tokens_seen": 560901332, + "step": 6145 + }, + { + "epoch": 25.608333333333334, + "grad_norm": 2.8052451512041237, + "learning_rate": 5e-05, + "loss": 0.1045, + "num_input_tokens_seen": 560993196, + "step": 6146 + }, + { + "epoch": 25.608333333333334, + "loss": 0.04913078621029854, + "loss_ce": 5.116238298796816e-06, + "loss_iou": 0.255859375, + "loss_num": 0.00982666015625, + "loss_xval": 0.049072265625, + "num_input_tokens_seen": 560993196, + "step": 6146 + }, + { + "epoch": 25.6125, + "grad_norm": 2.9103962904260117, + "learning_rate": 5e-05, + "loss": 0.0764, + "num_input_tokens_seen": 561084644, + "step": 6147 + }, + { + "epoch": 25.6125, + "loss": 0.07760877907276154, + "loss_ce": 9.412600775249302e-05, + "loss_iou": 0.333984375, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 561084644, + "step": 6147 + }, + { + "epoch": 25.616666666666667, + "grad_norm": 2.704512380849956, + "learning_rate": 5e-05, + "loss": 0.0583, + "num_input_tokens_seen": 561176220, + "step": 6148 + }, + { + "epoch": 25.616666666666667, + "loss": 0.036154501140117645, + "loss_ce": 6.4274645410478115e-06, + "loss_iou": 0.21875, + "loss_num": 0.007232666015625, + "loss_xval": 0.0361328125, + "num_input_tokens_seen": 561176220, + "step": 6148 + }, + { + "epoch": 25.620833333333334, + "grad_norm": 7.236808509675872, + "learning_rate": 5e-05, + "loss": 0.0305, + "num_input_tokens_seen": 561267356, + "step": 6149 + }, + { + "epoch": 25.620833333333334, + "loss": 0.03460079804062843, + "loss_ce": 9.121634320763405e-06, + "loss_iou": 0.33203125, + "loss_num": 0.00689697265625, + "loss_xval": 0.03466796875, + "num_input_tokens_seen": 561267356, + "step": 6149 + }, + { + "epoch": 25.625, + "grad_norm": 4.561622903085843, + "learning_rate": 5e-05, + "loss": 0.0583, + "num_input_tokens_seen": 561359028, + "step": 6150 + }, + { + "epoch": 25.625, + "loss": 0.07709389925003052, + "loss_ce": 0.00018960244779009372, + "loss_iou": 0.3046875, + "loss_num": 0.015380859375, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 561359028, + "step": 6150 + }, + { + "epoch": 25.629166666666666, + "grad_norm": 2.8734458229378474, + "learning_rate": 5e-05, + "loss": 0.0378, + "num_input_tokens_seen": 561450104, + "step": 6151 + }, + { + "epoch": 25.629166666666666, + "loss": 0.04495877027511597, + "loss_ce": 6.373582436935976e-06, + "loss_iou": 0.1591796875, + "loss_num": 0.00897216796875, + "loss_xval": 0.044921875, + "num_input_tokens_seen": 561450104, + "step": 6151 + }, + { + "epoch": 25.633333333333333, + "grad_norm": 3.25924726062024, + "learning_rate": 5e-05, + "loss": 0.0375, + "num_input_tokens_seen": 561541564, + "step": 6152 + }, + { + "epoch": 25.633333333333333, + "loss": 0.05050988495349884, + "loss_ce": 3.3811782486736774e-05, + "loss_iou": 0.26171875, + "loss_num": 0.01007080078125, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 561541564, + "step": 6152 + }, + { + "epoch": 25.6375, + "grad_norm": 2.6116361066075933, + "learning_rate": 5e-05, + "loss": 0.0718, + "num_input_tokens_seen": 561632796, + "step": 6153 + }, + { + "epoch": 25.6375, + "loss": 0.09388189762830734, + "loss_ce": 3.271274545113556e-05, + "loss_iou": 0.208984375, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 561632796, + "step": 6153 + }, + { + "epoch": 25.641666666666666, + "grad_norm": 2.3179977721097003, + "learning_rate": 5e-05, + "loss": 0.0365, + "num_input_tokens_seen": 561723784, + "step": 6154 + }, + { + "epoch": 25.641666666666666, + "loss": 0.03187521547079086, + "loss_ce": 7.235814791783923e-06, + "loss_iou": 0.19140625, + "loss_num": 0.006378173828125, + "loss_xval": 0.031982421875, + "num_input_tokens_seen": 561723784, + "step": 6154 + }, + { + "epoch": 25.645833333333332, + "grad_norm": 2.778303299620632, + "learning_rate": 5e-05, + "loss": 0.0492, + "num_input_tokens_seen": 561814648, + "step": 6155 + }, + { + "epoch": 25.645833333333332, + "loss": 0.04797649383544922, + "loss_ce": 7.915434980532154e-05, + "loss_iou": 0.1064453125, + "loss_num": 0.00958251953125, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 561814648, + "step": 6155 + }, + { + "epoch": 25.65, + "grad_norm": 1.7432156361042102, + "learning_rate": 5e-05, + "loss": 0.0887, + "num_input_tokens_seen": 561906228, + "step": 6156 + }, + { + "epoch": 25.65, + "loss": 0.1472359001636505, + "loss_ce": 9.540006431052461e-05, + "loss_iou": 0.1875, + "loss_num": 0.0294189453125, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 561906228, + "step": 6156 + }, + { + "epoch": 25.654166666666665, + "grad_norm": 2.163326086254311, + "learning_rate": 5e-05, + "loss": 0.0499, + "num_input_tokens_seen": 561997536, + "step": 6157 + }, + { + "epoch": 25.654166666666665, + "loss": 0.03649410605430603, + "loss_ce": 9.426492761122063e-05, + "loss_iou": 0.255859375, + "loss_num": 0.007293701171875, + "loss_xval": 0.036376953125, + "num_input_tokens_seen": 561997536, + "step": 6157 + }, + { + "epoch": 25.658333333333335, + "grad_norm": 2.576559069006906, + "learning_rate": 5e-05, + "loss": 0.0385, + "num_input_tokens_seen": 562088964, + "step": 6158 + }, + { + "epoch": 25.658333333333335, + "loss": 0.041357457637786865, + "loss_ce": 7.098697824403644e-05, + "loss_iou": 0.232421875, + "loss_num": 0.00823974609375, + "loss_xval": 0.041259765625, + "num_input_tokens_seen": 562088964, + "step": 6158 + }, + { + "epoch": 25.6625, + "grad_norm": 1.94564116676901, + "learning_rate": 5e-05, + "loss": 0.0263, + "num_input_tokens_seen": 562180896, + "step": 6159 + }, + { + "epoch": 25.6625, + "loss": 0.03156707063317299, + "loss_ce": 4.264139533916023e-06, + "loss_iou": 0.228515625, + "loss_num": 0.006317138671875, + "loss_xval": 0.031494140625, + "num_input_tokens_seen": 562180896, + "step": 6159 + }, + { + "epoch": 25.666666666666668, + "grad_norm": 2.60825665054094, + "learning_rate": 5e-05, + "loss": 0.0499, + "num_input_tokens_seen": 562272180, + "step": 6160 + }, + { + "epoch": 25.666666666666668, + "loss": 0.0645618662238121, + "loss_ce": 3.2445161195937544e-05, + "loss_iou": 0.28515625, + "loss_num": 0.012939453125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 562272180, + "step": 6160 + }, + { + "epoch": 25.670833333333334, + "grad_norm": 3.697479041946092, + "learning_rate": 5e-05, + "loss": 0.0708, + "num_input_tokens_seen": 562363832, + "step": 6161 + }, + { + "epoch": 25.670833333333334, + "loss": 0.03595633804798126, + "loss_ce": 5.240726750344038e-05, + "loss_iou": 0.31640625, + "loss_num": 0.007171630859375, + "loss_xval": 0.035888671875, + "num_input_tokens_seen": 562363832, + "step": 6161 + }, + { + "epoch": 25.675, + "grad_norm": 2.079205454212805, + "learning_rate": 5e-05, + "loss": 0.0347, + "num_input_tokens_seen": 562455596, + "step": 6162 + }, + { + "epoch": 25.675, + "loss": 0.031726814806461334, + "loss_ce": 3.79521907234448e-06, + "loss_iou": 0.1875, + "loss_num": 0.00634765625, + "loss_xval": 0.03173828125, + "num_input_tokens_seen": 562455596, + "step": 6162 + }, + { + "epoch": 25.679166666666667, + "grad_norm": 5.529298553751564, + "learning_rate": 5e-05, + "loss": 0.0312, + "num_input_tokens_seen": 562547416, + "step": 6163 + }, + { + "epoch": 25.679166666666667, + "loss": 0.029071012511849403, + "loss_ce": 7.168351294239983e-05, + "loss_iou": 0.16796875, + "loss_num": 0.00579833984375, + "loss_xval": 0.029052734375, + "num_input_tokens_seen": 562547416, + "step": 6163 + }, + { + "epoch": 25.683333333333334, + "grad_norm": 2.1032150384919324, + "learning_rate": 5e-05, + "loss": 0.0501, + "num_input_tokens_seen": 562638784, + "step": 6164 + }, + { + "epoch": 25.683333333333334, + "loss": 0.030691668391227722, + "loss_ce": 6.2420622271019965e-06, + "loss_iou": 0.169921875, + "loss_num": 0.006134033203125, + "loss_xval": 0.0306396484375, + "num_input_tokens_seen": 562638784, + "step": 6164 + }, + { + "epoch": 25.6875, + "grad_norm": 2.820384299390893, + "learning_rate": 5e-05, + "loss": 0.0507, + "num_input_tokens_seen": 562730376, + "step": 6165 + }, + { + "epoch": 25.6875, + "loss": 0.04048352688550949, + "loss_ce": 1.9593010165408487e-06, + "loss_iou": 0.22265625, + "loss_num": 0.00811767578125, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 562730376, + "step": 6165 + }, + { + "epoch": 25.691666666666666, + "grad_norm": 3.114629293616458, + "learning_rate": 5e-05, + "loss": 0.0297, + "num_input_tokens_seen": 562821668, + "step": 6166 + }, + { + "epoch": 25.691666666666666, + "loss": 0.032954927533864975, + "loss_ce": 1.1202602763660252e-05, + "loss_iou": 0.2490234375, + "loss_num": 0.006591796875, + "loss_xval": 0.032958984375, + "num_input_tokens_seen": 562821668, + "step": 6166 + }, + { + "epoch": 25.695833333333333, + "grad_norm": 2.8132563422955816, + "learning_rate": 5e-05, + "loss": 0.0376, + "num_input_tokens_seen": 562912528, + "step": 6167 + }, + { + "epoch": 25.695833333333333, + "loss": 0.033153094351291656, + "loss_ce": 1.1004223779309541e-05, + "loss_iou": 0.197265625, + "loss_num": 0.006622314453125, + "loss_xval": 0.033203125, + "num_input_tokens_seen": 562912528, + "step": 6167 + }, + { + "epoch": 25.7, + "grad_norm": 2.430857235582789, + "learning_rate": 5e-05, + "loss": 0.0649, + "num_input_tokens_seen": 563004012, + "step": 6168 + }, + { + "epoch": 25.7, + "loss": 0.04591874033212662, + "loss_ce": 5.042638804297894e-06, + "loss_iou": 0.34375, + "loss_num": 0.0091552734375, + "loss_xval": 0.0458984375, + "num_input_tokens_seen": 563004012, + "step": 6168 + }, + { + "epoch": 25.704166666666666, + "grad_norm": 4.638590187533068, + "learning_rate": 5e-05, + "loss": 0.0362, + "num_input_tokens_seen": 563095140, + "step": 6169 + }, + { + "epoch": 25.704166666666666, + "loss": 0.043398939073085785, + "loss_ce": 2.9414709388220217e-06, + "loss_iou": 0.294921875, + "loss_num": 0.0086669921875, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 563095140, + "step": 6169 + }, + { + "epoch": 25.708333333333332, + "grad_norm": 3.3078042298822665, + "learning_rate": 5e-05, + "loss": 0.0393, + "num_input_tokens_seen": 563187308, + "step": 6170 + }, + { + "epoch": 25.708333333333332, + "loss": 0.03523440659046173, + "loss_ce": 4.001004344900139e-05, + "loss_iou": 0.1474609375, + "loss_num": 0.007049560546875, + "loss_xval": 0.03515625, + "num_input_tokens_seen": 563187308, + "step": 6170 + }, + { + "epoch": 25.7125, + "grad_norm": 2.2406118879558137, + "learning_rate": 5e-05, + "loss": 0.0623, + "num_input_tokens_seen": 563277300, + "step": 6171 + }, + { + "epoch": 25.7125, + "loss": 0.09529478847980499, + "loss_ce": 3.6481064853433054e-06, + "loss_iou": 0.23828125, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 563277300, + "step": 6171 + }, + { + "epoch": 25.716666666666665, + "grad_norm": 3.0833788667937045, + "learning_rate": 5e-05, + "loss": 0.0242, + "num_input_tokens_seen": 563368680, + "step": 6172 + }, + { + "epoch": 25.716666666666665, + "loss": 0.026963843032717705, + "loss_ce": 1.5623777471773792e-06, + "loss_iou": 0.2578125, + "loss_num": 0.005401611328125, + "loss_xval": 0.0269775390625, + "num_input_tokens_seen": 563368680, + "step": 6172 + }, + { + "epoch": 25.720833333333335, + "grad_norm": 3.338505833691027, + "learning_rate": 5e-05, + "loss": 0.0694, + "num_input_tokens_seen": 563459932, + "step": 6173 + }, + { + "epoch": 25.720833333333335, + "loss": 0.04239106550812721, + "loss_ce": 3.266503699705936e-05, + "loss_iou": 0.365234375, + "loss_num": 0.00848388671875, + "loss_xval": 0.04248046875, + "num_input_tokens_seen": 563459932, + "step": 6173 + }, + { + "epoch": 25.725, + "grad_norm": 3.225510461718473, + "learning_rate": 5e-05, + "loss": 0.029, + "num_input_tokens_seen": 563551464, + "step": 6174 + }, + { + "epoch": 25.725, + "loss": 0.02353103831410408, + "loss_ce": 0.0001545738341519609, + "loss_iou": 0.26171875, + "loss_num": 0.004669189453125, + "loss_xval": 0.0234375, + "num_input_tokens_seen": 563551464, + "step": 6174 + }, + { + "epoch": 25.729166666666668, + "grad_norm": 3.6078941333414374, + "learning_rate": 5e-05, + "loss": 0.0562, + "num_input_tokens_seen": 563643004, + "step": 6175 + }, + { + "epoch": 25.729166666666668, + "loss": 0.026356343179941177, + "loss_ce": 8.230143976106774e-06, + "loss_iou": 0.2177734375, + "loss_num": 0.005279541015625, + "loss_xval": 0.0263671875, + "num_input_tokens_seen": 563643004, + "step": 6175 + }, + { + "epoch": 25.733333333333334, + "grad_norm": 1.0670316467237433, + "learning_rate": 5e-05, + "loss": 0.0453, + "num_input_tokens_seen": 563734168, + "step": 6176 + }, + { + "epoch": 25.733333333333334, + "loss": 0.027280237525701523, + "loss_ce": 3.567018939065747e-05, + "loss_iou": 0.2265625, + "loss_num": 0.00543212890625, + "loss_xval": 0.0272216796875, + "num_input_tokens_seen": 563734168, + "step": 6176 + }, + { + "epoch": 25.7375, + "grad_norm": 2.103919541031617, + "learning_rate": 5e-05, + "loss": 0.0377, + "num_input_tokens_seen": 563824572, + "step": 6177 + }, + { + "epoch": 25.7375, + "loss": 0.04883137717843056, + "loss_ce": 3.2511102290300187e-06, + "loss_iou": 0.1962890625, + "loss_num": 0.009765625, + "loss_xval": 0.048828125, + "num_input_tokens_seen": 563824572, + "step": 6177 + }, + { + "epoch": 25.741666666666667, + "grad_norm": 1.0159686485119894, + "learning_rate": 5e-05, + "loss": 0.0187, + "num_input_tokens_seen": 563915732, + "step": 6178 + }, + { + "epoch": 25.741666666666667, + "loss": 0.020426098257303238, + "loss_ce": 2.5097266188822687e-05, + "loss_iou": 0.1220703125, + "loss_num": 0.00408935546875, + "loss_xval": 0.0203857421875, + "num_input_tokens_seen": 563915732, + "step": 6178 + }, + { + "epoch": 25.745833333333334, + "grad_norm": 0.6865861724508531, + "learning_rate": 5e-05, + "loss": 0.0466, + "num_input_tokens_seen": 564006540, + "step": 6179 + }, + { + "epoch": 25.745833333333334, + "loss": 0.032504454255104065, + "loss_ce": 3.2329619443771662e-06, + "loss_iou": 0.203125, + "loss_num": 0.006500244140625, + "loss_xval": 0.032470703125, + "num_input_tokens_seen": 564006540, + "step": 6179 + }, + { + "epoch": 25.75, + "grad_norm": 1.2746881020338765, + "learning_rate": 5e-05, + "loss": 0.0592, + "num_input_tokens_seen": 564098576, + "step": 6180 + }, + { + "epoch": 25.75, + "loss": 0.09174899756908417, + "loss_ce": 5.5285390772041865e-06, + "loss_iou": 0.2158203125, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 564098576, + "step": 6180 + }, + { + "epoch": 25.754166666666666, + "grad_norm": 1.161104001659042, + "learning_rate": 5e-05, + "loss": 0.0302, + "num_input_tokens_seen": 564190040, + "step": 6181 + }, + { + "epoch": 25.754166666666666, + "loss": 0.03920216113328934, + "loss_ce": 2.3326715563598555e-06, + "loss_iou": 0.154296875, + "loss_num": 0.00787353515625, + "loss_xval": 0.039306640625, + "num_input_tokens_seen": 564190040, + "step": 6181 + }, + { + "epoch": 25.758333333333333, + "grad_norm": 2.635958304628402, + "learning_rate": 5e-05, + "loss": 0.0554, + "num_input_tokens_seen": 564281212, + "step": 6182 + }, + { + "epoch": 25.758333333333333, + "loss": 0.059660643339157104, + "loss_ce": 0.007712096907198429, + "loss_iou": 0.212890625, + "loss_num": 0.0103759765625, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 564281212, + "step": 6182 + }, + { + "epoch": 25.7625, + "grad_norm": 1.8956833933864967, + "learning_rate": 5e-05, + "loss": 0.0464, + "num_input_tokens_seen": 564371556, + "step": 6183 + }, + { + "epoch": 25.7625, + "loss": 0.03747256100177765, + "loss_ce": 4.60345563624287e-06, + "loss_iou": 0.2353515625, + "loss_num": 0.00750732421875, + "loss_xval": 0.037353515625, + "num_input_tokens_seen": 564371556, + "step": 6183 + }, + { + "epoch": 25.766666666666666, + "grad_norm": 1.7525944786849836, + "learning_rate": 5e-05, + "loss": 0.0495, + "num_input_tokens_seen": 564463512, + "step": 6184 + }, + { + "epoch": 25.766666666666666, + "loss": 0.06269238889217377, + "loss_ce": 0.0001160952597274445, + "loss_iou": 0.28515625, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 564463512, + "step": 6184 + }, + { + "epoch": 25.770833333333332, + "grad_norm": 0.8645786560215175, + "learning_rate": 5e-05, + "loss": 0.0466, + "num_input_tokens_seen": 564554408, + "step": 6185 + }, + { + "epoch": 25.770833333333332, + "loss": 0.050404638051986694, + "loss_ce": 2.0119014152442105e-05, + "loss_iou": 0.109375, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 564554408, + "step": 6185 + }, + { + "epoch": 25.775, + "grad_norm": 0.8226319288973017, + "learning_rate": 5e-05, + "loss": 0.0224, + "num_input_tokens_seen": 564645524, + "step": 6186 + }, + { + "epoch": 25.775, + "loss": 0.01581304706633091, + "loss_ce": 3.16454897983931e-05, + "loss_iou": 0.1376953125, + "loss_num": 0.0031585693359375, + "loss_xval": 0.0157470703125, + "num_input_tokens_seen": 564645524, + "step": 6186 + }, + { + "epoch": 25.779166666666665, + "grad_norm": 1.7741157810285912, + "learning_rate": 5e-05, + "loss": 0.0605, + "num_input_tokens_seen": 564736176, + "step": 6187 + }, + { + "epoch": 25.779166666666665, + "loss": 0.0682372897863388, + "loss_ce": 3.050072155019734e-05, + "loss_iou": 0.216796875, + "loss_num": 0.01361083984375, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 564736176, + "step": 6187 + }, + { + "epoch": 25.783333333333335, + "grad_norm": 2.002310127769731, + "learning_rate": 5e-05, + "loss": 0.0449, + "num_input_tokens_seen": 564827528, + "step": 6188 + }, + { + "epoch": 25.783333333333335, + "loss": 0.03747815638780594, + "loss_ce": 2.5684003048809245e-06, + "loss_iou": 0.2373046875, + "loss_num": 0.00750732421875, + "loss_xval": 0.03759765625, + "num_input_tokens_seen": 564827528, + "step": 6188 + }, + { + "epoch": 25.7875, + "grad_norm": 3.5379552754793355, + "learning_rate": 5e-05, + "loss": 0.0385, + "num_input_tokens_seen": 564918892, + "step": 6189 + }, + { + "epoch": 25.7875, + "loss": 0.03478563204407692, + "loss_ce": 3.2234866012004204e-06, + "loss_iou": 0.2314453125, + "loss_num": 0.0069580078125, + "loss_xval": 0.03466796875, + "num_input_tokens_seen": 564918892, + "step": 6189 + }, + { + "epoch": 25.791666666666668, + "grad_norm": 2.139259743330136, + "learning_rate": 5e-05, + "loss": 0.0293, + "num_input_tokens_seen": 565010512, + "step": 6190 + }, + { + "epoch": 25.791666666666668, + "loss": 0.042409226298332214, + "loss_ce": 5.050466825196054e-06, + "loss_iou": 0.1728515625, + "loss_num": 0.00848388671875, + "loss_xval": 0.04248046875, + "num_input_tokens_seen": 565010512, + "step": 6190 + }, + { + "epoch": 25.795833333333334, + "grad_norm": 2.9062476205288945, + "learning_rate": 5e-05, + "loss": 0.0348, + "num_input_tokens_seen": 565101640, + "step": 6191 + }, + { + "epoch": 25.795833333333334, + "loss": 0.030232472345232964, + "loss_ce": 4.8102770051627886e-06, + "loss_iou": 0.1650390625, + "loss_num": 0.00604248046875, + "loss_xval": 0.0302734375, + "num_input_tokens_seen": 565101640, + "step": 6191 + }, + { + "epoch": 25.8, + "grad_norm": 2.621436298671246, + "learning_rate": 5e-05, + "loss": 0.0411, + "num_input_tokens_seen": 565192556, + "step": 6192 + }, + { + "epoch": 25.8, + "loss": 0.05496486276388168, + "loss_ce": 2.707063913476304e-06, + "loss_iou": 0.28515625, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 565192556, + "step": 6192 + }, + { + "epoch": 25.804166666666667, + "grad_norm": 2.9015879210449724, + "learning_rate": 5e-05, + "loss": 0.0399, + "num_input_tokens_seen": 565283468, + "step": 6193 + }, + { + "epoch": 25.804166666666667, + "loss": 0.045189596712589264, + "loss_ce": 0.00015328428708016872, + "loss_iou": 0.21484375, + "loss_num": 0.009033203125, + "loss_xval": 0.044921875, + "num_input_tokens_seen": 565283468, + "step": 6193 + }, + { + "epoch": 25.808333333333334, + "grad_norm": 2.981056736907888, + "learning_rate": 5e-05, + "loss": 0.0482, + "num_input_tokens_seen": 565373624, + "step": 6194 + }, + { + "epoch": 25.808333333333334, + "loss": 0.0601491704583168, + "loss_ce": 2.954133196908515e-05, + "loss_iou": 0.197265625, + "loss_num": 0.01202392578125, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 565373624, + "step": 6194 + }, + { + "epoch": 25.8125, + "grad_norm": 3.100211608761263, + "learning_rate": 5e-05, + "loss": 0.0325, + "num_input_tokens_seen": 565464568, + "step": 6195 + }, + { + "epoch": 25.8125, + "loss": 0.03338789939880371, + "loss_ce": 1.6691501514287665e-06, + "loss_iou": 0.337890625, + "loss_num": 0.006683349609375, + "loss_xval": 0.033447265625, + "num_input_tokens_seen": 565464568, + "step": 6195 + }, + { + "epoch": 25.816666666666666, + "grad_norm": 2.7975115834447113, + "learning_rate": 5e-05, + "loss": 0.0768, + "num_input_tokens_seen": 565555988, + "step": 6196 + }, + { + "epoch": 25.816666666666666, + "loss": 0.03894903510808945, + "loss_ce": 2.3860455257818103e-05, + "loss_iou": 0.234375, + "loss_num": 0.007781982421875, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 565555988, + "step": 6196 + }, + { + "epoch": 25.820833333333333, + "grad_norm": 6.683605039191462, + "learning_rate": 5e-05, + "loss": 0.0619, + "num_input_tokens_seen": 565646712, + "step": 6197 + }, + { + "epoch": 25.820833333333333, + "loss": 0.06854942440986633, + "loss_ce": 3.746480797417462e-05, + "loss_iou": 0.279296875, + "loss_num": 0.01373291015625, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 565646712, + "step": 6197 + }, + { + "epoch": 25.825, + "grad_norm": 2.939345412767463, + "learning_rate": 5e-05, + "loss": 0.0534, + "num_input_tokens_seen": 565737608, + "step": 6198 + }, + { + "epoch": 25.825, + "loss": 0.07456640899181366, + "loss_ce": 4.3337663555576e-06, + "loss_iou": 0.2412109375, + "loss_num": 0.014892578125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 565737608, + "step": 6198 + }, + { + "epoch": 25.829166666666666, + "grad_norm": 2.796323197936319, + "learning_rate": 5e-05, + "loss": 0.0536, + "num_input_tokens_seen": 565828744, + "step": 6199 + }, + { + "epoch": 25.829166666666666, + "loss": 0.032312601804733276, + "loss_ce": 1.3562228559749201e-05, + "loss_iou": 0.2041015625, + "loss_num": 0.0064697265625, + "loss_xval": 0.0322265625, + "num_input_tokens_seen": 565828744, + "step": 6199 + }, + { + "epoch": 25.833333333333332, + "grad_norm": 2.300145429427322, + "learning_rate": 5e-05, + "loss": 0.0283, + "num_input_tokens_seen": 565920076, + "step": 6200 + }, + { + "epoch": 25.833333333333332, + "loss": 0.028814151883125305, + "loss_ce": 2.0819143173866905e-05, + "loss_iou": 0.3359375, + "loss_num": 0.005767822265625, + "loss_xval": 0.02880859375, + "num_input_tokens_seen": 565920076, + "step": 6200 + }, + { + "epoch": 25.8375, + "grad_norm": 2.158863167829128, + "learning_rate": 5e-05, + "loss": 0.025, + "num_input_tokens_seen": 566011876, + "step": 6201 + }, + { + "epoch": 25.8375, + "loss": 0.022084590047597885, + "loss_ce": 0.0007451724377460778, + "loss_iou": 0.1904296875, + "loss_num": 0.0042724609375, + "loss_xval": 0.0213623046875, + "num_input_tokens_seen": 566011876, + "step": 6201 + }, + { + "epoch": 25.841666666666665, + "grad_norm": 3.0032125907318417, + "learning_rate": 5e-05, + "loss": 0.0638, + "num_input_tokens_seen": 566103524, + "step": 6202 + }, + { + "epoch": 25.841666666666665, + "loss": 0.05159597098827362, + "loss_ce": 1.363731917081168e-05, + "loss_iou": 0.203125, + "loss_num": 0.01031494140625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 566103524, + "step": 6202 + }, + { + "epoch": 25.845833333333335, + "grad_norm": 2.3648260691935685, + "learning_rate": 5e-05, + "loss": 0.0239, + "num_input_tokens_seen": 566195456, + "step": 6203 + }, + { + "epoch": 25.845833333333335, + "loss": 0.025610897690057755, + "loss_ce": 0.0005178189603611827, + "loss_iou": 0.2412109375, + "loss_num": 0.005035400390625, + "loss_xval": 0.025146484375, + "num_input_tokens_seen": 566195456, + "step": 6203 + }, + { + "epoch": 25.85, + "grad_norm": 2.4481790838852797, + "learning_rate": 5e-05, + "loss": 0.041, + "num_input_tokens_seen": 566286808, + "step": 6204 + }, + { + "epoch": 25.85, + "loss": 0.040507905185222626, + "loss_ce": 3.44900968229922e-06, + "loss_iou": 0.1982421875, + "loss_num": 0.00811767578125, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 566286808, + "step": 6204 + }, + { + "epoch": 25.854166666666668, + "grad_norm": 3.4786102615434094, + "learning_rate": 5e-05, + "loss": 0.0323, + "num_input_tokens_seen": 566378044, + "step": 6205 + }, + { + "epoch": 25.854166666666668, + "loss": 0.032995712012052536, + "loss_ce": 2.1468926206580363e-05, + "loss_iou": 0.333984375, + "loss_num": 0.006591796875, + "loss_xval": 0.032958984375, + "num_input_tokens_seen": 566378044, + "step": 6205 + }, + { + "epoch": 25.858333333333334, + "grad_norm": 3.5042248706895145, + "learning_rate": 5e-05, + "loss": 0.0351, + "num_input_tokens_seen": 566469372, + "step": 6206 + }, + { + "epoch": 25.858333333333334, + "loss": 0.03962497413158417, + "loss_ce": 2.8418700821930543e-05, + "loss_iou": 0.2412109375, + "loss_num": 0.0079345703125, + "loss_xval": 0.03955078125, + "num_input_tokens_seen": 566469372, + "step": 6206 + }, + { + "epoch": 25.8625, + "grad_norm": 2.8921605118927682, + "learning_rate": 5e-05, + "loss": 0.0404, + "num_input_tokens_seen": 566560928, + "step": 6207 + }, + { + "epoch": 25.8625, + "loss": 0.022944016382098198, + "loss_ce": 5.5832915677456185e-05, + "loss_iou": 0.158203125, + "loss_num": 0.00457763671875, + "loss_xval": 0.02294921875, + "num_input_tokens_seen": 566560928, + "step": 6207 + }, + { + "epoch": 25.866666666666667, + "grad_norm": 2.585905652175534, + "learning_rate": 5e-05, + "loss": 0.0415, + "num_input_tokens_seen": 566652676, + "step": 6208 + }, + { + "epoch": 25.866666666666667, + "loss": 0.05306173861026764, + "loss_ce": 6.93045058142161e-06, + "loss_iou": 0.271484375, + "loss_num": 0.0106201171875, + "loss_xval": 0.052978515625, + "num_input_tokens_seen": 566652676, + "step": 6208 + }, + { + "epoch": 25.870833333333334, + "grad_norm": 2.253935206946212, + "learning_rate": 5e-05, + "loss": 0.0369, + "num_input_tokens_seen": 566744724, + "step": 6209 + }, + { + "epoch": 25.870833333333334, + "loss": 0.027485482394695282, + "loss_ce": 4.402094873512397e-06, + "loss_iou": 0.19921875, + "loss_num": 0.0054931640625, + "loss_xval": 0.0274658203125, + "num_input_tokens_seen": 566744724, + "step": 6209 + }, + { + "epoch": 25.875, + "grad_norm": 1.1610223925097085, + "learning_rate": 5e-05, + "loss": 0.0528, + "num_input_tokens_seen": 566835900, + "step": 6210 + }, + { + "epoch": 25.875, + "loss": 0.02255455031991005, + "loss_ce": 0.00010887056851061061, + "loss_iou": 0.1552734375, + "loss_num": 0.004486083984375, + "loss_xval": 0.0224609375, + "num_input_tokens_seen": 566835900, + "step": 6210 + }, + { + "epoch": 25.879166666666666, + "grad_norm": 2.0502710078555793, + "learning_rate": 5e-05, + "loss": 0.0655, + "num_input_tokens_seen": 566927672, + "step": 6211 + }, + { + "epoch": 25.879166666666666, + "loss": 0.07474754750728607, + "loss_ce": 0.0003685862757265568, + "loss_iou": 0.18359375, + "loss_num": 0.014892578125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 566927672, + "step": 6211 + }, + { + "epoch": 25.883333333333333, + "grad_norm": 1.1012079323531696, + "learning_rate": 5e-05, + "loss": 0.0348, + "num_input_tokens_seen": 567019052, + "step": 6212 + }, + { + "epoch": 25.883333333333333, + "loss": 0.030489172786474228, + "loss_ce": 2.1120893052284373e-06, + "loss_iou": 0.2060546875, + "loss_num": 0.006103515625, + "loss_xval": 0.030517578125, + "num_input_tokens_seen": 567019052, + "step": 6212 + }, + { + "epoch": 25.8875, + "grad_norm": 1.690526939465067, + "learning_rate": 5e-05, + "loss": 0.0267, + "num_input_tokens_seen": 567110820, + "step": 6213 + }, + { + "epoch": 25.8875, + "loss": 0.03049873746931553, + "loss_ce": 4.047382390126586e-06, + "loss_iou": 0.38671875, + "loss_num": 0.006103515625, + "loss_xval": 0.030517578125, + "num_input_tokens_seen": 567110820, + "step": 6213 + }, + { + "epoch": 25.891666666666666, + "grad_norm": 2.027924223311171, + "learning_rate": 5e-05, + "loss": 0.0241, + "num_input_tokens_seen": 567201736, + "step": 6214 + }, + { + "epoch": 25.891666666666666, + "loss": 0.02165631204843521, + "loss_ce": 4.0901650208979845e-06, + "loss_iou": 0.146484375, + "loss_num": 0.00433349609375, + "loss_xval": 0.0216064453125, + "num_input_tokens_seen": 567201736, + "step": 6214 + }, + { + "epoch": 25.895833333333332, + "grad_norm": 2.786822197862997, + "learning_rate": 5e-05, + "loss": 0.0388, + "num_input_tokens_seen": 567292892, + "step": 6215 + }, + { + "epoch": 25.895833333333332, + "loss": 0.03339173272252083, + "loss_ce": 5.50091135664843e-06, + "loss_iou": 0.328125, + "loss_num": 0.006683349609375, + "loss_xval": 0.033447265625, + "num_input_tokens_seen": 567292892, + "step": 6215 + }, + { + "epoch": 25.9, + "grad_norm": 2.767579093122714, + "learning_rate": 5e-05, + "loss": 0.0361, + "num_input_tokens_seen": 567383720, + "step": 6216 + }, + { + "epoch": 25.9, + "loss": 0.03694453462958336, + "loss_ce": 3.0083456294960342e-06, + "loss_iou": 0.275390625, + "loss_num": 0.00738525390625, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 567383720, + "step": 6216 + }, + { + "epoch": 25.904166666666665, + "grad_norm": 2.249322127371412, + "learning_rate": 5e-05, + "loss": 0.0521, + "num_input_tokens_seen": 567475000, + "step": 6217 + }, + { + "epoch": 25.904166666666665, + "loss": 0.06515315920114517, + "loss_ce": 2.8645601560128853e-05, + "loss_iou": 0.154296875, + "loss_num": 0.0130615234375, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 567475000, + "step": 6217 + }, + { + "epoch": 25.908333333333335, + "grad_norm": 1.176764330390045, + "learning_rate": 5e-05, + "loss": 0.0713, + "num_input_tokens_seen": 567564508, + "step": 6218 + }, + { + "epoch": 25.908333333333335, + "loss": 0.10813853144645691, + "loss_ce": 3.0015964512131177e-05, + "loss_iou": 0.12109375, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 567564508, + "step": 6218 + }, + { + "epoch": 25.9125, + "grad_norm": 1.8127313337376398, + "learning_rate": 5e-05, + "loss": 0.0399, + "num_input_tokens_seen": 567655664, + "step": 6219 + }, + { + "epoch": 25.9125, + "loss": 0.05346290022134781, + "loss_ce": 3.7308441278582904e-06, + "loss_iou": 0.2216796875, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 567655664, + "step": 6219 + }, + { + "epoch": 25.916666666666668, + "grad_norm": 2.8901642512214525, + "learning_rate": 5e-05, + "loss": 0.0623, + "num_input_tokens_seen": 567746800, + "step": 6220 + }, + { + "epoch": 25.916666666666668, + "loss": 0.03645596653223038, + "loss_ce": 2.721512146308669e-06, + "loss_iou": 0.2021484375, + "loss_num": 0.007293701171875, + "loss_xval": 0.036376953125, + "num_input_tokens_seen": 567746800, + "step": 6220 + }, + { + "epoch": 25.920833333333334, + "grad_norm": 3.0671081042196895, + "learning_rate": 5e-05, + "loss": 0.0527, + "num_input_tokens_seen": 567838300, + "step": 6221 + }, + { + "epoch": 25.920833333333334, + "loss": 0.03754575550556183, + "loss_ce": 0.0017791553400456905, + "loss_iou": 0.25, + "loss_num": 0.00714111328125, + "loss_xval": 0.03564453125, + "num_input_tokens_seen": 567838300, + "step": 6221 + }, + { + "epoch": 25.925, + "grad_norm": 2.816814043901978, + "learning_rate": 5e-05, + "loss": 0.0411, + "num_input_tokens_seen": 567929616, + "step": 6222 + }, + { + "epoch": 25.925, + "loss": 0.04085657373070717, + "loss_ce": 0.00011560738494154066, + "loss_iou": 0.189453125, + "loss_num": 0.0081787109375, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 567929616, + "step": 6222 + }, + { + "epoch": 25.929166666666667, + "grad_norm": 2.8523635840464197, + "learning_rate": 5e-05, + "loss": 0.0401, + "num_input_tokens_seen": 568020912, + "step": 6223 + }, + { + "epoch": 25.929166666666667, + "loss": 0.03213977813720703, + "loss_ce": 4.769712177221663e-06, + "loss_iou": 0.2177734375, + "loss_num": 0.00640869140625, + "loss_xval": 0.0322265625, + "num_input_tokens_seen": 568020912, + "step": 6223 + }, + { + "epoch": 25.933333333333334, + "grad_norm": 3.0148122840929386, + "learning_rate": 5e-05, + "loss": 0.0788, + "num_input_tokens_seen": 568112240, + "step": 6224 + }, + { + "epoch": 25.933333333333334, + "loss": 0.05899741128087044, + "loss_ce": 0.0003578842442948371, + "loss_iou": 0.18359375, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 568112240, + "step": 6224 + }, + { + "epoch": 25.9375, + "grad_norm": 3.2862347670356793, + "learning_rate": 5e-05, + "loss": 0.0309, + "num_input_tokens_seen": 568201736, + "step": 6225 + }, + { + "epoch": 25.9375, + "loss": 0.02853585034608841, + "loss_ce": 1.9140070435241796e-06, + "loss_iou": 0.1875, + "loss_num": 0.005706787109375, + "loss_xval": 0.028564453125, + "num_input_tokens_seen": 568201736, + "step": 6225 + }, + { + "epoch": 25.941666666666666, + "grad_norm": 2.8599250102557328, + "learning_rate": 5e-05, + "loss": 0.0553, + "num_input_tokens_seen": 568293380, + "step": 6226 + }, + { + "epoch": 25.941666666666666, + "loss": 0.07472589612007141, + "loss_ce": 3.602154720283579e-06, + "loss_iou": 0.26953125, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 568293380, + "step": 6226 + }, + { + "epoch": 25.945833333333333, + "grad_norm": 1.9691023829632905, + "learning_rate": 5e-05, + "loss": 0.0403, + "num_input_tokens_seen": 568382916, + "step": 6227 + }, + { + "epoch": 25.945833333333333, + "loss": 0.028636876493692398, + "loss_ce": 3.7587974475172814e-06, + "loss_iou": 0.201171875, + "loss_num": 0.0057373046875, + "loss_xval": 0.0286865234375, + "num_input_tokens_seen": 568382916, + "step": 6227 + }, + { + "epoch": 25.95, + "grad_norm": 2.237243818037928, + "learning_rate": 5e-05, + "loss": 0.0374, + "num_input_tokens_seen": 568474528, + "step": 6228 + }, + { + "epoch": 25.95, + "loss": 0.034794896841049194, + "loss_ce": 2.393420618318487e-05, + "loss_iou": 0.080078125, + "loss_num": 0.0069580078125, + "loss_xval": 0.03466796875, + "num_input_tokens_seen": 568474528, + "step": 6228 + }, + { + "epoch": 25.954166666666666, + "grad_norm": 3.5637477409096627, + "learning_rate": 5e-05, + "loss": 0.0562, + "num_input_tokens_seen": 568565956, + "step": 6229 + }, + { + "epoch": 25.954166666666666, + "loss": 0.04442165791988373, + "loss_ce": 0.00047634501243010163, + "loss_iou": 0.2451171875, + "loss_num": 0.0087890625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 568565956, + "step": 6229 + }, + { + "epoch": 25.958333333333332, + "grad_norm": 3.015889661191772, + "learning_rate": 5e-05, + "loss": 0.0465, + "num_input_tokens_seen": 568657444, + "step": 6230 + }, + { + "epoch": 25.958333333333332, + "loss": 0.03149028494954109, + "loss_ce": 0.004558522719889879, + "loss_iou": 0.15625, + "loss_num": 0.00537109375, + "loss_xval": 0.0269775390625, + "num_input_tokens_seen": 568657444, + "step": 6230 + }, + { + "epoch": 25.9625, + "grad_norm": 2.338261968838323, + "learning_rate": 5e-05, + "loss": 0.0369, + "num_input_tokens_seen": 568748604, + "step": 6231 + }, + { + "epoch": 25.9625, + "loss": 0.031379248946905136, + "loss_ce": 7.17790180715383e-06, + "loss_iou": 0.32421875, + "loss_num": 0.00628662109375, + "loss_xval": 0.03125, + "num_input_tokens_seen": 568748604, + "step": 6231 + }, + { + "epoch": 25.966666666666665, + "grad_norm": 2.6931507635597693, + "learning_rate": 5e-05, + "loss": 0.0827, + "num_input_tokens_seen": 568840140, + "step": 6232 + }, + { + "epoch": 25.966666666666665, + "loss": 0.0415530651807785, + "loss_ce": 3.384055844435352e-06, + "loss_iou": 0.263671875, + "loss_num": 0.00830078125, + "loss_xval": 0.04150390625, + "num_input_tokens_seen": 568840140, + "step": 6232 + }, + { + "epoch": 25.970833333333335, + "grad_norm": 3.348812641350004, + "learning_rate": 5e-05, + "loss": 0.0478, + "num_input_tokens_seen": 568931560, + "step": 6233 + }, + { + "epoch": 25.970833333333335, + "loss": 0.06727465242147446, + "loss_ce": 2.916773155448027e-05, + "loss_iou": 0.2890625, + "loss_num": 0.013427734375, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 568931560, + "step": 6233 + }, + { + "epoch": 25.975, + "grad_norm": 2.3705319876111672, + "learning_rate": 5e-05, + "loss": 0.0406, + "num_input_tokens_seen": 569023052, + "step": 6234 + }, + { + "epoch": 25.975, + "loss": 0.05192911997437477, + "loss_ce": 3.4578806662466377e-06, + "loss_iou": 0.2294921875, + "loss_num": 0.0103759765625, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 569023052, + "step": 6234 + }, + { + "epoch": 25.979166666666668, + "grad_norm": 2.140631008113906, + "learning_rate": 5e-05, + "loss": 0.0641, + "num_input_tokens_seen": 569114472, + "step": 6235 + }, + { + "epoch": 25.979166666666668, + "loss": 0.05225517600774765, + "loss_ce": 2.4343242330360226e-05, + "loss_iou": 0.271484375, + "loss_num": 0.01043701171875, + "loss_xval": 0.05224609375, + "num_input_tokens_seen": 569114472, + "step": 6235 + }, + { + "epoch": 25.983333333333334, + "grad_norm": 2.5581902161791437, + "learning_rate": 5e-05, + "loss": 0.0324, + "num_input_tokens_seen": 569205756, + "step": 6236 + }, + { + "epoch": 25.983333333333334, + "loss": 0.02487356960773468, + "loss_ce": 0.000566318107303232, + "loss_iou": 0.240234375, + "loss_num": 0.004852294921875, + "loss_xval": 0.0242919921875, + "num_input_tokens_seen": 569205756, + "step": 6236 + }, + { + "epoch": 25.9875, + "grad_norm": 2.234012344346653, + "learning_rate": 5e-05, + "loss": 0.035, + "num_input_tokens_seen": 569297040, + "step": 6237 + }, + { + "epoch": 25.9875, + "loss": 0.03664017841219902, + "loss_ce": 3.825194653472863e-06, + "loss_iou": 0.28515625, + "loss_num": 0.00732421875, + "loss_xval": 0.03662109375, + "num_input_tokens_seen": 569297040, + "step": 6237 + }, + { + "epoch": 25.991666666666667, + "grad_norm": 2.5095885007986345, + "learning_rate": 5e-05, + "loss": 0.0493, + "num_input_tokens_seen": 569388200, + "step": 6238 + }, + { + "epoch": 25.991666666666667, + "loss": 0.057438794523477554, + "loss_ce": 1.2340997272985987e-05, + "loss_iou": 0.2138671875, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 569388200, + "step": 6238 + }, + { + "epoch": 25.995833333333334, + "grad_norm": 6.921083419486904, + "learning_rate": 5e-05, + "loss": 0.08, + "num_input_tokens_seen": 569479296, + "step": 6239 + }, + { + "epoch": 25.995833333333334, + "loss": 0.12009778618812561, + "loss_ce": 1.1113231266790535e-05, + "loss_iou": 0.28125, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 569479296, + "step": 6239 + }, + { + "epoch": 26.0, + "grad_norm": 2.987832781000782, + "learning_rate": 5e-05, + "loss": 0.0573, + "num_input_tokens_seen": 569570620, + "step": 6240 + }, + { + "epoch": 26.0, + "loss": 0.04104957729578018, + "loss_ce": 3.4333083931414876e-06, + "loss_iou": 0.28125, + "loss_num": 0.0081787109375, + "loss_xval": 0.041015625, + "num_input_tokens_seen": 569570620, + "step": 6240 + }, + { + "epoch": 26.004166666666666, + "grad_norm": 3.1303272391838255, + "learning_rate": 5e-05, + "loss": 0.0406, + "num_input_tokens_seen": 569662492, + "step": 6241 + }, + { + "epoch": 26.004166666666666, + "loss": 0.035025689750909805, + "loss_ce": 1.4398500752577092e-05, + "loss_iou": 0.255859375, + "loss_num": 0.006988525390625, + "loss_xval": 0.034912109375, + "num_input_tokens_seen": 569662492, + "step": 6241 + }, + { + "epoch": 26.008333333333333, + "grad_norm": 3.5971941895441724, + "learning_rate": 5e-05, + "loss": 0.0318, + "num_input_tokens_seen": 569753680, + "step": 6242 + }, + { + "epoch": 26.008333333333333, + "loss": 0.027462609112262726, + "loss_ce": 4.41880456492072e-06, + "loss_iou": 0.201171875, + "loss_num": 0.0054931640625, + "loss_xval": 0.0274658203125, + "num_input_tokens_seen": 569753680, + "step": 6242 + }, + { + "epoch": 26.0125, + "grad_norm": 1.9901081175986914, + "learning_rate": 5e-05, + "loss": 0.0611, + "num_input_tokens_seen": 569844260, + "step": 6243 + }, + { + "epoch": 26.0125, + "loss": 0.04240645468235016, + "loss_ce": 2.2796084522269666e-06, + "loss_iou": 0.173828125, + "loss_num": 0.00848388671875, + "loss_xval": 0.04248046875, + "num_input_tokens_seen": 569844260, + "step": 6243 + }, + { + "epoch": 26.016666666666666, + "grad_norm": 2.5666725125305967, + "learning_rate": 5e-05, + "loss": 0.0385, + "num_input_tokens_seen": 569935652, + "step": 6244 + }, + { + "epoch": 26.016666666666666, + "loss": 0.04415284842252731, + "loss_ce": 9.170468729280401e-06, + "loss_iou": 0.2578125, + "loss_num": 0.00885009765625, + "loss_xval": 0.044189453125, + "num_input_tokens_seen": 569935652, + "step": 6244 + }, + { + "epoch": 26.020833333333332, + "grad_norm": 2.4139493327373662, + "learning_rate": 5e-05, + "loss": 0.0588, + "num_input_tokens_seen": 570027156, + "step": 6245 + }, + { + "epoch": 26.020833333333332, + "loss": 0.06275545805692673, + "loss_ce": 3.6852998164249584e-06, + "loss_iou": 0.2490234375, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 570027156, + "step": 6245 + }, + { + "epoch": 26.025, + "grad_norm": 2.447243831182181, + "learning_rate": 5e-05, + "loss": 0.0332, + "num_input_tokens_seen": 570118944, + "step": 6246 + }, + { + "epoch": 26.025, + "loss": 0.029815129935741425, + "loss_ce": 7.086601272021653e-06, + "loss_iou": 0.341796875, + "loss_num": 0.005950927734375, + "loss_xval": 0.02978515625, + "num_input_tokens_seen": 570118944, + "step": 6246 + }, + { + "epoch": 26.029166666666665, + "grad_norm": 2.016318243284664, + "learning_rate": 5e-05, + "loss": 0.0701, + "num_input_tokens_seen": 570210268, + "step": 6247 + }, + { + "epoch": 26.029166666666665, + "loss": 0.11593090742826462, + "loss_ce": 2.5144749088212848e-05, + "loss_iou": 0.1552734375, + "loss_num": 0.023193359375, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 570210268, + "step": 6247 + }, + { + "epoch": 26.033333333333335, + "grad_norm": 2.0813449498283854, + "learning_rate": 5e-05, + "loss": 0.0334, + "num_input_tokens_seen": 570300968, + "step": 6248 + }, + { + "epoch": 26.033333333333335, + "loss": 0.03726140409708023, + "loss_ce": 7.0692808549210895e-06, + "loss_iou": 0.212890625, + "loss_num": 0.0074462890625, + "loss_xval": 0.037353515625, + "num_input_tokens_seen": 570300968, + "step": 6248 + }, + { + "epoch": 26.0375, + "grad_norm": 3.053951564871621, + "learning_rate": 5e-05, + "loss": 0.0325, + "num_input_tokens_seen": 570391888, + "step": 6249 + }, + { + "epoch": 26.0375, + "loss": 0.025143064558506012, + "loss_ce": 1.1838392310892232e-05, + "loss_iou": 0.287109375, + "loss_num": 0.005035400390625, + "loss_xval": 0.025146484375, + "num_input_tokens_seen": 570391888, + "step": 6249 + }, + { + "epoch": 26.041666666666668, + "grad_norm": 3.493859109783065, + "learning_rate": 5e-05, + "loss": 0.064, + "num_input_tokens_seen": 570482944, + "step": 6250 + }, + { + "epoch": 26.041666666666668, + "eval_seeclick_CIoU": 0.19633438810706139, + "eval_seeclick_GIoU": 0.18025581538677216, + "eval_seeclick_IoU": 0.31648438423871994, + "eval_seeclick_MAE_all": 0.11852361261844635, + "eval_seeclick_MAE_h": 0.10228492319583893, + "eval_seeclick_MAE_w": 0.255145899951458, + "eval_seeclick_MAE_x_boxes": 0.26643601059913635, + "eval_seeclick_MAE_y_boxes": 0.10366765409708023, + "eval_seeclick_NUM_probability": 0.9999975264072418, + "eval_seeclick_inside_bbox": 0.5255681872367859, + "eval_seeclick_loss": 0.6100429892539978, + "eval_seeclick_loss_ce": 0.10555814579129219, + "eval_seeclick_loss_iou": 0.46240234375, + "eval_seeclick_loss_num": 0.094268798828125, + "eval_seeclick_loss_xval": 0.471435546875, + "eval_seeclick_runtime": 76.4862, + "eval_seeclick_samples_per_second": 0.562, + "eval_seeclick_steps_per_second": 0.026, + "num_input_tokens_seen": 570482944, + "step": 6250 + }, + { + "epoch": 26.041666666666668, + "eval_icons_CIoU": 0.2770818769931793, + "eval_icons_GIoU": 0.24754273891448975, + "eval_icons_IoU": 0.37252339720726013, + "eval_icons_MAE_all": 0.0877491757273674, + "eval_icons_MAE_h": 0.1594034805893898, + "eval_icons_MAE_w": 0.14346452057361603, + "eval_icons_MAE_x_boxes": 0.14612339437007904, + "eval_icons_MAE_y_boxes": 0.16382309794425964, + "eval_icons_NUM_probability": 0.9999905824661255, + "eval_icons_inside_bbox": 0.546875, + "eval_icons_loss": 0.43681633472442627, + "eval_icons_loss_ce": 0.0001863774232333526, + "eval_icons_loss_iou": 0.2720947265625, + "eval_icons_loss_num": 0.0906219482421875, + "eval_icons_loss_xval": 0.45294189453125, + "eval_icons_runtime": 99.8018, + "eval_icons_samples_per_second": 0.501, + "eval_icons_steps_per_second": 0.02, + "num_input_tokens_seen": 570482944, + "step": 6250 + }, + { + "epoch": 26.041666666666668, + "eval_screenspot_CIoU": 0.37342973550160724, + "eval_screenspot_GIoU": 0.36550194025039673, + "eval_screenspot_IoU": 0.4399256110191345, + "eval_screenspot_MAE_all": 0.09415490676959355, + "eval_screenspot_MAE_h": 0.08269187062978745, + "eval_screenspot_MAE_w": 0.21002002557118735, + "eval_screenspot_MAE_x_boxes": 0.1757701834042867, + "eval_screenspot_MAE_y_boxes": 0.08223061760266621, + "eval_screenspot_NUM_probability": 0.9999955495198568, + "eval_screenspot_inside_bbox": 0.7279166579246521, + "eval_screenspot_loss": 0.4741321802139282, + "eval_screenspot_loss_ce": 0.0008951277413871139, + "eval_screenspot_loss_iou": 0.3416341145833333, + "eval_screenspot_loss_num": 0.09652201334635417, + "eval_screenspot_loss_xval": 0.4826253255208333, + "eval_screenspot_runtime": 161.2618, + "eval_screenspot_samples_per_second": 0.552, + "eval_screenspot_steps_per_second": 0.019, + "num_input_tokens_seen": 570482944, + "step": 6250 + }, + { + "epoch": 26.041666666666668, + "eval_compot_CIoU": 0.5062353014945984, + "eval_compot_GIoU": 0.496506005525589, + "eval_compot_IoU": 0.5829664170742035, + "eval_compot_MAE_all": 0.0551037210971117, + "eval_compot_MAE_h": 0.06339871324598789, + "eval_compot_MAE_w": 0.14107514172792435, + "eval_compot_MAE_x_boxes": 0.14240705966949463, + "eval_compot_MAE_y_boxes": 0.06185857765376568, + "eval_compot_NUM_probability": 0.999993234872818, + "eval_compot_inside_bbox": 0.7326388955116272, + "eval_compot_loss": 0.32540085911750793, + "eval_compot_loss_ce": 0.04954234138131142, + "eval_compot_loss_iou": 0.323486328125, + "eval_compot_loss_num": 0.05290985107421875, + "eval_compot_loss_xval": 0.264495849609375, + "eval_compot_runtime": 89.7456, + "eval_compot_samples_per_second": 0.557, + "eval_compot_steps_per_second": 0.022, + "num_input_tokens_seen": 570482944, + "step": 6250 + }, + { + "epoch": 26.041666666666668, + "loss": 0.3000350594520569, + "loss_ce": 0.048509202897548676, + "loss_iou": 0.3203125, + "loss_num": 0.05029296875, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 570482944, + "step": 6250 + }, + { + "epoch": 26.045833333333334, + "grad_norm": 2.3315837202573615, + "learning_rate": 5e-05, + "loss": 0.0598, + "num_input_tokens_seen": 570574088, + "step": 6251 + }, + { + "epoch": 26.045833333333334, + "loss": 0.0314057320356369, + "loss_ce": 3.1418903745361604e-06, + "loss_iou": 0.310546875, + "loss_num": 0.00628662109375, + "loss_xval": 0.031494140625, + "num_input_tokens_seen": 570574088, + "step": 6251 + }, + { + "epoch": 26.05, + "grad_norm": 2.7485465532597466, + "learning_rate": 5e-05, + "loss": 0.0209, + "num_input_tokens_seen": 570665532, + "step": 6252 + }, + { + "epoch": 26.05, + "loss": 0.02470763400197029, + "loss_ce": 3.6557296425598906e-06, + "loss_iou": 0.2412109375, + "loss_num": 0.00494384765625, + "loss_xval": 0.024658203125, + "num_input_tokens_seen": 570665532, + "step": 6252 + }, + { + "epoch": 26.054166666666667, + "grad_norm": 2.328907127752638, + "learning_rate": 5e-05, + "loss": 0.0372, + "num_input_tokens_seen": 570756560, + "step": 6253 + }, + { + "epoch": 26.054166666666667, + "loss": 0.026493418961763382, + "loss_ce": 4.162418463238282e-06, + "loss_iou": 0.26171875, + "loss_num": 0.00531005859375, + "loss_xval": 0.0264892578125, + "num_input_tokens_seen": 570756560, + "step": 6253 + }, + { + "epoch": 26.058333333333334, + "grad_norm": 2.9153475674165636, + "learning_rate": 5e-05, + "loss": 0.0439, + "num_input_tokens_seen": 570847504, + "step": 6254 + }, + { + "epoch": 26.058333333333334, + "loss": 0.04204103723168373, + "loss_ce": 3.0710834835190326e-06, + "loss_iou": 0.29296875, + "loss_num": 0.0084228515625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 570847504, + "step": 6254 + }, + { + "epoch": 26.0625, + "grad_norm": 2.8012764743833807, + "learning_rate": 5e-05, + "loss": 0.0416, + "num_input_tokens_seen": 570938912, + "step": 6255 + }, + { + "epoch": 26.0625, + "loss": 0.03852350637316704, + "loss_ce": 7.135752093745396e-05, + "loss_iou": 0.359375, + "loss_num": 0.0076904296875, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 570938912, + "step": 6255 + }, + { + "epoch": 26.066666666666666, + "grad_norm": 2.1701406378525587, + "learning_rate": 5e-05, + "loss": 0.0319, + "num_input_tokens_seen": 571030588, + "step": 6256 + }, + { + "epoch": 26.066666666666666, + "loss": 0.028773188591003418, + "loss_ce": 1.0371810276410542e-05, + "loss_iou": 0.24609375, + "loss_num": 0.005767822265625, + "loss_xval": 0.02880859375, + "num_input_tokens_seen": 571030588, + "step": 6256 + }, + { + "epoch": 26.070833333333333, + "grad_norm": 2.0690085239834533, + "learning_rate": 5e-05, + "loss": 0.0452, + "num_input_tokens_seen": 571120480, + "step": 6257 + }, + { + "epoch": 26.070833333333333, + "loss": 0.06280811876058578, + "loss_ce": 2.944461357401451e-06, + "loss_iou": 0.1865234375, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 571120480, + "step": 6257 + }, + { + "epoch": 26.075, + "grad_norm": 2.3099640086091737, + "learning_rate": 5e-05, + "loss": 0.0691, + "num_input_tokens_seen": 571211992, + "step": 6258 + }, + { + "epoch": 26.075, + "loss": 0.044280171394348145, + "loss_ce": 6.795500667067245e-06, + "loss_iou": 0.310546875, + "loss_num": 0.00885009765625, + "loss_xval": 0.044189453125, + "num_input_tokens_seen": 571211992, + "step": 6258 + }, + { + "epoch": 26.079166666666666, + "grad_norm": 2.8956852834842914, + "learning_rate": 5e-05, + "loss": 0.0562, + "num_input_tokens_seen": 571302604, + "step": 6259 + }, + { + "epoch": 26.079166666666666, + "loss": 0.07299505174160004, + "loss_ce": 0.001866208971478045, + "loss_iou": 0.1796875, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 571302604, + "step": 6259 + }, + { + "epoch": 26.083333333333332, + "grad_norm": 2.0520974878683633, + "learning_rate": 5e-05, + "loss": 0.0479, + "num_input_tokens_seen": 571394020, + "step": 6260 + }, + { + "epoch": 26.083333333333332, + "loss": 0.024094898253679276, + "loss_ce": 8.897854058886878e-06, + "loss_iou": 0.265625, + "loss_num": 0.00482177734375, + "loss_xval": 0.0240478515625, + "num_input_tokens_seen": 571394020, + "step": 6260 + }, + { + "epoch": 26.0875, + "grad_norm": 1.2220130617000615, + "learning_rate": 5e-05, + "loss": 0.0409, + "num_input_tokens_seen": 571484956, + "step": 6261 + }, + { + "epoch": 26.0875, + "loss": 0.04031633958220482, + "loss_ce": 2.6189673008047976e-06, + "loss_iou": 0.185546875, + "loss_num": 0.008056640625, + "loss_xval": 0.040283203125, + "num_input_tokens_seen": 571484956, + "step": 6261 + }, + { + "epoch": 26.091666666666665, + "grad_norm": 1.8787374771295302, + "learning_rate": 5e-05, + "loss": 0.0307, + "num_input_tokens_seen": 571576136, + "step": 6262 + }, + { + "epoch": 26.091666666666665, + "loss": 0.028536062687635422, + "loss_ce": 1.738655373628717e-05, + "loss_iou": 0.255859375, + "loss_num": 0.005706787109375, + "loss_xval": 0.028564453125, + "num_input_tokens_seen": 571576136, + "step": 6262 + }, + { + "epoch": 26.095833333333335, + "grad_norm": 2.368319643306598, + "learning_rate": 5e-05, + "loss": 0.0259, + "num_input_tokens_seen": 571667516, + "step": 6263 + }, + { + "epoch": 26.095833333333335, + "loss": 0.02410757541656494, + "loss_ce": 6.353846401907504e-05, + "loss_iou": 0.1357421875, + "loss_num": 0.004791259765625, + "loss_xval": 0.0240478515625, + "num_input_tokens_seen": 571667516, + "step": 6263 + }, + { + "epoch": 26.1, + "grad_norm": 2.9416620233734836, + "learning_rate": 5e-05, + "loss": 0.031, + "num_input_tokens_seen": 571759092, + "step": 6264 + }, + { + "epoch": 26.1, + "loss": 0.02723521925508976, + "loss_ce": 5.9099975260323845e-06, + "loss_iou": 0.357421875, + "loss_num": 0.005462646484375, + "loss_xval": 0.0272216796875, + "num_input_tokens_seen": 571759092, + "step": 6264 + }, + { + "epoch": 26.104166666666668, + "grad_norm": 2.9230697977898177, + "learning_rate": 5e-05, + "loss": 0.0451, + "num_input_tokens_seen": 571850480, + "step": 6265 + }, + { + "epoch": 26.104166666666668, + "loss": 0.03306296467781067, + "loss_ce": 0.00019553500169422477, + "loss_iou": 0.2373046875, + "loss_num": 0.006591796875, + "loss_xval": 0.032958984375, + "num_input_tokens_seen": 571850480, + "step": 6265 + }, + { + "epoch": 26.108333333333334, + "grad_norm": 2.8379543798087665, + "learning_rate": 5e-05, + "loss": 0.0476, + "num_input_tokens_seen": 571941896, + "step": 6266 + }, + { + "epoch": 26.108333333333334, + "loss": 0.05199863761663437, + "loss_ce": 4.316520517022582e-06, + "loss_iou": 0.1552734375, + "loss_num": 0.01043701171875, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 571941896, + "step": 6266 + }, + { + "epoch": 26.1125, + "grad_norm": 2.4718131067011773, + "learning_rate": 5e-05, + "loss": 0.0545, + "num_input_tokens_seen": 572033372, + "step": 6267 + }, + { + "epoch": 26.1125, + "loss": 0.03578196465969086, + "loss_ce": 0.00019083707593381405, + "loss_iou": 0.1259765625, + "loss_num": 0.007110595703125, + "loss_xval": 0.03564453125, + "num_input_tokens_seen": 572033372, + "step": 6267 + }, + { + "epoch": 26.116666666666667, + "grad_norm": 2.5265657710953526, + "learning_rate": 5e-05, + "loss": 0.0382, + "num_input_tokens_seen": 572124444, + "step": 6268 + }, + { + "epoch": 26.116666666666667, + "loss": 0.021869119256734848, + "loss_ce": 3.2737359560996993e-06, + "loss_iou": 0.365234375, + "loss_num": 0.004364013671875, + "loss_xval": 0.0218505859375, + "num_input_tokens_seen": 572124444, + "step": 6268 + }, + { + "epoch": 26.120833333333334, + "grad_norm": 3.850485848251067, + "learning_rate": 5e-05, + "loss": 0.067, + "num_input_tokens_seen": 572216048, + "step": 6269 + }, + { + "epoch": 26.120833333333334, + "loss": 0.06515754759311676, + "loss_ce": 2.5184165224345634e-06, + "loss_iou": 0.3046875, + "loss_num": 0.0130615234375, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 572216048, + "step": 6269 + }, + { + "epoch": 26.125, + "grad_norm": 2.0856751596103673, + "learning_rate": 5e-05, + "loss": 0.0281, + "num_input_tokens_seen": 572307184, + "step": 6270 + }, + { + "epoch": 26.125, + "loss": 0.042046919465065, + "loss_ce": 1.277327010029694e-05, + "loss_iou": 0.236328125, + "loss_num": 0.0084228515625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 572307184, + "step": 6270 + }, + { + "epoch": 26.129166666666666, + "grad_norm": 2.547013880080754, + "learning_rate": 5e-05, + "loss": 0.0245, + "num_input_tokens_seen": 572398924, + "step": 6271 + }, + { + "epoch": 26.129166666666666, + "loss": 0.026323389261960983, + "loss_ce": 1.9775347936956678e-06, + "loss_iou": 0.216796875, + "loss_num": 0.0052490234375, + "loss_xval": 0.0263671875, + "num_input_tokens_seen": 572398924, + "step": 6271 + }, + { + "epoch": 26.133333333333333, + "grad_norm": 2.0702680246502148, + "learning_rate": 5e-05, + "loss": 0.0358, + "num_input_tokens_seen": 572490196, + "step": 6272 + }, + { + "epoch": 26.133333333333333, + "loss": 0.024699455127120018, + "loss_ce": 3.105150426563341e-06, + "loss_iou": 0.193359375, + "loss_num": 0.00494384765625, + "loss_xval": 0.024658203125, + "num_input_tokens_seen": 572490196, + "step": 6272 + }, + { + "epoch": 26.1375, + "grad_norm": 1.8665974664534275, + "learning_rate": 5e-05, + "loss": 0.021, + "num_input_tokens_seen": 572581772, + "step": 6273 + }, + { + "epoch": 26.1375, + "loss": 0.021007981151342392, + "loss_ce": 2.7147021683049388e-05, + "loss_iou": 0.23046875, + "loss_num": 0.004180908203125, + "loss_xval": 0.02099609375, + "num_input_tokens_seen": 572581772, + "step": 6273 + }, + { + "epoch": 26.141666666666666, + "grad_norm": 2.569508785889838, + "learning_rate": 5e-05, + "loss": 0.0484, + "num_input_tokens_seen": 572673364, + "step": 6274 + }, + { + "epoch": 26.141666666666666, + "loss": 0.03979034721851349, + "loss_ce": 6.409214984159917e-05, + "loss_iou": 0.099609375, + "loss_num": 0.0079345703125, + "loss_xval": 0.039794921875, + "num_input_tokens_seen": 572673364, + "step": 6274 + }, + { + "epoch": 26.145833333333332, + "grad_norm": 3.3867004929729982, + "learning_rate": 5e-05, + "loss": 0.0436, + "num_input_tokens_seen": 572765160, + "step": 6275 + }, + { + "epoch": 26.145833333333332, + "loss": 0.04471275210380554, + "loss_ce": 4.498059752222616e-06, + "loss_iou": 0.271484375, + "loss_num": 0.00897216796875, + "loss_xval": 0.044677734375, + "num_input_tokens_seen": 572765160, + "step": 6275 + }, + { + "epoch": 26.15, + "grad_norm": 3.9647607647830805, + "learning_rate": 5e-05, + "loss": 0.0514, + "num_input_tokens_seen": 572856588, + "step": 6276 + }, + { + "epoch": 26.15, + "loss": 0.07329382747411728, + "loss_ce": 3.6381457903189585e-05, + "loss_iou": 0.31640625, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 572856588, + "step": 6276 + }, + { + "epoch": 26.154166666666665, + "grad_norm": 2.9639328826519242, + "learning_rate": 5e-05, + "loss": 0.0377, + "num_input_tokens_seen": 572947872, + "step": 6277 + }, + { + "epoch": 26.154166666666665, + "loss": 0.03356025740504265, + "loss_ce": 2.1438656403915957e-05, + "loss_iou": 0.1572265625, + "loss_num": 0.0067138671875, + "loss_xval": 0.033447265625, + "num_input_tokens_seen": 572947872, + "step": 6277 + }, + { + "epoch": 26.158333333333335, + "grad_norm": 5.329458968613203, + "learning_rate": 5e-05, + "loss": 0.0425, + "num_input_tokens_seen": 573038684, + "step": 6278 + }, + { + "epoch": 26.158333333333335, + "loss": 0.031451016664505005, + "loss_ce": 2.653630872373469e-06, + "loss_iou": 0.296875, + "loss_num": 0.00628662109375, + "loss_xval": 0.031494140625, + "num_input_tokens_seen": 573038684, + "step": 6278 + }, + { + "epoch": 26.1625, + "grad_norm": 2.430329436830701, + "learning_rate": 5e-05, + "loss": 0.0466, + "num_input_tokens_seen": 573129732, + "step": 6279 + }, + { + "epoch": 26.1625, + "loss": 0.05689224228262901, + "loss_ce": 7.477737199224066e-06, + "loss_iou": 0.216796875, + "loss_num": 0.0113525390625, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 573129732, + "step": 6279 + }, + { + "epoch": 26.166666666666668, + "grad_norm": 7.444355132302191, + "learning_rate": 5e-05, + "loss": 0.0208, + "num_input_tokens_seen": 573221080, + "step": 6280 + }, + { + "epoch": 26.166666666666668, + "loss": 0.021823428571224213, + "loss_ce": 3.359195943630766e-06, + "loss_iou": 0.19921875, + "loss_num": 0.004364013671875, + "loss_xval": 0.0218505859375, + "num_input_tokens_seen": 573221080, + "step": 6280 + }, + { + "epoch": 26.170833333333334, + "grad_norm": 2.063519178543893, + "learning_rate": 5e-05, + "loss": 0.0288, + "num_input_tokens_seen": 573312500, + "step": 6281 + }, + { + "epoch": 26.170833333333334, + "loss": 0.0332389771938324, + "loss_ce": 5.330958629201632e-06, + "loss_iou": 0.23828125, + "loss_num": 0.00665283203125, + "loss_xval": 0.033203125, + "num_input_tokens_seen": 573312500, + "step": 6281 + }, + { + "epoch": 26.175, + "grad_norm": 2.478197828967112, + "learning_rate": 5e-05, + "loss": 0.0251, + "num_input_tokens_seen": 573403948, + "step": 6282 + }, + { + "epoch": 26.175, + "loss": 0.018054649233818054, + "loss_ce": 1.1132175131933764e-05, + "loss_iou": 0.1201171875, + "loss_num": 0.00360107421875, + "loss_xval": 0.01806640625, + "num_input_tokens_seen": 573403948, + "step": 6282 + }, + { + "epoch": 26.179166666666667, + "grad_norm": 2.0409826501703314, + "learning_rate": 5e-05, + "loss": 0.0304, + "num_input_tokens_seen": 573495656, + "step": 6283 + }, + { + "epoch": 26.179166666666667, + "loss": 0.026428505778312683, + "loss_ce": 0.002380652353167534, + "loss_iou": 0.1728515625, + "loss_num": 0.00482177734375, + "loss_xval": 0.0240478515625, + "num_input_tokens_seen": 573495656, + "step": 6283 + }, + { + "epoch": 26.183333333333334, + "grad_norm": 2.12562414575699, + "learning_rate": 5e-05, + "loss": 0.0349, + "num_input_tokens_seen": 573586556, + "step": 6284 + }, + { + "epoch": 26.183333333333334, + "loss": 0.04275288060307503, + "loss_ce": 0.00014271096733864397, + "loss_iou": 0.2255859375, + "loss_num": 0.008544921875, + "loss_xval": 0.042724609375, + "num_input_tokens_seen": 573586556, + "step": 6284 + }, + { + "epoch": 26.1875, + "grad_norm": 2.3256192561189386, + "learning_rate": 5e-05, + "loss": 0.0291, + "num_input_tokens_seen": 573677892, + "step": 6285 + }, + { + "epoch": 26.1875, + "loss": 0.0218992717564106, + "loss_ce": 2.910128387156874e-06, + "loss_iou": 0.287109375, + "loss_num": 0.004364013671875, + "loss_xval": 0.0218505859375, + "num_input_tokens_seen": 573677892, + "step": 6285 + }, + { + "epoch": 26.191666666666666, + "grad_norm": 2.2700532782155487, + "learning_rate": 5e-05, + "loss": 0.0244, + "num_input_tokens_seen": 573769264, + "step": 6286 + }, + { + "epoch": 26.191666666666666, + "loss": 0.0227479487657547, + "loss_ce": 1.2353164493106306e-05, + "loss_iou": 0.2890625, + "loss_num": 0.004547119140625, + "loss_xval": 0.022705078125, + "num_input_tokens_seen": 573769264, + "step": 6286 + }, + { + "epoch": 26.195833333333333, + "grad_norm": 2.497051277580666, + "learning_rate": 5e-05, + "loss": 0.0348, + "num_input_tokens_seen": 573861032, + "step": 6287 + }, + { + "epoch": 26.195833333333333, + "loss": 0.02247200906276703, + "loss_ce": 3.440377668084693e-06, + "loss_iou": 0.3125, + "loss_num": 0.004486083984375, + "loss_xval": 0.0224609375, + "num_input_tokens_seen": 573861032, + "step": 6287 + }, + { + "epoch": 26.2, + "grad_norm": 2.4665341618284793, + "learning_rate": 5e-05, + "loss": 0.0418, + "num_input_tokens_seen": 573952516, + "step": 6288 + }, + { + "epoch": 26.2, + "loss": 0.03499322384595871, + "loss_ce": 4.820210961042903e-06, + "loss_iou": 0.23046875, + "loss_num": 0.006988525390625, + "loss_xval": 0.034912109375, + "num_input_tokens_seen": 573952516, + "step": 6288 + }, + { + "epoch": 26.204166666666666, + "grad_norm": 2.5178799536284404, + "learning_rate": 5e-05, + "loss": 0.0402, + "num_input_tokens_seen": 574043548, + "step": 6289 + }, + { + "epoch": 26.204166666666666, + "loss": 0.025905869901180267, + "loss_ce": 4.075403012393508e-06, + "loss_iou": 0.248046875, + "loss_num": 0.00518798828125, + "loss_xval": 0.02587890625, + "num_input_tokens_seen": 574043548, + "step": 6289 + }, + { + "epoch": 26.208333333333332, + "grad_norm": 2.815248669719499, + "learning_rate": 5e-05, + "loss": 0.055, + "num_input_tokens_seen": 574134964, + "step": 6290 + }, + { + "epoch": 26.208333333333332, + "loss": 0.04833405464887619, + "loss_ce": 0.009218152612447739, + "loss_iou": 0.173828125, + "loss_num": 0.0078125, + "loss_xval": 0.0390625, + "num_input_tokens_seen": 574134964, + "step": 6290 + }, + { + "epoch": 26.2125, + "grad_norm": 1.2196878914329312, + "learning_rate": 5e-05, + "loss": 0.068, + "num_input_tokens_seen": 574226544, + "step": 6291 + }, + { + "epoch": 26.2125, + "loss": 0.035638727247714996, + "loss_ce": 9.454719474888407e-06, + "loss_iou": 0.263671875, + "loss_num": 0.00714111328125, + "loss_xval": 0.03564453125, + "num_input_tokens_seen": 574226544, + "step": 6291 + }, + { + "epoch": 26.216666666666665, + "grad_norm": 5.70504657339675, + "learning_rate": 5e-05, + "loss": 0.0539, + "num_input_tokens_seen": 574317544, + "step": 6292 + }, + { + "epoch": 26.216666666666665, + "loss": 0.02251153253018856, + "loss_ce": 4.818836259801174e-06, + "loss_iou": 0.3046875, + "loss_num": 0.004486083984375, + "loss_xval": 0.0224609375, + "num_input_tokens_seen": 574317544, + "step": 6292 + }, + { + "epoch": 26.220833333333335, + "grad_norm": 16.549963830690942, + "learning_rate": 5e-05, + "loss": 0.0297, + "num_input_tokens_seen": 574408668, + "step": 6293 + }, + { + "epoch": 26.220833333333335, + "loss": 0.03274759650230408, + "loss_ce": 1.749454349919688e-05, + "loss_iou": 0.076171875, + "loss_num": 0.00653076171875, + "loss_xval": 0.03271484375, + "num_input_tokens_seen": 574408668, + "step": 6293 + }, + { + "epoch": 26.225, + "grad_norm": 1.98771914841208, + "learning_rate": 5e-05, + "loss": 0.0313, + "num_input_tokens_seen": 574499588, + "step": 6294 + }, + { + "epoch": 26.225, + "loss": 0.04000755771994591, + "loss_ce": 1.4269931853050366e-05, + "loss_iou": 0.232421875, + "loss_num": 0.00799560546875, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 574499588, + "step": 6294 + }, + { + "epoch": 26.229166666666668, + "grad_norm": 2.8155593274898787, + "learning_rate": 5e-05, + "loss": 0.0246, + "num_input_tokens_seen": 574590840, + "step": 6295 + }, + { + "epoch": 26.229166666666668, + "loss": 0.021502085030078888, + "loss_ce": 2.4502253381797345e-06, + "loss_iou": 0.2412109375, + "loss_num": 0.004302978515625, + "loss_xval": 0.021484375, + "num_input_tokens_seen": 574590840, + "step": 6295 + }, + { + "epoch": 26.233333333333334, + "grad_norm": 3.603509176942159, + "learning_rate": 5e-05, + "loss": 0.0272, + "num_input_tokens_seen": 574681876, + "step": 6296 + }, + { + "epoch": 26.233333333333334, + "loss": 0.0193068515509367, + "loss_ce": 4.483481461647898e-06, + "loss_iou": 0.25390625, + "loss_num": 0.0038604736328125, + "loss_xval": 0.019287109375, + "num_input_tokens_seen": 574681876, + "step": 6296 + }, + { + "epoch": 26.2375, + "grad_norm": 5.566119614228712, + "learning_rate": 5e-05, + "loss": 0.0393, + "num_input_tokens_seen": 574773600, + "step": 6297 + }, + { + "epoch": 26.2375, + "loss": 0.03268130123615265, + "loss_ce": 0.00016482088540215045, + "loss_iou": 0.294921875, + "loss_num": 0.006500244140625, + "loss_xval": 0.032470703125, + "num_input_tokens_seen": 574773600, + "step": 6297 + }, + { + "epoch": 26.241666666666667, + "grad_norm": 3.6229640604552147, + "learning_rate": 5e-05, + "loss": 0.0349, + "num_input_tokens_seen": 574865264, + "step": 6298 + }, + { + "epoch": 26.241666666666667, + "loss": 0.03571357578039169, + "loss_ce": 8.007896212802734e-06, + "loss_iou": 0.040283203125, + "loss_num": 0.00714111328125, + "loss_xval": 0.03564453125, + "num_input_tokens_seen": 574865264, + "step": 6298 + }, + { + "epoch": 26.245833333333334, + "grad_norm": 1.0912757230321628, + "learning_rate": 5e-05, + "loss": 0.0624, + "num_input_tokens_seen": 574956888, + "step": 6299 + }, + { + "epoch": 26.245833333333334, + "loss": 0.06911545991897583, + "loss_ce": 0.0002525450545363128, + "loss_iou": 0.2890625, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 574956888, + "step": 6299 + }, + { + "epoch": 26.25, + "grad_norm": 1.5003465757889274, + "learning_rate": 5e-05, + "loss": 0.0497, + "num_input_tokens_seen": 575048208, + "step": 6300 + }, + { + "epoch": 26.25, + "loss": 0.07881193608045578, + "loss_ce": 2.3175707610789686e-05, + "loss_iou": 0.29296875, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 575048208, + "step": 6300 + }, + { + "epoch": 26.254166666666666, + "grad_norm": 1.4826211105944096, + "learning_rate": 5e-05, + "loss": 0.0266, + "num_input_tokens_seen": 575140140, + "step": 6301 + }, + { + "epoch": 26.254166666666666, + "loss": 0.032630644738674164, + "loss_ce": 0.002044401131570339, + "loss_iou": 0.19140625, + "loss_num": 0.006134033203125, + "loss_xval": 0.0306396484375, + "num_input_tokens_seen": 575140140, + "step": 6301 + }, + { + "epoch": 26.258333333333333, + "grad_norm": 2.0612668053861007, + "learning_rate": 5e-05, + "loss": 0.0435, + "num_input_tokens_seen": 575231776, + "step": 6302 + }, + { + "epoch": 26.258333333333333, + "loss": 0.02026584930717945, + "loss_ce": 1.7435413610655814e-05, + "loss_iou": 0.1318359375, + "loss_num": 0.004058837890625, + "loss_xval": 0.020263671875, + "num_input_tokens_seen": 575231776, + "step": 6302 + }, + { + "epoch": 26.2625, + "grad_norm": 2.461279254233241, + "learning_rate": 5e-05, + "loss": 0.0292, + "num_input_tokens_seen": 575323000, + "step": 6303 + }, + { + "epoch": 26.2625, + "loss": 0.03142453730106354, + "loss_ce": 6.692681381537113e-06, + "loss_iou": 0.1875, + "loss_num": 0.00628662109375, + "loss_xval": 0.031494140625, + "num_input_tokens_seen": 575323000, + "step": 6303 + }, + { + "epoch": 26.266666666666666, + "grad_norm": 4.378948959388897, + "learning_rate": 5e-05, + "loss": 0.0852, + "num_input_tokens_seen": 575414968, + "step": 6304 + }, + { + "epoch": 26.266666666666666, + "loss": 0.14499732851982117, + "loss_ce": 3.8841390050947666e-05, + "loss_iou": 0.125, + "loss_num": 0.029052734375, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 575414968, + "step": 6304 + }, + { + "epoch": 26.270833333333332, + "grad_norm": 2.4258684782840754, + "learning_rate": 5e-05, + "loss": 0.0342, + "num_input_tokens_seen": 575506536, + "step": 6305 + }, + { + "epoch": 26.270833333333332, + "loss": 0.029087748378515244, + "loss_ce": 4.495202119869646e-06, + "loss_iou": 0.20703125, + "loss_num": 0.005828857421875, + "loss_xval": 0.029052734375, + "num_input_tokens_seen": 575506536, + "step": 6305 + }, + { + "epoch": 26.275, + "grad_norm": 3.2960106053065346, + "learning_rate": 5e-05, + "loss": 0.068, + "num_input_tokens_seen": 575598072, + "step": 6306 + }, + { + "epoch": 26.275, + "loss": 0.06410195678472519, + "loss_ce": 3.03038668789668e-05, + "loss_iou": 0.130859375, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 575598072, + "step": 6306 + }, + { + "epoch": 26.279166666666665, + "grad_norm": 4.672763395873947, + "learning_rate": 5e-05, + "loss": 0.032, + "num_input_tokens_seen": 575689892, + "step": 6307 + }, + { + "epoch": 26.279166666666665, + "loss": 0.04166632145643234, + "loss_ce": 0.00018530177476350218, + "loss_iou": 0.193359375, + "loss_num": 0.00830078125, + "loss_xval": 0.04150390625, + "num_input_tokens_seen": 575689892, + "step": 6307 + }, + { + "epoch": 26.283333333333335, + "grad_norm": 1.8578568597790612, + "learning_rate": 5e-05, + "loss": 0.054, + "num_input_tokens_seen": 575781492, + "step": 6308 + }, + { + "epoch": 26.283333333333335, + "loss": 0.05019602179527283, + "loss_ce": 9.861505532171577e-06, + "loss_iou": 0.2158203125, + "loss_num": 0.010009765625, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 575781492, + "step": 6308 + }, + { + "epoch": 26.2875, + "grad_norm": 2.1385111114018813, + "learning_rate": 5e-05, + "loss": 0.0316, + "num_input_tokens_seen": 575872756, + "step": 6309 + }, + { + "epoch": 26.2875, + "loss": 0.0381060428917408, + "loss_ce": 4.844471732212696e-06, + "loss_iou": 0.1796875, + "loss_num": 0.00762939453125, + "loss_xval": 0.0380859375, + "num_input_tokens_seen": 575872756, + "step": 6309 + }, + { + "epoch": 26.291666666666668, + "grad_norm": 1.8730805481238693, + "learning_rate": 5e-05, + "loss": 0.055, + "num_input_tokens_seen": 575964732, + "step": 6310 + }, + { + "epoch": 26.291666666666668, + "loss": 0.06005624681711197, + "loss_ce": 8.920710388338193e-05, + "loss_iou": 0.2734375, + "loss_num": 0.01202392578125, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 575964732, + "step": 6310 + }, + { + "epoch": 26.295833333333334, + "grad_norm": 2.188500937318792, + "learning_rate": 5e-05, + "loss": 0.0325, + "num_input_tokens_seen": 576055952, + "step": 6311 + }, + { + "epoch": 26.295833333333334, + "loss": 0.04608750343322754, + "loss_ce": 5.960510861768853e-06, + "loss_iou": 0.1728515625, + "loss_num": 0.00921630859375, + "loss_xval": 0.046142578125, + "num_input_tokens_seen": 576055952, + "step": 6311 + }, + { + "epoch": 26.3, + "grad_norm": 3.995286971857075, + "learning_rate": 5e-05, + "loss": 0.0524, + "num_input_tokens_seen": 576147220, + "step": 6312 + }, + { + "epoch": 26.3, + "loss": 0.05525440722703934, + "loss_ce": 2.332332314836094e-06, + "loss_iou": 0.08984375, + "loss_num": 0.01104736328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 576147220, + "step": 6312 + }, + { + "epoch": 26.304166666666667, + "grad_norm": 3.0070715232170655, + "learning_rate": 5e-05, + "loss": 0.0354, + "num_input_tokens_seen": 576238912, + "step": 6313 + }, + { + "epoch": 26.304166666666667, + "loss": 0.04728226363658905, + "loss_ce": 1.0534803550399374e-05, + "loss_iou": 0.294921875, + "loss_num": 0.00946044921875, + "loss_xval": 0.04736328125, + "num_input_tokens_seen": 576238912, + "step": 6313 + }, + { + "epoch": 26.308333333333334, + "grad_norm": 3.2493945466049796, + "learning_rate": 5e-05, + "loss": 0.03, + "num_input_tokens_seen": 576329824, + "step": 6314 + }, + { + "epoch": 26.308333333333334, + "loss": 0.021796412765979767, + "loss_ce": 6.862643203930929e-06, + "loss_iou": 0.267578125, + "loss_num": 0.004364013671875, + "loss_xval": 0.021728515625, + "num_input_tokens_seen": 576329824, + "step": 6314 + }, + { + "epoch": 26.3125, + "grad_norm": 2.8164148728586356, + "learning_rate": 5e-05, + "loss": 0.0438, + "num_input_tokens_seen": 576420952, + "step": 6315 + }, + { + "epoch": 26.3125, + "loss": 0.04578210785984993, + "loss_ce": 5.741334007325349e-06, + "loss_iou": 0.31640625, + "loss_num": 0.0091552734375, + "loss_xval": 0.0458984375, + "num_input_tokens_seen": 576420952, + "step": 6315 + }, + { + "epoch": 26.316666666666666, + "grad_norm": 2.950784137828782, + "learning_rate": 5e-05, + "loss": 0.0651, + "num_input_tokens_seen": 576511960, + "step": 6316 + }, + { + "epoch": 26.316666666666666, + "loss": 0.1039830893278122, + "loss_ce": 2.070630216621794e-06, + "loss_iou": 0.30078125, + "loss_num": 0.020751953125, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 576511960, + "step": 6316 + }, + { + "epoch": 26.320833333333333, + "grad_norm": 3.0424274172526546, + "learning_rate": 5e-05, + "loss": 0.0323, + "num_input_tokens_seen": 576603444, + "step": 6317 + }, + { + "epoch": 26.320833333333333, + "loss": 0.025478117167949677, + "loss_ce": 3.568322881619679e-06, + "loss_iou": 0.259765625, + "loss_num": 0.005096435546875, + "loss_xval": 0.0255126953125, + "num_input_tokens_seen": 576603444, + "step": 6317 + }, + { + "epoch": 26.325, + "grad_norm": 2.8406572357861637, + "learning_rate": 5e-05, + "loss": 0.0503, + "num_input_tokens_seen": 576694496, + "step": 6318 + }, + { + "epoch": 26.325, + "loss": 0.05386997014284134, + "loss_ce": 6.443972324632341e-06, + "loss_iou": 0.1591796875, + "loss_num": 0.0107421875, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 576694496, + "step": 6318 + }, + { + "epoch": 26.329166666666666, + "grad_norm": 3.658124062989085, + "learning_rate": 5e-05, + "loss": 0.0561, + "num_input_tokens_seen": 576786176, + "step": 6319 + }, + { + "epoch": 26.329166666666666, + "loss": 0.040701497346162796, + "loss_ce": 6.306990599114215e-06, + "loss_iou": 0.234375, + "loss_num": 0.00811767578125, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 576786176, + "step": 6319 + }, + { + "epoch": 26.333333333333332, + "grad_norm": 4.215985552079258, + "learning_rate": 5e-05, + "loss": 0.0513, + "num_input_tokens_seen": 576877596, + "step": 6320 + }, + { + "epoch": 26.333333333333332, + "loss": 0.06873767077922821, + "loss_ce": 4.454630470718257e-06, + "loss_iou": 0.1923828125, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 576877596, + "step": 6320 + }, + { + "epoch": 26.3375, + "grad_norm": 2.5018603380379165, + "learning_rate": 5e-05, + "loss": 0.0459, + "num_input_tokens_seen": 576968136, + "step": 6321 + }, + { + "epoch": 26.3375, + "loss": 0.05684886872768402, + "loss_ce": 2.248891405542963e-06, + "loss_iou": 0.2041015625, + "loss_num": 0.01141357421875, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 576968136, + "step": 6321 + }, + { + "epoch": 26.341666666666665, + "grad_norm": 2.5013332338694054, + "learning_rate": 5e-05, + "loss": 0.023, + "num_input_tokens_seen": 577058988, + "step": 6322 + }, + { + "epoch": 26.341666666666665, + "loss": 0.025415629148483276, + "loss_ce": 2.114283233822789e-06, + "loss_iou": 0.31640625, + "loss_num": 0.00506591796875, + "loss_xval": 0.025390625, + "num_input_tokens_seen": 577058988, + "step": 6322 + }, + { + "epoch": 26.345833333333335, + "grad_norm": 3.110519419619886, + "learning_rate": 5e-05, + "loss": 0.0454, + "num_input_tokens_seen": 577149672, + "step": 6323 + }, + { + "epoch": 26.345833333333335, + "loss": 0.06356197595596313, + "loss_ce": 1.491544935561251e-06, + "loss_iou": 0.1904296875, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 577149672, + "step": 6323 + }, + { + "epoch": 26.35, + "grad_norm": 3.010348438294041, + "learning_rate": 5e-05, + "loss": 0.036, + "num_input_tokens_seen": 577241132, + "step": 6324 + }, + { + "epoch": 26.35, + "loss": 0.04298759251832962, + "loss_ce": 3.583397756301565e-06, + "loss_iou": 0.232421875, + "loss_num": 0.00860595703125, + "loss_xval": 0.04296875, + "num_input_tokens_seen": 577241132, + "step": 6324 + }, + { + "epoch": 26.354166666666668, + "grad_norm": 2.0317761923647613, + "learning_rate": 5e-05, + "loss": 0.0428, + "num_input_tokens_seen": 577333176, + "step": 6325 + }, + { + "epoch": 26.354166666666668, + "loss": 0.038431257009506226, + "loss_ce": 9.62726062425645e-06, + "loss_iou": 0.19921875, + "loss_num": 0.0076904296875, + "loss_xval": 0.038330078125, + "num_input_tokens_seen": 577333176, + "step": 6325 + }, + { + "epoch": 26.358333333333334, + "grad_norm": 2.2036163389130863, + "learning_rate": 5e-05, + "loss": 0.0407, + "num_input_tokens_seen": 577424236, + "step": 6326 + }, + { + "epoch": 26.358333333333334, + "loss": 0.06302201747894287, + "loss_ce": 3.2206221476371866e-06, + "loss_iou": 0.361328125, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 577424236, + "step": 6326 + }, + { + "epoch": 26.3625, + "grad_norm": 5.343683713846114, + "learning_rate": 5e-05, + "loss": 0.0415, + "num_input_tokens_seen": 577514744, + "step": 6327 + }, + { + "epoch": 26.3625, + "loss": 0.024122852832078934, + "loss_ce": 6.335745638352819e-06, + "loss_iou": 0.224609375, + "loss_num": 0.00482177734375, + "loss_xval": 0.024169921875, + "num_input_tokens_seen": 577514744, + "step": 6327 + }, + { + "epoch": 26.366666666666667, + "grad_norm": 2.091313578216484, + "learning_rate": 5e-05, + "loss": 0.0274, + "num_input_tokens_seen": 577606116, + "step": 6328 + }, + { + "epoch": 26.366666666666667, + "loss": 0.025190196931362152, + "loss_ce": 1.3193935956223868e-05, + "loss_iou": 0.29296875, + "loss_num": 0.005035400390625, + "loss_xval": 0.025146484375, + "num_input_tokens_seen": 577606116, + "step": 6328 + }, + { + "epoch": 26.370833333333334, + "grad_norm": 2.7810069202307286, + "learning_rate": 5e-05, + "loss": 0.0403, + "num_input_tokens_seen": 577697536, + "step": 6329 + }, + { + "epoch": 26.370833333333334, + "loss": 0.03468858078122139, + "loss_ce": 1.2982125554117374e-05, + "loss_iou": 0.27734375, + "loss_num": 0.006927490234375, + "loss_xval": 0.03466796875, + "num_input_tokens_seen": 577697536, + "step": 6329 + }, + { + "epoch": 26.375, + "grad_norm": 3.9993272580255472, + "learning_rate": 5e-05, + "loss": 0.0879, + "num_input_tokens_seen": 577788520, + "step": 6330 + }, + { + "epoch": 26.375, + "loss": 0.135927215218544, + "loss_ce": 1.926666300278157e-06, + "loss_iou": 0.259765625, + "loss_num": 0.0272216796875, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 577788520, + "step": 6330 + }, + { + "epoch": 26.379166666666666, + "grad_norm": 2.601935095295643, + "learning_rate": 5e-05, + "loss": 0.0333, + "num_input_tokens_seen": 577880416, + "step": 6331 + }, + { + "epoch": 26.379166666666666, + "loss": 0.02961255982518196, + "loss_ce": 1.8137467122869566e-05, + "loss_iou": 0.1650390625, + "loss_num": 0.00592041015625, + "loss_xval": 0.029541015625, + "num_input_tokens_seen": 577880416, + "step": 6331 + }, + { + "epoch": 26.383333333333333, + "grad_norm": 1.3863780156038121, + "learning_rate": 5e-05, + "loss": 0.0208, + "num_input_tokens_seen": 577972412, + "step": 6332 + }, + { + "epoch": 26.383333333333333, + "loss": 0.015614290721714497, + "loss_ce": 8.847277786117047e-05, + "loss_iou": 0.162109375, + "loss_num": 0.00311279296875, + "loss_xval": 0.0155029296875, + "num_input_tokens_seen": 577972412, + "step": 6332 + }, + { + "epoch": 26.3875, + "grad_norm": 1.5277325908460835, + "learning_rate": 5e-05, + "loss": 0.0414, + "num_input_tokens_seen": 578064024, + "step": 6333 + }, + { + "epoch": 26.3875, + "loss": 0.05949154123663902, + "loss_ce": 1.2783336387656163e-05, + "loss_iou": 0.326171875, + "loss_num": 0.01190185546875, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 578064024, + "step": 6333 + }, + { + "epoch": 26.391666666666666, + "grad_norm": 1.580821654790845, + "learning_rate": 5e-05, + "loss": 0.0314, + "num_input_tokens_seen": 578155216, + "step": 6334 + }, + { + "epoch": 26.391666666666666, + "loss": 0.026848390698432922, + "loss_ce": 8.180058102880139e-06, + "loss_iou": 0.1494140625, + "loss_num": 0.00537109375, + "loss_xval": 0.02685546875, + "num_input_tokens_seen": 578155216, + "step": 6334 + }, + { + "epoch": 26.395833333333332, + "grad_norm": 2.8275278403687216, + "learning_rate": 5e-05, + "loss": 0.0223, + "num_input_tokens_seen": 578246792, + "step": 6335 + }, + { + "epoch": 26.395833333333332, + "loss": 0.018020860850811005, + "loss_ce": 6.126628431957215e-05, + "loss_iou": 0.19140625, + "loss_num": 0.0035858154296875, + "loss_xval": 0.0179443359375, + "num_input_tokens_seen": 578246792, + "step": 6335 + }, + { + "epoch": 26.4, + "grad_norm": 2.8463810027193905, + "learning_rate": 5e-05, + "loss": 0.0732, + "num_input_tokens_seen": 578338724, + "step": 6336 + }, + { + "epoch": 26.4, + "loss": 0.07648120820522308, + "loss_ce": 4.160474873060593e-06, + "loss_iou": 0.255859375, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 578338724, + "step": 6336 + }, + { + "epoch": 26.404166666666665, + "grad_norm": 3.459044094398339, + "learning_rate": 5e-05, + "loss": 0.0341, + "num_input_tokens_seen": 578429780, + "step": 6337 + }, + { + "epoch": 26.404166666666665, + "loss": 0.046463415026664734, + "loss_ce": 8.034942766244058e-06, + "loss_iou": 0.1728515625, + "loss_num": 0.00927734375, + "loss_xval": 0.04638671875, + "num_input_tokens_seen": 578429780, + "step": 6337 + }, + { + "epoch": 26.408333333333335, + "grad_norm": 2.8430893757458002, + "learning_rate": 5e-05, + "loss": 0.0502, + "num_input_tokens_seen": 578520908, + "step": 6338 + }, + { + "epoch": 26.408333333333335, + "loss": 0.06003076583147049, + "loss_ce": 2.689038410608191e-06, + "loss_iou": 0.27734375, + "loss_num": 0.011962890625, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 578520908, + "step": 6338 + }, + { + "epoch": 26.4125, + "grad_norm": 2.2231752423437774, + "learning_rate": 5e-05, + "loss": 0.0303, + "num_input_tokens_seen": 578611796, + "step": 6339 + }, + { + "epoch": 26.4125, + "loss": 0.04048309847712517, + "loss_ce": 1.5290997907868586e-06, + "loss_iou": 0.279296875, + "loss_num": 0.00811767578125, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 578611796, + "step": 6339 + }, + { + "epoch": 26.416666666666668, + "grad_norm": 2.7015743485243275, + "learning_rate": 5e-05, + "loss": 0.03, + "num_input_tokens_seen": 578703224, + "step": 6340 + }, + { + "epoch": 26.416666666666668, + "loss": 0.024205388501286507, + "loss_ce": 4.949035428580828e-06, + "loss_iou": 0.37109375, + "loss_num": 0.004852294921875, + "loss_xval": 0.024169921875, + "num_input_tokens_seen": 578703224, + "step": 6340 + }, + { + "epoch": 26.420833333333334, + "grad_norm": 3.4984107307336005, + "learning_rate": 5e-05, + "loss": 0.0337, + "num_input_tokens_seen": 578794864, + "step": 6341 + }, + { + "epoch": 26.420833333333334, + "loss": 0.03814494609832764, + "loss_ce": 2.849268821591977e-05, + "loss_iou": 0.1953125, + "loss_num": 0.00762939453125, + "loss_xval": 0.0380859375, + "num_input_tokens_seen": 578794864, + "step": 6341 + }, + { + "epoch": 26.425, + "grad_norm": 2.6017879252865743, + "learning_rate": 5e-05, + "loss": 0.0577, + "num_input_tokens_seen": 578886592, + "step": 6342 + }, + { + "epoch": 26.425, + "loss": 0.04673183336853981, + "loss_ce": 0.00112331158015877, + "loss_iou": 0.2314453125, + "loss_num": 0.0091552734375, + "loss_xval": 0.045654296875, + "num_input_tokens_seen": 578886592, + "step": 6342 + }, + { + "epoch": 26.429166666666667, + "grad_norm": 2.2730921916039786, + "learning_rate": 5e-05, + "loss": 0.0265, + "num_input_tokens_seen": 578977840, + "step": 6343 + }, + { + "epoch": 26.429166666666667, + "loss": 0.025332368910312653, + "loss_ce": 1.8036354958894663e-05, + "loss_iou": 0.2333984375, + "loss_num": 0.00506591796875, + "loss_xval": 0.0252685546875, + "num_input_tokens_seen": 578977840, + "step": 6343 + }, + { + "epoch": 26.433333333333334, + "grad_norm": 2.074076894002716, + "learning_rate": 5e-05, + "loss": 0.0252, + "num_input_tokens_seen": 579069376, + "step": 6344 + }, + { + "epoch": 26.433333333333334, + "loss": 0.02509579062461853, + "loss_ce": 0.0005596562987193465, + "loss_iou": 0.201171875, + "loss_num": 0.004913330078125, + "loss_xval": 0.0245361328125, + "num_input_tokens_seen": 579069376, + "step": 6344 + }, + { + "epoch": 26.4375, + "grad_norm": 2.98508876389629, + "learning_rate": 5e-05, + "loss": 0.0258, + "num_input_tokens_seen": 579160768, + "step": 6345 + }, + { + "epoch": 26.4375, + "loss": 0.026991160586476326, + "loss_ce": 7.465577073162422e-05, + "loss_iou": 0.271484375, + "loss_num": 0.005401611328125, + "loss_xval": 0.02685546875, + "num_input_tokens_seen": 579160768, + "step": 6345 + }, + { + "epoch": 26.441666666666666, + "grad_norm": 2.852654715793294, + "learning_rate": 5e-05, + "loss": 0.0542, + "num_input_tokens_seen": 579251572, + "step": 6346 + }, + { + "epoch": 26.441666666666666, + "loss": 0.07384663075208664, + "loss_ce": 1.7213085357070668e-06, + "loss_iou": 0.2470703125, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 579251572, + "step": 6346 + }, + { + "epoch": 26.445833333333333, + "grad_norm": 4.067748408031735, + "learning_rate": 5e-05, + "loss": 0.0319, + "num_input_tokens_seen": 579342196, + "step": 6347 + }, + { + "epoch": 26.445833333333333, + "loss": 0.04042934253811836, + "loss_ce": 8.811045518086758e-06, + "loss_iou": 0.30078125, + "loss_num": 0.008056640625, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 579342196, + "step": 6347 + }, + { + "epoch": 26.45, + "grad_norm": 2.060944078779385, + "learning_rate": 5e-05, + "loss": 0.0286, + "num_input_tokens_seen": 579433508, + "step": 6348 + }, + { + "epoch": 26.45, + "loss": 0.026566024869680405, + "loss_ce": 8.101601451926399e-06, + "loss_iou": 0.208984375, + "loss_num": 0.00531005859375, + "loss_xval": 0.026611328125, + "num_input_tokens_seen": 579433508, + "step": 6348 + }, + { + "epoch": 26.454166666666666, + "grad_norm": 1.4420374872645958, + "learning_rate": 5e-05, + "loss": 0.0509, + "num_input_tokens_seen": 579524268, + "step": 6349 + }, + { + "epoch": 26.454166666666666, + "loss": 0.05209437757730484, + "loss_ce": 8.689075912116095e-07, + "loss_iou": 0.236328125, + "loss_num": 0.01043701171875, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 579524268, + "step": 6349 + }, + { + "epoch": 26.458333333333332, + "grad_norm": 1.372894539742087, + "learning_rate": 5e-05, + "loss": 0.0289, + "num_input_tokens_seen": 579615144, + "step": 6350 + }, + { + "epoch": 26.458333333333332, + "loss": 0.03426942229270935, + "loss_ce": 1.3438528185361065e-05, + "loss_iou": 0.1318359375, + "loss_num": 0.006866455078125, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 579615144, + "step": 6350 + }, + { + "epoch": 26.4625, + "grad_norm": 2.619487082696982, + "learning_rate": 5e-05, + "loss": 0.0361, + "num_input_tokens_seen": 579706576, + "step": 6351 + }, + { + "epoch": 26.4625, + "loss": 0.02325865998864174, + "loss_ce": 5.004097693017684e-05, + "loss_iou": 0.296875, + "loss_num": 0.004638671875, + "loss_xval": 0.023193359375, + "num_input_tokens_seen": 579706576, + "step": 6351 + }, + { + "epoch": 26.466666666666665, + "grad_norm": 2.9126440330534806, + "learning_rate": 5e-05, + "loss": 0.0487, + "num_input_tokens_seen": 579798132, + "step": 6352 + }, + { + "epoch": 26.466666666666665, + "loss": 0.030383776873350143, + "loss_ce": 3.5252160159870982e-06, + "loss_iou": 0.1201171875, + "loss_num": 0.006072998046875, + "loss_xval": 0.0303955078125, + "num_input_tokens_seen": 579798132, + "step": 6352 + }, + { + "epoch": 26.470833333333335, + "grad_norm": 2.2802737333967307, + "learning_rate": 5e-05, + "loss": 0.058, + "num_input_tokens_seen": 579889192, + "step": 6353 + }, + { + "epoch": 26.470833333333335, + "loss": 0.05479633808135986, + "loss_ce": 2.024693003477296e-06, + "loss_iou": 0.232421875, + "loss_num": 0.010986328125, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 579889192, + "step": 6353 + }, + { + "epoch": 26.475, + "grad_norm": 3.3087152312604875, + "learning_rate": 5e-05, + "loss": 0.0261, + "num_input_tokens_seen": 579981088, + "step": 6354 + }, + { + "epoch": 26.475, + "loss": 0.024331307038664818, + "loss_ce": 6.220270734047517e-05, + "loss_iou": 0.181640625, + "loss_num": 0.004852294921875, + "loss_xval": 0.0242919921875, + "num_input_tokens_seen": 579981088, + "step": 6354 + }, + { + "epoch": 26.479166666666668, + "grad_norm": 2.636841214464431, + "learning_rate": 5e-05, + "loss": 0.0712, + "num_input_tokens_seen": 580071936, + "step": 6355 + }, + { + "epoch": 26.479166666666668, + "loss": 0.11911989003419876, + "loss_ce": 2.156376694983919e-06, + "loss_iou": 0.302734375, + "loss_num": 0.0238037109375, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 580071936, + "step": 6355 + }, + { + "epoch": 26.483333333333334, + "grad_norm": 2.4018988890415365, + "learning_rate": 5e-05, + "loss": 0.0536, + "num_input_tokens_seen": 580162684, + "step": 6356 + }, + { + "epoch": 26.483333333333334, + "loss": 0.05590973049402237, + "loss_ce": 1.5258759731295868e-06, + "loss_iou": 0.197265625, + "loss_num": 0.01116943359375, + "loss_xval": 0.055908203125, + "num_input_tokens_seen": 580162684, + "step": 6356 + }, + { + "epoch": 26.4875, + "grad_norm": 2.111625423943705, + "learning_rate": 5e-05, + "loss": 0.0229, + "num_input_tokens_seen": 580254320, + "step": 6357 + }, + { + "epoch": 26.4875, + "loss": 0.01646145060658455, + "loss_ce": 1.2477152267820202e-05, + "loss_iou": 0.1513671875, + "loss_num": 0.0032958984375, + "loss_xval": 0.0164794921875, + "num_input_tokens_seen": 580254320, + "step": 6357 + }, + { + "epoch": 26.491666666666667, + "grad_norm": 1.7982741445816284, + "learning_rate": 5e-05, + "loss": 0.0271, + "num_input_tokens_seen": 580345336, + "step": 6358 + }, + { + "epoch": 26.491666666666667, + "loss": 0.028727242723107338, + "loss_ce": 2.5719239147292683e-06, + "loss_iou": 0.19140625, + "loss_num": 0.0057373046875, + "loss_xval": 0.0286865234375, + "num_input_tokens_seen": 580345336, + "step": 6358 + }, + { + "epoch": 26.495833333333334, + "grad_norm": 2.4180720667866877, + "learning_rate": 5e-05, + "loss": 0.0495, + "num_input_tokens_seen": 580436756, + "step": 6359 + }, + { + "epoch": 26.495833333333334, + "loss": 0.07168925553560257, + "loss_ce": 3.4634629173524445e-06, + "loss_iou": 0.2890625, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 580436756, + "step": 6359 + }, + { + "epoch": 26.5, + "grad_norm": 4.37380711018086, + "learning_rate": 5e-05, + "loss": 0.047, + "num_input_tokens_seen": 580527860, + "step": 6360 + }, + { + "epoch": 26.5, + "loss": 0.05018797516822815, + "loss_ce": 1.8191740309703164e-06, + "loss_iou": 0.201171875, + "loss_num": 0.010009765625, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 580527860, + "step": 6360 + }, + { + "epoch": 26.504166666666666, + "grad_norm": 3.3143864690576725, + "learning_rate": 5e-05, + "loss": 0.046, + "num_input_tokens_seen": 580619316, + "step": 6361 + }, + { + "epoch": 26.504166666666666, + "loss": 0.06650644540786743, + "loss_ce": 7.730662036919966e-05, + "loss_iou": 0.2734375, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 580619316, + "step": 6361 + }, + { + "epoch": 26.508333333333333, + "grad_norm": 3.126583552244286, + "learning_rate": 5e-05, + "loss": 0.0646, + "num_input_tokens_seen": 580710312, + "step": 6362 + }, + { + "epoch": 26.508333333333333, + "loss": 0.027590272948145866, + "loss_ce": 2.3825505195418373e-06, + "loss_iou": 0.1640625, + "loss_num": 0.005523681640625, + "loss_xval": 0.027587890625, + "num_input_tokens_seen": 580710312, + "step": 6362 + }, + { + "epoch": 26.5125, + "grad_norm": 1.5455366033668991, + "learning_rate": 5e-05, + "loss": 0.078, + "num_input_tokens_seen": 580801956, + "step": 6363 + }, + { + "epoch": 26.5125, + "loss": 0.023525547236204147, + "loss_ce": 3.464097244432196e-05, + "loss_iou": 0.263671875, + "loss_num": 0.00469970703125, + "loss_xval": 0.0234375, + "num_input_tokens_seen": 580801956, + "step": 6363 + }, + { + "epoch": 26.516666666666666, + "grad_norm": 2.2025538317255884, + "learning_rate": 5e-05, + "loss": 0.0511, + "num_input_tokens_seen": 580893244, + "step": 6364 + }, + { + "epoch": 26.516666666666666, + "loss": 0.04680035263299942, + "loss_ce": 1.6449587292299839e-06, + "loss_iou": 0.287109375, + "loss_num": 0.00933837890625, + "loss_xval": 0.046875, + "num_input_tokens_seen": 580893244, + "step": 6364 + }, + { + "epoch": 26.520833333333332, + "grad_norm": 1.977698351598379, + "learning_rate": 5e-05, + "loss": 0.0262, + "num_input_tokens_seen": 580984300, + "step": 6365 + }, + { + "epoch": 26.520833333333332, + "loss": 0.033968620002269745, + "loss_ce": 2.556229446781799e-06, + "loss_iou": 0.29296875, + "loss_num": 0.006805419921875, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 580984300, + "step": 6365 + }, + { + "epoch": 26.525, + "grad_norm": 2.769782273195563, + "learning_rate": 5e-05, + "loss": 0.0251, + "num_input_tokens_seen": 581074768, + "step": 6366 + }, + { + "epoch": 26.525, + "loss": 0.02291957288980484, + "loss_ce": 8.502931450493634e-06, + "loss_iou": 0.1572265625, + "loss_num": 0.00457763671875, + "loss_xval": 0.02294921875, + "num_input_tokens_seen": 581074768, + "step": 6366 + }, + { + "epoch": 26.529166666666665, + "grad_norm": 2.812135989180566, + "learning_rate": 5e-05, + "loss": 0.0593, + "num_input_tokens_seen": 581166188, + "step": 6367 + }, + { + "epoch": 26.529166666666665, + "loss": 0.07515604794025421, + "loss_ce": 6.513767857541097e-06, + "loss_iou": 0.208984375, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 581166188, + "step": 6367 + }, + { + "epoch": 26.533333333333335, + "grad_norm": 3.291469743063748, + "learning_rate": 5e-05, + "loss": 0.0312, + "num_input_tokens_seen": 581257644, + "step": 6368 + }, + { + "epoch": 26.533333333333335, + "loss": 0.04181433096528053, + "loss_ce": 5.247154149401467e-06, + "loss_iou": 0.3046875, + "loss_num": 0.00836181640625, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 581257644, + "step": 6368 + }, + { + "epoch": 26.5375, + "grad_norm": 2.6630209253197212, + "learning_rate": 5e-05, + "loss": 0.0356, + "num_input_tokens_seen": 581348840, + "step": 6369 + }, + { + "epoch": 26.5375, + "loss": 0.043280940502882004, + "loss_ce": 7.01455519447336e-06, + "loss_iou": 0.3359375, + "loss_num": 0.0086669921875, + "loss_xval": 0.043212890625, + "num_input_tokens_seen": 581348840, + "step": 6369 + }, + { + "epoch": 26.541666666666668, + "grad_norm": 2.165904526625043, + "learning_rate": 5e-05, + "loss": 0.0259, + "num_input_tokens_seen": 581440356, + "step": 6370 + }, + { + "epoch": 26.541666666666668, + "loss": 0.030488599091768265, + "loss_ce": 1.679923116171267e-05, + "loss_iou": 0.193359375, + "loss_num": 0.006103515625, + "loss_xval": 0.030517578125, + "num_input_tokens_seen": 581440356, + "step": 6370 + }, + { + "epoch": 26.545833333333334, + "grad_norm": 2.1519854563459013, + "learning_rate": 5e-05, + "loss": 0.0314, + "num_input_tokens_seen": 581531112, + "step": 6371 + }, + { + "epoch": 26.545833333333334, + "loss": 0.04378194361925125, + "loss_ce": 4.480289135244675e-06, + "loss_iou": 0.2314453125, + "loss_num": 0.00872802734375, + "loss_xval": 0.043701171875, + "num_input_tokens_seen": 581531112, + "step": 6371 + }, + { + "epoch": 26.55, + "grad_norm": 2.032262603192787, + "learning_rate": 5e-05, + "loss": 0.0631, + "num_input_tokens_seen": 581622908, + "step": 6372 + }, + { + "epoch": 26.55, + "loss": 0.07665525376796722, + "loss_ce": 5.612863606074825e-05, + "loss_iou": 0.115234375, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 581622908, + "step": 6372 + }, + { + "epoch": 26.554166666666667, + "grad_norm": 2.916450194533043, + "learning_rate": 5e-05, + "loss": 0.0258, + "num_input_tokens_seen": 581715240, + "step": 6373 + }, + { + "epoch": 26.554166666666667, + "loss": 0.02922218292951584, + "loss_ce": 1.6016360859794077e-06, + "loss_iou": 0.26953125, + "loss_num": 0.005859375, + "loss_xval": 0.0291748046875, + "num_input_tokens_seen": 581715240, + "step": 6373 + }, + { + "epoch": 26.558333333333334, + "grad_norm": 2.7777091959970948, + "learning_rate": 5e-05, + "loss": 0.0617, + "num_input_tokens_seen": 581806232, + "step": 6374 + }, + { + "epoch": 26.558333333333334, + "loss": 0.08162251114845276, + "loss_ce": 3.246418145863572e-06, + "loss_iou": 0.33203125, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 581806232, + "step": 6374 + }, + { + "epoch": 26.5625, + "grad_norm": 2.759082928506338, + "learning_rate": 5e-05, + "loss": 0.0317, + "num_input_tokens_seen": 581897896, + "step": 6375 + }, + { + "epoch": 26.5625, + "loss": 0.022695370018482208, + "loss_ce": 5.550998139369767e-06, + "loss_iou": 0.2421875, + "loss_num": 0.004547119140625, + "loss_xval": 0.022705078125, + "num_input_tokens_seen": 581897896, + "step": 6375 + }, + { + "epoch": 26.566666666666666, + "grad_norm": 2.8094302276322103, + "learning_rate": 5e-05, + "loss": 0.033, + "num_input_tokens_seen": 581989280, + "step": 6376 + }, + { + "epoch": 26.566666666666666, + "loss": 0.04536880552768707, + "loss_ce": 4.423782229423523e-06, + "loss_iou": 0.310546875, + "loss_num": 0.00909423828125, + "loss_xval": 0.04541015625, + "num_input_tokens_seen": 581989280, + "step": 6376 + }, + { + "epoch": 26.570833333333333, + "grad_norm": 2.3794042855729716, + "learning_rate": 5e-05, + "loss": 0.0648, + "num_input_tokens_seen": 582079520, + "step": 6377 + }, + { + "epoch": 26.570833333333333, + "loss": 0.06763634085655212, + "loss_ce": 1.760738086886704e-06, + "loss_iou": 0.1904296875, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 582079520, + "step": 6377 + }, + { + "epoch": 26.575, + "grad_norm": 2.490674979187668, + "learning_rate": 5e-05, + "loss": 0.054, + "num_input_tokens_seen": 582170564, + "step": 6378 + }, + { + "epoch": 26.575, + "loss": 0.025903530418872833, + "loss_ce": 9.36424476094544e-06, + "loss_iou": 0.2294921875, + "loss_num": 0.00518798828125, + "loss_xval": 0.02587890625, + "num_input_tokens_seen": 582170564, + "step": 6378 + }, + { + "epoch": 26.579166666666666, + "grad_norm": 2.6831178349896794, + "learning_rate": 5e-05, + "loss": 0.0582, + "num_input_tokens_seen": 582261796, + "step": 6379 + }, + { + "epoch": 26.579166666666666, + "loss": 0.04030474275350571, + "loss_ce": 6.28036877969862e-06, + "loss_iou": 0.3203125, + "loss_num": 0.008056640625, + "loss_xval": 0.040283203125, + "num_input_tokens_seen": 582261796, + "step": 6379 + }, + { + "epoch": 26.583333333333332, + "grad_norm": 2.9254635506782654, + "learning_rate": 5e-05, + "loss": 0.0395, + "num_input_tokens_seen": 582352716, + "step": 6380 + }, + { + "epoch": 26.583333333333332, + "loss": 0.030702892690896988, + "loss_ce": 2.2086460376158357e-06, + "loss_iou": 0.2119140625, + "loss_num": 0.006134033203125, + "loss_xval": 0.03076171875, + "num_input_tokens_seen": 582352716, + "step": 6380 + }, + { + "epoch": 26.5875, + "grad_norm": 3.318806426993543, + "learning_rate": 5e-05, + "loss": 0.0284, + "num_input_tokens_seen": 582444340, + "step": 6381 + }, + { + "epoch": 26.5875, + "loss": 0.027070969343185425, + "loss_ce": 1.7136593669420108e-05, + "loss_iou": 0.3359375, + "loss_num": 0.005401611328125, + "loss_xval": 0.027099609375, + "num_input_tokens_seen": 582444340, + "step": 6381 + }, + { + "epoch": 26.591666666666665, + "grad_norm": 1.533350298247467, + "learning_rate": 5e-05, + "loss": 0.0294, + "num_input_tokens_seen": 582535436, + "step": 6382 + }, + { + "epoch": 26.591666666666665, + "loss": 0.04427720606327057, + "loss_ce": 3.434727477724664e-05, + "loss_iou": 0.1923828125, + "loss_num": 0.00885009765625, + "loss_xval": 0.044189453125, + "num_input_tokens_seen": 582535436, + "step": 6382 + }, + { + "epoch": 26.595833333333335, + "grad_norm": 1.2006372851725466, + "learning_rate": 5e-05, + "loss": 0.0526, + "num_input_tokens_seen": 582626420, + "step": 6383 + }, + { + "epoch": 26.595833333333335, + "loss": 0.06297853589057922, + "loss_ce": 0.0001886187819764018, + "loss_iou": 0.259765625, + "loss_num": 0.01251220703125, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 582626420, + "step": 6383 + }, + { + "epoch": 26.6, + "grad_norm": 3.043951801189853, + "learning_rate": 5e-05, + "loss": 0.0563, + "num_input_tokens_seen": 582717916, + "step": 6384 + }, + { + "epoch": 26.6, + "loss": 0.03581683710217476, + "loss_ce": 4.460815489437664e-06, + "loss_iou": 0.306640625, + "loss_num": 0.007171630859375, + "loss_xval": 0.035888671875, + "num_input_tokens_seen": 582717916, + "step": 6384 + }, + { + "epoch": 26.604166666666668, + "grad_norm": 3.1192619091196265, + "learning_rate": 5e-05, + "loss": 0.0818, + "num_input_tokens_seen": 582807888, + "step": 6385 + }, + { + "epoch": 26.604166666666668, + "loss": 0.03427360951900482, + "loss_ce": 1.762903411872685e-05, + "loss_iou": 0.1787109375, + "loss_num": 0.0068359375, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 582807888, + "step": 6385 + }, + { + "epoch": 26.608333333333334, + "grad_norm": 1.3891490390759393, + "learning_rate": 5e-05, + "loss": 0.0233, + "num_input_tokens_seen": 582899608, + "step": 6386 + }, + { + "epoch": 26.608333333333334, + "loss": 0.027261588722467422, + "loss_ce": 1.7636632492212811e-06, + "loss_iou": 0.2373046875, + "loss_num": 0.00543212890625, + "loss_xval": 0.0272216796875, + "num_input_tokens_seen": 582899608, + "step": 6386 + }, + { + "epoch": 26.6125, + "grad_norm": 1.9439048481514238, + "learning_rate": 5e-05, + "loss": 0.0467, + "num_input_tokens_seen": 582991388, + "step": 6387 + }, + { + "epoch": 26.6125, + "loss": 0.0331236831843853, + "loss_ce": 4.482254098547855e-06, + "loss_iou": 0.2890625, + "loss_num": 0.006622314453125, + "loss_xval": 0.033203125, + "num_input_tokens_seen": 582991388, + "step": 6387 + }, + { + "epoch": 26.616666666666667, + "grad_norm": 2.601169162882571, + "learning_rate": 5e-05, + "loss": 0.0338, + "num_input_tokens_seen": 583082652, + "step": 6388 + }, + { + "epoch": 26.616666666666667, + "loss": 0.03648631274700165, + "loss_ce": 2.5509375518595334e-06, + "loss_iou": 0.265625, + "loss_num": 0.007293701171875, + "loss_xval": 0.036376953125, + "num_input_tokens_seen": 583082652, + "step": 6388 + }, + { + "epoch": 26.620833333333334, + "grad_norm": 3.4201171748095054, + "learning_rate": 5e-05, + "loss": 0.0724, + "num_input_tokens_seen": 583173792, + "step": 6389 + }, + { + "epoch": 26.620833333333334, + "loss": 0.04953838512301445, + "loss_ce": 8.357676961168181e-06, + "loss_iou": 0.2314453125, + "loss_num": 0.0098876953125, + "loss_xval": 0.049560546875, + "num_input_tokens_seen": 583173792, + "step": 6389 + }, + { + "epoch": 26.625, + "grad_norm": 3.2177538717532537, + "learning_rate": 5e-05, + "loss": 0.0447, + "num_input_tokens_seen": 583265528, + "step": 6390 + }, + { + "epoch": 26.625, + "loss": 0.02730938419699669, + "loss_ce": 1.1408922546252143e-05, + "loss_iou": 0.24609375, + "loss_num": 0.005462646484375, + "loss_xval": 0.02734375, + "num_input_tokens_seen": 583265528, + "step": 6390 + }, + { + "epoch": 26.629166666666666, + "grad_norm": 2.6080596153005744, + "learning_rate": 5e-05, + "loss": 0.0702, + "num_input_tokens_seen": 583356632, + "step": 6391 + }, + { + "epoch": 26.629166666666666, + "loss": 0.1096821278333664, + "loss_ce": 1.9534661532816244e-06, + "loss_iou": 0.2197265625, + "loss_num": 0.0218505859375, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 583356632, + "step": 6391 + }, + { + "epoch": 26.633333333333333, + "grad_norm": 3.3712503495952557, + "learning_rate": 5e-05, + "loss": 0.0459, + "num_input_tokens_seen": 583447960, + "step": 6392 + }, + { + "epoch": 26.633333333333333, + "loss": 0.05872820317745209, + "loss_ce": 0.003735528327524662, + "loss_iou": 0.259765625, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 583447960, + "step": 6392 + }, + { + "epoch": 26.6375, + "grad_norm": 2.7712620943981388, + "learning_rate": 5e-05, + "loss": 0.0453, + "num_input_tokens_seen": 583539624, + "step": 6393 + }, + { + "epoch": 26.6375, + "loss": 0.035634495317935944, + "loss_ce": 5.2247432904550806e-06, + "loss_iou": 0.330078125, + "loss_num": 0.00714111328125, + "loss_xval": 0.03564453125, + "num_input_tokens_seen": 583539624, + "step": 6393 + }, + { + "epoch": 26.641666666666666, + "grad_norm": 3.1583934392875848, + "learning_rate": 5e-05, + "loss": 0.042, + "num_input_tokens_seen": 583630692, + "step": 6394 + }, + { + "epoch": 26.641666666666666, + "loss": 0.03953661769628525, + "loss_ce": 4.909299605060369e-06, + "loss_iou": 0.28125, + "loss_num": 0.0079345703125, + "loss_xval": 0.03955078125, + "num_input_tokens_seen": 583630692, + "step": 6394 + }, + { + "epoch": 26.645833333333332, + "grad_norm": 2.3011474265121303, + "learning_rate": 5e-05, + "loss": 0.0445, + "num_input_tokens_seen": 583722228, + "step": 6395 + }, + { + "epoch": 26.645833333333332, + "loss": 0.05926249548792839, + "loss_ce": 1.2617707398021594e-05, + "loss_iou": 0.28515625, + "loss_num": 0.0118408203125, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 583722228, + "step": 6395 + }, + { + "epoch": 26.65, + "grad_norm": 2.3120902553161304, + "learning_rate": 5e-05, + "loss": 0.0393, + "num_input_tokens_seen": 583813768, + "step": 6396 + }, + { + "epoch": 26.65, + "loss": 0.0495050773024559, + "loss_ce": 5.567444532061927e-06, + "loss_iou": 0.150390625, + "loss_num": 0.0098876953125, + "loss_xval": 0.049560546875, + "num_input_tokens_seen": 583813768, + "step": 6396 + }, + { + "epoch": 26.654166666666665, + "grad_norm": 4.047422590953184, + "learning_rate": 5e-05, + "loss": 0.0473, + "num_input_tokens_seen": 583904500, + "step": 6397 + }, + { + "epoch": 26.654166666666665, + "loss": 0.06963618844747543, + "loss_ce": 2.5595696570235305e-05, + "loss_iou": 0.30859375, + "loss_num": 0.013916015625, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 583904500, + "step": 6397 + }, + { + "epoch": 26.658333333333335, + "grad_norm": 2.0502554889475375, + "learning_rate": 5e-05, + "loss": 0.0421, + "num_input_tokens_seen": 583995700, + "step": 6398 + }, + { + "epoch": 26.658333333333335, + "loss": 0.03861871361732483, + "loss_ce": 1.3975381079944782e-05, + "loss_iou": 0.314453125, + "loss_num": 0.007720947265625, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 583995700, + "step": 6398 + }, + { + "epoch": 26.6625, + "grad_norm": 2.8899846505666806, + "learning_rate": 5e-05, + "loss": 0.0405, + "num_input_tokens_seen": 584086472, + "step": 6399 + }, + { + "epoch": 26.6625, + "loss": 0.044085074216127396, + "loss_ce": 2.4305013539560605e-06, + "loss_iou": 0.302734375, + "loss_num": 0.00885009765625, + "loss_xval": 0.044189453125, + "num_input_tokens_seen": 584086472, + "step": 6399 + }, + { + "epoch": 26.666666666666668, + "grad_norm": 5.051323947719037, + "learning_rate": 5e-05, + "loss": 0.0471, + "num_input_tokens_seen": 584177612, + "step": 6400 + }, + { + "epoch": 26.666666666666668, + "loss": 0.06541875004768372, + "loss_ce": 4.321193500800291e-06, + "loss_iou": 0.318359375, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 584177612, + "step": 6400 + }, + { + "epoch": 26.670833333333334, + "grad_norm": 1.8595043235990891, + "learning_rate": 5e-05, + "loss": 0.0405, + "num_input_tokens_seen": 584268900, + "step": 6401 + }, + { + "epoch": 26.670833333333334, + "loss": 0.05805381387472153, + "loss_ce": 1.7489541050963453e-06, + "loss_iou": 0.1845703125, + "loss_num": 0.0115966796875, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 584268900, + "step": 6401 + }, + { + "epoch": 26.675, + "grad_norm": 2.1412284194971734, + "learning_rate": 5e-05, + "loss": 0.0208, + "num_input_tokens_seen": 584360708, + "step": 6402 + }, + { + "epoch": 26.675, + "loss": 0.020786207169294357, + "loss_ce": 3.735653081093915e-06, + "loss_iou": 0.15234375, + "loss_num": 0.004150390625, + "loss_xval": 0.020751953125, + "num_input_tokens_seen": 584360708, + "step": 6402 + }, + { + "epoch": 26.679166666666667, + "grad_norm": 6.353105232087393, + "learning_rate": 5e-05, + "loss": 0.0475, + "num_input_tokens_seen": 584451928, + "step": 6403 + }, + { + "epoch": 26.679166666666667, + "loss": 0.04798254743218422, + "loss_ce": 2.417149516986683e-05, + "loss_iou": 0.1484375, + "loss_num": 0.00958251953125, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 584451928, + "step": 6403 + }, + { + "epoch": 26.683333333333334, + "grad_norm": 2.3612110352957045, + "learning_rate": 5e-05, + "loss": 0.048, + "num_input_tokens_seen": 584543328, + "step": 6404 + }, + { + "epoch": 26.683333333333334, + "loss": 0.03970205783843994, + "loss_ce": 2.9204951715655625e-05, + "loss_iou": 0.2890625, + "loss_num": 0.0079345703125, + "loss_xval": 0.03955078125, + "num_input_tokens_seen": 584543328, + "step": 6404 + }, + { + "epoch": 26.6875, + "grad_norm": 2.4044976294078038, + "learning_rate": 5e-05, + "loss": 0.0696, + "num_input_tokens_seen": 584634444, + "step": 6405 + }, + { + "epoch": 26.6875, + "loss": 0.052463434636592865, + "loss_ce": 3.719230335264001e-06, + "loss_iou": 0.28515625, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 584634444, + "step": 6405 + }, + { + "epoch": 26.691666666666666, + "grad_norm": 1.9056035326436946, + "learning_rate": 5e-05, + "loss": 0.0307, + "num_input_tokens_seen": 584726220, + "step": 6406 + }, + { + "epoch": 26.691666666666666, + "loss": 0.029926294460892677, + "loss_ce": 0.0003395025269128382, + "loss_iou": 0.17578125, + "loss_num": 0.00592041015625, + "loss_xval": 0.029541015625, + "num_input_tokens_seen": 584726220, + "step": 6406 + }, + { + "epoch": 26.695833333333333, + "grad_norm": 3.4191147212881625, + "learning_rate": 5e-05, + "loss": 0.0478, + "num_input_tokens_seen": 584817984, + "step": 6407 + }, + { + "epoch": 26.695833333333333, + "loss": 0.02571401745080948, + "loss_ce": 2.957248625534703e-06, + "loss_iou": 0.2421875, + "loss_num": 0.005126953125, + "loss_xval": 0.0257568359375, + "num_input_tokens_seen": 584817984, + "step": 6407 + }, + { + "epoch": 26.7, + "grad_norm": 3.549124781623612, + "learning_rate": 5e-05, + "loss": 0.024, + "num_input_tokens_seen": 584909328, + "step": 6408 + }, + { + "epoch": 26.7, + "loss": 0.022335490211844444, + "loss_ce": 4.252930921211373e-06, + "loss_iou": 0.24609375, + "loss_num": 0.00445556640625, + "loss_xval": 0.0223388671875, + "num_input_tokens_seen": 584909328, + "step": 6408 + }, + { + "epoch": 26.704166666666666, + "grad_norm": 8.95269863880642, + "learning_rate": 5e-05, + "loss": 0.0697, + "num_input_tokens_seen": 585000660, + "step": 6409 + }, + { + "epoch": 26.704166666666666, + "loss": 0.026591291651129723, + "loss_ce": 0.0002546220493968576, + "loss_iou": 0.216796875, + "loss_num": 0.005279541015625, + "loss_xval": 0.0263671875, + "num_input_tokens_seen": 585000660, + "step": 6409 + }, + { + "epoch": 26.708333333333332, + "grad_norm": 1.7099945501510865, + "learning_rate": 5e-05, + "loss": 0.0671, + "num_input_tokens_seen": 585092012, + "step": 6410 + }, + { + "epoch": 26.708333333333332, + "loss": 0.03264483064413071, + "loss_ce": 1.3908905202697497e-05, + "loss_iou": 0.298828125, + "loss_num": 0.00653076171875, + "loss_xval": 0.03271484375, + "num_input_tokens_seen": 585092012, + "step": 6410 + }, + { + "epoch": 26.7125, + "grad_norm": 2.8318015894004454, + "learning_rate": 5e-05, + "loss": 0.0616, + "num_input_tokens_seen": 585183520, + "step": 6411 + }, + { + "epoch": 26.7125, + "loss": 0.08012676239013672, + "loss_ce": 0.0002622563042677939, + "loss_iou": 0.1875, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 585183520, + "step": 6411 + }, + { + "epoch": 26.716666666666665, + "grad_norm": 0.9956697448339735, + "learning_rate": 5e-05, + "loss": 0.0446, + "num_input_tokens_seen": 585274760, + "step": 6412 + }, + { + "epoch": 26.716666666666665, + "loss": 0.028292525559663773, + "loss_ce": 2.729700099735055e-06, + "loss_iou": 0.2275390625, + "loss_num": 0.005645751953125, + "loss_xval": 0.0283203125, + "num_input_tokens_seen": 585274760, + "step": 6412 + }, + { + "epoch": 26.720833333333335, + "grad_norm": 0.9206675696449096, + "learning_rate": 5e-05, + "loss": 0.0319, + "num_input_tokens_seen": 585365972, + "step": 6413 + }, + { + "epoch": 26.720833333333335, + "loss": 0.04103298857808113, + "loss_ce": 1.736307967803441e-05, + "loss_iou": 0.1884765625, + "loss_num": 0.0081787109375, + "loss_xval": 0.041015625, + "num_input_tokens_seen": 585365972, + "step": 6413 + }, + { + "epoch": 26.725, + "grad_norm": 1.5571004714894323, + "learning_rate": 5e-05, + "loss": 0.0624, + "num_input_tokens_seen": 585457120, + "step": 6414 + }, + { + "epoch": 26.725, + "loss": 0.05378871411085129, + "loss_ce": 1.4819468105997657e-06, + "loss_iou": 0.265625, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 585457120, + "step": 6414 + }, + { + "epoch": 26.729166666666668, + "grad_norm": 16.903678598398205, + "learning_rate": 5e-05, + "loss": 0.0506, + "num_input_tokens_seen": 585548840, + "step": 6415 + }, + { + "epoch": 26.729166666666668, + "loss": 0.06305142492055893, + "loss_ce": 2.106048668792937e-06, + "loss_iou": 0.1162109375, + "loss_num": 0.01263427734375, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 585548840, + "step": 6415 + }, + { + "epoch": 26.733333333333334, + "grad_norm": 3.6405479201013327, + "learning_rate": 5e-05, + "loss": 0.0627, + "num_input_tokens_seen": 585640304, + "step": 6416 + }, + { + "epoch": 26.733333333333334, + "loss": 0.10354401171207428, + "loss_ce": 5.503875399881508e-06, + "loss_iou": 0.353515625, + "loss_num": 0.020751953125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 585640304, + "step": 6416 + }, + { + "epoch": 26.7375, + "grad_norm": 5.075702941057741, + "learning_rate": 5e-05, + "loss": 0.0402, + "num_input_tokens_seen": 585731188, + "step": 6417 + }, + { + "epoch": 26.7375, + "loss": 0.05039920285344124, + "loss_ce": 0.0008539170958101749, + "loss_iou": 0.2080078125, + "loss_num": 0.0098876953125, + "loss_xval": 0.049560546875, + "num_input_tokens_seen": 585731188, + "step": 6417 + }, + { + "epoch": 26.741666666666667, + "grad_norm": 2.945129074205288, + "learning_rate": 5e-05, + "loss": 0.0661, + "num_input_tokens_seen": 585822472, + "step": 6418 + }, + { + "epoch": 26.741666666666667, + "loss": 0.04696325957775116, + "loss_ce": 3.485221532173455e-05, + "loss_iou": 0.337890625, + "loss_num": 0.0093994140625, + "loss_xval": 0.046875, + "num_input_tokens_seen": 585822472, + "step": 6418 + }, + { + "epoch": 26.745833333333334, + "grad_norm": 2.7674485302683824, + "learning_rate": 5e-05, + "loss": 0.0568, + "num_input_tokens_seen": 585913776, + "step": 6419 + }, + { + "epoch": 26.745833333333334, + "loss": 0.05188199505209923, + "loss_ce": 2.112512902385788e-06, + "loss_iou": 0.28125, + "loss_num": 0.0103759765625, + "loss_xval": 0.0517578125, + "num_input_tokens_seen": 585913776, + "step": 6419 + }, + { + "epoch": 26.75, + "grad_norm": 2.306503631062339, + "learning_rate": 5e-05, + "loss": 0.0455, + "num_input_tokens_seen": 586004924, + "step": 6420 + }, + { + "epoch": 26.75, + "loss": 0.031033311039209366, + "loss_ce": 0.002835067454725504, + "loss_iou": 0.173828125, + "loss_num": 0.005645751953125, + "loss_xval": 0.0281982421875, + "num_input_tokens_seen": 586004924, + "step": 6420 + }, + { + "epoch": 26.754166666666666, + "grad_norm": 2.7235878325854195, + "learning_rate": 5e-05, + "loss": 0.0315, + "num_input_tokens_seen": 586094432, + "step": 6421 + }, + { + "epoch": 26.754166666666666, + "loss": 0.025530759245157242, + "loss_ce": 2.805929398164153e-06, + "loss_iou": 0.265625, + "loss_num": 0.005096435546875, + "loss_xval": 0.0255126953125, + "num_input_tokens_seen": 586094432, + "step": 6421 + }, + { + "epoch": 26.758333333333333, + "grad_norm": 4.879312451738563, + "learning_rate": 5e-05, + "loss": 0.0679, + "num_input_tokens_seen": 586185332, + "step": 6422 + }, + { + "epoch": 26.758333333333333, + "loss": 0.0576663538813591, + "loss_ce": 1.1019059456884861e-05, + "loss_iou": 0.2041015625, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 586185332, + "step": 6422 + }, + { + "epoch": 26.7625, + "grad_norm": 2.0444074012690945, + "learning_rate": 5e-05, + "loss": 0.0556, + "num_input_tokens_seen": 586276412, + "step": 6423 + }, + { + "epoch": 26.7625, + "loss": 0.049933046102523804, + "loss_ce": 6.287096312007634e-06, + "loss_iou": 0.06103515625, + "loss_num": 0.010009765625, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 586276412, + "step": 6423 + }, + { + "epoch": 26.766666666666666, + "grad_norm": 1.433993243634629, + "learning_rate": 5e-05, + "loss": 0.0314, + "num_input_tokens_seen": 586367844, + "step": 6424 + }, + { + "epoch": 26.766666666666666, + "loss": 0.039614349603652954, + "loss_ce": 1.7794123778003268e-05, + "loss_iou": 0.2353515625, + "loss_num": 0.0079345703125, + "loss_xval": 0.03955078125, + "num_input_tokens_seen": 586367844, + "step": 6424 + }, + { + "epoch": 26.770833333333332, + "grad_norm": 2.1161370072800074, + "learning_rate": 5e-05, + "loss": 0.0551, + "num_input_tokens_seen": 586459180, + "step": 6425 + }, + { + "epoch": 26.770833333333332, + "loss": 0.08504265546798706, + "loss_ce": 0.0005547403707168996, + "loss_iou": 0.1630859375, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 586459180, + "step": 6425 + }, + { + "epoch": 26.775, + "grad_norm": 1.494492169237464, + "learning_rate": 5e-05, + "loss": 0.052, + "num_input_tokens_seen": 586550300, + "step": 6426 + }, + { + "epoch": 26.775, + "loss": 0.04698227345943451, + "loss_ce": 5.3866875532548875e-05, + "loss_iou": 0.162109375, + "loss_num": 0.0093994140625, + "loss_xval": 0.046875, + "num_input_tokens_seen": 586550300, + "step": 6426 + }, + { + "epoch": 26.779166666666665, + "grad_norm": 1.446415825474081, + "learning_rate": 5e-05, + "loss": 0.0567, + "num_input_tokens_seen": 586641508, + "step": 6427 + }, + { + "epoch": 26.779166666666665, + "loss": 0.0393327996134758, + "loss_ce": 0.0003465966146904975, + "loss_iou": 0.28125, + "loss_num": 0.0078125, + "loss_xval": 0.0390625, + "num_input_tokens_seen": 586641508, + "step": 6427 + }, + { + "epoch": 26.783333333333335, + "grad_norm": 2.0137081030703836, + "learning_rate": 5e-05, + "loss": 0.0283, + "num_input_tokens_seen": 586733016, + "step": 6428 + }, + { + "epoch": 26.783333333333335, + "loss": 0.029314683750271797, + "loss_ce": 0.0001169908355223015, + "loss_iou": 0.1904296875, + "loss_num": 0.005828857421875, + "loss_xval": 0.0291748046875, + "num_input_tokens_seen": 586733016, + "step": 6428 + }, + { + "epoch": 26.7875, + "grad_norm": 4.094413968281199, + "learning_rate": 5e-05, + "loss": 0.0259, + "num_input_tokens_seen": 586824192, + "step": 6429 + }, + { + "epoch": 26.7875, + "loss": 0.029474452137947083, + "loss_ce": 9.729870726005174e-06, + "loss_iou": 0.09619140625, + "loss_num": 0.005889892578125, + "loss_xval": 0.0294189453125, + "num_input_tokens_seen": 586824192, + "step": 6429 + }, + { + "epoch": 26.791666666666668, + "grad_norm": 15.593829077174021, + "learning_rate": 5e-05, + "loss": 0.0458, + "num_input_tokens_seen": 586915608, + "step": 6430 + }, + { + "epoch": 26.791666666666668, + "loss": 0.04587894305586815, + "loss_ce": 3.3958101539610652e-06, + "loss_iou": 0.306640625, + "loss_num": 0.00921630859375, + "loss_xval": 0.0458984375, + "num_input_tokens_seen": 586915608, + "step": 6430 + }, + { + "epoch": 26.795833333333334, + "grad_norm": 2.352842928826233, + "learning_rate": 5e-05, + "loss": 0.0381, + "num_input_tokens_seen": 587006468, + "step": 6431 + }, + { + "epoch": 26.795833333333334, + "loss": 0.0529550164937973, + "loss_ce": 0.0015023784944787621, + "loss_iou": 0.1923828125, + "loss_num": 0.01031494140625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 587006468, + "step": 6431 + }, + { + "epoch": 26.8, + "grad_norm": 3.895723652699884, + "learning_rate": 5e-05, + "loss": 0.0265, + "num_input_tokens_seen": 587098508, + "step": 6432 + }, + { + "epoch": 26.8, + "loss": 0.028584472835063934, + "loss_ce": 3.527989247231744e-05, + "loss_iou": 0.283203125, + "loss_num": 0.005706787109375, + "loss_xval": 0.028564453125, + "num_input_tokens_seen": 587098508, + "step": 6432 + }, + { + "epoch": 26.804166666666667, + "grad_norm": 3.2476906357415154, + "learning_rate": 5e-05, + "loss": 0.0523, + "num_input_tokens_seen": 587188992, + "step": 6433 + }, + { + "epoch": 26.804166666666667, + "loss": 0.026140259578824043, + "loss_ce": 1.953450009750668e-06, + "loss_iou": 0.2236328125, + "loss_num": 0.005218505859375, + "loss_xval": 0.026123046875, + "num_input_tokens_seen": 587188992, + "step": 6433 + }, + { + "epoch": 26.808333333333334, + "grad_norm": 4.413858954256823, + "learning_rate": 5e-05, + "loss": 0.0468, + "num_input_tokens_seen": 587280204, + "step": 6434 + }, + { + "epoch": 26.808333333333334, + "loss": 0.0530477836728096, + "loss_ce": 0.0002828882134053856, + "loss_iou": 0.212890625, + "loss_num": 0.01055908203125, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 587280204, + "step": 6434 + }, + { + "epoch": 26.8125, + "grad_norm": 12.91505568211801, + "learning_rate": 5e-05, + "loss": 0.0588, + "num_input_tokens_seen": 587371684, + "step": 6435 + }, + { + "epoch": 26.8125, + "loss": 0.0907832682132721, + "loss_ce": 0.0005962011055089533, + "loss_iou": 0.2373046875, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 587371684, + "step": 6435 + }, + { + "epoch": 26.816666666666666, + "grad_norm": 3.991737329495078, + "learning_rate": 5e-05, + "loss": 0.0333, + "num_input_tokens_seen": 587463276, + "step": 6436 + }, + { + "epoch": 26.816666666666666, + "loss": 0.03305169939994812, + "loss_ce": 1.2606242307811044e-05, + "loss_iou": 0.2275390625, + "loss_num": 0.006622314453125, + "loss_xval": 0.032958984375, + "num_input_tokens_seen": 587463276, + "step": 6436 + }, + { + "epoch": 26.820833333333333, + "grad_norm": 1.8788612397637245, + "learning_rate": 5e-05, + "loss": 0.032, + "num_input_tokens_seen": 587554432, + "step": 6437 + }, + { + "epoch": 26.820833333333333, + "loss": 0.02601810172200203, + "loss_ce": 1.8665964489628095e-06, + "loss_iou": 0.27734375, + "loss_num": 0.005218505859375, + "loss_xval": 0.0260009765625, + "num_input_tokens_seen": 587554432, + "step": 6437 + }, + { + "epoch": 26.825, + "grad_norm": 1.6852604433469536, + "learning_rate": 5e-05, + "loss": 0.0589, + "num_input_tokens_seen": 587646084, + "step": 6438 + }, + { + "epoch": 26.825, + "loss": 0.031329937279224396, + "loss_ce": 1.8900527720688842e-05, + "loss_iou": 0.224609375, + "loss_num": 0.006256103515625, + "loss_xval": 0.03125, + "num_input_tokens_seen": 587646084, + "step": 6438 + }, + { + "epoch": 26.829166666666666, + "grad_norm": 2.610942859817052, + "learning_rate": 5e-05, + "loss": 0.0418, + "num_input_tokens_seen": 587737264, + "step": 6439 + }, + { + "epoch": 26.829166666666666, + "loss": 0.05527624860405922, + "loss_ce": 0.0020078180823475122, + "loss_iou": 0.326171875, + "loss_num": 0.01068115234375, + "loss_xval": 0.05322265625, + "num_input_tokens_seen": 587737264, + "step": 6439 + }, + { + "epoch": 26.833333333333332, + "grad_norm": 3.0473913921892257, + "learning_rate": 5e-05, + "loss": 0.0711, + "num_input_tokens_seen": 587828408, + "step": 6440 + }, + { + "epoch": 26.833333333333332, + "loss": 0.022648457437753677, + "loss_ce": 0.0008932405617088079, + "loss_iou": 0.162109375, + "loss_num": 0.004364013671875, + "loss_xval": 0.021728515625, + "num_input_tokens_seen": 587828408, + "step": 6440 + }, + { + "epoch": 26.8375, + "grad_norm": 3.7468871750695363, + "learning_rate": 5e-05, + "loss": 0.0428, + "num_input_tokens_seen": 587920180, + "step": 6441 + }, + { + "epoch": 26.8375, + "loss": 0.03927876800298691, + "loss_ce": 2.6419752430228982e-06, + "loss_iou": 0.310546875, + "loss_num": 0.00787353515625, + "loss_xval": 0.039306640625, + "num_input_tokens_seen": 587920180, + "step": 6441 + }, + { + "epoch": 26.841666666666665, + "grad_norm": 2.8556493561688745, + "learning_rate": 5e-05, + "loss": 0.0394, + "num_input_tokens_seen": 588011052, + "step": 6442 + }, + { + "epoch": 26.841666666666665, + "loss": 0.04216247797012329, + "loss_ce": 2.4448554540867917e-06, + "loss_iou": 0.310546875, + "loss_num": 0.0084228515625, + "loss_xval": 0.042236328125, + "num_input_tokens_seen": 588011052, + "step": 6442 + }, + { + "epoch": 26.845833333333335, + "grad_norm": 64.27268256971784, + "learning_rate": 5e-05, + "loss": 0.0762, + "num_input_tokens_seen": 588102664, + "step": 6443 + }, + { + "epoch": 26.845833333333335, + "loss": 0.08693346381187439, + "loss_ce": 1.1767648175009526e-05, + "loss_iou": 0.216796875, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 588102664, + "step": 6443 + }, + { + "epoch": 26.85, + "grad_norm": 9.652023728117497, + "learning_rate": 5e-05, + "loss": 0.0536, + "num_input_tokens_seen": 588192224, + "step": 6444 + }, + { + "epoch": 26.85, + "loss": 0.05360259860754013, + "loss_ce": 2.8993539672228508e-05, + "loss_iou": 0.15234375, + "loss_num": 0.0107421875, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 588192224, + "step": 6444 + }, + { + "epoch": 26.854166666666668, + "grad_norm": 2.628580701797132, + "learning_rate": 5e-05, + "loss": 0.0244, + "num_input_tokens_seen": 588283672, + "step": 6445 + }, + { + "epoch": 26.854166666666668, + "loss": 0.02869114838540554, + "loss_ce": 1.9882903870893642e-05, + "loss_iou": 0.2138671875, + "loss_num": 0.0057373046875, + "loss_xval": 0.0286865234375, + "num_input_tokens_seen": 588283672, + "step": 6445 + }, + { + "epoch": 26.858333333333334, + "grad_norm": 3.154952388101508, + "learning_rate": 5e-05, + "loss": 0.0306, + "num_input_tokens_seen": 588374980, + "step": 6446 + }, + { + "epoch": 26.858333333333334, + "loss": 0.028727829456329346, + "loss_ce": 3.1601703085470945e-06, + "loss_iou": 0.333984375, + "loss_num": 0.0057373046875, + "loss_xval": 0.0286865234375, + "num_input_tokens_seen": 588374980, + "step": 6446 + }, + { + "epoch": 26.8625, + "grad_norm": 2.959396044273708, + "learning_rate": 5e-05, + "loss": 0.0507, + "num_input_tokens_seen": 588466732, + "step": 6447 + }, + { + "epoch": 26.8625, + "loss": 0.031750332564115524, + "loss_ce": 0.000164640587172471, + "loss_iou": 0.2890625, + "loss_num": 0.006317138671875, + "loss_xval": 0.031494140625, + "num_input_tokens_seen": 588466732, + "step": 6447 + }, + { + "epoch": 26.866666666666667, + "grad_norm": 2.4942420944744925, + "learning_rate": 5e-05, + "loss": 0.0415, + "num_input_tokens_seen": 588558308, + "step": 6448 + }, + { + "epoch": 26.866666666666667, + "loss": 0.04990419000387192, + "loss_ce": 7.953952263051178e-06, + "loss_iou": 0.162109375, + "loss_num": 0.010009765625, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 588558308, + "step": 6448 + }, + { + "epoch": 26.870833333333334, + "grad_norm": 2.8619748945556425, + "learning_rate": 5e-05, + "loss": 0.0472, + "num_input_tokens_seen": 588649520, + "step": 6449 + }, + { + "epoch": 26.870833333333334, + "loss": 0.02948564663529396, + "loss_ce": 5.666960532835219e-06, + "loss_iou": 0.2890625, + "loss_num": 0.005889892578125, + "loss_xval": 0.029541015625, + "num_input_tokens_seen": 588649520, + "step": 6449 + }, + { + "epoch": 26.875, + "grad_norm": 3.3762100271150777, + "learning_rate": 5e-05, + "loss": 0.0299, + "num_input_tokens_seen": 588740472, + "step": 6450 + }, + { + "epoch": 26.875, + "loss": 0.029242604970932007, + "loss_ce": 6.764735189790372e-06, + "loss_iou": 0.197265625, + "loss_num": 0.005859375, + "loss_xval": 0.029296875, + "num_input_tokens_seen": 588740472, + "step": 6450 + }, + { + "epoch": 26.879166666666666, + "grad_norm": 2.7603635479763042, + "learning_rate": 5e-05, + "loss": 0.047, + "num_input_tokens_seen": 588832100, + "step": 6451 + }, + { + "epoch": 26.879166666666666, + "loss": 0.043041642755270004, + "loss_ce": 4.237728353473358e-05, + "loss_iou": 0.2451171875, + "loss_num": 0.00860595703125, + "loss_xval": 0.04296875, + "num_input_tokens_seen": 588832100, + "step": 6451 + }, + { + "epoch": 26.883333333333333, + "grad_norm": 3.299961341163053, + "learning_rate": 5e-05, + "loss": 0.0366, + "num_input_tokens_seen": 588923460, + "step": 6452 + }, + { + "epoch": 26.883333333333333, + "loss": 0.04982820153236389, + "loss_ce": 8.258573870989494e-06, + "loss_iou": 0.2333984375, + "loss_num": 0.00994873046875, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 588923460, + "step": 6452 + }, + { + "epoch": 26.8875, + "grad_norm": 2.48388702600195, + "learning_rate": 5e-05, + "loss": 0.0346, + "num_input_tokens_seen": 589014828, + "step": 6453 + }, + { + "epoch": 26.8875, + "loss": 0.04472944512963295, + "loss_ce": 5.9347225942474324e-06, + "loss_iou": 0.2138671875, + "loss_num": 0.0089111328125, + "loss_xval": 0.044677734375, + "num_input_tokens_seen": 589014828, + "step": 6453 + }, + { + "epoch": 26.891666666666666, + "grad_norm": 5.689731283328901, + "learning_rate": 5e-05, + "loss": 0.0333, + "num_input_tokens_seen": 589105608, + "step": 6454 + }, + { + "epoch": 26.891666666666666, + "loss": 0.031533852219581604, + "loss_ce": 1.5657816447856021e-06, + "loss_iou": 0.2734375, + "loss_num": 0.00628662109375, + "loss_xval": 0.031494140625, + "num_input_tokens_seen": 589105608, + "step": 6454 + }, + { + "epoch": 26.895833333333332, + "grad_norm": 3.700058361609715, + "learning_rate": 5e-05, + "loss": 0.0405, + "num_input_tokens_seen": 589197044, + "step": 6455 + }, + { + "epoch": 26.895833333333332, + "loss": 0.04795718565583229, + "loss_ce": 7.510394789278507e-05, + "loss_iou": 0.205078125, + "loss_num": 0.00958251953125, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 589197044, + "step": 6455 + }, + { + "epoch": 26.9, + "grad_norm": 2.330781239457574, + "learning_rate": 5e-05, + "loss": 0.0418, + "num_input_tokens_seen": 589287924, + "step": 6456 + }, + { + "epoch": 26.9, + "loss": 0.05563540384173393, + "loss_ce": 1.86222086995258e-06, + "loss_iou": 0.1923828125, + "loss_num": 0.0111083984375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 589287924, + "step": 6456 + }, + { + "epoch": 26.904166666666665, + "grad_norm": 2.8092294796667807, + "learning_rate": 5e-05, + "loss": 0.0297, + "num_input_tokens_seen": 589379332, + "step": 6457 + }, + { + "epoch": 26.904166666666665, + "loss": 0.034930381923913956, + "loss_ce": 3.013393325090874e-06, + "loss_iou": 0.283203125, + "loss_num": 0.006988525390625, + "loss_xval": 0.034912109375, + "num_input_tokens_seen": 589379332, + "step": 6457 + }, + { + "epoch": 26.908333333333335, + "grad_norm": 3.501297570900534, + "learning_rate": 5e-05, + "loss": 0.0401, + "num_input_tokens_seen": 589470464, + "step": 6458 + }, + { + "epoch": 26.908333333333335, + "loss": 0.041064560413360596, + "loss_ce": 1.842044002842158e-05, + "loss_iou": 0.265625, + "loss_num": 0.0081787109375, + "loss_xval": 0.041015625, + "num_input_tokens_seen": 589470464, + "step": 6458 + }, + { + "epoch": 26.9125, + "grad_norm": 2.3132474254536217, + "learning_rate": 5e-05, + "loss": 0.0422, + "num_input_tokens_seen": 589561848, + "step": 6459 + }, + { + "epoch": 26.9125, + "loss": 0.05403226241469383, + "loss_ce": 0.0004166927537880838, + "loss_iou": 0.0908203125, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 589561848, + "step": 6459 + }, + { + "epoch": 26.916666666666668, + "grad_norm": 1.251692634650389, + "learning_rate": 5e-05, + "loss": 0.0309, + "num_input_tokens_seen": 589653132, + "step": 6460 + }, + { + "epoch": 26.916666666666668, + "loss": 0.02022678591310978, + "loss_ce": 0.00016910732665564865, + "loss_iou": 0.2109375, + "loss_num": 0.003997802734375, + "loss_xval": 0.02001953125, + "num_input_tokens_seen": 589653132, + "step": 6460 + }, + { + "epoch": 26.920833333333334, + "grad_norm": 1.7199158661604974, + "learning_rate": 5e-05, + "loss": 0.0303, + "num_input_tokens_seen": 589744456, + "step": 6461 + }, + { + "epoch": 26.920833333333334, + "loss": 0.03480079025030136, + "loss_ce": 3.1222889447235502e-06, + "loss_iou": 0.1982421875, + "loss_num": 0.0069580078125, + "loss_xval": 0.034912109375, + "num_input_tokens_seen": 589744456, + "step": 6461 + }, + { + "epoch": 26.925, + "grad_norm": 7.6365636958705485, + "learning_rate": 5e-05, + "loss": 0.0534, + "num_input_tokens_seen": 589835476, + "step": 6462 + }, + { + "epoch": 26.925, + "loss": 0.05388407036662102, + "loss_ce": 5.286566192808095e-06, + "loss_iou": 0.24609375, + "loss_num": 0.0107421875, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 589835476, + "step": 6462 + }, + { + "epoch": 26.929166666666667, + "grad_norm": 3.8661025523979697, + "learning_rate": 5e-05, + "loss": 0.0469, + "num_input_tokens_seen": 589927360, + "step": 6463 + }, + { + "epoch": 26.929166666666667, + "loss": 0.028550995513796806, + "loss_ce": 3.994800863438286e-05, + "loss_iou": 0.294921875, + "loss_num": 0.005706787109375, + "loss_xval": 0.028564453125, + "num_input_tokens_seen": 589927360, + "step": 6463 + }, + { + "epoch": 26.933333333333334, + "grad_norm": 2.595665132144297, + "learning_rate": 5e-05, + "loss": 0.0258, + "num_input_tokens_seen": 590018304, + "step": 6464 + }, + { + "epoch": 26.933333333333334, + "loss": 0.029079755768179893, + "loss_ce": 4.132675712753553e-06, + "loss_iou": 0.25, + "loss_num": 0.005828857421875, + "loss_xval": 0.029052734375, + "num_input_tokens_seen": 590018304, + "step": 6464 + }, + { + "epoch": 26.9375, + "grad_norm": 4.17170778740505, + "learning_rate": 5e-05, + "loss": 0.0507, + "num_input_tokens_seen": 590109448, + "step": 6465 + }, + { + "epoch": 26.9375, + "loss": 0.065787173807621, + "loss_ce": 6.5329086282872595e-06, + "loss_iou": 0.275390625, + "loss_num": 0.01312255859375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 590109448, + "step": 6465 + }, + { + "epoch": 26.941666666666666, + "grad_norm": 1.5051146651283749, + "learning_rate": 5e-05, + "loss": 0.049, + "num_input_tokens_seen": 590200868, + "step": 6466 + }, + { + "epoch": 26.941666666666666, + "loss": 0.05446765571832657, + "loss_ce": 3.9554543036501855e-05, + "loss_iou": 0.173828125, + "loss_num": 0.0108642578125, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 590200868, + "step": 6466 + }, + { + "epoch": 26.945833333333333, + "grad_norm": 0.8182828257810135, + "learning_rate": 5e-05, + "loss": 0.026, + "num_input_tokens_seen": 590292396, + "step": 6467 + }, + { + "epoch": 26.945833333333333, + "loss": 0.030023805797100067, + "loss_ce": 9.769059033715166e-06, + "loss_iou": 0.10009765625, + "loss_num": 0.006011962890625, + "loss_xval": 0.030029296875, + "num_input_tokens_seen": 590292396, + "step": 6467 + }, + { + "epoch": 26.95, + "grad_norm": 1.8724105927178325, + "learning_rate": 5e-05, + "loss": 0.0616, + "num_input_tokens_seen": 590384416, + "step": 6468 + }, + { + "epoch": 26.95, + "loss": 0.07086817920207977, + "loss_ce": 6.361617579386802e-06, + "loss_iou": 0.1943359375, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 590384416, + "step": 6468 + }, + { + "epoch": 26.954166666666666, + "grad_norm": 1.5139939648692018, + "learning_rate": 5e-05, + "loss": 0.0514, + "num_input_tokens_seen": 590475848, + "step": 6469 + }, + { + "epoch": 26.954166666666666, + "loss": 0.026034872978925705, + "loss_ce": 0.00015596569573972374, + "loss_iou": 0.2041015625, + "loss_num": 0.00518798828125, + "loss_xval": 0.02587890625, + "num_input_tokens_seen": 590475848, + "step": 6469 + }, + { + "epoch": 26.958333333333332, + "grad_norm": 1.3300940044856553, + "learning_rate": 5e-05, + "loss": 0.0452, + "num_input_tokens_seen": 590567184, + "step": 6470 + }, + { + "epoch": 26.958333333333332, + "loss": 0.023993268609046936, + "loss_ce": 0.00021626101806759834, + "loss_iou": 0.24609375, + "loss_num": 0.0047607421875, + "loss_xval": 0.0238037109375, + "num_input_tokens_seen": 590567184, + "step": 6470 + }, + { + "epoch": 26.9625, + "grad_norm": 1.7243529092692613, + "learning_rate": 5e-05, + "loss": 0.035, + "num_input_tokens_seen": 590658468, + "step": 6471 + }, + { + "epoch": 26.9625, + "loss": 0.03004373051226139, + "loss_ce": 1.4433577234740369e-05, + "loss_iou": 0.1953125, + "loss_num": 0.006011962890625, + "loss_xval": 0.030029296875, + "num_input_tokens_seen": 590658468, + "step": 6471 + }, + { + "epoch": 26.966666666666665, + "grad_norm": 3.616339040915319, + "learning_rate": 5e-05, + "loss": 0.0536, + "num_input_tokens_seen": 590749624, + "step": 6472 + }, + { + "epoch": 26.966666666666665, + "loss": 0.06653188169002533, + "loss_ce": 3.5614871194411535e-06, + "loss_iou": 0.12890625, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 590749624, + "step": 6472 + }, + { + "epoch": 26.970833333333335, + "grad_norm": 2.6145017660857124, + "learning_rate": 5e-05, + "loss": 0.0714, + "num_input_tokens_seen": 590840284, + "step": 6473 + }, + { + "epoch": 26.970833333333335, + "loss": 0.06919960677623749, + "loss_ce": 0.00012306452845223248, + "loss_iou": 0.1123046875, + "loss_num": 0.01385498046875, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 590840284, + "step": 6473 + }, + { + "epoch": 26.975, + "grad_norm": 3.0006287243722394, + "learning_rate": 5e-05, + "loss": 0.0344, + "num_input_tokens_seen": 590931200, + "step": 6474 + }, + { + "epoch": 26.975, + "loss": 0.02546137198805809, + "loss_ce": 2.08112146538042e-06, + "loss_iou": 0.234375, + "loss_num": 0.005096435546875, + "loss_xval": 0.0255126953125, + "num_input_tokens_seen": 590931200, + "step": 6474 + }, + { + "epoch": 26.979166666666668, + "grad_norm": 2.998171401254916, + "learning_rate": 5e-05, + "loss": 0.0425, + "num_input_tokens_seen": 591023128, + "step": 6475 + }, + { + "epoch": 26.979166666666668, + "loss": 0.025605838745832443, + "loss_ce": 1.5902509176157764e-06, + "loss_iou": 0.341796875, + "loss_num": 0.005126953125, + "loss_xval": 0.025634765625, + "num_input_tokens_seen": 591023128, + "step": 6475 + }, + { + "epoch": 26.983333333333334, + "grad_norm": 2.6153748993073815, + "learning_rate": 5e-05, + "loss": 0.0593, + "num_input_tokens_seen": 591114924, + "step": 6476 + }, + { + "epoch": 26.983333333333334, + "loss": 0.03486858308315277, + "loss_ce": 0.003710137214511633, + "loss_iou": 0.265625, + "loss_num": 0.0062255859375, + "loss_xval": 0.0311279296875, + "num_input_tokens_seen": 591114924, + "step": 6476 + }, + { + "epoch": 26.9875, + "grad_norm": 5.765636049482318, + "learning_rate": 5e-05, + "loss": 0.0378, + "num_input_tokens_seen": 591206700, + "step": 6477 + }, + { + "epoch": 26.9875, + "loss": 0.026380009949207306, + "loss_ce": 1.2822742974094581e-05, + "loss_iou": 0.283203125, + "loss_num": 0.005279541015625, + "loss_xval": 0.0263671875, + "num_input_tokens_seen": 591206700, + "step": 6477 + }, + { + "epoch": 26.991666666666667, + "grad_norm": 1.3548250943193227, + "learning_rate": 5e-05, + "loss": 0.0638, + "num_input_tokens_seen": 591298156, + "step": 6478 + }, + { + "epoch": 26.991666666666667, + "loss": 0.03639143705368042, + "loss_ce": 1.4486518921330571e-05, + "loss_iou": 0.265625, + "loss_num": 0.00726318359375, + "loss_xval": 0.036376953125, + "num_input_tokens_seen": 591298156, + "step": 6478 + }, + { + "epoch": 26.995833333333334, + "grad_norm": 2.1892743373009442, + "learning_rate": 5e-05, + "loss": 0.0881, + "num_input_tokens_seen": 591389368, + "step": 6479 + }, + { + "epoch": 26.995833333333334, + "loss": 0.07875439524650574, + "loss_ce": 0.0001640051050344482, + "loss_iou": 0.185546875, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 591389368, + "step": 6479 + }, + { + "epoch": 27.0, + "grad_norm": 2.6684763478882854, + "learning_rate": 5e-05, + "loss": 0.0381, + "num_input_tokens_seen": 591481344, + "step": 6480 + }, + { + "epoch": 27.0, + "loss": 0.021271036937832832, + "loss_ce": 2.3174412490334362e-05, + "loss_iou": 0.1015625, + "loss_num": 0.004241943359375, + "loss_xval": 0.021240234375, + "num_input_tokens_seen": 591481344, + "step": 6480 + }, + { + "epoch": 27.004166666666666, + "grad_norm": 1.955922810243266, + "learning_rate": 5e-05, + "loss": 0.0249, + "num_input_tokens_seen": 591571916, + "step": 6481 + }, + { + "epoch": 27.004166666666666, + "loss": 0.028039991855621338, + "loss_ce": 1.966868239833275e-06, + "loss_iou": 0.2158203125, + "loss_num": 0.005615234375, + "loss_xval": 0.028076171875, + "num_input_tokens_seen": 591571916, + "step": 6481 + }, + { + "epoch": 27.008333333333333, + "grad_norm": 2.962709243450798, + "learning_rate": 5e-05, + "loss": 0.0319, + "num_input_tokens_seen": 591662092, + "step": 6482 + }, + { + "epoch": 27.008333333333333, + "loss": 0.040201835334300995, + "loss_ce": 2.5569020181137603e-06, + "loss_iou": 0.2119140625, + "loss_num": 0.008056640625, + "loss_xval": 0.040283203125, + "num_input_tokens_seen": 591662092, + "step": 6482 + }, + { + "epoch": 27.0125, + "grad_norm": 2.4820166684216094, + "learning_rate": 5e-05, + "loss": 0.0287, + "num_input_tokens_seen": 591753724, + "step": 6483 + }, + { + "epoch": 27.0125, + "loss": 0.031597621738910675, + "loss_ce": 4.300906311982544e-06, + "loss_iou": 0.0654296875, + "loss_num": 0.006317138671875, + "loss_xval": 0.031494140625, + "num_input_tokens_seen": 591753724, + "step": 6483 + }, + { + "epoch": 27.016666666666666, + "grad_norm": 2.258753161775535, + "learning_rate": 5e-05, + "loss": 0.0282, + "num_input_tokens_seen": 591844740, + "step": 6484 + }, + { + "epoch": 27.016666666666666, + "loss": 0.030123792588710785, + "loss_ce": 0.002207839395850897, + "loss_iou": 0.1640625, + "loss_num": 0.005584716796875, + "loss_xval": 0.0279541015625, + "num_input_tokens_seen": 591844740, + "step": 6484 + }, + { + "epoch": 27.020833333333332, + "grad_norm": 3.1742272127851017, + "learning_rate": 5e-05, + "loss": 0.0418, + "num_input_tokens_seen": 591936264, + "step": 6485 + }, + { + "epoch": 27.020833333333332, + "loss": 0.03546886146068573, + "loss_ce": 0.0020978914108127356, + "loss_iou": 0.275390625, + "loss_num": 0.006683349609375, + "loss_xval": 0.033447265625, + "num_input_tokens_seen": 591936264, + "step": 6485 + }, + { + "epoch": 27.025, + "grad_norm": 3.280678281674529, + "learning_rate": 5e-05, + "loss": 0.0655, + "num_input_tokens_seen": 592026920, + "step": 6486 + }, + { + "epoch": 27.025, + "loss": 0.06747589260339737, + "loss_ce": 1.5269779396476224e-06, + "loss_iou": 0.32421875, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 592026920, + "step": 6486 + }, + { + "epoch": 27.029166666666665, + "grad_norm": 5.005440450340139, + "learning_rate": 5e-05, + "loss": 0.0383, + "num_input_tokens_seen": 592118488, + "step": 6487 + }, + { + "epoch": 27.029166666666665, + "loss": 0.040529705584049225, + "loss_ce": 2.362769464525627e-06, + "loss_iou": 0.2890625, + "loss_num": 0.00811767578125, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 592118488, + "step": 6487 + }, + { + "epoch": 27.033333333333335, + "grad_norm": 3.3301406471708206, + "learning_rate": 5e-05, + "loss": 0.0378, + "num_input_tokens_seen": 592209508, + "step": 6488 + }, + { + "epoch": 27.033333333333335, + "loss": 0.025289881974458694, + "loss_ce": 6.0693087107210886e-06, + "loss_iou": 0.263671875, + "loss_num": 0.00506591796875, + "loss_xval": 0.0252685546875, + "num_input_tokens_seen": 592209508, + "step": 6488 + }, + { + "epoch": 27.0375, + "grad_norm": 2.9558401362765756, + "learning_rate": 5e-05, + "loss": 0.0401, + "num_input_tokens_seen": 592301356, + "step": 6489 + }, + { + "epoch": 27.0375, + "loss": 0.05254870653152466, + "loss_ce": 5.066004177933792e-06, + "loss_iou": 0.30078125, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 592301356, + "step": 6489 + }, + { + "epoch": 27.041666666666668, + "grad_norm": 2.2463626000732617, + "learning_rate": 5e-05, + "loss": 0.0584, + "num_input_tokens_seen": 592392420, + "step": 6490 + }, + { + "epoch": 27.041666666666668, + "loss": 0.04210108518600464, + "loss_ce": 1.7344702428090386e-05, + "loss_iou": 0.2158203125, + "loss_num": 0.0084228515625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 592392420, + "step": 6490 + }, + { + "epoch": 27.045833333333334, + "grad_norm": 1.7286917187255142, + "learning_rate": 5e-05, + "loss": 0.0588, + "num_input_tokens_seen": 592483924, + "step": 6491 + }, + { + "epoch": 27.045833333333334, + "loss": 0.026698973029851913, + "loss_ce": 3.4238990338053554e-05, + "loss_iou": 0.23046875, + "loss_num": 0.005340576171875, + "loss_xval": 0.026611328125, + "num_input_tokens_seen": 592483924, + "step": 6491 + }, + { + "epoch": 27.05, + "grad_norm": 1.945067902151619, + "learning_rate": 5e-05, + "loss": 0.0688, + "num_input_tokens_seen": 592575252, + "step": 6492 + }, + { + "epoch": 27.05, + "loss": 0.06354920566082001, + "loss_ce": 1.161103409685893e-05, + "loss_iou": 0.2021484375, + "loss_num": 0.01275634765625, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 592575252, + "step": 6492 + }, + { + "epoch": 27.054166666666667, + "grad_norm": 1.6626659274386684, + "learning_rate": 5e-05, + "loss": 0.0543, + "num_input_tokens_seen": 592666608, + "step": 6493 + }, + { + "epoch": 27.054166666666667, + "loss": 0.06014417111873627, + "loss_ce": 0.0001466095563955605, + "loss_iou": 0.22265625, + "loss_num": 0.01202392578125, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 592666608, + "step": 6493 + }, + { + "epoch": 27.058333333333334, + "grad_norm": 1.3952258022061073, + "learning_rate": 5e-05, + "loss": 0.0418, + "num_input_tokens_seen": 592758172, + "step": 6494 + }, + { + "epoch": 27.058333333333334, + "loss": 0.06435603648424149, + "loss_ce": 2.091861915687332e-06, + "loss_iou": 0.21484375, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 592758172, + "step": 6494 + }, + { + "epoch": 27.0625, + "grad_norm": 2.322261381202757, + "learning_rate": 5e-05, + "loss": 0.0387, + "num_input_tokens_seen": 592849552, + "step": 6495 + }, + { + "epoch": 27.0625, + "loss": 0.0410141684114933, + "loss_ce": 1.3802233297610655e-05, + "loss_iou": 0.1865234375, + "loss_num": 0.0081787109375, + "loss_xval": 0.041015625, + "num_input_tokens_seen": 592849552, + "step": 6495 + }, + { + "epoch": 27.066666666666666, + "grad_norm": 1.9380490050746328, + "learning_rate": 5e-05, + "loss": 0.0573, + "num_input_tokens_seen": 592941172, + "step": 6496 + }, + { + "epoch": 27.066666666666666, + "loss": 0.07027255743741989, + "loss_ce": 0.002462498378008604, + "loss_iou": 0.01123046875, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 592941172, + "step": 6496 + }, + { + "epoch": 27.070833333333333, + "grad_norm": 1.9863267251115089, + "learning_rate": 5e-05, + "loss": 0.0462, + "num_input_tokens_seen": 593032284, + "step": 6497 + }, + { + "epoch": 27.070833333333333, + "loss": 0.035664528608322144, + "loss_ce": 0.0006608680123463273, + "loss_iou": 0.177734375, + "loss_num": 0.00701904296875, + "loss_xval": 0.034912109375, + "num_input_tokens_seen": 593032284, + "step": 6497 + }, + { + "epoch": 27.075, + "grad_norm": 2.818201131258505, + "learning_rate": 5e-05, + "loss": 0.0289, + "num_input_tokens_seen": 593123252, + "step": 6498 + }, + { + "epoch": 27.075, + "loss": 0.025768805295228958, + "loss_ce": 4.3383024603826925e-06, + "loss_iou": 0.263671875, + "loss_num": 0.005157470703125, + "loss_xval": 0.0257568359375, + "num_input_tokens_seen": 593123252, + "step": 6498 + }, + { + "epoch": 27.079166666666666, + "grad_norm": 2.9105649505028994, + "learning_rate": 5e-05, + "loss": 0.0312, + "num_input_tokens_seen": 593214604, + "step": 6499 + }, + { + "epoch": 27.079166666666666, + "loss": 0.020308678969740868, + "loss_ce": 1.4489427485386841e-05, + "loss_iou": 0.173828125, + "loss_num": 0.004058837890625, + "loss_xval": 0.020263671875, + "num_input_tokens_seen": 593214604, + "step": 6499 + }, + { + "epoch": 27.083333333333332, + "grad_norm": 2.359393385537779, + "learning_rate": 5e-05, + "loss": 0.0329, + "num_input_tokens_seen": 593303984, + "step": 6500 + }, + { + "epoch": 27.083333333333332, + "eval_seeclick_CIoU": 0.1611507646739483, + "eval_seeclick_GIoU": 0.12508939485996962, + "eval_seeclick_IoU": 0.28630343824625015, + "eval_seeclick_MAE_all": 0.12311594188213348, + "eval_seeclick_MAE_h": 0.1294156238436699, + "eval_seeclick_MAE_w": 0.24873895943164825, + "eval_seeclick_MAE_x_boxes": 0.2699016109108925, + "eval_seeclick_MAE_y_boxes": 0.12702525407075882, + "eval_seeclick_NUM_probability": 0.9999813139438629, + "eval_seeclick_inside_bbox": 0.4801136404275894, + "eval_seeclick_loss": 0.6404656171798706, + "eval_seeclick_loss_ce": 0.11072489991784096, + "eval_seeclick_loss_iou": 0.42852783203125, + "eval_seeclick_loss_num": 0.0981903076171875, + "eval_seeclick_loss_xval": 0.49114990234375, + "eval_seeclick_runtime": 78.1395, + "eval_seeclick_samples_per_second": 0.55, + "eval_seeclick_steps_per_second": 0.026, + "num_input_tokens_seen": 593303984, + "step": 6500 + }, + { + "epoch": 27.083333333333332, + "eval_icons_CIoU": 0.2290511429309845, + "eval_icons_GIoU": 0.2096809484064579, + "eval_icons_IoU": 0.3420000970363617, + "eval_icons_MAE_all": 0.0876893661916256, + "eval_icons_MAE_h": 0.17075396329164505, + "eval_icons_MAE_w": 0.1464579701423645, + "eval_icons_MAE_x_boxes": 0.1476225182414055, + "eval_icons_MAE_y_boxes": 0.17341304197907448, + "eval_icons_NUM_probability": 0.9999927282333374, + "eval_icons_inside_bbox": 0.4913194477558136, + "eval_icons_loss": 0.42048099637031555, + "eval_icons_loss_ce": 0.0010824212222360075, + "eval_icons_loss_iou": 0.19549560546875, + "eval_icons_loss_num": 0.085906982421875, + "eval_icons_loss_xval": 0.42987060546875, + "eval_icons_runtime": 90.1406, + "eval_icons_samples_per_second": 0.555, + "eval_icons_steps_per_second": 0.022, + "num_input_tokens_seen": 593303984, + "step": 6500 + }, + { + "epoch": 27.083333333333332, + "eval_screenspot_CIoU": 0.4100871682167053, + "eval_screenspot_GIoU": 0.39583032329877216, + "eval_screenspot_IoU": 0.48177264134089154, + "eval_screenspot_MAE_all": 0.09190142154693604, + "eval_screenspot_MAE_h": 0.09084974229335785, + "eval_screenspot_MAE_w": 0.18963578095038733, + "eval_screenspot_MAE_x_boxes": 0.17057538032531738, + "eval_screenspot_MAE_y_boxes": 0.08838931967814763, + "eval_screenspot_NUM_probability": 0.9996122717857361, + "eval_screenspot_inside_bbox": 0.7279166579246521, + "eval_screenspot_loss": 0.46608996391296387, + "eval_screenspot_loss_ce": 0.00031701042704905075, + "eval_screenspot_loss_iou": 0.3773193359375, + "eval_screenspot_loss_num": 0.09621938069661458, + "eval_screenspot_loss_xval": 0.481201171875, + "eval_screenspot_runtime": 151.372, + "eval_screenspot_samples_per_second": 0.588, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 593303984, + "step": 6500 + }, + { + "epoch": 27.083333333333332, + "eval_compot_CIoU": 0.5083262324333191, + "eval_compot_GIoU": 0.5026091039180756, + "eval_compot_IoU": 0.5841458737850189, + "eval_compot_MAE_all": 0.052470432594418526, + "eval_compot_MAE_h": 0.06258269213140011, + "eval_compot_MAE_w": 0.13531950861215591, + "eval_compot_MAE_x_boxes": 0.1363530457019806, + "eval_compot_MAE_y_boxes": 0.06230769865214825, + "eval_compot_NUM_probability": 0.9999942779541016, + "eval_compot_inside_bbox": 0.7604166567325592, + "eval_compot_loss": 0.3140476942062378, + "eval_compot_loss_ce": 0.042354028671979904, + "eval_compot_loss_iou": 0.31719970703125, + "eval_compot_loss_num": 0.05543327331542969, + "eval_compot_loss_xval": 0.2773284912109375, + "eval_compot_runtime": 87.6305, + "eval_compot_samples_per_second": 0.571, + "eval_compot_steps_per_second": 0.023, + "num_input_tokens_seen": 593303984, + "step": 6500 + }, + { + "epoch": 27.083333333333332, + "loss": 0.3146435022354126, + "loss_ce": 0.043616883456707, + "loss_iou": 0.33203125, + "loss_num": 0.05419921875, + "loss_xval": 0.271484375, + "num_input_tokens_seen": 593303984, + "step": 6500 + }, + { + "epoch": 27.0875, + "grad_norm": 2.1077022206848555, + "learning_rate": 5e-05, + "loss": 0.033, + "num_input_tokens_seen": 593394836, + "step": 6501 + }, + { + "epoch": 27.0875, + "loss": 0.037172507494688034, + "loss_ce": 2.0956435946573038e-06, + "loss_iou": 0.330078125, + "loss_num": 0.0074462890625, + "loss_xval": 0.037109375, + "num_input_tokens_seen": 593394836, + "step": 6501 + }, + { + "epoch": 27.091666666666665, + "grad_norm": 1.9656642718284403, + "learning_rate": 5e-05, + "loss": 0.0447, + "num_input_tokens_seen": 593485112, + "step": 6502 + }, + { + "epoch": 27.091666666666665, + "loss": 0.06263962388038635, + "loss_ce": 2.301181439179345e-06, + "loss_iou": 0.33203125, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 593485112, + "step": 6502 + }, + { + "epoch": 27.095833333333335, + "grad_norm": 1.9131504146432234, + "learning_rate": 5e-05, + "loss": 0.0597, + "num_input_tokens_seen": 593576028, + "step": 6503 + }, + { + "epoch": 27.095833333333335, + "loss": 0.0599580779671669, + "loss_ce": 6.293254045885988e-06, + "loss_iou": 0.1513671875, + "loss_num": 0.011962890625, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 593576028, + "step": 6503 + }, + { + "epoch": 27.1, + "grad_norm": 1.9339228752806417, + "learning_rate": 5e-05, + "loss": 0.0496, + "num_input_tokens_seen": 593667172, + "step": 6504 + }, + { + "epoch": 27.1, + "loss": 0.01530100591480732, + "loss_ce": 1.9329276256030425e-05, + "loss_iou": 0.013671875, + "loss_num": 0.0030517578125, + "loss_xval": 0.0152587890625, + "num_input_tokens_seen": 593667172, + "step": 6504 + }, + { + "epoch": 27.104166666666668, + "grad_norm": 1.9726035509891664, + "learning_rate": 5e-05, + "loss": 0.0729, + "num_input_tokens_seen": 593758656, + "step": 6505 + }, + { + "epoch": 27.104166666666668, + "loss": 0.11320458352565765, + "loss_ce": 0.004150021355599165, + "loss_iou": 0.203125, + "loss_num": 0.0218505859375, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 593758656, + "step": 6505 + }, + { + "epoch": 27.108333333333334, + "grad_norm": 1.4187761290531202, + "learning_rate": 5e-05, + "loss": 0.0277, + "num_input_tokens_seen": 593850620, + "step": 6506 + }, + { + "epoch": 27.108333333333334, + "loss": 0.023839503526687622, + "loss_ce": 5.276498086459469e-06, + "loss_iou": 0.216796875, + "loss_num": 0.0047607421875, + "loss_xval": 0.0238037109375, + "num_input_tokens_seen": 593850620, + "step": 6506 + }, + { + "epoch": 27.1125, + "grad_norm": 3.1787292236573577, + "learning_rate": 5e-05, + "loss": 0.0343, + "num_input_tokens_seen": 593942132, + "step": 6507 + }, + { + "epoch": 27.1125, + "loss": 0.044819869101047516, + "loss_ce": 4.804438958672108e-06, + "loss_iou": 0.2421875, + "loss_num": 0.00897216796875, + "loss_xval": 0.044921875, + "num_input_tokens_seen": 593942132, + "step": 6507 + }, + { + "epoch": 27.116666666666667, + "grad_norm": 3.089052011599488, + "learning_rate": 5e-05, + "loss": 0.0488, + "num_input_tokens_seen": 594033800, + "step": 6508 + }, + { + "epoch": 27.116666666666667, + "loss": 0.06599961966276169, + "loss_ce": 5.357538157113595e-06, + "loss_iou": 0.18359375, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 594033800, + "step": 6508 + }, + { + "epoch": 27.120833333333334, + "grad_norm": 2.2688935031872757, + "learning_rate": 5e-05, + "loss": 0.0289, + "num_input_tokens_seen": 594125320, + "step": 6509 + }, + { + "epoch": 27.120833333333334, + "loss": 0.03243381530046463, + "loss_ce": 0.00011569818889256567, + "loss_iou": 0.255859375, + "loss_num": 0.0064697265625, + "loss_xval": 0.0322265625, + "num_input_tokens_seen": 594125320, + "step": 6509 + }, + { + "epoch": 27.125, + "grad_norm": 2.1392485184909646, + "learning_rate": 5e-05, + "loss": 0.0431, + "num_input_tokens_seen": 594216656, + "step": 6510 + }, + { + "epoch": 27.125, + "loss": 0.03446205332875252, + "loss_ce": 0.0003281440294813365, + "loss_iou": 0.15234375, + "loss_num": 0.0068359375, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 594216656, + "step": 6510 + }, + { + "epoch": 27.129166666666666, + "grad_norm": 1.9749644119750247, + "learning_rate": 5e-05, + "loss": 0.0462, + "num_input_tokens_seen": 594306504, + "step": 6511 + }, + { + "epoch": 27.129166666666666, + "loss": 0.031366996467113495, + "loss_ce": 2.5445739083806984e-05, + "loss_iou": 0.29296875, + "loss_num": 0.006256103515625, + "loss_xval": 0.03125, + "num_input_tokens_seen": 594306504, + "step": 6511 + }, + { + "epoch": 27.133333333333333, + "grad_norm": 1.4282145397187538, + "learning_rate": 5e-05, + "loss": 0.0525, + "num_input_tokens_seen": 594396640, + "step": 6512 + }, + { + "epoch": 27.133333333333333, + "loss": 0.03622637316584587, + "loss_ce": 2.0083837171114283e-06, + "loss_iou": 0.134765625, + "loss_num": 0.00726318359375, + "loss_xval": 0.0361328125, + "num_input_tokens_seen": 594396640, + "step": 6512 + }, + { + "epoch": 27.1375, + "grad_norm": 1.3492857134776273, + "learning_rate": 5e-05, + "loss": 0.0354, + "num_input_tokens_seen": 594488128, + "step": 6513 + }, + { + "epoch": 27.1375, + "loss": 0.04107179492712021, + "loss_ce": 2.764478722383501e-06, + "loss_iou": 0.19921875, + "loss_num": 0.00823974609375, + "loss_xval": 0.041015625, + "num_input_tokens_seen": 594488128, + "step": 6513 + }, + { + "epoch": 27.141666666666666, + "grad_norm": 1.769915660912848, + "learning_rate": 5e-05, + "loss": 0.0177, + "num_input_tokens_seen": 594579368, + "step": 6514 + }, + { + "epoch": 27.141666666666666, + "loss": 0.017422253265976906, + "loss_ce": 2.7233463697484694e-05, + "loss_iou": 0.09326171875, + "loss_num": 0.00347900390625, + "loss_xval": 0.017333984375, + "num_input_tokens_seen": 594579368, + "step": 6514 + }, + { + "epoch": 27.145833333333332, + "grad_norm": 0.755243181489823, + "learning_rate": 5e-05, + "loss": 0.0435, + "num_input_tokens_seen": 594670584, + "step": 6515 + }, + { + "epoch": 27.145833333333332, + "loss": 0.05167026072740555, + "loss_ce": 4.001905836048536e-06, + "loss_iou": 0.24609375, + "loss_num": 0.01031494140625, + "loss_xval": 0.0517578125, + "num_input_tokens_seen": 594670584, + "step": 6515 + }, + { + "epoch": 27.15, + "grad_norm": 0.9541012916032766, + "learning_rate": 5e-05, + "loss": 0.0196, + "num_input_tokens_seen": 594761792, + "step": 6516 + }, + { + "epoch": 27.15, + "loss": 0.01985876075923443, + "loss_ce": 7.076235306158196e-06, + "loss_iou": 0.2158203125, + "loss_num": 0.00396728515625, + "loss_xval": 0.0198974609375, + "num_input_tokens_seen": 594761792, + "step": 6516 + }, + { + "epoch": 27.154166666666665, + "grad_norm": 1.6180947072383487, + "learning_rate": 5e-05, + "loss": 0.0415, + "num_input_tokens_seen": 594853332, + "step": 6517 + }, + { + "epoch": 27.154166666666665, + "loss": 0.031560756266117096, + "loss_ce": 2.0839312128373422e-05, + "loss_iou": 0.09716796875, + "loss_num": 0.006317138671875, + "loss_xval": 0.031494140625, + "num_input_tokens_seen": 594853332, + "step": 6517 + }, + { + "epoch": 27.158333333333335, + "grad_norm": 1.5742236895851742, + "learning_rate": 5e-05, + "loss": 0.0507, + "num_input_tokens_seen": 594944404, + "step": 6518 + }, + { + "epoch": 27.158333333333335, + "loss": 0.07269126921892166, + "loss_ce": 7.469241973012686e-05, + "loss_iou": 0.27734375, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 594944404, + "step": 6518 + }, + { + "epoch": 27.1625, + "grad_norm": 1.9779687504647347, + "learning_rate": 5e-05, + "loss": 0.0673, + "num_input_tokens_seen": 595034692, + "step": 6519 + }, + { + "epoch": 27.1625, + "loss": 0.07391928136348724, + "loss_ce": 5.7055276556639e-06, + "loss_iou": 0.11474609375, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 595034692, + "step": 6519 + }, + { + "epoch": 27.166666666666668, + "grad_norm": 0.8543401365989585, + "learning_rate": 5e-05, + "loss": 0.0268, + "num_input_tokens_seen": 595126832, + "step": 6520 + }, + { + "epoch": 27.166666666666668, + "loss": 0.020465940237045288, + "loss_ce": 4.58647555205971e-05, + "loss_iou": 0.146484375, + "loss_num": 0.00408935546875, + "loss_xval": 0.0203857421875, + "num_input_tokens_seen": 595126832, + "step": 6520 + }, + { + "epoch": 27.170833333333334, + "grad_norm": 0.8576067334089554, + "learning_rate": 5e-05, + "loss": 0.0203, + "num_input_tokens_seen": 595218192, + "step": 6521 + }, + { + "epoch": 27.170833333333334, + "loss": 0.021774495020508766, + "loss_ce": 3.0721468647243455e-05, + "loss_iou": 0.2021484375, + "loss_num": 0.00433349609375, + "loss_xval": 0.021728515625, + "num_input_tokens_seen": 595218192, + "step": 6521 + }, + { + "epoch": 27.175, + "grad_norm": 1.1798262388390548, + "learning_rate": 5e-05, + "loss": 0.0453, + "num_input_tokens_seen": 595310668, + "step": 6522 + }, + { + "epoch": 27.175, + "loss": 0.055068276822566986, + "loss_ce": 0.002219459041953087, + "loss_iou": 0.12890625, + "loss_num": 0.01055908203125, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 595310668, + "step": 6522 + }, + { + "epoch": 27.179166666666667, + "grad_norm": 0.47659015671630084, + "learning_rate": 5e-05, + "loss": 0.0408, + "num_input_tokens_seen": 595401872, + "step": 6523 + }, + { + "epoch": 27.179166666666667, + "loss": 0.02778068743646145, + "loss_ce": 2.0612305888789706e-06, + "loss_iou": 0.220703125, + "loss_num": 0.00555419921875, + "loss_xval": 0.02783203125, + "num_input_tokens_seen": 595401872, + "step": 6523 + }, + { + "epoch": 27.183333333333334, + "grad_norm": 0.753248181051116, + "learning_rate": 5e-05, + "loss": 0.0336, + "num_input_tokens_seen": 595492764, + "step": 6524 + }, + { + "epoch": 27.183333333333334, + "loss": 0.042344845831394196, + "loss_ce": 0.0001695503160590306, + "loss_iou": 0.0400390625, + "loss_num": 0.0084228515625, + "loss_xval": 0.042236328125, + "num_input_tokens_seen": 595492764, + "step": 6524 + }, + { + "epoch": 27.1875, + "grad_norm": 1.3000092644957844, + "learning_rate": 5e-05, + "loss": 0.0227, + "num_input_tokens_seen": 595584444, + "step": 6525 + }, + { + "epoch": 27.1875, + "loss": 0.013810301199555397, + "loss_ce": 0.00022997862834017724, + "loss_iou": 0.11474609375, + "loss_num": 0.002716064453125, + "loss_xval": 0.0135498046875, + "num_input_tokens_seen": 595584444, + "step": 6525 + }, + { + "epoch": 27.191666666666666, + "grad_norm": 1.0670653485738448, + "learning_rate": 5e-05, + "loss": 0.0222, + "num_input_tokens_seen": 595675460, + "step": 6526 + }, + { + "epoch": 27.191666666666666, + "loss": 0.019739195704460144, + "loss_ce": 1.9522099137248006e-06, + "loss_iou": 0.158203125, + "loss_num": 0.003936767578125, + "loss_xval": 0.019775390625, + "num_input_tokens_seen": 595675460, + "step": 6526 + }, + { + "epoch": 27.195833333333333, + "grad_norm": 1.4191768284685875, + "learning_rate": 5e-05, + "loss": 0.0221, + "num_input_tokens_seen": 595766744, + "step": 6527 + }, + { + "epoch": 27.195833333333333, + "loss": 0.014562261290848255, + "loss_ce": 5.375677119445754e-06, + "loss_iou": 0.1875, + "loss_num": 0.0029144287109375, + "loss_xval": 0.0145263671875, + "num_input_tokens_seen": 595766744, + "step": 6527 + }, + { + "epoch": 27.2, + "grad_norm": 2.510616324231998, + "learning_rate": 5e-05, + "loss": 0.0373, + "num_input_tokens_seen": 595857988, + "step": 6528 + }, + { + "epoch": 27.2, + "loss": 0.03148343786597252, + "loss_ce": 4.559001354209613e-06, + "loss_iou": 0.244140625, + "loss_num": 0.00628662109375, + "loss_xval": 0.031494140625, + "num_input_tokens_seen": 595857988, + "step": 6528 + }, + { + "epoch": 27.204166666666666, + "grad_norm": 3.548223310534183, + "learning_rate": 5e-05, + "loss": 0.0418, + "num_input_tokens_seen": 595949040, + "step": 6529 + }, + { + "epoch": 27.204166666666666, + "loss": 0.04415207728743553, + "loss_ce": 8.40070060803555e-06, + "loss_iou": 0.2265625, + "loss_num": 0.00885009765625, + "loss_xval": 0.044189453125, + "num_input_tokens_seen": 595949040, + "step": 6529 + }, + { + "epoch": 27.208333333333332, + "grad_norm": 3.9173736783739335, + "learning_rate": 5e-05, + "loss": 0.0599, + "num_input_tokens_seen": 596040256, + "step": 6530 + }, + { + "epoch": 27.208333333333332, + "loss": 0.03422192111611366, + "loss_ce": 0.00021007962641306221, + "loss_iou": 0.3828125, + "loss_num": 0.006805419921875, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 596040256, + "step": 6530 + }, + { + "epoch": 27.2125, + "grad_norm": 2.4748804437242664, + "learning_rate": 5e-05, + "loss": 0.0362, + "num_input_tokens_seen": 596131668, + "step": 6531 + }, + { + "epoch": 27.2125, + "loss": 0.02880486100912094, + "loss_ce": 1.152791992353741e-05, + "loss_iou": 0.271484375, + "loss_num": 0.005767822265625, + "loss_xval": 0.02880859375, + "num_input_tokens_seen": 596131668, + "step": 6531 + }, + { + "epoch": 27.216666666666665, + "grad_norm": 2.1199309385036447, + "learning_rate": 5e-05, + "loss": 0.0566, + "num_input_tokens_seen": 596223196, + "step": 6532 + }, + { + "epoch": 27.216666666666665, + "loss": 0.07532989978790283, + "loss_ce": 0.00040924627683125436, + "loss_iou": 0.2001953125, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 596223196, + "step": 6532 + }, + { + "epoch": 27.220833333333335, + "grad_norm": 3.021299338389343, + "learning_rate": 5e-05, + "loss": 0.0399, + "num_input_tokens_seen": 596315184, + "step": 6533 + }, + { + "epoch": 27.220833333333335, + "loss": 0.0591164231300354, + "loss_ce": 3.877664312312845e-06, + "loss_iou": 0.34375, + "loss_num": 0.0118408203125, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 596315184, + "step": 6533 + }, + { + "epoch": 27.225, + "grad_norm": 4.6485282022338446, + "learning_rate": 5e-05, + "loss": 0.0733, + "num_input_tokens_seen": 596406900, + "step": 6534 + }, + { + "epoch": 27.225, + "loss": 0.1163218691945076, + "loss_ce": 4.120826815778855e-06, + "loss_iou": 0.263671875, + "loss_num": 0.0233154296875, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 596406900, + "step": 6534 + }, + { + "epoch": 27.229166666666668, + "grad_norm": 2.9641180907333062, + "learning_rate": 5e-05, + "loss": 0.0283, + "num_input_tokens_seen": 596498008, + "step": 6535 + }, + { + "epoch": 27.229166666666668, + "loss": 0.024163711816072464, + "loss_ce": 4.7195982915582135e-05, + "loss_iou": 0.30859375, + "loss_num": 0.00482177734375, + "loss_xval": 0.024169921875, + "num_input_tokens_seen": 596498008, + "step": 6535 + }, + { + "epoch": 27.233333333333334, + "grad_norm": 3.072393946682052, + "learning_rate": 5e-05, + "loss": 0.0549, + "num_input_tokens_seen": 596588428, + "step": 6536 + }, + { + "epoch": 27.233333333333334, + "loss": 0.07454818487167358, + "loss_ce": 9.000251338875387e-06, + "loss_iou": 0.259765625, + "loss_num": 0.014892578125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 596588428, + "step": 6536 + }, + { + "epoch": 27.2375, + "grad_norm": 2.4653601053704937, + "learning_rate": 5e-05, + "loss": 0.031, + "num_input_tokens_seen": 596678336, + "step": 6537 + }, + { + "epoch": 27.2375, + "loss": 0.03270196542143822, + "loss_ce": 0.001253600581549108, + "loss_iou": 0.244140625, + "loss_num": 0.00628662109375, + "loss_xval": 0.031494140625, + "num_input_tokens_seen": 596678336, + "step": 6537 + }, + { + "epoch": 27.241666666666667, + "grad_norm": 2.775849081662359, + "learning_rate": 5e-05, + "loss": 0.0349, + "num_input_tokens_seen": 596769908, + "step": 6538 + }, + { + "epoch": 27.241666666666667, + "loss": 0.04061917960643768, + "loss_ce": 6.131500413175672e-05, + "loss_iou": 0.2734375, + "loss_num": 0.00811767578125, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 596769908, + "step": 6538 + }, + { + "epoch": 27.245833333333334, + "grad_norm": 3.645464170111079, + "learning_rate": 5e-05, + "loss": 0.0402, + "num_input_tokens_seen": 596859852, + "step": 6539 + }, + { + "epoch": 27.245833333333334, + "loss": 0.05066291242837906, + "loss_ce": 3.7343652365962043e-06, + "loss_iou": 0.2236328125, + "loss_num": 0.0101318359375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 596859852, + "step": 6539 + }, + { + "epoch": 27.25, + "grad_norm": 3.247650890370929, + "learning_rate": 5e-05, + "loss": 0.0533, + "num_input_tokens_seen": 596951336, + "step": 6540 + }, + { + "epoch": 27.25, + "loss": 0.06673350930213928, + "loss_ce": 6.826207481935853e-06, + "loss_iou": 0.1279296875, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 596951336, + "step": 6540 + }, + { + "epoch": 27.254166666666666, + "grad_norm": 2.689825035863558, + "learning_rate": 5e-05, + "loss": 0.0295, + "num_input_tokens_seen": 597042140, + "step": 6541 + }, + { + "epoch": 27.254166666666666, + "loss": 0.02926325984299183, + "loss_ce": 4.53209349871031e-06, + "loss_iou": 0.228515625, + "loss_num": 0.005859375, + "loss_xval": 0.029296875, + "num_input_tokens_seen": 597042140, + "step": 6541 + }, + { + "epoch": 27.258333333333333, + "grad_norm": 4.583612867604823, + "learning_rate": 5e-05, + "loss": 0.0353, + "num_input_tokens_seen": 597132976, + "step": 6542 + }, + { + "epoch": 27.258333333333333, + "loss": 0.0501759797334671, + "loss_ce": 3.559742253855802e-05, + "loss_iou": 0.28125, + "loss_num": 0.010009765625, + "loss_xval": 0.050048828125, + "num_input_tokens_seen": 597132976, + "step": 6542 + }, + { + "epoch": 27.2625, + "grad_norm": 3.109616886623326, + "learning_rate": 5e-05, + "loss": 0.041, + "num_input_tokens_seen": 597222868, + "step": 6543 + }, + { + "epoch": 27.2625, + "loss": 0.040508195757865906, + "loss_ce": 2.6628946216078475e-05, + "loss_iou": 0.310546875, + "loss_num": 0.00811767578125, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 597222868, + "step": 6543 + }, + { + "epoch": 27.266666666666666, + "grad_norm": 2.402863282173942, + "learning_rate": 5e-05, + "loss": 0.0239, + "num_input_tokens_seen": 597313968, + "step": 6544 + }, + { + "epoch": 27.266666666666666, + "loss": 0.025843966752290726, + "loss_ce": 6.424347520805895e-05, + "loss_iou": 0.2265625, + "loss_num": 0.005157470703125, + "loss_xval": 0.0257568359375, + "num_input_tokens_seen": 597313968, + "step": 6544 + }, + { + "epoch": 27.270833333333332, + "grad_norm": 2.2805245659831757, + "learning_rate": 5e-05, + "loss": 0.0311, + "num_input_tokens_seen": 597405364, + "step": 6545 + }, + { + "epoch": 27.270833333333332, + "loss": 0.04524778574705124, + "loss_ce": 5.473571491165785e-06, + "loss_iou": 0.1875, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 597405364, + "step": 6545 + }, + { + "epoch": 27.275, + "grad_norm": 2.0017398197524843, + "learning_rate": 5e-05, + "loss": 0.0261, + "num_input_tokens_seen": 597496600, + "step": 6546 + }, + { + "epoch": 27.275, + "loss": 0.030996788293123245, + "loss_ce": 6.188166480569635e-06, + "loss_iou": 0.29296875, + "loss_num": 0.006195068359375, + "loss_xval": 0.031005859375, + "num_input_tokens_seen": 597496600, + "step": 6546 + }, + { + "epoch": 27.279166666666665, + "grad_norm": 2.6121521722878294, + "learning_rate": 5e-05, + "loss": 0.0399, + "num_input_tokens_seen": 597587648, + "step": 6547 + }, + { + "epoch": 27.279166666666665, + "loss": 0.052928172051906586, + "loss_ce": 3.3578489819774404e-05, + "loss_iou": 0.1484375, + "loss_num": 0.01055908203125, + "loss_xval": 0.052978515625, + "num_input_tokens_seen": 597587648, + "step": 6547 + }, + { + "epoch": 27.283333333333335, + "grad_norm": 8.989873845089479, + "learning_rate": 5e-05, + "loss": 0.035, + "num_input_tokens_seen": 597678912, + "step": 6548 + }, + { + "epoch": 27.283333333333335, + "loss": 0.02183392643928528, + "loss_ce": 0.0006089517846703529, + "loss_iou": 0.138671875, + "loss_num": 0.004241943359375, + "loss_xval": 0.021240234375, + "num_input_tokens_seen": 597678912, + "step": 6548 + }, + { + "epoch": 27.2875, + "grad_norm": 4.589243530412472, + "learning_rate": 5e-05, + "loss": 0.0363, + "num_input_tokens_seen": 597770348, + "step": 6549 + }, + { + "epoch": 27.2875, + "loss": 0.05072301626205444, + "loss_ce": 2.801705477395444e-06, + "loss_iou": 0.2158203125, + "loss_num": 0.0101318359375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 597770348, + "step": 6549 + }, + { + "epoch": 27.291666666666668, + "grad_norm": 1.3975896587582461, + "learning_rate": 5e-05, + "loss": 0.0459, + "num_input_tokens_seen": 597861368, + "step": 6550 + }, + { + "epoch": 27.291666666666668, + "loss": 0.02159380167722702, + "loss_ce": 2.615761331981048e-06, + "loss_iou": 0.1240234375, + "loss_num": 0.00433349609375, + "loss_xval": 0.0216064453125, + "num_input_tokens_seen": 597861368, + "step": 6550 + }, + { + "epoch": 27.295833333333334, + "grad_norm": 0.7843153006333098, + "learning_rate": 5e-05, + "loss": 0.0169, + "num_input_tokens_seen": 597952988, + "step": 6551 + }, + { + "epoch": 27.295833333333334, + "loss": 0.01972576230764389, + "loss_ce": 0.004619560670107603, + "loss_iou": 0.1474609375, + "loss_num": 0.003021240234375, + "loss_xval": 0.01513671875, + "num_input_tokens_seen": 597952988, + "step": 6551 + }, + { + "epoch": 27.3, + "grad_norm": 1.377873437741385, + "learning_rate": 5e-05, + "loss": 0.0461, + "num_input_tokens_seen": 598044820, + "step": 6552 + }, + { + "epoch": 27.3, + "loss": 0.028581751510500908, + "loss_ce": 1.729914583847858e-05, + "loss_iou": 0.2060546875, + "loss_num": 0.005706787109375, + "loss_xval": 0.028564453125, + "num_input_tokens_seen": 598044820, + "step": 6552 + }, + { + "epoch": 27.304166666666667, + "grad_norm": 1.13192773336822, + "learning_rate": 5e-05, + "loss": 0.0259, + "num_input_tokens_seen": 598136288, + "step": 6553 + }, + { + "epoch": 27.304166666666667, + "loss": 0.015871770679950714, + "loss_ce": 6.445116923714522e-06, + "loss_iou": 0.150390625, + "loss_num": 0.003173828125, + "loss_xval": 0.015869140625, + "num_input_tokens_seen": 598136288, + "step": 6553 + }, + { + "epoch": 27.308333333333334, + "grad_norm": 1.3050478403503047, + "learning_rate": 5e-05, + "loss": 0.0465, + "num_input_tokens_seen": 598228480, + "step": 6554 + }, + { + "epoch": 27.308333333333334, + "loss": 0.05149269849061966, + "loss_ce": 0.000391012872569263, + "loss_iou": 0.181640625, + "loss_num": 0.01019287109375, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 598228480, + "step": 6554 + }, + { + "epoch": 27.3125, + "grad_norm": 2.7849902168565297, + "learning_rate": 5e-05, + "loss": 0.0182, + "num_input_tokens_seen": 598319732, + "step": 6555 + }, + { + "epoch": 27.3125, + "loss": 0.018088556826114655, + "loss_ce": 6.891947577969404e-06, + "loss_iou": 0.322265625, + "loss_num": 0.0036163330078125, + "loss_xval": 0.01806640625, + "num_input_tokens_seen": 598319732, + "step": 6555 + }, + { + "epoch": 27.316666666666666, + "grad_norm": 3.9446926543306478, + "learning_rate": 5e-05, + "loss": 0.0253, + "num_input_tokens_seen": 598411192, + "step": 6556 + }, + { + "epoch": 27.316666666666666, + "loss": 0.03370252996683121, + "loss_ce": 7.215633377199993e-05, + "loss_iou": 0.1171875, + "loss_num": 0.006744384765625, + "loss_xval": 0.03369140625, + "num_input_tokens_seen": 598411192, + "step": 6556 + }, + { + "epoch": 27.320833333333333, + "grad_norm": 1.657969730059567, + "learning_rate": 5e-05, + "loss": 0.0195, + "num_input_tokens_seen": 598502060, + "step": 6557 + }, + { + "epoch": 27.320833333333333, + "loss": 0.01847069337964058, + "loss_ce": 7.558067409263458e-06, + "loss_iou": 0.166015625, + "loss_num": 0.003692626953125, + "loss_xval": 0.0184326171875, + "num_input_tokens_seen": 598502060, + "step": 6557 + }, + { + "epoch": 27.325, + "grad_norm": 2.5518112566432496, + "learning_rate": 5e-05, + "loss": 0.0509, + "num_input_tokens_seen": 598593176, + "step": 6558 + }, + { + "epoch": 27.325, + "loss": 0.02984347566962242, + "loss_ce": 4.3059942981926724e-05, + "loss_iou": 0.1630859375, + "loss_num": 0.005950927734375, + "loss_xval": 0.02978515625, + "num_input_tokens_seen": 598593176, + "step": 6558 + }, + { + "epoch": 27.329166666666666, + "grad_norm": 2.505236392254768, + "learning_rate": 5e-05, + "loss": 0.0324, + "num_input_tokens_seen": 598684100, + "step": 6559 + }, + { + "epoch": 27.329166666666666, + "loss": 0.03522047773003578, + "loss_ce": 3.191518317180453e-06, + "loss_iou": 0.255859375, + "loss_num": 0.007049560546875, + "loss_xval": 0.03515625, + "num_input_tokens_seen": 598684100, + "step": 6559 + }, + { + "epoch": 27.333333333333332, + "grad_norm": 3.9208746222693662, + "learning_rate": 5e-05, + "loss": 0.0471, + "num_input_tokens_seen": 598775708, + "step": 6560 + }, + { + "epoch": 27.333333333333332, + "loss": 0.06393549591302872, + "loss_ce": 0.00035211897920817137, + "loss_iou": 0.1416015625, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 598775708, + "step": 6560 + }, + { + "epoch": 27.3375, + "grad_norm": 5.306174373965517, + "learning_rate": 5e-05, + "loss": 0.032, + "num_input_tokens_seen": 598867024, + "step": 6561 + }, + { + "epoch": 27.3375, + "loss": 0.038117777556180954, + "loss_ce": 0.00013102231605444103, + "loss_iou": 0.302734375, + "loss_num": 0.007598876953125, + "loss_xval": 0.0380859375, + "num_input_tokens_seen": 598867024, + "step": 6561 + }, + { + "epoch": 27.341666666666665, + "grad_norm": 3.203872946930661, + "learning_rate": 5e-05, + "loss": 0.0345, + "num_input_tokens_seen": 598958032, + "step": 6562 + }, + { + "epoch": 27.341666666666665, + "loss": 0.035031914710998535, + "loss_ce": 4.351133975433186e-05, + "loss_iou": 0.294921875, + "loss_num": 0.006988525390625, + "loss_xval": 0.034912109375, + "num_input_tokens_seen": 598958032, + "step": 6562 + }, + { + "epoch": 27.345833333333335, + "grad_norm": 2.9163353781723034, + "learning_rate": 5e-05, + "loss": 0.0433, + "num_input_tokens_seen": 599047688, + "step": 6563 + }, + { + "epoch": 27.345833333333335, + "loss": 0.03683867305517197, + "loss_ce": 3.95470760850003e-06, + "loss_iou": 0.283203125, + "loss_num": 0.007354736328125, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 599047688, + "step": 6563 + }, + { + "epoch": 27.35, + "grad_norm": 2.516729600554519, + "learning_rate": 5e-05, + "loss": 0.0547, + "num_input_tokens_seen": 599139488, + "step": 6564 + }, + { + "epoch": 27.35, + "loss": 0.0386323407292366, + "loss_ce": 1.234715455211699e-05, + "loss_iou": 0.2138671875, + "loss_num": 0.007720947265625, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 599139488, + "step": 6564 + }, + { + "epoch": 27.354166666666668, + "grad_norm": 2.2636362822526723, + "learning_rate": 5e-05, + "loss": 0.0277, + "num_input_tokens_seen": 599231412, + "step": 6565 + }, + { + "epoch": 27.354166666666668, + "loss": 0.023040983825922012, + "loss_ce": 4.598782834364101e-05, + "loss_iou": 0.181640625, + "loss_num": 0.004608154296875, + "loss_xval": 0.02294921875, + "num_input_tokens_seen": 599231412, + "step": 6565 + }, + { + "epoch": 27.358333333333334, + "grad_norm": 1.6733517038068515, + "learning_rate": 5e-05, + "loss": 0.0353, + "num_input_tokens_seen": 599322796, + "step": 6566 + }, + { + "epoch": 27.358333333333334, + "loss": 0.021526511758565903, + "loss_ce": 1.1619857104960829e-05, + "loss_iou": 0.1103515625, + "loss_num": 0.004302978515625, + "loss_xval": 0.021484375, + "num_input_tokens_seen": 599322796, + "step": 6566 + }, + { + "epoch": 27.3625, + "grad_norm": 6.377268414193188, + "learning_rate": 5e-05, + "loss": 0.0251, + "num_input_tokens_seen": 599414560, + "step": 6567 + }, + { + "epoch": 27.3625, + "loss": 0.030199095606803894, + "loss_ce": 9.581130143487826e-06, + "loss_iou": 0.30859375, + "loss_num": 0.00604248046875, + "loss_xval": 0.0301513671875, + "num_input_tokens_seen": 599414560, + "step": 6567 + }, + { + "epoch": 27.366666666666667, + "grad_norm": 2.1677706360799585, + "learning_rate": 5e-05, + "loss": 0.0364, + "num_input_tokens_seen": 599506228, + "step": 6568 + }, + { + "epoch": 27.366666666666667, + "loss": 0.03312592953443527, + "loss_ce": 1.4354819541040342e-05, + "loss_iou": 0.115234375, + "loss_num": 0.006622314453125, + "loss_xval": 0.033203125, + "num_input_tokens_seen": 599506228, + "step": 6568 + }, + { + "epoch": 27.370833333333334, + "grad_norm": 1.1997122500015587, + "learning_rate": 5e-05, + "loss": 0.0479, + "num_input_tokens_seen": 599597184, + "step": 6569 + }, + { + "epoch": 27.370833333333334, + "loss": 0.03354697674512863, + "loss_ce": 8.157267075148411e-06, + "loss_iou": 0.2197265625, + "loss_num": 0.0067138671875, + "loss_xval": 0.033447265625, + "num_input_tokens_seen": 599597184, + "step": 6569 + }, + { + "epoch": 27.375, + "grad_norm": 3.7223113507625176, + "learning_rate": 5e-05, + "loss": 0.0256, + "num_input_tokens_seen": 599688540, + "step": 6570 + }, + { + "epoch": 27.375, + "loss": 0.02370530739426613, + "loss_ce": 2.3666618290008046e-05, + "loss_iou": 0.236328125, + "loss_num": 0.004730224609375, + "loss_xval": 0.023681640625, + "num_input_tokens_seen": 599688540, + "step": 6570 + }, + { + "epoch": 27.379166666666666, + "grad_norm": 5.7049482022435525, + "learning_rate": 5e-05, + "loss": 0.0584, + "num_input_tokens_seen": 599779348, + "step": 6571 + }, + { + "epoch": 27.379166666666666, + "loss": 0.051517054438591, + "loss_ce": 3.3838264243968297e-06, + "loss_iou": 0.2236328125, + "loss_num": 0.01031494140625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 599779348, + "step": 6571 + }, + { + "epoch": 27.383333333333333, + "grad_norm": 4.808404128333326, + "learning_rate": 5e-05, + "loss": 0.0571, + "num_input_tokens_seen": 599871480, + "step": 6572 + }, + { + "epoch": 27.383333333333333, + "loss": 0.03034200705587864, + "loss_ce": 7.535805707448162e-06, + "loss_iou": 0.28125, + "loss_num": 0.006072998046875, + "loss_xval": 0.0302734375, + "num_input_tokens_seen": 599871480, + "step": 6572 + }, + { + "epoch": 27.3875, + "grad_norm": 2.96591911729717, + "learning_rate": 5e-05, + "loss": 0.0433, + "num_input_tokens_seen": 599962540, + "step": 6573 + }, + { + "epoch": 27.3875, + "loss": 0.06300961971282959, + "loss_ce": 0.00025021936744451523, + "loss_iou": 0.259765625, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 599962540, + "step": 6573 + }, + { + "epoch": 27.391666666666666, + "grad_norm": 2.7157211325861375, + "learning_rate": 5e-05, + "loss": 0.0401, + "num_input_tokens_seen": 600054692, + "step": 6574 + }, + { + "epoch": 27.391666666666666, + "loss": 0.037177495658397675, + "loss_ce": 7.08368588675512e-06, + "loss_iou": 0.2734375, + "loss_num": 0.0074462890625, + "loss_xval": 0.037109375, + "num_input_tokens_seen": 600054692, + "step": 6574 + }, + { + "epoch": 27.395833333333332, + "grad_norm": 2.550427003119342, + "learning_rate": 5e-05, + "loss": 0.0235, + "num_input_tokens_seen": 600146196, + "step": 6575 + }, + { + "epoch": 27.395833333333332, + "loss": 0.02404908463358879, + "loss_ce": 1.6491594578837976e-05, + "loss_iou": 0.12890625, + "loss_num": 0.004791259765625, + "loss_xval": 0.0240478515625, + "num_input_tokens_seen": 600146196, + "step": 6575 + }, + { + "epoch": 27.4, + "grad_norm": 2.644869533589869, + "learning_rate": 5e-05, + "loss": 0.0718, + "num_input_tokens_seen": 600235640, + "step": 6576 + }, + { + "epoch": 27.4, + "loss": 0.11950768530368805, + "loss_ce": 0.0006264570401981473, + "loss_iou": 0.19921875, + "loss_num": 0.0238037109375, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 600235640, + "step": 6576 + }, + { + "epoch": 27.404166666666665, + "grad_norm": 9.303528233594129, + "learning_rate": 5e-05, + "loss": 0.0467, + "num_input_tokens_seen": 600326960, + "step": 6577 + }, + { + "epoch": 27.404166666666665, + "loss": 0.043067824095487595, + "loss_ce": 3.8039237551856786e-05, + "loss_iou": 0.2333984375, + "loss_num": 0.00860595703125, + "loss_xval": 0.04296875, + "num_input_tokens_seen": 600326960, + "step": 6577 + }, + { + "epoch": 27.408333333333335, + "grad_norm": 2.732674325736436, + "learning_rate": 5e-05, + "loss": 0.0622, + "num_input_tokens_seen": 600418444, + "step": 6578 + }, + { + "epoch": 27.408333333333335, + "loss": 0.07342066615819931, + "loss_ce": 0.0001555857015773654, + "loss_iou": 0.2578125, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 600418444, + "step": 6578 + }, + { + "epoch": 27.4125, + "grad_norm": 1.7043220860281332, + "learning_rate": 5e-05, + "loss": 0.0267, + "num_input_tokens_seen": 600510068, + "step": 6579 + }, + { + "epoch": 27.4125, + "loss": 0.036038704216480255, + "loss_ce": 5.077363312011585e-06, + "loss_iou": 0.248046875, + "loss_num": 0.0072021484375, + "loss_xval": 0.0361328125, + "num_input_tokens_seen": 600510068, + "step": 6579 + }, + { + "epoch": 27.416666666666668, + "grad_norm": 1.2667137607979664, + "learning_rate": 5e-05, + "loss": 0.032, + "num_input_tokens_seen": 600601860, + "step": 6580 + }, + { + "epoch": 27.416666666666668, + "loss": 0.01787398010492325, + "loss_ce": 0.00020430199219845235, + "loss_iou": 0.1171875, + "loss_num": 0.0035247802734375, + "loss_xval": 0.0177001953125, + "num_input_tokens_seen": 600601860, + "step": 6580 + }, + { + "epoch": 27.420833333333334, + "grad_norm": 1.8383693180637422, + "learning_rate": 5e-05, + "loss": 0.0188, + "num_input_tokens_seen": 600692928, + "step": 6581 + }, + { + "epoch": 27.420833333333334, + "loss": 0.021061724051833153, + "loss_ce": 0.0002411069581285119, + "loss_iou": 0.19140625, + "loss_num": 0.004180908203125, + "loss_xval": 0.0208740234375, + "num_input_tokens_seen": 600692928, + "step": 6581 + }, + { + "epoch": 27.425, + "grad_norm": 2.1614473169920543, + "learning_rate": 5e-05, + "loss": 0.0392, + "num_input_tokens_seen": 600783732, + "step": 6582 + }, + { + "epoch": 27.425, + "loss": 0.046985093504190445, + "loss_ce": 3.2839527648320654e-06, + "loss_iou": 0.275390625, + "loss_num": 0.0093994140625, + "loss_xval": 0.046875, + "num_input_tokens_seen": 600783732, + "step": 6582 + }, + { + "epoch": 27.429166666666667, + "grad_norm": 2.2908035124483934, + "learning_rate": 5e-05, + "loss": 0.0296, + "num_input_tokens_seen": 600875184, + "step": 6583 + }, + { + "epoch": 27.429166666666667, + "loss": 0.022312387824058533, + "loss_ce": 4.037513917864999e-06, + "loss_iou": 0.158203125, + "loss_num": 0.00445556640625, + "loss_xval": 0.0223388671875, + "num_input_tokens_seen": 600875184, + "step": 6583 + }, + { + "epoch": 27.433333333333334, + "grad_norm": 2.351502737531473, + "learning_rate": 5e-05, + "loss": 0.0322, + "num_input_tokens_seen": 600967108, + "step": 6584 + }, + { + "epoch": 27.433333333333334, + "loss": 0.032669175416231155, + "loss_ce": 7.739663487882353e-06, + "loss_iou": 0.345703125, + "loss_num": 0.00653076171875, + "loss_xval": 0.03271484375, + "num_input_tokens_seen": 600967108, + "step": 6584 + }, + { + "epoch": 27.4375, + "grad_norm": 2.510176621938398, + "learning_rate": 5e-05, + "loss": 0.036, + "num_input_tokens_seen": 601057556, + "step": 6585 + }, + { + "epoch": 27.4375, + "loss": 0.024011608213186264, + "loss_ce": 9.533564480079804e-06, + "loss_iou": 0.279296875, + "loss_num": 0.004791259765625, + "loss_xval": 0.0240478515625, + "num_input_tokens_seen": 601057556, + "step": 6585 + }, + { + "epoch": 27.441666666666666, + "grad_norm": 4.978150988584718, + "learning_rate": 5e-05, + "loss": 0.0364, + "num_input_tokens_seen": 601149160, + "step": 6586 + }, + { + "epoch": 27.441666666666666, + "loss": 0.029743358492851257, + "loss_ce": 1.9236387743148953e-05, + "loss_iou": 0.283203125, + "loss_num": 0.005950927734375, + "loss_xval": 0.02978515625, + "num_input_tokens_seen": 601149160, + "step": 6586 + }, + { + "epoch": 27.445833333333333, + "grad_norm": 2.660998513384476, + "learning_rate": 5e-05, + "loss": 0.0692, + "num_input_tokens_seen": 601240432, + "step": 6587 + }, + { + "epoch": 27.445833333333333, + "loss": 0.08215197920799255, + "loss_ce": 1.3918957847636193e-05, + "loss_iou": 0.177734375, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 601240432, + "step": 6587 + }, + { + "epoch": 27.45, + "grad_norm": 2.9024923055038885, + "learning_rate": 5e-05, + "loss": 0.0314, + "num_input_tokens_seen": 601331376, + "step": 6588 + }, + { + "epoch": 27.45, + "loss": 0.030946815386414528, + "loss_ce": 1.990160626519355e-06, + "loss_iou": 0.22265625, + "loss_num": 0.006195068359375, + "loss_xval": 0.031005859375, + "num_input_tokens_seen": 601331376, + "step": 6588 + }, + { + "epoch": 27.454166666666666, + "grad_norm": 3.02695444764913, + "learning_rate": 5e-05, + "loss": 0.0533, + "num_input_tokens_seen": 601421736, + "step": 6589 + }, + { + "epoch": 27.454166666666666, + "loss": 0.038657695055007935, + "loss_ce": 3.770285184145905e-05, + "loss_iou": 0.26171875, + "loss_num": 0.007720947265625, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 601421736, + "step": 6589 + }, + { + "epoch": 27.458333333333332, + "grad_norm": 2.7917795632046367, + "learning_rate": 5e-05, + "loss": 0.0439, + "num_input_tokens_seen": 601513420, + "step": 6590 + }, + { + "epoch": 27.458333333333332, + "loss": 0.02959408238530159, + "loss_ce": 3.78065014956519e-05, + "loss_iou": 0.150390625, + "loss_num": 0.00592041015625, + "loss_xval": 0.029541015625, + "num_input_tokens_seen": 601513420, + "step": 6590 + }, + { + "epoch": 27.4625, + "grad_norm": 1.975953164903365, + "learning_rate": 5e-05, + "loss": 0.0701, + "num_input_tokens_seen": 601605732, + "step": 6591 + }, + { + "epoch": 27.4625, + "loss": 0.055715471506118774, + "loss_ce": 2.0891760868835263e-05, + "loss_iou": 0.265625, + "loss_num": 0.0111083984375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 601605732, + "step": 6591 + }, + { + "epoch": 27.466666666666665, + "grad_norm": 2.1041934779287628, + "learning_rate": 5e-05, + "loss": 0.0307, + "num_input_tokens_seen": 601696996, + "step": 6592 + }, + { + "epoch": 27.466666666666665, + "loss": 0.020221196115016937, + "loss_ce": 3.2998759706970304e-06, + "loss_iou": 0.2158203125, + "loss_num": 0.004058837890625, + "loss_xval": 0.020263671875, + "num_input_tokens_seen": 601696996, + "step": 6592 + }, + { + "epoch": 27.470833333333335, + "grad_norm": 3.003669877138788, + "learning_rate": 5e-05, + "loss": 0.0461, + "num_input_tokens_seen": 601788580, + "step": 6593 + }, + { + "epoch": 27.470833333333335, + "loss": 0.05574270710349083, + "loss_ce": 9.980880349758081e-06, + "loss_iou": 0.2314453125, + "loss_num": 0.01116943359375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 601788580, + "step": 6593 + }, + { + "epoch": 27.475, + "grad_norm": 3.1522051116112224, + "learning_rate": 5e-05, + "loss": 0.0438, + "num_input_tokens_seen": 601880024, + "step": 6594 + }, + { + "epoch": 27.475, + "loss": 0.03714306652545929, + "loss_ce": 4.1322076867800206e-05, + "loss_iou": 0.26953125, + "loss_num": 0.007415771484375, + "loss_xval": 0.037109375, + "num_input_tokens_seen": 601880024, + "step": 6594 + }, + { + "epoch": 27.479166666666668, + "grad_norm": 2.147560213566929, + "learning_rate": 5e-05, + "loss": 0.0221, + "num_input_tokens_seen": 601971004, + "step": 6595 + }, + { + "epoch": 27.479166666666668, + "loss": 0.020969413220882416, + "loss_ce": 3.836278210656019e-06, + "loss_iou": 0.2041015625, + "loss_num": 0.004180908203125, + "loss_xval": 0.02099609375, + "num_input_tokens_seen": 601971004, + "step": 6595 + }, + { + "epoch": 27.483333333333334, + "grad_norm": 4.119361896806683, + "learning_rate": 5e-05, + "loss": 0.0629, + "num_input_tokens_seen": 602062960, + "step": 6596 + }, + { + "epoch": 27.483333333333334, + "loss": 0.09118642657995224, + "loss_ce": 1.5162068848439958e-05, + "loss_iou": 0.275390625, + "loss_num": 0.0181884765625, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 602062960, + "step": 6596 + }, + { + "epoch": 27.4875, + "grad_norm": 2.342176167246577, + "learning_rate": 5e-05, + "loss": 0.0377, + "num_input_tokens_seen": 602154216, + "step": 6597 + }, + { + "epoch": 27.4875, + "loss": 0.03796786069869995, + "loss_ce": 3.992761776316911e-06, + "loss_iou": 0.1533203125, + "loss_num": 0.007598876953125, + "loss_xval": 0.0380859375, + "num_input_tokens_seen": 602154216, + "step": 6597 + }, + { + "epoch": 27.491666666666667, + "grad_norm": 0.905613639951875, + "learning_rate": 5e-05, + "loss": 0.0408, + "num_input_tokens_seen": 602245320, + "step": 6598 + }, + { + "epoch": 27.491666666666667, + "loss": 0.057194821536540985, + "loss_ce": 0.00029479575459845364, + "loss_iou": 0.1962890625, + "loss_num": 0.01141357421875, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 602245320, + "step": 6598 + }, + { + "epoch": 27.495833333333334, + "grad_norm": 1.3463095939446643, + "learning_rate": 5e-05, + "loss": 0.0285, + "num_input_tokens_seen": 602336928, + "step": 6599 + }, + { + "epoch": 27.495833333333334, + "loss": 0.018810540437698364, + "loss_ce": 4.083451585756848e-06, + "loss_iou": 0.2001953125, + "loss_num": 0.0037689208984375, + "loss_xval": 0.018798828125, + "num_input_tokens_seen": 602336928, + "step": 6599 + }, + { + "epoch": 27.5, + "grad_norm": 1.2228630031767433, + "learning_rate": 5e-05, + "loss": 0.035, + "num_input_tokens_seen": 602428236, + "step": 6600 + }, + { + "epoch": 27.5, + "loss": 0.047089651226997375, + "loss_ce": 1.6286081518046558e-05, + "loss_iou": 0.1875, + "loss_num": 0.0093994140625, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 602428236, + "step": 6600 + }, + { + "epoch": 27.504166666666666, + "grad_norm": 1.7518779199013301, + "learning_rate": 5e-05, + "loss": 0.0209, + "num_input_tokens_seen": 602519408, + "step": 6601 + }, + { + "epoch": 27.504166666666666, + "loss": 0.023205768316984177, + "loss_ce": 1.2409835107973777e-05, + "loss_iou": 0.220703125, + "loss_num": 0.004638671875, + "loss_xval": 0.023193359375, + "num_input_tokens_seen": 602519408, + "step": 6601 + }, + { + "epoch": 27.508333333333333, + "grad_norm": 3.188805646269444, + "learning_rate": 5e-05, + "loss": 0.0388, + "num_input_tokens_seen": 602611080, + "step": 6602 + }, + { + "epoch": 27.508333333333333, + "loss": 0.027485787868499756, + "loss_ce": 4.707836069428595e-06, + "loss_iou": 0.2578125, + "loss_num": 0.0054931640625, + "loss_xval": 0.0274658203125, + "num_input_tokens_seen": 602611080, + "step": 6602 + }, + { + "epoch": 27.5125, + "grad_norm": 3.558059379057071, + "learning_rate": 5e-05, + "loss": 0.0427, + "num_input_tokens_seen": 602702520, + "step": 6603 + }, + { + "epoch": 27.5125, + "loss": 0.03928153216838837, + "loss_ce": 0.00020377383043523878, + "loss_iou": 0.353515625, + "loss_num": 0.0078125, + "loss_xval": 0.0390625, + "num_input_tokens_seen": 602702520, + "step": 6603 + }, + { + "epoch": 27.516666666666666, + "grad_norm": 3.153982145205354, + "learning_rate": 5e-05, + "loss": 0.0462, + "num_input_tokens_seen": 602793520, + "step": 6604 + }, + { + "epoch": 27.516666666666666, + "loss": 0.03709595650434494, + "loss_ce": 1.8420216747472296e-06, + "loss_iou": 0.294921875, + "loss_num": 0.007415771484375, + "loss_xval": 0.037109375, + "num_input_tokens_seen": 602793520, + "step": 6604 + }, + { + "epoch": 27.520833333333332, + "grad_norm": 2.53906577654717, + "learning_rate": 5e-05, + "loss": 0.0608, + "num_input_tokens_seen": 602884316, + "step": 6605 + }, + { + "epoch": 27.520833333333332, + "loss": 0.08753709495067596, + "loss_ce": 5.0501193982199766e-06, + "loss_iou": 0.177734375, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 602884316, + "step": 6605 + }, + { + "epoch": 27.525, + "grad_norm": 2.4297300661582253, + "learning_rate": 5e-05, + "loss": 0.0298, + "num_input_tokens_seen": 602976368, + "step": 6606 + }, + { + "epoch": 27.525, + "loss": 0.03763948008418083, + "loss_ce": 0.007869582623243332, + "loss_iou": 0.3046875, + "loss_num": 0.005950927734375, + "loss_xval": 0.02978515625, + "num_input_tokens_seen": 602976368, + "step": 6606 + }, + { + "epoch": 27.529166666666665, + "grad_norm": 2.7158826587930633, + "learning_rate": 5e-05, + "loss": 0.0473, + "num_input_tokens_seen": 603068292, + "step": 6607 + }, + { + "epoch": 27.529166666666665, + "loss": 0.06054652854800224, + "loss_ce": 4.543113027466461e-05, + "loss_iou": 0.23828125, + "loss_num": 0.0120849609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 603068292, + "step": 6607 + }, + { + "epoch": 27.533333333333335, + "grad_norm": 2.1029110616488156, + "learning_rate": 5e-05, + "loss": 0.0389, + "num_input_tokens_seen": 603159684, + "step": 6608 + }, + { + "epoch": 27.533333333333335, + "loss": 0.030283518135547638, + "loss_ce": 0.00010163510160055012, + "loss_iou": 0.197265625, + "loss_num": 0.00604248046875, + "loss_xval": 0.0301513671875, + "num_input_tokens_seen": 603159684, + "step": 6608 + }, + { + "epoch": 27.5375, + "grad_norm": 3.078600833549705, + "learning_rate": 5e-05, + "loss": 0.0444, + "num_input_tokens_seen": 603250252, + "step": 6609 + }, + { + "epoch": 27.5375, + "loss": 0.046682506799697876, + "loss_ce": 5.869356755283661e-06, + "loss_iou": 0.158203125, + "loss_num": 0.00933837890625, + "loss_xval": 0.046630859375, + "num_input_tokens_seen": 603250252, + "step": 6609 + }, + { + "epoch": 27.541666666666668, + "grad_norm": 2.6082151175759902, + "learning_rate": 5e-05, + "loss": 0.0325, + "num_input_tokens_seen": 603341720, + "step": 6610 + }, + { + "epoch": 27.541666666666668, + "loss": 0.034782443195581436, + "loss_ce": 5.343872544472106e-05, + "loss_iou": 0.30859375, + "loss_num": 0.0069580078125, + "loss_xval": 0.03466796875, + "num_input_tokens_seen": 603341720, + "step": 6610 + }, + { + "epoch": 27.545833333333334, + "grad_norm": 2.358940988759403, + "learning_rate": 5e-05, + "loss": 0.0373, + "num_input_tokens_seen": 603433232, + "step": 6611 + }, + { + "epoch": 27.545833333333334, + "loss": 0.05367887765169144, + "loss_ce": 6.086263056204189e-06, + "loss_iou": 0.234375, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 603433232, + "step": 6611 + }, + { + "epoch": 27.55, + "grad_norm": 1.7271143762622954, + "learning_rate": 5e-05, + "loss": 0.0464, + "num_input_tokens_seen": 603524472, + "step": 6612 + }, + { + "epoch": 27.55, + "loss": 0.02543582022190094, + "loss_ce": 3.2346802072424907e-06, + "loss_iou": 0.1865234375, + "loss_num": 0.005096435546875, + "loss_xval": 0.025390625, + "num_input_tokens_seen": 603524472, + "step": 6612 + }, + { + "epoch": 27.554166666666667, + "grad_norm": 2.6463370892329623, + "learning_rate": 5e-05, + "loss": 0.0406, + "num_input_tokens_seen": 603615952, + "step": 6613 + }, + { + "epoch": 27.554166666666667, + "loss": 0.051030345261096954, + "loss_ce": 3.546969310264103e-05, + "loss_iou": 0.31640625, + "loss_num": 0.01019287109375, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 603615952, + "step": 6613 + }, + { + "epoch": 27.558333333333334, + "grad_norm": 2.7903412369422806, + "learning_rate": 5e-05, + "loss": 0.0414, + "num_input_tokens_seen": 603706352, + "step": 6614 + }, + { + "epoch": 27.558333333333334, + "loss": 0.03343695402145386, + "loss_ce": 1.2576303561218083e-05, + "loss_iou": 0.2578125, + "loss_num": 0.006683349609375, + "loss_xval": 0.033447265625, + "num_input_tokens_seen": 603706352, + "step": 6614 + }, + { + "epoch": 27.5625, + "grad_norm": 2.762314092578423, + "learning_rate": 5e-05, + "loss": 0.0266, + "num_input_tokens_seen": 603797440, + "step": 6615 + }, + { + "epoch": 27.5625, + "loss": 0.017136216163635254, + "loss_ce": 4.410403107613092e-06, + "loss_iou": 0.208984375, + "loss_num": 0.0034332275390625, + "loss_xval": 0.01708984375, + "num_input_tokens_seen": 603797440, + "step": 6615 + }, + { + "epoch": 27.566666666666666, + "grad_norm": 2.852916686819886, + "learning_rate": 5e-05, + "loss": 0.0512, + "num_input_tokens_seen": 603889088, + "step": 6616 + }, + { + "epoch": 27.566666666666666, + "loss": 0.061923280358314514, + "loss_ce": 3.1138415579334833e-06, + "loss_iou": 0.26171875, + "loss_num": 0.01239013671875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 603889088, + "step": 6616 + }, + { + "epoch": 27.570833333333333, + "grad_norm": 2.4174418189875975, + "learning_rate": 5e-05, + "loss": 0.0357, + "num_input_tokens_seen": 603981204, + "step": 6617 + }, + { + "epoch": 27.570833333333333, + "loss": 0.044821545481681824, + "loss_ce": 0.00014381064102053642, + "loss_iou": 0.265625, + "loss_num": 0.0089111328125, + "loss_xval": 0.044677734375, + "num_input_tokens_seen": 603981204, + "step": 6617 + }, + { + "epoch": 27.575, + "grad_norm": 2.7924830501984013, + "learning_rate": 5e-05, + "loss": 0.0326, + "num_input_tokens_seen": 604072500, + "step": 6618 + }, + { + "epoch": 27.575, + "loss": 0.028552627190947533, + "loss_ce": 3.4320119084441103e-06, + "loss_iou": 0.298828125, + "loss_num": 0.005706787109375, + "loss_xval": 0.028564453125, + "num_input_tokens_seen": 604072500, + "step": 6618 + }, + { + "epoch": 27.579166666666666, + "grad_norm": 2.0640590359943154, + "learning_rate": 5e-05, + "loss": 0.0351, + "num_input_tokens_seen": 604164248, + "step": 6619 + }, + { + "epoch": 27.579166666666666, + "loss": 0.0477108359336853, + "loss_ce": 4.231982529745437e-06, + "loss_iou": 0.3515625, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 604164248, + "step": 6619 + }, + { + "epoch": 27.583333333333332, + "grad_norm": 2.7243145323678717, + "learning_rate": 5e-05, + "loss": 0.0332, + "num_input_tokens_seen": 604255476, + "step": 6620 + }, + { + "epoch": 27.583333333333332, + "loss": 0.026827622205018997, + "loss_ce": 2.6708999030233826e-06, + "loss_iou": 0.26171875, + "loss_num": 0.00537109375, + "loss_xval": 0.02685546875, + "num_input_tokens_seen": 604255476, + "step": 6620 + }, + { + "epoch": 27.5875, + "grad_norm": 2.9996038692424625, + "learning_rate": 5e-05, + "loss": 0.0594, + "num_input_tokens_seen": 604345568, + "step": 6621 + }, + { + "epoch": 27.5875, + "loss": 0.05639968067407608, + "loss_ce": 3.193629254383268e-06, + "loss_iou": 0.322265625, + "loss_num": 0.01129150390625, + "loss_xval": 0.056396484375, + "num_input_tokens_seen": 604345568, + "step": 6621 + }, + { + "epoch": 27.591666666666665, + "grad_norm": 2.791497014815601, + "learning_rate": 5e-05, + "loss": 0.0361, + "num_input_tokens_seen": 604436948, + "step": 6622 + }, + { + "epoch": 27.591666666666665, + "loss": 0.04797312244772911, + "loss_ce": 6.0525271692313254e-05, + "loss_iou": 0.1748046875, + "loss_num": 0.00958251953125, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 604436948, + "step": 6622 + }, + { + "epoch": 27.595833333333335, + "grad_norm": 3.23239804379117, + "learning_rate": 5e-05, + "loss": 0.0332, + "num_input_tokens_seen": 604528500, + "step": 6623 + }, + { + "epoch": 27.595833333333335, + "loss": 0.03417633846402168, + "loss_ce": 4.281735073163873e-06, + "loss_iou": 0.283203125, + "loss_num": 0.0068359375, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 604528500, + "step": 6623 + }, + { + "epoch": 27.6, + "grad_norm": 2.5592091033285413, + "learning_rate": 5e-05, + "loss": 0.0305, + "num_input_tokens_seen": 604620180, + "step": 6624 + }, + { + "epoch": 27.6, + "loss": 0.03764050453901291, + "loss_ce": 4.700970748672262e-06, + "loss_iou": 0.2734375, + "loss_num": 0.007537841796875, + "loss_xval": 0.03759765625, + "num_input_tokens_seen": 604620180, + "step": 6624 + }, + { + "epoch": 27.604166666666668, + "grad_norm": 3.5211528586839744, + "learning_rate": 5e-05, + "loss": 0.0352, + "num_input_tokens_seen": 604711684, + "step": 6625 + }, + { + "epoch": 27.604166666666668, + "loss": 0.021237660199403763, + "loss_ce": 9.660785872256383e-05, + "loss_iou": 0.2001953125, + "loss_num": 0.00421142578125, + "loss_xval": 0.0211181640625, + "num_input_tokens_seen": 604711684, + "step": 6625 + }, + { + "epoch": 27.608333333333334, + "grad_norm": 2.95923657655911, + "learning_rate": 5e-05, + "loss": 0.0398, + "num_input_tokens_seen": 604802744, + "step": 6626 + }, + { + "epoch": 27.608333333333334, + "loss": 0.04550544172525406, + "loss_ce": 3.7324662116589025e-06, + "loss_iou": 0.28125, + "loss_num": 0.00909423828125, + "loss_xval": 0.04541015625, + "num_input_tokens_seen": 604802744, + "step": 6626 + }, + { + "epoch": 27.6125, + "grad_norm": 2.2750047447413237, + "learning_rate": 5e-05, + "loss": 0.0259, + "num_input_tokens_seen": 604893984, + "step": 6627 + }, + { + "epoch": 27.6125, + "loss": 0.027261460199952126, + "loss_ce": 1.63518939189089e-06, + "loss_iou": 0.27734375, + "loss_num": 0.005462646484375, + "loss_xval": 0.0272216796875, + "num_input_tokens_seen": 604893984, + "step": 6627 + }, + { + "epoch": 27.616666666666667, + "grad_norm": 2.2490624202174647, + "learning_rate": 5e-05, + "loss": 0.0311, + "num_input_tokens_seen": 604985744, + "step": 6628 + }, + { + "epoch": 27.616666666666667, + "loss": 0.03529675304889679, + "loss_ce": 3.1767167456564493e-06, + "loss_iou": 0.263671875, + "loss_num": 0.007049560546875, + "loss_xval": 0.035400390625, + "num_input_tokens_seen": 604985744, + "step": 6628 + }, + { + "epoch": 27.620833333333334, + "grad_norm": 1.7491363871611125, + "learning_rate": 5e-05, + "loss": 0.0509, + "num_input_tokens_seen": 605077648, + "step": 6629 + }, + { + "epoch": 27.620833333333334, + "loss": 0.054396286606788635, + "loss_ce": 2.1591577024082653e-05, + "loss_iou": 0.23046875, + "loss_num": 0.0108642578125, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 605077648, + "step": 6629 + }, + { + "epoch": 27.625, + "grad_norm": 1.8101714745526631, + "learning_rate": 5e-05, + "loss": 0.0308, + "num_input_tokens_seen": 605168448, + "step": 6630 + }, + { + "epoch": 27.625, + "loss": 0.03479132801294327, + "loss_ce": 1.2861009963671677e-06, + "loss_iou": 0.197265625, + "loss_num": 0.0069580078125, + "loss_xval": 0.03466796875, + "num_input_tokens_seen": 605168448, + "step": 6630 + }, + { + "epoch": 27.629166666666666, + "grad_norm": 2.3916940840025216, + "learning_rate": 5e-05, + "loss": 0.0522, + "num_input_tokens_seen": 605259724, + "step": 6631 + }, + { + "epoch": 27.629166666666666, + "loss": 0.08446063101291656, + "loss_ce": 3.2409195682703285e-06, + "loss_iou": 0.171875, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 605259724, + "step": 6631 + }, + { + "epoch": 27.633333333333333, + "grad_norm": 3.048251825415468, + "learning_rate": 5e-05, + "loss": 0.0256, + "num_input_tokens_seen": 605350616, + "step": 6632 + }, + { + "epoch": 27.633333333333333, + "loss": 0.02368415705859661, + "loss_ce": 2.51623964686587e-06, + "loss_iou": 0.259765625, + "loss_num": 0.004730224609375, + "loss_xval": 0.023681640625, + "num_input_tokens_seen": 605350616, + "step": 6632 + }, + { + "epoch": 27.6375, + "grad_norm": 3.634069175070679, + "learning_rate": 5e-05, + "loss": 0.0406, + "num_input_tokens_seen": 605441932, + "step": 6633 + }, + { + "epoch": 27.6375, + "loss": 0.0382399708032608, + "loss_ce": 1.4437129038924468e-06, + "loss_iou": 0.298828125, + "loss_num": 0.00762939453125, + "loss_xval": 0.038330078125, + "num_input_tokens_seen": 605441932, + "step": 6633 + }, + { + "epoch": 27.641666666666666, + "grad_norm": 2.9491222861087563, + "learning_rate": 5e-05, + "loss": 0.0288, + "num_input_tokens_seen": 605533148, + "step": 6634 + }, + { + "epoch": 27.641666666666666, + "loss": 0.022725991904735565, + "loss_ce": 5.656002485920908e-06, + "loss_iou": 0.1904296875, + "loss_num": 0.004547119140625, + "loss_xval": 0.022705078125, + "num_input_tokens_seen": 605533148, + "step": 6634 + }, + { + "epoch": 27.645833333333332, + "grad_norm": 4.376426521985265, + "learning_rate": 5e-05, + "loss": 0.0393, + "num_input_tokens_seen": 605624092, + "step": 6635 + }, + { + "epoch": 27.645833333333332, + "loss": 0.0188091192394495, + "loss_ce": 2.6615209662850248e-06, + "loss_iou": 0.177734375, + "loss_num": 0.0037689208984375, + "loss_xval": 0.018798828125, + "num_input_tokens_seen": 605624092, + "step": 6635 + }, + { + "epoch": 27.65, + "grad_norm": 2.509259588092114, + "learning_rate": 5e-05, + "loss": 0.0928, + "num_input_tokens_seen": 605715632, + "step": 6636 + }, + { + "epoch": 27.65, + "loss": 0.15086206793785095, + "loss_ce": 0.00016627281729597598, + "loss_iou": 0.1123046875, + "loss_num": 0.0301513671875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 605715632, + "step": 6636 + }, + { + "epoch": 27.654166666666665, + "grad_norm": 1.192599307960487, + "learning_rate": 5e-05, + "loss": 0.0439, + "num_input_tokens_seen": 605807444, + "step": 6637 + }, + { + "epoch": 27.654166666666665, + "loss": 0.027369150891900063, + "loss_ce": 0.00013221264816820621, + "loss_iou": 0.271484375, + "loss_num": 0.005462646484375, + "loss_xval": 0.0272216796875, + "num_input_tokens_seen": 605807444, + "step": 6637 + }, + { + "epoch": 27.658333333333335, + "grad_norm": 0.8708589826806008, + "learning_rate": 5e-05, + "loss": 0.0312, + "num_input_tokens_seen": 605898772, + "step": 6638 + }, + { + "epoch": 27.658333333333335, + "loss": 0.03177279233932495, + "loss_ce": 3.994483449787367e-06, + "loss_iou": 0.2099609375, + "loss_num": 0.00634765625, + "loss_xval": 0.03173828125, + "num_input_tokens_seen": 605898772, + "step": 6638 + }, + { + "epoch": 27.6625, + "grad_norm": 2.7583714638593837, + "learning_rate": 5e-05, + "loss": 0.0361, + "num_input_tokens_seen": 605990100, + "step": 6639 + }, + { + "epoch": 27.6625, + "loss": 0.028095796704292297, + "loss_ce": 4.366681423562113e-06, + "loss_iou": 0.29296875, + "loss_num": 0.005615234375, + "loss_xval": 0.028076171875, + "num_input_tokens_seen": 605990100, + "step": 6639 + }, + { + "epoch": 27.666666666666668, + "grad_norm": 4.235285412737973, + "learning_rate": 5e-05, + "loss": 0.0326, + "num_input_tokens_seen": 606081324, + "step": 6640 + }, + { + "epoch": 27.666666666666668, + "loss": 0.02892245352268219, + "loss_ce": 7.046784048725385e-06, + "loss_iou": 0.302734375, + "loss_num": 0.00579833984375, + "loss_xval": 0.0289306640625, + "num_input_tokens_seen": 606081324, + "step": 6640 + }, + { + "epoch": 27.670833333333334, + "grad_norm": 2.0263615613746264, + "learning_rate": 5e-05, + "loss": 0.0511, + "num_input_tokens_seen": 606173076, + "step": 6641 + }, + { + "epoch": 27.670833333333334, + "loss": 0.06474164873361588, + "loss_ce": 0.00016645470168441534, + "loss_iou": 0.2001953125, + "loss_num": 0.012939453125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 606173076, + "step": 6641 + }, + { + "epoch": 27.675, + "grad_norm": 2.379405254853854, + "learning_rate": 5e-05, + "loss": 0.0388, + "num_input_tokens_seen": 606264580, + "step": 6642 + }, + { + "epoch": 27.675, + "loss": 0.024179283529520035, + "loss_ce": 9.360705917060841e-06, + "loss_iou": 0.265625, + "loss_num": 0.004852294921875, + "loss_xval": 0.024169921875, + "num_input_tokens_seen": 606264580, + "step": 6642 + }, + { + "epoch": 27.679166666666667, + "grad_norm": 2.5738652281852654, + "learning_rate": 5e-05, + "loss": 0.0337, + "num_input_tokens_seen": 606355852, + "step": 6643 + }, + { + "epoch": 27.679166666666667, + "loss": 0.020705537870526314, + "loss_ce": 6.990535439399537e-06, + "loss_iou": 0.20703125, + "loss_num": 0.004150390625, + "loss_xval": 0.020751953125, + "num_input_tokens_seen": 606355852, + "step": 6643 + }, + { + "epoch": 27.683333333333334, + "grad_norm": 1.7440057256069197, + "learning_rate": 5e-05, + "loss": 0.0396, + "num_input_tokens_seen": 606447168, + "step": 6644 + }, + { + "epoch": 27.683333333333334, + "loss": 0.05534441024065018, + "loss_ce": 0.0016563633689656854, + "loss_iou": 0.14453125, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 606447168, + "step": 6644 + }, + { + "epoch": 27.6875, + "grad_norm": 2.037974230861017, + "learning_rate": 5e-05, + "loss": 0.0593, + "num_input_tokens_seen": 606538228, + "step": 6645 + }, + { + "epoch": 27.6875, + "loss": 0.07503647357225418, + "loss_ce": 0.0033430559560656548, + "loss_iou": 0.224609375, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 606538228, + "step": 6645 + }, + { + "epoch": 27.691666666666666, + "grad_norm": 3.0074458940026974, + "learning_rate": 5e-05, + "loss": 0.0399, + "num_input_tokens_seen": 606629192, + "step": 6646 + }, + { + "epoch": 27.691666666666666, + "loss": 0.03610651195049286, + "loss_ce": 4.216415163682541e-06, + "loss_iou": 0.224609375, + "loss_num": 0.007232666015625, + "loss_xval": 0.0361328125, + "num_input_tokens_seen": 606629192, + "step": 6646 + }, + { + "epoch": 27.695833333333333, + "grad_norm": 2.5371188704551253, + "learning_rate": 5e-05, + "loss": 0.0386, + "num_input_tokens_seen": 606718940, + "step": 6647 + }, + { + "epoch": 27.695833333333333, + "loss": 0.030656758695840836, + "loss_ce": 1.8496901930120657e-06, + "loss_iou": 0.240234375, + "loss_num": 0.006134033203125, + "loss_xval": 0.0306396484375, + "num_input_tokens_seen": 606718940, + "step": 6647 + }, + { + "epoch": 27.7, + "grad_norm": 2.105683239007001, + "learning_rate": 5e-05, + "loss": 0.0463, + "num_input_tokens_seen": 606810252, + "step": 6648 + }, + { + "epoch": 27.7, + "loss": 0.02906056120991707, + "loss_ce": 0.002235610270872712, + "loss_iou": 0.333984375, + "loss_num": 0.00537109375, + "loss_xval": 0.02685546875, + "num_input_tokens_seen": 606810252, + "step": 6648 + }, + { + "epoch": 27.704166666666666, + "grad_norm": 2.023054843563252, + "learning_rate": 5e-05, + "loss": 0.0299, + "num_input_tokens_seen": 606901096, + "step": 6649 + }, + { + "epoch": 27.704166666666666, + "loss": 0.02129945158958435, + "loss_ce": 0.0005322406068444252, + "loss_iou": 0.2578125, + "loss_num": 0.004150390625, + "loss_xval": 0.020751953125, + "num_input_tokens_seen": 606901096, + "step": 6649 + }, + { + "epoch": 27.708333333333332, + "grad_norm": 1.6183348267504132, + "learning_rate": 5e-05, + "loss": 0.0279, + "num_input_tokens_seen": 606992180, + "step": 6650 + }, + { + "epoch": 27.708333333333332, + "loss": 0.01805499568581581, + "loss_ce": 3.84854456569883e-06, + "loss_iou": 0.11865234375, + "loss_num": 0.0036163330078125, + "loss_xval": 0.01806640625, + "num_input_tokens_seen": 606992180, + "step": 6650 + }, + { + "epoch": 27.7125, + "grad_norm": 1.3219883169124516, + "learning_rate": 5e-05, + "loss": 0.0273, + "num_input_tokens_seen": 607083380, + "step": 6651 + }, + { + "epoch": 27.7125, + "loss": 0.022418688982725143, + "loss_ce": 4.9306090659229085e-05, + "loss_iou": 0.1630859375, + "loss_num": 0.004486083984375, + "loss_xval": 0.0223388671875, + "num_input_tokens_seen": 607083380, + "step": 6651 + }, + { + "epoch": 27.716666666666665, + "grad_norm": 0.530552864302545, + "learning_rate": 5e-05, + "loss": 0.0203, + "num_input_tokens_seen": 607174456, + "step": 6652 + }, + { + "epoch": 27.716666666666665, + "loss": 0.018681395798921585, + "loss_ce": 1.608139427844435e-05, + "loss_iou": 0.13671875, + "loss_num": 0.0037384033203125, + "loss_xval": 0.0186767578125, + "num_input_tokens_seen": 607174456, + "step": 6652 + }, + { + "epoch": 27.720833333333335, + "grad_norm": 2.2263865427479312, + "learning_rate": 5e-05, + "loss": 0.0238, + "num_input_tokens_seen": 607265168, + "step": 6653 + }, + { + "epoch": 27.720833333333335, + "loss": 0.024402815848588943, + "loss_ce": 4.011170858575497e-06, + "loss_iou": 0.212890625, + "loss_num": 0.0048828125, + "loss_xval": 0.0244140625, + "num_input_tokens_seen": 607265168, + "step": 6653 + }, + { + "epoch": 27.725, + "grad_norm": 1.7763990809981527, + "learning_rate": 5e-05, + "loss": 0.0298, + "num_input_tokens_seen": 607355936, + "step": 6654 + }, + { + "epoch": 27.725, + "loss": 0.02579689212143421, + "loss_ce": 1.909734010041575e-06, + "loss_iou": 0.21875, + "loss_num": 0.005157470703125, + "loss_xval": 0.0257568359375, + "num_input_tokens_seen": 607355936, + "step": 6654 + }, + { + "epoch": 27.729166666666668, + "grad_norm": 1.6228947218015455, + "learning_rate": 5e-05, + "loss": 0.0439, + "num_input_tokens_seen": 607447924, + "step": 6655 + }, + { + "epoch": 27.729166666666668, + "loss": 0.04186485707759857, + "loss_ce": 1.7629317881073803e-05, + "loss_iou": 0.169921875, + "loss_num": 0.00836181640625, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 607447924, + "step": 6655 + }, + { + "epoch": 27.733333333333334, + "grad_norm": 1.6086721925953786, + "learning_rate": 5e-05, + "loss": 0.019, + "num_input_tokens_seen": 607539088, + "step": 6656 + }, + { + "epoch": 27.733333333333334, + "loss": 0.022941526025533676, + "loss_ce": 7.5659963840735145e-06, + "loss_iou": 0.1826171875, + "loss_num": 0.00457763671875, + "loss_xval": 0.02294921875, + "num_input_tokens_seen": 607539088, + "step": 6656 + }, + { + "epoch": 27.7375, + "grad_norm": 2.8696915212555485, + "learning_rate": 5e-05, + "loss": 0.0754, + "num_input_tokens_seen": 607630200, + "step": 6657 + }, + { + "epoch": 27.7375, + "loss": 0.02179615944623947, + "loss_ce": 6.6089974097849336e-06, + "loss_iou": 0.203125, + "loss_num": 0.004364013671875, + "loss_xval": 0.021728515625, + "num_input_tokens_seen": 607630200, + "step": 6657 + }, + { + "epoch": 27.741666666666667, + "grad_norm": 4.928794728075151, + "learning_rate": 5e-05, + "loss": 0.099, + "num_input_tokens_seen": 607720344, + "step": 6658 + }, + { + "epoch": 27.741666666666667, + "loss": 0.07867708057165146, + "loss_ce": 1.8020633433479816e-05, + "loss_iou": 0.17578125, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 607720344, + "step": 6658 + }, + { + "epoch": 27.745833333333334, + "grad_norm": 0.8958089934352309, + "learning_rate": 5e-05, + "loss": 0.0567, + "num_input_tokens_seen": 607811356, + "step": 6659 + }, + { + "epoch": 27.745833333333334, + "loss": 0.020234694704413414, + "loss_ce": 1.5400872825921397e-06, + "loss_iou": 0.09716796875, + "loss_num": 0.004058837890625, + "loss_xval": 0.020263671875, + "num_input_tokens_seen": 607811356, + "step": 6659 + }, + { + "epoch": 27.75, + "grad_norm": 2.0022405141020387, + "learning_rate": 5e-05, + "loss": 0.0566, + "num_input_tokens_seen": 607901144, + "step": 6660 + }, + { + "epoch": 27.75, + "loss": 0.09240372478961945, + "loss_ce": 4.121278834645636e-06, + "loss_iou": 0.16015625, + "loss_num": 0.0185546875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 607901144, + "step": 6660 + }, + { + "epoch": 27.754166666666666, + "grad_norm": 3.718947048580481, + "learning_rate": 5e-05, + "loss": 0.0389, + "num_input_tokens_seen": 607992280, + "step": 6661 + }, + { + "epoch": 27.754166666666666, + "loss": 0.05292084068059921, + "loss_ce": 3.360340770086623e-06, + "loss_iou": 0.166015625, + "loss_num": 0.01055908203125, + "loss_xval": 0.052978515625, + "num_input_tokens_seen": 607992280, + "step": 6661 + }, + { + "epoch": 27.758333333333333, + "grad_norm": 2.8810944333714255, + "learning_rate": 5e-05, + "loss": 0.0311, + "num_input_tokens_seen": 608083284, + "step": 6662 + }, + { + "epoch": 27.758333333333333, + "loss": 0.02787664532661438, + "loss_ce": 2.653983301570406e-06, + "loss_iou": 0.2060546875, + "loss_num": 0.005584716796875, + "loss_xval": 0.02783203125, + "num_input_tokens_seen": 608083284, + "step": 6662 + }, + { + "epoch": 27.7625, + "grad_norm": 3.25957753965448, + "learning_rate": 5e-05, + "loss": 0.0543, + "num_input_tokens_seen": 608174528, + "step": 6663 + }, + { + "epoch": 27.7625, + "loss": 0.03937568515539169, + "loss_ce": 6.904626206960529e-05, + "loss_iou": 0.25, + "loss_num": 0.00787353515625, + "loss_xval": 0.039306640625, + "num_input_tokens_seen": 608174528, + "step": 6663 + }, + { + "epoch": 27.766666666666666, + "grad_norm": 3.233480124426848, + "learning_rate": 5e-05, + "loss": 0.0689, + "num_input_tokens_seen": 608266056, + "step": 6664 + }, + { + "epoch": 27.766666666666666, + "loss": 0.07732213288545609, + "loss_ce": 5.852544745721389e-06, + "loss_iou": 0.1796875, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 608266056, + "step": 6664 + }, + { + "epoch": 27.770833333333332, + "grad_norm": 2.6917479858733073, + "learning_rate": 5e-05, + "loss": 0.0415, + "num_input_tokens_seen": 608356956, + "step": 6665 + }, + { + "epoch": 27.770833333333332, + "loss": 0.02752842754125595, + "loss_ce": 1.5733947975604679e-06, + "loss_iou": 0.291015625, + "loss_num": 0.0054931640625, + "loss_xval": 0.027587890625, + "num_input_tokens_seen": 608356956, + "step": 6665 + }, + { + "epoch": 27.775, + "grad_norm": 2.1732125363132093, + "learning_rate": 5e-05, + "loss": 0.0405, + "num_input_tokens_seen": 608447756, + "step": 6666 + }, + { + "epoch": 27.775, + "loss": 0.024806858971714973, + "loss_ce": 3.4214372135465965e-05, + "loss_iou": 0.1630859375, + "loss_num": 0.00494384765625, + "loss_xval": 0.0247802734375, + "num_input_tokens_seen": 608447756, + "step": 6666 + }, + { + "epoch": 27.779166666666665, + "grad_norm": 3.061774590524382, + "learning_rate": 5e-05, + "loss": 0.0491, + "num_input_tokens_seen": 608538840, + "step": 6667 + }, + { + "epoch": 27.779166666666665, + "loss": 0.07025608420372009, + "loss_ce": 6.565061630681157e-05, + "loss_iou": 0.228515625, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 608538840, + "step": 6667 + }, + { + "epoch": 27.783333333333335, + "grad_norm": 3.6641274833009785, + "learning_rate": 5e-05, + "loss": 0.0399, + "num_input_tokens_seen": 608630136, + "step": 6668 + }, + { + "epoch": 27.783333333333335, + "loss": 0.041263651102781296, + "loss_ce": 3.4404733014525846e-05, + "loss_iou": 0.2119140625, + "loss_num": 0.00823974609375, + "loss_xval": 0.041259765625, + "num_input_tokens_seen": 608630136, + "step": 6668 + }, + { + "epoch": 27.7875, + "grad_norm": 2.2530095216389774, + "learning_rate": 5e-05, + "loss": 0.0437, + "num_input_tokens_seen": 608721748, + "step": 6669 + }, + { + "epoch": 27.7875, + "loss": 0.06024031713604927, + "loss_ce": 2.430454514978919e-06, + "loss_iou": 0.21484375, + "loss_num": 0.01202392578125, + "loss_xval": 0.060302734375, + "num_input_tokens_seen": 608721748, + "step": 6669 + }, + { + "epoch": 27.791666666666668, + "grad_norm": 1.7668261934744909, + "learning_rate": 5e-05, + "loss": 0.0324, + "num_input_tokens_seen": 608813348, + "step": 6670 + }, + { + "epoch": 27.791666666666668, + "loss": 0.023870760574936867, + "loss_ce": 3.653265594039112e-05, + "loss_iou": 0.361328125, + "loss_num": 0.0047607421875, + "loss_xval": 0.0238037109375, + "num_input_tokens_seen": 608813348, + "step": 6670 + }, + { + "epoch": 27.795833333333334, + "grad_norm": 2.27547960921719, + "learning_rate": 5e-05, + "loss": 0.0362, + "num_input_tokens_seen": 608904652, + "step": 6671 + }, + { + "epoch": 27.795833333333334, + "loss": 0.023014262318611145, + "loss_ce": 4.0084287320496514e-06, + "loss_iou": 0.265625, + "loss_num": 0.004608154296875, + "loss_xval": 0.02294921875, + "num_input_tokens_seen": 608904652, + "step": 6671 + }, + { + "epoch": 27.8, + "grad_norm": 2.7513623023413776, + "learning_rate": 5e-05, + "loss": 0.0349, + "num_input_tokens_seen": 608995660, + "step": 6672 + }, + { + "epoch": 27.8, + "loss": 0.03639537841081619, + "loss_ce": 3.166042688462767e-06, + "loss_iou": 0.330078125, + "loss_num": 0.007293701171875, + "loss_xval": 0.036376953125, + "num_input_tokens_seen": 608995660, + "step": 6672 + }, + { + "epoch": 27.804166666666667, + "grad_norm": 2.6703657851807545, + "learning_rate": 5e-05, + "loss": 0.0446, + "num_input_tokens_seen": 609087460, + "step": 6673 + }, + { + "epoch": 27.804166666666667, + "loss": 0.03163629025220871, + "loss_ce": 2.0082923583686352e-05, + "loss_iou": 0.26953125, + "loss_num": 0.006317138671875, + "loss_xval": 0.03173828125, + "num_input_tokens_seen": 609087460, + "step": 6673 + }, + { + "epoch": 27.808333333333334, + "grad_norm": 1.2789380699694435, + "learning_rate": 5e-05, + "loss": 0.0259, + "num_input_tokens_seen": 609178952, + "step": 6674 + }, + { + "epoch": 27.808333333333334, + "loss": 0.0326501727104187, + "loss_ce": 3.995027327619027e-06, + "loss_iou": 0.06005859375, + "loss_num": 0.00653076171875, + "loss_xval": 0.03271484375, + "num_input_tokens_seen": 609178952, + "step": 6674 + }, + { + "epoch": 27.8125, + "grad_norm": 1.3440151423332536, + "learning_rate": 5e-05, + "loss": 0.0209, + "num_input_tokens_seen": 609270636, + "step": 6675 + }, + { + "epoch": 27.8125, + "loss": 0.020898228511214256, + "loss_ce": 1.315923327638302e-06, + "loss_iou": 0.298828125, + "loss_num": 0.004180908203125, + "loss_xval": 0.0208740234375, + "num_input_tokens_seen": 609270636, + "step": 6675 + }, + { + "epoch": 27.816666666666666, + "grad_norm": 1.9783284850733671, + "learning_rate": 5e-05, + "loss": 0.0179, + "num_input_tokens_seen": 609362236, + "step": 6676 + }, + { + "epoch": 27.816666666666666, + "loss": 0.01814625971019268, + "loss_ce": 3.5588950595411006e-06, + "loss_iou": 0.146484375, + "loss_num": 0.003631591796875, + "loss_xval": 0.0181884765625, + "num_input_tokens_seen": 609362236, + "step": 6676 + }, + { + "epoch": 27.820833333333333, + "grad_norm": 3.5619040739492087, + "learning_rate": 5e-05, + "loss": 0.0574, + "num_input_tokens_seen": 609453936, + "step": 6677 + }, + { + "epoch": 27.820833333333333, + "loss": 0.023126963526010513, + "loss_ce": 2.2699837245454546e-06, + "loss_iou": 0.224609375, + "loss_num": 0.004638671875, + "loss_xval": 0.0230712890625, + "num_input_tokens_seen": 609453936, + "step": 6677 + }, + { + "epoch": 27.825, + "grad_norm": 1.7534022906086244, + "learning_rate": 5e-05, + "loss": 0.0567, + "num_input_tokens_seen": 609545840, + "step": 6678 + }, + { + "epoch": 27.825, + "loss": 0.07971315085887909, + "loss_ce": 0.00019960546342190355, + "loss_iou": 0.25390625, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 609545840, + "step": 6678 + }, + { + "epoch": 27.829166666666666, + "grad_norm": 1.8491931893484272, + "learning_rate": 5e-05, + "loss": 0.0362, + "num_input_tokens_seen": 609637468, + "step": 6679 + }, + { + "epoch": 27.829166666666666, + "loss": 0.021543636918067932, + "loss_ce": 0.0019894989673048258, + "loss_iou": 0.173828125, + "loss_num": 0.00390625, + "loss_xval": 0.01953125, + "num_input_tokens_seen": 609637468, + "step": 6679 + }, + { + "epoch": 27.833333333333332, + "grad_norm": 2.2468572646660143, + "learning_rate": 5e-05, + "loss": 0.0474, + "num_input_tokens_seen": 609728584, + "step": 6680 + }, + { + "epoch": 27.833333333333332, + "loss": 0.055621709674596786, + "loss_ce": 3.423035423111287e-06, + "loss_iou": 0.19140625, + "loss_num": 0.0111083984375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 609728584, + "step": 6680 + }, + { + "epoch": 27.8375, + "grad_norm": 2.3759722096065943, + "learning_rate": 5e-05, + "loss": 0.0428, + "num_input_tokens_seen": 609818644, + "step": 6681 + }, + { + "epoch": 27.8375, + "loss": 0.032642923295497894, + "loss_ce": 1.2005425560346339e-05, + "loss_iou": 0.18359375, + "loss_num": 0.00653076171875, + "loss_xval": 0.03271484375, + "num_input_tokens_seen": 609818644, + "step": 6681 + }, + { + "epoch": 27.841666666666665, + "grad_norm": 2.356444501863688, + "learning_rate": 5e-05, + "loss": 0.0547, + "num_input_tokens_seen": 609909732, + "step": 6682 + }, + { + "epoch": 27.841666666666665, + "loss": 0.05059649795293808, + "loss_ce": 5.984739800624084e-06, + "loss_iou": 0.1982421875, + "loss_num": 0.0101318359375, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 609909732, + "step": 6682 + }, + { + "epoch": 27.845833333333335, + "grad_norm": 2.9347829583069807, + "learning_rate": 5e-05, + "loss": 0.0386, + "num_input_tokens_seen": 610001188, + "step": 6683 + }, + { + "epoch": 27.845833333333335, + "loss": 0.030507180839776993, + "loss_ce": 4.862364676228026e-06, + "loss_iou": 0.28515625, + "loss_num": 0.006103515625, + "loss_xval": 0.030517578125, + "num_input_tokens_seen": 610001188, + "step": 6683 + }, + { + "epoch": 27.85, + "grad_norm": 2.470245589424793, + "learning_rate": 5e-05, + "loss": 0.0288, + "num_input_tokens_seen": 610092612, + "step": 6684 + }, + { + "epoch": 27.85, + "loss": 0.02897973544895649, + "loss_ce": 4.90726888529025e-05, + "loss_iou": 0.2099609375, + "loss_num": 0.00579833984375, + "loss_xval": 0.0289306640625, + "num_input_tokens_seen": 610092612, + "step": 6684 + }, + { + "epoch": 27.854166666666668, + "grad_norm": 2.6669907034193967, + "learning_rate": 5e-05, + "loss": 0.0683, + "num_input_tokens_seen": 610183948, + "step": 6685 + }, + { + "epoch": 27.854166666666668, + "loss": 0.0505242794752121, + "loss_ce": 1.7687789295450784e-05, + "loss_iou": 0.2470703125, + "loss_num": 0.0101318359375, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 610183948, + "step": 6685 + }, + { + "epoch": 27.858333333333334, + "grad_norm": 4.943193288243672, + "learning_rate": 5e-05, + "loss": 0.0323, + "num_input_tokens_seen": 610274844, + "step": 6686 + }, + { + "epoch": 27.858333333333334, + "loss": 0.044877685606479645, + "loss_ce": 1.5843521623537526e-06, + "loss_iou": 0.3671875, + "loss_num": 0.00897216796875, + "loss_xval": 0.044921875, + "num_input_tokens_seen": 610274844, + "step": 6686 + }, + { + "epoch": 27.8625, + "grad_norm": 3.015357849727821, + "learning_rate": 5e-05, + "loss": 0.0331, + "num_input_tokens_seen": 610366324, + "step": 6687 + }, + { + "epoch": 27.8625, + "loss": 0.032734472304582596, + "loss_ce": 4.369842827145476e-06, + "loss_iou": 0.2216796875, + "loss_num": 0.00653076171875, + "loss_xval": 0.03271484375, + "num_input_tokens_seen": 610366324, + "step": 6687 + }, + { + "epoch": 27.866666666666667, + "grad_norm": 2.194911272264501, + "learning_rate": 5e-05, + "loss": 0.0327, + "num_input_tokens_seen": 610458028, + "step": 6688 + }, + { + "epoch": 27.866666666666667, + "loss": 0.03970428556203842, + "loss_ce": 8.5460051195696e-06, + "loss_iou": 0.146484375, + "loss_num": 0.0079345703125, + "loss_xval": 0.039794921875, + "num_input_tokens_seen": 610458028, + "step": 6688 + }, + { + "epoch": 27.870833333333334, + "grad_norm": 2.0279866514973808, + "learning_rate": 5e-05, + "loss": 0.0425, + "num_input_tokens_seen": 610549548, + "step": 6689 + }, + { + "epoch": 27.870833333333334, + "loss": 0.03714201599359512, + "loss_ce": 9.751396646606736e-06, + "loss_iou": 0.2216796875, + "loss_num": 0.007415771484375, + "loss_xval": 0.037109375, + "num_input_tokens_seen": 610549548, + "step": 6689 + }, + { + "epoch": 27.875, + "grad_norm": 1.929867615469621, + "learning_rate": 5e-05, + "loss": 0.0414, + "num_input_tokens_seen": 610639092, + "step": 6690 + }, + { + "epoch": 27.875, + "loss": 0.04645119607448578, + "loss_ce": 3.4437287013133755e-06, + "loss_iou": 0.201171875, + "loss_num": 0.00927734375, + "loss_xval": 0.04638671875, + "num_input_tokens_seen": 610639092, + "step": 6690 + }, + { + "epoch": 27.879166666666666, + "grad_norm": 2.744225789976649, + "learning_rate": 5e-05, + "loss": 0.0452, + "num_input_tokens_seen": 610730424, + "step": 6691 + }, + { + "epoch": 27.879166666666666, + "loss": 0.02222595363855362, + "loss_ce": 1.5272669315891108e-06, + "loss_iou": 0.27734375, + "loss_num": 0.00445556640625, + "loss_xval": 0.022216796875, + "num_input_tokens_seen": 610730424, + "step": 6691 + }, + { + "epoch": 27.883333333333333, + "grad_norm": 3.1370565803980033, + "learning_rate": 5e-05, + "loss": 0.0344, + "num_input_tokens_seen": 610821192, + "step": 6692 + }, + { + "epoch": 27.883333333333333, + "loss": 0.021936416625976562, + "loss_ce": 1.9063465970248217e-06, + "loss_iou": 0.1982421875, + "loss_num": 0.00439453125, + "loss_xval": 0.02197265625, + "num_input_tokens_seen": 610821192, + "step": 6692 + }, + { + "epoch": 27.8875, + "grad_norm": 2.9068527893783416, + "learning_rate": 5e-05, + "loss": 0.0244, + "num_input_tokens_seen": 610912760, + "step": 6693 + }, + { + "epoch": 27.8875, + "loss": 0.029053665697574615, + "loss_ce": 1.6190129827009514e-05, + "loss_iou": 0.296875, + "loss_num": 0.00579833984375, + "loss_xval": 0.029052734375, + "num_input_tokens_seen": 610912760, + "step": 6693 + }, + { + "epoch": 27.891666666666666, + "grad_norm": 2.527096086377251, + "learning_rate": 5e-05, + "loss": 0.0322, + "num_input_tokens_seen": 611002952, + "step": 6694 + }, + { + "epoch": 27.891666666666666, + "loss": 0.033598653972148895, + "loss_ce": 1.4061615729588084e-05, + "loss_iou": 0.283203125, + "loss_num": 0.0067138671875, + "loss_xval": 0.03369140625, + "num_input_tokens_seen": 611002952, + "step": 6694 + }, + { + "epoch": 27.895833333333332, + "grad_norm": 2.518044426828584, + "learning_rate": 5e-05, + "loss": 0.0447, + "num_input_tokens_seen": 611094256, + "step": 6695 + }, + { + "epoch": 27.895833333333332, + "loss": 0.04927809536457062, + "loss_ce": 7.464379450539127e-06, + "loss_iou": 0.30078125, + "loss_num": 0.00982666015625, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 611094256, + "step": 6695 + }, + { + "epoch": 27.9, + "grad_norm": 1.5148853802682194, + "learning_rate": 5e-05, + "loss": 0.0229, + "num_input_tokens_seen": 611185464, + "step": 6696 + }, + { + "epoch": 27.9, + "loss": 0.024848662316799164, + "loss_ce": 6.838909030193463e-05, + "loss_iou": 0.1318359375, + "loss_num": 0.00494384765625, + "loss_xval": 0.0247802734375, + "num_input_tokens_seen": 611185464, + "step": 6696 + }, + { + "epoch": 27.904166666666665, + "grad_norm": 1.5103727608247175, + "learning_rate": 5e-05, + "loss": 0.0356, + "num_input_tokens_seen": 611275700, + "step": 6697 + }, + { + "epoch": 27.904166666666665, + "loss": 0.05257517471909523, + "loss_ce": 8.644882655062247e-06, + "loss_iou": 0.125, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 611275700, + "step": 6697 + }, + { + "epoch": 27.908333333333335, + "grad_norm": 1.5339462371131791, + "learning_rate": 5e-05, + "loss": 0.0193, + "num_input_tokens_seen": 611367152, + "step": 6698 + }, + { + "epoch": 27.908333333333335, + "loss": 0.021390093490481377, + "loss_ce": 4.900951353192795e-06, + "loss_iou": 0.1650390625, + "loss_num": 0.0042724609375, + "loss_xval": 0.0213623046875, + "num_input_tokens_seen": 611367152, + "step": 6698 + }, + { + "epoch": 27.9125, + "grad_norm": 1.4095863126869015, + "learning_rate": 5e-05, + "loss": 0.0314, + "num_input_tokens_seen": 611457952, + "step": 6699 + }, + { + "epoch": 27.9125, + "loss": 0.03978870064020157, + "loss_ce": 1.409547621733509e-06, + "loss_iou": 0.212890625, + "loss_num": 0.00799560546875, + "loss_xval": 0.039794921875, + "num_input_tokens_seen": 611457952, + "step": 6699 + }, + { + "epoch": 27.916666666666668, + "grad_norm": 1.5880024976095921, + "learning_rate": 5e-05, + "loss": 0.0209, + "num_input_tokens_seen": 611548968, + "step": 6700 + }, + { + "epoch": 27.916666666666668, + "loss": 0.023582756519317627, + "loss_ce": 4.225938391755335e-05, + "loss_iou": 0.1533203125, + "loss_num": 0.00469970703125, + "loss_xval": 0.0235595703125, + "num_input_tokens_seen": 611548968, + "step": 6700 + }, + { + "epoch": 27.920833333333334, + "grad_norm": 1.9742190307650425, + "learning_rate": 5e-05, + "loss": 0.0365, + "num_input_tokens_seen": 611639728, + "step": 6701 + }, + { + "epoch": 27.920833333333334, + "loss": 0.025729922577738762, + "loss_ce": 3.604785206334782e-06, + "loss_iou": 0.232421875, + "loss_num": 0.005157470703125, + "loss_xval": 0.0257568359375, + "num_input_tokens_seen": 611639728, + "step": 6701 + }, + { + "epoch": 27.925, + "grad_norm": 1.992467386622936, + "learning_rate": 5e-05, + "loss": 0.0323, + "num_input_tokens_seen": 611731080, + "step": 6702 + }, + { + "epoch": 27.925, + "loss": 0.03682165965437889, + "loss_ce": 2.2019166863174178e-06, + "loss_iou": 0.296875, + "loss_num": 0.007354736328125, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 611731080, + "step": 6702 + }, + { + "epoch": 27.929166666666667, + "grad_norm": 2.074614272677992, + "learning_rate": 5e-05, + "loss": 0.0427, + "num_input_tokens_seen": 611822700, + "step": 6703 + }, + { + "epoch": 27.929166666666667, + "loss": 0.028840631246566772, + "loss_ce": 1.677820364420768e-05, + "loss_iou": 0.2265625, + "loss_num": 0.005767822265625, + "loss_xval": 0.02880859375, + "num_input_tokens_seen": 611822700, + "step": 6703 + }, + { + "epoch": 27.933333333333334, + "grad_norm": 2.3413248371276985, + "learning_rate": 5e-05, + "loss": 0.091, + "num_input_tokens_seen": 611914400, + "step": 6704 + }, + { + "epoch": 27.933333333333334, + "loss": 0.1385079026222229, + "loss_ce": 3.8804105315648485e-06, + "loss_iou": 0.2265625, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 611914400, + "step": 6704 + }, + { + "epoch": 27.9375, + "grad_norm": 2.836668070462786, + "learning_rate": 5e-05, + "loss": 0.0393, + "num_input_tokens_seen": 612005760, + "step": 6705 + }, + { + "epoch": 27.9375, + "loss": 0.03196059912443161, + "loss_ce": 2.3952296032803133e-05, + "loss_iou": 0.2265625, + "loss_num": 0.006378173828125, + "loss_xval": 0.031982421875, + "num_input_tokens_seen": 612005760, + "step": 6705 + }, + { + "epoch": 27.941666666666666, + "grad_norm": 3.2116535409493494, + "learning_rate": 5e-05, + "loss": 0.0311, + "num_input_tokens_seen": 612096928, + "step": 6706 + }, + { + "epoch": 27.941666666666666, + "loss": 0.028221435844898224, + "loss_ce": 7.934193490655161e-06, + "loss_iou": 0.27734375, + "loss_num": 0.005645751953125, + "loss_xval": 0.0281982421875, + "num_input_tokens_seen": 612096928, + "step": 6706 + }, + { + "epoch": 27.945833333333333, + "grad_norm": 3.1307035433075927, + "learning_rate": 5e-05, + "loss": 0.043, + "num_input_tokens_seen": 612188292, + "step": 6707 + }, + { + "epoch": 27.945833333333333, + "loss": 0.028533106669783592, + "loss_ce": 0.0002280521730426699, + "loss_iou": 0.265625, + "loss_num": 0.00567626953125, + "loss_xval": 0.0283203125, + "num_input_tokens_seen": 612188292, + "step": 6707 + }, + { + "epoch": 27.95, + "grad_norm": 1.2504373411970096, + "learning_rate": 5e-05, + "loss": 0.024, + "num_input_tokens_seen": 612280228, + "step": 6708 + }, + { + "epoch": 27.95, + "loss": 0.0176064595580101, + "loss_ce": 5.445829629024956e-06, + "loss_iou": 0.244140625, + "loss_num": 0.0035247802734375, + "loss_xval": 0.017578125, + "num_input_tokens_seen": 612280228, + "step": 6708 + }, + { + "epoch": 27.954166666666666, + "grad_norm": 1.2803907274616162, + "learning_rate": 5e-05, + "loss": 0.0483, + "num_input_tokens_seen": 612370392, + "step": 6709 + }, + { + "epoch": 27.954166666666666, + "loss": 0.024072404950857162, + "loss_ce": 9.293554285250138e-06, + "loss_iou": 0.181640625, + "loss_num": 0.00482177734375, + "loss_xval": 0.0240478515625, + "num_input_tokens_seen": 612370392, + "step": 6709 + }, + { + "epoch": 27.958333333333332, + "grad_norm": 1.9039607328747052, + "learning_rate": 5e-05, + "loss": 0.022, + "num_input_tokens_seen": 612461204, + "step": 6710 + }, + { + "epoch": 27.958333333333332, + "loss": 0.017778582870960236, + "loss_ce": 2.0933200630679494e-06, + "loss_iou": 0.2119140625, + "loss_num": 0.0035552978515625, + "loss_xval": 0.017822265625, + "num_input_tokens_seen": 612461204, + "step": 6710 + }, + { + "epoch": 27.9625, + "grad_norm": 1.077575573157038, + "learning_rate": 5e-05, + "loss": 0.0626, + "num_input_tokens_seen": 612552660, + "step": 6711 + }, + { + "epoch": 27.9625, + "loss": 0.09176675230264664, + "loss_ce": 8.022767360671423e-06, + "loss_iou": 0.2412109375, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 612552660, + "step": 6711 + }, + { + "epoch": 27.966666666666665, + "grad_norm": 1.4654702672188098, + "learning_rate": 5e-05, + "loss": 0.0219, + "num_input_tokens_seen": 612644160, + "step": 6712 + }, + { + "epoch": 27.966666666666665, + "loss": 0.019673090428113937, + "loss_ce": 4.2658390157157555e-05, + "loss_iou": 0.1953125, + "loss_num": 0.00390625, + "loss_xval": 0.0196533203125, + "num_input_tokens_seen": 612644160, + "step": 6712 + }, + { + "epoch": 27.970833333333335, + "grad_norm": 2.7529606130467177, + "learning_rate": 5e-05, + "loss": 0.0537, + "num_input_tokens_seen": 612735848, + "step": 6713 + }, + { + "epoch": 27.970833333333335, + "loss": 0.033948902040719986, + "loss_ce": 0.0007762951427139342, + "loss_iou": 0.251953125, + "loss_num": 0.00665283203125, + "loss_xval": 0.033203125, + "num_input_tokens_seen": 612735848, + "step": 6713 + }, + { + "epoch": 27.975, + "grad_norm": 3.718214513967778, + "learning_rate": 5e-05, + "loss": 0.0594, + "num_input_tokens_seen": 612827740, + "step": 6714 + }, + { + "epoch": 27.975, + "loss": 0.04464350640773773, + "loss_ce": 7.25825666449964e-05, + "loss_iou": 0.1953125, + "loss_num": 0.0089111328125, + "loss_xval": 0.044677734375, + "num_input_tokens_seen": 612827740, + "step": 6714 + }, + { + "epoch": 27.979166666666668, + "grad_norm": 1.9675331451311393, + "learning_rate": 5e-05, + "loss": 0.0316, + "num_input_tokens_seen": 612918596, + "step": 6715 + }, + { + "epoch": 27.979166666666668, + "loss": 0.031432561576366425, + "loss_ce": 7.085216111590853e-06, + "loss_iou": 0.1494140625, + "loss_num": 0.00628662109375, + "loss_xval": 0.031494140625, + "num_input_tokens_seen": 612918596, + "step": 6715 + }, + { + "epoch": 27.983333333333334, + "grad_norm": 2.744930369139453, + "learning_rate": 5e-05, + "loss": 0.062, + "num_input_tokens_seen": 613009696, + "step": 6716 + }, + { + "epoch": 27.983333333333334, + "loss": 0.045453984290361404, + "loss_ce": 2.8569376809173264e-05, + "loss_iou": 0.1796875, + "loss_num": 0.00909423828125, + "loss_xval": 0.04541015625, + "num_input_tokens_seen": 613009696, + "step": 6716 + }, + { + "epoch": 27.9875, + "grad_norm": 2.170973130092673, + "learning_rate": 5e-05, + "loss": 0.0562, + "num_input_tokens_seen": 613101436, + "step": 6717 + }, + { + "epoch": 27.9875, + "loss": 0.01815599389374256, + "loss_ce": 5.664732270815875e-06, + "loss_iou": 0.2041015625, + "loss_num": 0.003631591796875, + "loss_xval": 0.0181884765625, + "num_input_tokens_seen": 613101436, + "step": 6717 + }, + { + "epoch": 27.991666666666667, + "grad_norm": 2.912160469589973, + "learning_rate": 5e-05, + "loss": 0.036, + "num_input_tokens_seen": 613193484, + "step": 6718 + }, + { + "epoch": 27.991666666666667, + "loss": 0.03415211662650108, + "loss_ce": 2.9476209419954102e-06, + "loss_iou": 0.2333984375, + "loss_num": 0.0068359375, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 613193484, + "step": 6718 + }, + { + "epoch": 27.995833333333334, + "grad_norm": 2.2128424458657494, + "learning_rate": 5e-05, + "loss": 0.0443, + "num_input_tokens_seen": 613284420, + "step": 6719 + }, + { + "epoch": 27.995833333333334, + "loss": 0.06363622099161148, + "loss_ce": 7.0679270720575005e-06, + "loss_iou": 0.24609375, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 613284420, + "step": 6719 + }, + { + "epoch": 28.0, + "grad_norm": 2.350621454205893, + "learning_rate": 5e-05, + "loss": 0.0312, + "num_input_tokens_seen": 613375388, + "step": 6720 + }, + { + "epoch": 28.0, + "loss": 0.042087651789188385, + "loss_ce": 3.914565695595229e-06, + "loss_iou": 0.2333984375, + "loss_num": 0.0084228515625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 613375388, + "step": 6720 + }, + { + "epoch": 28.004166666666666, + "grad_norm": 3.2327163007317914, + "learning_rate": 5e-05, + "loss": 0.0263, + "num_input_tokens_seen": 613466476, + "step": 6721 + }, + { + "epoch": 28.004166666666666, + "loss": 0.02487345226109028, + "loss_ce": 1.6264102669083513e-06, + "loss_iou": 0.296875, + "loss_num": 0.004974365234375, + "loss_xval": 0.02490234375, + "num_input_tokens_seen": 613466476, + "step": 6721 + }, + { + "epoch": 28.008333333333333, + "grad_norm": 3.589281169371409, + "learning_rate": 5e-05, + "loss": 0.1078, + "num_input_tokens_seen": 613557576, + "step": 6722 + }, + { + "epoch": 28.008333333333333, + "loss": 0.1355500966310501, + "loss_ce": 8.257750596385449e-05, + "loss_iou": 0.345703125, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 613557576, + "step": 6722 + }, + { + "epoch": 28.0125, + "grad_norm": 4.843901347091087, + "learning_rate": 5e-05, + "loss": 0.0292, + "num_input_tokens_seen": 613649712, + "step": 6723 + }, + { + "epoch": 28.0125, + "loss": 0.025940248742699623, + "loss_ce": 0.0003283706319052726, + "loss_iou": 0.28515625, + "loss_num": 0.005126953125, + "loss_xval": 0.025634765625, + "num_input_tokens_seen": 613649712, + "step": 6723 + }, + { + "epoch": 28.016666666666666, + "grad_norm": 2.309924836065568, + "learning_rate": 5e-05, + "loss": 0.0318, + "num_input_tokens_seen": 613740504, + "step": 6724 + }, + { + "epoch": 28.016666666666666, + "loss": 0.03217145800590515, + "loss_ce": 5.931042323936708e-06, + "loss_iou": 0.173828125, + "loss_num": 0.006439208984375, + "loss_xval": 0.0322265625, + "num_input_tokens_seen": 613740504, + "step": 6724 + }, + { + "epoch": 28.020833333333332, + "grad_norm": 2.744417311709559, + "learning_rate": 5e-05, + "loss": 0.0222, + "num_input_tokens_seen": 613831876, + "step": 6725 + }, + { + "epoch": 28.020833333333332, + "loss": 0.023073479533195496, + "loss_ce": 2.190429313486675e-06, + "loss_iou": 0.328125, + "loss_num": 0.004608154296875, + "loss_xval": 0.0230712890625, + "num_input_tokens_seen": 613831876, + "step": 6725 + }, + { + "epoch": 28.025, + "grad_norm": 3.9389467811443013, + "learning_rate": 5e-05, + "loss": 0.0411, + "num_input_tokens_seen": 613923236, + "step": 6726 + }, + { + "epoch": 28.025, + "loss": 0.02820133976638317, + "loss_ce": 3.0974363198765786e-06, + "loss_iou": 0.234375, + "loss_num": 0.005645751953125, + "loss_xval": 0.0281982421875, + "num_input_tokens_seen": 613923236, + "step": 6726 + }, + { + "epoch": 28.029166666666665, + "grad_norm": 2.3543451636267423, + "learning_rate": 5e-05, + "loss": 0.0436, + "num_input_tokens_seen": 614014544, + "step": 6727 + }, + { + "epoch": 28.029166666666665, + "loss": 0.025972997769713402, + "loss_ce": 2.539197112128022e-06, + "loss_iou": 0.1796875, + "loss_num": 0.00518798828125, + "loss_xval": 0.0260009765625, + "num_input_tokens_seen": 614014544, + "step": 6727 + }, + { + "epoch": 28.033333333333335, + "grad_norm": 1.8596789984578768, + "learning_rate": 5e-05, + "loss": 0.0761, + "num_input_tokens_seen": 614105536, + "step": 6728 + }, + { + "epoch": 28.033333333333335, + "loss": 0.039376746863126755, + "loss_ce": 0.00011588324559852481, + "loss_iou": 0.259765625, + "loss_num": 0.00787353515625, + "loss_xval": 0.039306640625, + "num_input_tokens_seen": 614105536, + "step": 6728 + }, + { + "epoch": 28.0375, + "grad_norm": 1.2802451215459856, + "learning_rate": 5e-05, + "loss": 0.0342, + "num_input_tokens_seen": 614196852, + "step": 6729 + }, + { + "epoch": 28.0375, + "loss": 0.04407196491956711, + "loss_ce": 4.585856004268862e-06, + "loss_iou": 0.30859375, + "loss_num": 0.0087890625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 614196852, + "step": 6729 + }, + { + "epoch": 28.041666666666668, + "grad_norm": 1.8547256072084315, + "learning_rate": 5e-05, + "loss": 0.0388, + "num_input_tokens_seen": 614288368, + "step": 6730 + }, + { + "epoch": 28.041666666666668, + "loss": 0.0452733188867569, + "loss_ce": 8.120239726849832e-06, + "loss_iou": 0.177734375, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 614288368, + "step": 6730 + }, + { + "epoch": 28.045833333333334, + "grad_norm": 2.529875475786179, + "learning_rate": 5e-05, + "loss": 0.0411, + "num_input_tokens_seen": 614379672, + "step": 6731 + }, + { + "epoch": 28.045833333333334, + "loss": 0.04790426790714264, + "loss_ce": 1.0746186489996035e-05, + "loss_iou": 0.205078125, + "loss_num": 0.00958251953125, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 614379672, + "step": 6731 + }, + { + "epoch": 28.05, + "grad_norm": 2.8016778294167586, + "learning_rate": 5e-05, + "loss": 0.0541, + "num_input_tokens_seen": 614471132, + "step": 6732 + }, + { + "epoch": 28.05, + "loss": 0.05601131543517113, + "loss_ce": 2.68183030129876e-05, + "loss_iou": 0.1494140625, + "loss_num": 0.01123046875, + "loss_xval": 0.055908203125, + "num_input_tokens_seen": 614471132, + "step": 6732 + }, + { + "epoch": 28.054166666666667, + "grad_norm": 5.041261792599298, + "learning_rate": 5e-05, + "loss": 0.0273, + "num_input_tokens_seen": 614562476, + "step": 6733 + }, + { + "epoch": 28.054166666666667, + "loss": 0.01708902418613434, + "loss_ce": 1.4439017832046375e-05, + "loss_iou": 0.2333984375, + "loss_num": 0.00341796875, + "loss_xval": 0.01708984375, + "num_input_tokens_seen": 614562476, + "step": 6733 + }, + { + "epoch": 28.058333333333334, + "grad_norm": 21.63092022483019, + "learning_rate": 5e-05, + "loss": 0.032, + "num_input_tokens_seen": 614653496, + "step": 6734 + }, + { + "epoch": 28.058333333333334, + "loss": 0.04017886146903038, + "loss_ce": 2.4707196644158103e-06, + "loss_iou": 0.2734375, + "loss_num": 0.008056640625, + "loss_xval": 0.040283203125, + "num_input_tokens_seen": 614653496, + "step": 6734 + }, + { + "epoch": 28.0625, + "grad_norm": 6.262098948531676, + "learning_rate": 5e-05, + "loss": 0.033, + "num_input_tokens_seen": 614744988, + "step": 6735 + }, + { + "epoch": 28.0625, + "loss": 0.028947506099939346, + "loss_ce": 1.5855760011618258e-06, + "loss_iou": 0.2197265625, + "loss_num": 0.00579833984375, + "loss_xval": 0.0289306640625, + "num_input_tokens_seen": 614744988, + "step": 6735 + }, + { + "epoch": 28.066666666666666, + "grad_norm": 1.9436654058305354, + "learning_rate": 5e-05, + "loss": 0.046, + "num_input_tokens_seen": 614835832, + "step": 6736 + }, + { + "epoch": 28.066666666666666, + "loss": 0.04625730589032173, + "loss_ce": 0.00016050375415943563, + "loss_iou": 0.296875, + "loss_num": 0.00921630859375, + "loss_xval": 0.046142578125, + "num_input_tokens_seen": 614835832, + "step": 6736 + }, + { + "epoch": 28.070833333333333, + "grad_norm": 2.257151469855315, + "learning_rate": 5e-05, + "loss": 0.0621, + "num_input_tokens_seen": 614927224, + "step": 6737 + }, + { + "epoch": 28.070833333333333, + "loss": 0.059449754655361176, + "loss_ce": 1.6775371477706358e-05, + "loss_iou": 0.275390625, + "loss_num": 0.01190185546875, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 614927224, + "step": 6737 + }, + { + "epoch": 28.075, + "grad_norm": 2.2855989114564763, + "learning_rate": 5e-05, + "loss": 0.0298, + "num_input_tokens_seen": 615018312, + "step": 6738 + }, + { + "epoch": 28.075, + "loss": 0.02760922722518444, + "loss_ce": 5.185429836274125e-05, + "loss_iou": 0.30859375, + "loss_num": 0.005523681640625, + "loss_xval": 0.027587890625, + "num_input_tokens_seen": 615018312, + "step": 6738 + }, + { + "epoch": 28.079166666666666, + "grad_norm": 2.297345574198754, + "learning_rate": 5e-05, + "loss": 0.0502, + "num_input_tokens_seen": 615109608, + "step": 6739 + }, + { + "epoch": 28.079166666666666, + "loss": 0.05746372416615486, + "loss_ce": 6.779150862712413e-05, + "loss_iou": 0.134765625, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 615109608, + "step": 6739 + }, + { + "epoch": 28.083333333333332, + "grad_norm": 1.5291320958908552, + "learning_rate": 5e-05, + "loss": 0.0529, + "num_input_tokens_seen": 615200904, + "step": 6740 + }, + { + "epoch": 28.083333333333332, + "loss": 0.05671272426843643, + "loss_ce": 0.00014839068171568215, + "loss_iou": 0.0301513671875, + "loss_num": 0.0113525390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 615200904, + "step": 6740 + }, + { + "epoch": 28.0875, + "grad_norm": 1.089316633257019, + "learning_rate": 5e-05, + "loss": 0.0608, + "num_input_tokens_seen": 615292096, + "step": 6741 + }, + { + "epoch": 28.0875, + "loss": 0.09118057787418365, + "loss_ce": 2.457026857882738e-05, + "loss_iou": 0.2041015625, + "loss_num": 0.0181884765625, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 615292096, + "step": 6741 + }, + { + "epoch": 28.091666666666665, + "grad_norm": 1.018290463227347, + "learning_rate": 5e-05, + "loss": 0.0467, + "num_input_tokens_seen": 615382728, + "step": 6742 + }, + { + "epoch": 28.091666666666665, + "loss": 0.05897248536348343, + "loss_ce": 2.7785092242993414e-05, + "loss_iou": 0.20703125, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 615382728, + "step": 6742 + }, + { + "epoch": 28.095833333333335, + "grad_norm": 1.4832378691014567, + "learning_rate": 5e-05, + "loss": 0.0337, + "num_input_tokens_seen": 615473816, + "step": 6743 + }, + { + "epoch": 28.095833333333335, + "loss": 0.031228337436914444, + "loss_ce": 1.2240070645930246e-06, + "loss_iou": 0.251953125, + "loss_num": 0.006256103515625, + "loss_xval": 0.03125, + "num_input_tokens_seen": 615473816, + "step": 6743 + }, + { + "epoch": 28.1, + "grad_norm": 1.7110579965688832, + "learning_rate": 5e-05, + "loss": 0.0334, + "num_input_tokens_seen": 615565288, + "step": 6744 + }, + { + "epoch": 28.1, + "loss": 0.03355923295021057, + "loss_ce": 5.154472546564648e-06, + "loss_iou": 0.2392578125, + "loss_num": 0.0067138671875, + "loss_xval": 0.033447265625, + "num_input_tokens_seen": 615565288, + "step": 6744 + }, + { + "epoch": 28.104166666666668, + "grad_norm": 2.22850107697282, + "learning_rate": 5e-05, + "loss": 0.0422, + "num_input_tokens_seen": 615656148, + "step": 6745 + }, + { + "epoch": 28.104166666666668, + "loss": 0.06713393330574036, + "loss_ce": 2.895528268709313e-06, + "loss_iou": 0.2041015625, + "loss_num": 0.013427734375, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 615656148, + "step": 6745 + }, + { + "epoch": 28.108333333333334, + "grad_norm": 2.8948865798773413, + "learning_rate": 5e-05, + "loss": 0.0421, + "num_input_tokens_seen": 615747460, + "step": 6746 + }, + { + "epoch": 28.108333333333334, + "loss": 0.034502334892749786, + "loss_ce": 2.2117551452538464e-06, + "loss_iou": 0.25390625, + "loss_num": 0.00689697265625, + "loss_xval": 0.034423828125, + "num_input_tokens_seen": 615747460, + "step": 6746 + }, + { + "epoch": 28.1125, + "grad_norm": 2.809796632839258, + "learning_rate": 5e-05, + "loss": 0.0585, + "num_input_tokens_seen": 615838432, + "step": 6747 + }, + { + "epoch": 28.1125, + "loss": 0.05419039726257324, + "loss_ce": 2.1699428543797694e-05, + "loss_iou": 0.17578125, + "loss_num": 0.0108642578125, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 615838432, + "step": 6747 + }, + { + "epoch": 28.116666666666667, + "grad_norm": 3.1390099084089327, + "learning_rate": 5e-05, + "loss": 0.0335, + "num_input_tokens_seen": 615929520, + "step": 6748 + }, + { + "epoch": 28.116666666666667, + "loss": 0.030735237523913383, + "loss_ce": 1.9295372112537734e-05, + "loss_iou": 0.29296875, + "loss_num": 0.006134033203125, + "loss_xval": 0.03076171875, + "num_input_tokens_seen": 615929520, + "step": 6748 + }, + { + "epoch": 28.120833333333334, + "grad_norm": 3.0726683309385914, + "learning_rate": 5e-05, + "loss": 0.0406, + "num_input_tokens_seen": 616020724, + "step": 6749 + }, + { + "epoch": 28.120833333333334, + "loss": 0.03250548616051674, + "loss_ce": 0.0012020806316286325, + "loss_iou": 0.228515625, + "loss_num": 0.006256103515625, + "loss_xval": 0.03125, + "num_input_tokens_seen": 616020724, + "step": 6749 + }, + { + "epoch": 28.125, + "grad_norm": 1.7974385929387977, + "learning_rate": 5e-05, + "loss": 0.0571, + "num_input_tokens_seen": 616111644, + "step": 6750 + }, + { + "epoch": 28.125, + "eval_seeclick_CIoU": 0.22142113745212555, + "eval_seeclick_GIoU": 0.20158283412456512, + "eval_seeclick_IoU": 0.31360870599746704, + "eval_seeclick_MAE_all": 0.09904173389077187, + "eval_seeclick_MAE_h": 0.11917447298765182, + "eval_seeclick_MAE_w": 0.16062359511852264, + "eval_seeclick_MAE_x_boxes": 0.17907852679491043, + "eval_seeclick_MAE_y_boxes": 0.11885930970311165, + "eval_seeclick_NUM_probability": 0.9999977946281433, + "eval_seeclick_inside_bbox": 0.4943181872367859, + "eval_seeclick_loss": 0.5863343477249146, + "eval_seeclick_loss_ce": 0.11647450551390648, + "eval_seeclick_loss_iou": 0.46881103515625, + "eval_seeclick_loss_num": 0.092193603515625, + "eval_seeclick_loss_xval": 0.4608154296875, + "eval_seeclick_runtime": 77.5535, + "eval_seeclick_samples_per_second": 0.554, + "eval_seeclick_steps_per_second": 0.026, + "num_input_tokens_seen": 616111644, + "step": 6750 + }, + { + "epoch": 28.125, + "eval_icons_CIoU": 0.24185068905353546, + "eval_icons_GIoU": 0.22074273973703384, + "eval_icons_IoU": 0.3384847342967987, + "eval_icons_MAE_all": 0.08675602078437805, + "eval_icons_MAE_h": 0.17416387051343918, + "eval_icons_MAE_w": 0.13255734741687775, + "eval_icons_MAE_x_boxes": 0.13261838257312775, + "eval_icons_MAE_y_boxes": 0.17597932368516922, + "eval_icons_NUM_probability": 0.9999973177909851, + "eval_icons_inside_bbox": 0.5190972238779068, + "eval_icons_loss": 0.4279177784919739, + "eval_icons_loss_ce": 0.000579208746785298, + "eval_icons_loss_iou": 0.2396240234375, + "eval_icons_loss_num": 0.0855712890625, + "eval_icons_loss_xval": 0.42791748046875, + "eval_icons_runtime": 86.732, + "eval_icons_samples_per_second": 0.576, + "eval_icons_steps_per_second": 0.023, + "num_input_tokens_seen": 616111644, + "step": 6750 + }, + { + "epoch": 28.125, + "eval_screenspot_CIoU": 0.39986973504225415, + "eval_screenspot_GIoU": 0.38962361713250476, + "eval_screenspot_IoU": 0.4628542462984721, + "eval_screenspot_MAE_all": 0.08986213803291321, + "eval_screenspot_MAE_h": 0.09811956932147343, + "eval_screenspot_MAE_w": 0.17064129809538522, + "eval_screenspot_MAE_x_boxes": 0.15360971788565317, + "eval_screenspot_MAE_y_boxes": 0.09253939737876256, + "eval_screenspot_NUM_probability": 0.999987006187439, + "eval_screenspot_inside_bbox": 0.7145833373069763, + "eval_screenspot_loss": 0.45824649930000305, + "eval_screenspot_loss_ce": 4.505477348478356e-05, + "eval_screenspot_loss_iou": 0.3572591145833333, + "eval_screenspot_loss_num": 0.09360249837239583, + "eval_screenspot_loss_xval": 0.4679768880208333, + "eval_screenspot_runtime": 152.3895, + "eval_screenspot_samples_per_second": 0.584, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 616111644, + "step": 6750 + }, + { + "epoch": 28.125, + "eval_compot_CIoU": 0.5180985331535339, + "eval_compot_GIoU": 0.5265152156352997, + "eval_compot_IoU": 0.5750917792320251, + "eval_compot_MAE_all": 0.0474148690700531, + "eval_compot_MAE_h": 0.05340435355901718, + "eval_compot_MAE_w": 0.12024356797337532, + "eval_compot_MAE_x_boxes": 0.12448347359895706, + "eval_compot_MAE_y_boxes": 0.05318191833794117, + "eval_compot_NUM_probability": 0.9999876022338867, + "eval_compot_inside_bbox": 0.7447916567325592, + "eval_compot_loss": 0.2997451722621918, + "eval_compot_loss_ce": 0.0403006412088871, + "eval_compot_loss_iou": 0.2772216796875, + "eval_compot_loss_num": 0.0525665283203125, + "eval_compot_loss_xval": 0.262939453125, + "eval_compot_runtime": 87.0473, + "eval_compot_samples_per_second": 0.574, + "eval_compot_steps_per_second": 0.023, + "num_input_tokens_seen": 616111644, + "step": 6750 + } + ], + "logging_steps": 1.0, + "max_steps": 10000, + "num_input_tokens_seen": 616111644, + "num_train_epochs": 42, + "save_steps": 250, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3818994542903296.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +}