{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9259259259259259, "eval_steps": 20, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.009259259259259259, "grad_norm": NaN, "learning_rate": 1e-05, "loss": 1.9801, "step": 1 }, { "epoch": 0.018518518518518517, "grad_norm": NaN, "learning_rate": 1e-05, "loss": 1.7876, "step": 2 }, { "epoch": 0.027777777777777776, "grad_norm": Infinity, "learning_rate": 1e-05, "loss": 2.3855, "step": 3 }, { "epoch": 0.037037037037037035, "grad_norm": 129.57530212402344, "learning_rate": 9.9e-06, "loss": 1.8948, "step": 4 }, { "epoch": 0.046296296296296294, "grad_norm": Infinity, "learning_rate": 9.9e-06, "loss": 2.1217, "step": 5 }, { "epoch": 0.05555555555555555, "grad_norm": 96.91162872314453, "learning_rate": 9.800000000000001e-06, "loss": 2.0581, "step": 6 }, { "epoch": 0.06481481481481481, "grad_norm": 40.41334533691406, "learning_rate": 9.7e-06, "loss": 1.7127, "step": 7 }, { "epoch": 0.07407407407407407, "grad_norm": 89.87907409667969, "learning_rate": 9.600000000000001e-06, "loss": 1.9292, "step": 8 }, { "epoch": 0.08333333333333333, "grad_norm": 52.121402740478516, "learning_rate": 9.5e-06, "loss": 1.9921, "step": 9 }, { "epoch": 0.09259259259259259, "grad_norm": 321.693115234375, "learning_rate": 9.4e-06, "loss": 2.1328, "step": 10 }, { "epoch": 0.10185185185185185, "grad_norm": 125.0765609741211, "learning_rate": 9.3e-06, "loss": 1.986, "step": 11 }, { "epoch": 0.1111111111111111, "grad_norm": 110.41716766357422, "learning_rate": 9.200000000000002e-06, "loss": 1.8335, "step": 12 }, { "epoch": 0.12037037037037036, "grad_norm": 77.33905792236328, "learning_rate": 9.100000000000001e-06, "loss": 1.9346, "step": 13 }, { "epoch": 0.12962962962962962, "grad_norm": 89.59822845458984, "learning_rate": 9e-06, "loss": 2.0646, "step": 14 }, { "epoch": 0.1388888888888889, "grad_norm": 23.786359786987305, "learning_rate": 8.900000000000001e-06, "loss": 1.9532, "step": 15 }, { "epoch": 0.14814814814814814, "grad_norm": 49.40592956542969, "learning_rate": 8.8e-06, "loss": 1.9619, "step": 16 }, { "epoch": 0.1574074074074074, "grad_norm": 146.65223693847656, "learning_rate": 8.700000000000001e-06, "loss": 1.9551, "step": 17 }, { "epoch": 0.16666666666666666, "grad_norm": Infinity, "learning_rate": 8.700000000000001e-06, "loss": 1.6949, "step": 18 }, { "epoch": 0.17592592592592593, "grad_norm": 236.78709411621094, "learning_rate": 8.6e-06, "loss": 1.8608, "step": 19 }, { "epoch": 0.18518518518518517, "grad_norm": 184.51332092285156, "learning_rate": 8.5e-06, "loss": 1.9155, "step": 20 }, { "epoch": 0.18518518518518517, "eval_loss": 1.8037505149841309, "eval_map": 0.0047, "eval_map_50": 0.0101, "eval_map_75": 0.005, "eval_map_Adult": 0.0081, "eval_map_Kid": 0.0013, "eval_map_large": 0.0071, "eval_map_medium": 0.0035, "eval_map_small": -1.0, "eval_mar_1": 0.0332, "eval_mar_10": 0.1231, "eval_mar_100": 0.3206, "eval_mar_100_Adult": 0.4727, "eval_mar_100_Kid": 0.1685, "eval_mar_large": 0.3067, "eval_mar_medium": 0.4545, "eval_mar_small": -1.0, "eval_runtime": 15.3961, "eval_samples_per_second": 4.936, "eval_steps_per_second": 1.234, "step": 20 }, { "epoch": 0.19444444444444445, "grad_norm": 163.67559814453125, "learning_rate": 8.400000000000001e-06, "loss": 2.1412, "step": 21 }, { "epoch": 0.2037037037037037, "grad_norm": 24.548583984375, "learning_rate": 8.3e-06, "loss": 1.7217, "step": 22 }, { "epoch": 0.21296296296296297, "grad_norm": 124.01173400878906, "learning_rate": 8.2e-06, "loss": 2.0238, "step": 23 }, { "epoch": 0.2222222222222222, "grad_norm": 29.773996353149414, "learning_rate": 8.1e-06, "loss": 1.995, "step": 24 }, { "epoch": 0.23148148148148148, "grad_norm": 294.4190368652344, "learning_rate": 8.000000000000001e-06, "loss": 1.7983, "step": 25 }, { "epoch": 0.24074074074074073, "grad_norm": 478.20721435546875, "learning_rate": 7.9e-06, "loss": 2.1716, "step": 26 }, { "epoch": 0.25, "grad_norm": 57.03529357910156, "learning_rate": 7.800000000000002e-06, "loss": 1.9222, "step": 27 }, { "epoch": 0.25925925925925924, "grad_norm": 83.11935424804688, "learning_rate": 7.7e-06, "loss": 2.0409, "step": 28 }, { "epoch": 0.26851851851851855, "grad_norm": 177.90359497070312, "learning_rate": 7.600000000000001e-06, "loss": 1.8105, "step": 29 }, { "epoch": 0.2777777777777778, "grad_norm": 93.07716369628906, "learning_rate": 7.500000000000001e-06, "loss": 1.935, "step": 30 }, { "epoch": 0.28703703703703703, "grad_norm": 146.4728546142578, "learning_rate": 7.4e-06, "loss": 1.6811, "step": 31 }, { "epoch": 0.2962962962962963, "grad_norm": 223.94830322265625, "learning_rate": 7.3e-06, "loss": 1.922, "step": 32 }, { "epoch": 0.3055555555555556, "grad_norm": 153.93211364746094, "learning_rate": 7.2000000000000005e-06, "loss": 1.9116, "step": 33 }, { "epoch": 0.3148148148148148, "grad_norm": 193.51223754882812, "learning_rate": 7.100000000000001e-06, "loss": 2.1864, "step": 34 }, { "epoch": 0.32407407407407407, "grad_norm": 590.421875, "learning_rate": 7e-06, "loss": 1.8323, "step": 35 }, { "epoch": 0.3333333333333333, "grad_norm": 194.01913452148438, "learning_rate": 6.9e-06, "loss": 1.8585, "step": 36 }, { "epoch": 0.3425925925925926, "grad_norm": 45.26372146606445, "learning_rate": 6.800000000000001e-06, "loss": 2.0296, "step": 37 }, { "epoch": 0.35185185185185186, "grad_norm": 206.16380310058594, "learning_rate": 6.700000000000001e-06, "loss": 1.8509, "step": 38 }, { "epoch": 0.3611111111111111, "grad_norm": 162.83180236816406, "learning_rate": 6.600000000000001e-06, "loss": 1.7652, "step": 39 }, { "epoch": 0.37037037037037035, "grad_norm": 55.78132629394531, "learning_rate": 6.5000000000000004e-06, "loss": 1.8395, "step": 40 }, { "epoch": 0.37037037037037035, "eval_loss": 1.7978708744049072, "eval_map": 0.0045, "eval_map_50": 0.0107, "eval_map_75": 0.0035, "eval_map_Adult": 0.0076, "eval_map_Kid": 0.0014, "eval_map_large": 0.0072, "eval_map_medium": 0.0042, "eval_map_small": -1.0, "eval_mar_1": 0.0223, "eval_mar_10": 0.1147, "eval_mar_100": 0.3081, "eval_mar_100_Adult": 0.4436, "eval_mar_100_Kid": 0.1726, "eval_mar_large": 0.2926, "eval_mar_medium": 0.4909, "eval_mar_small": -1.0, "eval_runtime": 14.768, "eval_samples_per_second": 5.146, "eval_steps_per_second": 1.287, "step": 40 }, { "epoch": 0.37962962962962965, "grad_norm": 60.19484329223633, "learning_rate": 6.4000000000000006e-06, "loss": 1.6552, "step": 41 }, { "epoch": 0.3888888888888889, "grad_norm": 179.31629943847656, "learning_rate": 6.300000000000001e-06, "loss": 1.7017, "step": 42 }, { "epoch": 0.39814814814814814, "grad_norm": 296.96734619140625, "learning_rate": 6.200000000000001e-06, "loss": 1.9784, "step": 43 }, { "epoch": 0.4074074074074074, "grad_norm": 32.73106384277344, "learning_rate": 6.1e-06, "loss": 2.0133, "step": 44 }, { "epoch": 0.4166666666666667, "grad_norm": 76.0937271118164, "learning_rate": 6e-06, "loss": 2.1171, "step": 45 }, { "epoch": 0.42592592592592593, "grad_norm": 41.131385803222656, "learning_rate": 5.9e-06, "loss": 2.0797, "step": 46 }, { "epoch": 0.4351851851851852, "grad_norm": 722.7262573242188, "learning_rate": 5.8e-06, "loss": 2.6376, "step": 47 }, { "epoch": 0.4444444444444444, "grad_norm": 661.7726440429688, "learning_rate": 5.7e-06, "loss": 2.1092, "step": 48 }, { "epoch": 0.4537037037037037, "grad_norm": 86.95410919189453, "learning_rate": 5.600000000000001e-06, "loss": 1.8785, "step": 49 }, { "epoch": 0.46296296296296297, "grad_norm": 168.14027404785156, "learning_rate": 5.500000000000001e-06, "loss": 1.9761, "step": 50 }, { "epoch": 0.4722222222222222, "grad_norm": Infinity, "learning_rate": 5.500000000000001e-06, "loss": 1.742, "step": 51 }, { "epoch": 0.48148148148148145, "grad_norm": 25.475540161132812, "learning_rate": 5.400000000000001e-06, "loss": 1.922, "step": 52 }, { "epoch": 0.49074074074074076, "grad_norm": 54.6159553527832, "learning_rate": 5.300000000000001e-06, "loss": 1.649, "step": 53 }, { "epoch": 0.5, "grad_norm": 67.34464263916016, "learning_rate": 5.2e-06, "loss": 2.0628, "step": 54 }, { "epoch": 0.5092592592592593, "grad_norm": 99.4066162109375, "learning_rate": 5.1e-06, "loss": 1.9407, "step": 55 }, { "epoch": 0.5185185185185185, "grad_norm": 68.25653076171875, "learning_rate": 5e-06, "loss": 1.9328, "step": 56 }, { "epoch": 0.5277777777777778, "grad_norm": 398.8371276855469, "learning_rate": 4.9000000000000005e-06, "loss": 1.8278, "step": 57 }, { "epoch": 0.5370370370370371, "grad_norm": 49.0096435546875, "learning_rate": 4.800000000000001e-06, "loss": 1.9959, "step": 58 }, { "epoch": 0.5462962962962963, "grad_norm": 310.508544921875, "learning_rate": 4.7e-06, "loss": 1.6916, "step": 59 }, { "epoch": 0.5555555555555556, "grad_norm": 260.59576416015625, "learning_rate": 4.600000000000001e-06, "loss": 1.8477, "step": 60 }, { "epoch": 0.5555555555555556, "eval_loss": 1.765649437904358, "eval_map": 0.0046, "eval_map_50": 0.0107, "eval_map_75": 0.0041, "eval_map_Adult": 0.0072, "eval_map_Kid": 0.002, "eval_map_large": 0.0077, "eval_map_medium": 0.0044, "eval_map_small": -1.0, "eval_mar_1": 0.0329, "eval_mar_10": 0.1148, "eval_mar_100": 0.3276, "eval_mar_100_Adult": 0.4309, "eval_mar_100_Kid": 0.2242, "eval_mar_large": 0.3137, "eval_mar_medium": 0.5091, "eval_mar_small": -1.0, "eval_runtime": 14.8896, "eval_samples_per_second": 5.104, "eval_steps_per_second": 1.276, "step": 60 }, { "epoch": 0.5648148148148148, "grad_norm": 65.56439971923828, "learning_rate": 4.5e-06, "loss": 1.9681, "step": 61 }, { "epoch": 0.5740740740740741, "grad_norm": 99.23474884033203, "learning_rate": 4.4e-06, "loss": 1.7314, "step": 62 }, { "epoch": 0.5833333333333334, "grad_norm": 118.4346694946289, "learning_rate": 4.3e-06, "loss": 1.6602, "step": 63 }, { "epoch": 0.5925925925925926, "grad_norm": 43.443115234375, "learning_rate": 4.2000000000000004e-06, "loss": 2.0101, "step": 64 }, { "epoch": 0.6018518518518519, "grad_norm": 112.58702850341797, "learning_rate": 4.1e-06, "loss": 1.656, "step": 65 }, { "epoch": 0.6111111111111112, "grad_norm": 60.914520263671875, "learning_rate": 4.000000000000001e-06, "loss": 1.7922, "step": 66 }, { "epoch": 0.6203703703703703, "grad_norm": 92.44080352783203, "learning_rate": 3.900000000000001e-06, "loss": 1.5629, "step": 67 }, { "epoch": 0.6296296296296297, "grad_norm": 1000.2711181640625, "learning_rate": 3.8000000000000005e-06, "loss": 2.3308, "step": 68 }, { "epoch": 0.6388888888888888, "grad_norm": 132.41334533691406, "learning_rate": 3.7e-06, "loss": 1.7623, "step": 69 }, { "epoch": 0.6481481481481481, "grad_norm": 631.0505981445312, "learning_rate": 3.6000000000000003e-06, "loss": 2.043, "step": 70 }, { "epoch": 0.6574074074074074, "grad_norm": 230.10476684570312, "learning_rate": 3.5e-06, "loss": 1.9894, "step": 71 }, { "epoch": 0.6666666666666666, "grad_norm": 68.68940734863281, "learning_rate": 3.4000000000000005e-06, "loss": 1.8069, "step": 72 }, { "epoch": 0.6759259259259259, "grad_norm": 310.83233642578125, "learning_rate": 3.3000000000000006e-06, "loss": 1.8285, "step": 73 }, { "epoch": 0.6851851851851852, "grad_norm": 97.42311096191406, "learning_rate": 3.2000000000000003e-06, "loss": 1.8649, "step": 74 }, { "epoch": 0.6944444444444444, "grad_norm": 119.45216369628906, "learning_rate": 3.1000000000000004e-06, "loss": 1.6971, "step": 75 }, { "epoch": 0.7037037037037037, "grad_norm": 63.03942108154297, "learning_rate": 3e-06, "loss": 1.8963, "step": 76 }, { "epoch": 0.7129629629629629, "grad_norm": 69.41495513916016, "learning_rate": 2.9e-06, "loss": 1.551, "step": 77 }, { "epoch": 0.7222222222222222, "grad_norm": 90.6502685546875, "learning_rate": 2.8000000000000003e-06, "loss": 1.6615, "step": 78 }, { "epoch": 0.7314814814814815, "grad_norm": 111.7878646850586, "learning_rate": 2.7000000000000004e-06, "loss": 2.3283, "step": 79 }, { "epoch": 0.7407407407407407, "grad_norm": 89.25751495361328, "learning_rate": 2.6e-06, "loss": 1.66, "step": 80 }, { "epoch": 0.7407407407407407, "eval_loss": 1.7439719438552856, "eval_map": 0.0044, "eval_map_50": 0.0098, "eval_map_75": 0.0039, "eval_map_Adult": 0.0062, "eval_map_Kid": 0.0026, "eval_map_large": 0.0076, "eval_map_medium": 0.0049, "eval_map_small": -1.0, "eval_mar_1": 0.0256, "eval_mar_10": 0.1029, "eval_mar_100": 0.3266, "eval_mar_100_Adult": 0.4, "eval_mar_100_Kid": 0.2532, "eval_mar_large": 0.3137, "eval_mar_medium": 0.5182, "eval_mar_small": -1.0, "eval_runtime": 15.2125, "eval_samples_per_second": 4.996, "eval_steps_per_second": 1.249, "step": 80 }, { "epoch": 0.75, "grad_norm": 87.07488250732422, "learning_rate": 2.5e-06, "loss": 1.7118, "step": 81 }, { "epoch": 0.7592592592592593, "grad_norm": 59.08213424682617, "learning_rate": 2.4000000000000003e-06, "loss": 1.6906, "step": 82 }, { "epoch": 0.7685185185185185, "grad_norm": 45.39196014404297, "learning_rate": 2.3000000000000004e-06, "loss": 1.7661, "step": 83 }, { "epoch": 0.7777777777777778, "grad_norm": 42.13712692260742, "learning_rate": 2.2e-06, "loss": 2.0699, "step": 84 }, { "epoch": 0.7870370370370371, "grad_norm": 355.4501647949219, "learning_rate": 2.1000000000000002e-06, "loss": 1.6448, "step": 85 }, { "epoch": 0.7962962962962963, "grad_norm": 42.52521514892578, "learning_rate": 2.0000000000000003e-06, "loss": 1.8531, "step": 86 }, { "epoch": 0.8055555555555556, "grad_norm": 90.40050506591797, "learning_rate": 1.9000000000000002e-06, "loss": 1.9209, "step": 87 }, { "epoch": 0.8148148148148148, "grad_norm": Infinity, "learning_rate": 1.9000000000000002e-06, "loss": 2.0899, "step": 88 }, { "epoch": 0.8240740740740741, "grad_norm": 58.782588958740234, "learning_rate": 1.8000000000000001e-06, "loss": 1.7296, "step": 89 }, { "epoch": 0.8333333333333334, "grad_norm": 45.47020721435547, "learning_rate": 1.7000000000000002e-06, "loss": 1.6152, "step": 90 }, { "epoch": 0.8425925925925926, "grad_norm": 25.518877029418945, "learning_rate": 1.6000000000000001e-06, "loss": 1.6979, "step": 91 }, { "epoch": 0.8518518518518519, "grad_norm": 87.5730972290039, "learning_rate": 1.5e-06, "loss": 2.2574, "step": 92 }, { "epoch": 0.8611111111111112, "grad_norm": 39.618709564208984, "learning_rate": 1.4000000000000001e-06, "loss": 2.0495, "step": 93 }, { "epoch": 0.8703703703703703, "grad_norm": 206.44537353515625, "learning_rate": 1.3e-06, "loss": 2.2438, "step": 94 }, { "epoch": 0.8796296296296297, "grad_norm": 163.28404235839844, "learning_rate": 1.2000000000000002e-06, "loss": 1.7708, "step": 95 }, { "epoch": 0.8888888888888888, "grad_norm": 103.78318786621094, "learning_rate": 1.1e-06, "loss": 1.9995, "step": 96 }, { "epoch": 0.8981481481481481, "grad_norm": 150.3301239013672, "learning_rate": 1.0000000000000002e-06, "loss": 1.8358, "step": 97 }, { "epoch": 0.9074074074074074, "grad_norm": 1041.284423828125, "learning_rate": 9.000000000000001e-07, "loss": 1.6731, "step": 98 }, { "epoch": 0.9166666666666666, "grad_norm": 290.3284912109375, "learning_rate": 8.000000000000001e-07, "loss": 1.9554, "step": 99 }, { "epoch": 0.9259259259259259, "grad_norm": 25.19146728515625, "learning_rate": 7.000000000000001e-07, "loss": 2.0088, "step": 100 }, { "epoch": 0.9259259259259259, "eval_loss": 1.7495548725128174, "eval_map": 0.0043, "eval_map_50": 0.0096, "eval_map_75": 0.0039, "eval_map_Adult": 0.0056, "eval_map_Kid": 0.0029, "eval_map_large": 0.0073, "eval_map_medium": 0.0051, "eval_map_small": -1.0, "eval_mar_1": 0.0175, "eval_mar_10": 0.0983, "eval_mar_100": 0.3165, "eval_mar_100_Adult": 0.3636, "eval_mar_100_Kid": 0.2694, "eval_mar_large": 0.3035, "eval_mar_medium": 0.5364, "eval_mar_small": -1.0, "eval_runtime": 15.2134, "eval_samples_per_second": 4.996, "eval_steps_per_second": 1.249, "step": 100 }, { "epoch": 0.9259259259259259, "step": 100, "total_flos": 1.9112276736e+17, "train_loss": 1.9073539078235626, "train_runtime": 260.6876, "train_samples_per_second": 1.534, "train_steps_per_second": 0.384 } ], "logging_steps": 1, "max_steps": 100, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.9112276736e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }