{ "best_metric": null, "best_model_checkpoint": null, "best_supernet_model_checkpoint": null, "epoch": 5.0, "global_step": 41175, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "compression_loss": 0.0, "epoch": 0.06, "learning_rate": 2.9989346402731773e-05, "loss": 5.5867, "step": 500 }, { "compression_loss": 0.0, "epoch": 0.12, "learning_rate": 2.9956882021999416e-05, "loss": 3.1338, "step": 1000 }, { "compression_loss": 0.0, "epoch": 0.18, "learning_rate": 2.990265252786665e-05, "loss": 2.5776, "step": 1500 }, { "compression_loss": 0.0, "epoch": 0.24, "learning_rate": 2.982673683458095e-05, "loss": 2.3185, "step": 2000 }, { "compression_loss": 0.0, "epoch": 0.3, "learning_rate": 2.972924541394191e-05, "loss": 2.0948, "step": 2500 }, { "compression_loss": 0.0, "epoch": 0.36, "learning_rate": 2.9610320134543718e-05, "loss": 1.9616, "step": 3000 }, { "compression_loss": 0.0, "epoch": 0.43, "learning_rate": 2.9470134055329297e-05, "loss": 1.8149, "step": 3500 }, { "compression_loss": 0.0, "epoch": 0.49, "learning_rate": 2.9308891173756593e-05, "loss": 1.7187, "step": 4000 }, { "compression_loss": 0.0, "epoch": 0.55, "learning_rate": 2.9126826128943387e-05, "loss": 1.6408, "step": 4500 }, { "compression_loss": 0.0, "epoch": 0.61, "learning_rate": 2.892420386022268e-05, "loss": 1.5817, "step": 5000 }, { "compression_loss": 0.0, "epoch": 0.67, "learning_rate": 2.8701319221605467e-05, "loss": 1.4504, "step": 5500 }, { "compression_loss": 0.0, "epoch": 0.73, "learning_rate": 2.8458496552711964e-05, "loss": 1.4507, "step": 6000 }, { "compression_loss": 0.0, "epoch": 0.79, "learning_rate": 2.819608920679567e-05, "loss": 1.4081, "step": 6500 }, { "compression_loss": 0.0, "epoch": 0.85, "learning_rate": 2.7915061153522062e-05, "loss": 1.3659, "step": 7000 }, { "compression_loss": 0.0, "epoch": 0.91, "learning_rate": 2.7614695114198718e-05, "loss": 1.3084, "step": 7500 }, { "compression_loss": 0.0, "epoch": 0.97, "learning_rate": 2.7295972289733377e-05, "loss": 1.2903, "step": 8000 }, { "Minimum SubNet": "OrderedDict([(, {0: 576, 1: 448, 2: 576, 3: 576, 4: 576, 5: 448, 6: 576, 7: 512, 8: 576, 9: 320, 10: 384, 11: 384, 12: 966, 13: 1052, 14: 1040, 15: 1007, 16: 989, 17: 938, 18: 789, 19: 693, 20: 517, 21: 280, 22: 332, 23: 505})])", "epoch": 1.0, "eval_HasAns_exact": 67.15587044534414, "eval_HasAns_f1": 73.70381219321077, "eval_HasAns_total": 5928, "eval_NoAns_exact": 76.23212783851976, "eval_NoAns_f1": 76.23212783851976, "eval_NoAns_total": 5945, "eval_best_exact": 71.70049692579803, "eval_best_exact_thresh": 0.0, "eval_best_f1": 74.96978006244028, "eval_best_f1_thresh": 0.0, "eval_exact": 71.70049692579803, "eval_f1": 74.96978006244046, "eval_runtime": 23.6796, "eval_samples_per_second": 512.425, "eval_steps_per_second": 4.012, "eval_total": 11873, "step": 8235 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 1.0, "eval_HasAns_exact": 70.73211875843455, "eval_HasAns_f1": 76.86664314060151, "eval_HasAns_total": 5928, "eval_NoAns_exact": 77.19091673675358, "eval_NoAns_f1": 77.19091673675358, "eval_NoAns_total": 5945, "eval_best_exact": 73.9661416659648, "eval_best_exact_thresh": 0.0, "eval_best_f1": 77.02901208940321, "eval_best_f1_thresh": 0.0, "eval_exact": 73.9661416659648, "eval_f1": 77.02901208940332, "eval_runtime": 33.7937, "eval_samples_per_second": 359.061, "eval_steps_per_second": 2.811, "eval_total": 11873, "step": 8235 }, { "compression_loss": 0.0, "epoch": 1.03, "learning_rate": 2.6960047250064117e-05, "loss": 1.047, "step": 8500 }, { "compression_loss": 0.0, "epoch": 1.09, "learning_rate": 2.6606062600812005e-05, "loss": 0.8362, "step": 9000 }, { "compression_loss": 0.0, "epoch": 1.15, "learning_rate": 2.6235947166838656e-05, "loss": 0.8438, "step": 9500 }, { "compression_loss": 0.0, "epoch": 1.21, "learning_rate": 2.5848756288308777e-05, "loss": 0.8345, "step": 10000 }, { "compression_loss": 0.0, "epoch": 1.28, "learning_rate": 2.5445778402131093e-05, "loss": 0.7877, "step": 10500 }, { "compression_loss": 0.0, "epoch": 1.34, "learning_rate": 2.502759991791811e-05, "loss": 0.8156, "step": 11000 }, { "compression_loss": 0.0, "epoch": 1.4, "learning_rate": 2.4595709053900474e-05, "loss": 0.7898, "step": 11500 }, { "compression_loss": 0.0, "epoch": 1.46, "learning_rate": 2.4149003478440486e-05, "loss": 0.8215, "step": 12000 }, { "compression_loss": 0.0, "epoch": 1.52, "learning_rate": 2.3688984359561745e-05, "loss": 0.7959, "step": 12500 }, { "compression_loss": 0.0, "epoch": 1.58, "learning_rate": 2.321632111273962e-05, "loss": 0.7867, "step": 13000 }, { "compression_loss": 0.0, "epoch": 1.64, "learning_rate": 2.2731701553064993e-05, "loss": 0.7743, "step": 13500 }, { "compression_loss": 0.0, "epoch": 1.7, "learning_rate": 2.2235830894342245e-05, "loss": 0.7451, "step": 14000 }, { "compression_loss": 0.0, "epoch": 1.76, "learning_rate": 2.172943072286878e-05, "loss": 0.7544, "step": 14500 }, { "compression_loss": 0.0, "epoch": 1.82, "learning_rate": 2.1213237947389485e-05, "loss": 0.7433, "step": 15000 }, { "compression_loss": 0.0, "epoch": 1.88, "learning_rate": 2.0688003726754053e-05, "loss": 0.7204, "step": 15500 }, { "compression_loss": 0.0, "epoch": 1.94, "learning_rate": 2.015556714598373e-05, "loss": 0.7353, "step": 16000 }, { "Minimum SubNet": "OrderedDict([(, {0: 576, 1: 448, 2: 576, 3: 576, 4: 576, 5: 448, 6: 576, 7: 512, 8: 576, 9: 320, 10: 384, 11: 384, 12: 966, 13: 1052, 14: 1040, 15: 1007, 16: 989, 17: 938, 18: 789, 19: 693, 20: 517, 21: 280, 22: 332, 23: 505})])", "epoch": 2.0, "eval_HasAns_exact": 70.8502024291498, "eval_HasAns_f1": 78.09609572680118, "eval_HasAns_total": 5928, "eval_NoAns_exact": 73.8099243061396, "eval_NoAns_f1": 73.8099243061396, "eval_NoAns_total": 5945, "eval_best_exact": 72.33218226227575, "eval_best_exact_thresh": 0.0, "eval_best_f1": 75.94994150328284, "eval_best_f1_thresh": 0.0, "eval_exact": 72.33218226227575, "eval_f1": 75.94994150328287, "eval_runtime": 23.4965, "eval_samples_per_second": 516.418, "eval_steps_per_second": 4.043, "eval_total": 11873, "step": 16470 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 2.0, "eval_HasAns_exact": 73.65047233468286, "eval_HasAns_f1": 80.28842836522877, "eval_HasAns_total": 5928, "eval_NoAns_exact": 75.57611438183348, "eval_NoAns_f1": 75.57611438183348, "eval_NoAns_total": 5945, "eval_best_exact": 74.6146719447486, "eval_best_exact_thresh": 0.0, "eval_best_f1": 77.92889778060096, "eval_best_f1_thresh": 0.0, "eval_exact": 74.6146719447486, "eval_f1": 77.92889778060105, "eval_runtime": 33.5553, "eval_samples_per_second": 361.612, "eval_steps_per_second": 2.831, "eval_total": 11873, "step": 16470 }, { "compression_loss": 0.0, "epoch": 2.0, "learning_rate": 1.961674714301095e-05, "loss": 0.711, "step": 16500 }, { "compression_loss": 0.0, "epoch": 2.06, "learning_rate": 1.907016099695466e-05, "loss": 0.4295, "step": 17000 }, { "compression_loss": 0.0, "epoch": 2.13, "learning_rate": 1.851656308325107e-05, "loss": 0.4419, "step": 17500 }, { "compression_loss": 0.0, "epoch": 2.19, "learning_rate": 1.795784790017448e-05, "loss": 0.4418, "step": 18000 }, { "compression_loss": 0.0, "epoch": 2.25, "learning_rate": 1.7394828484780373e-05, "loss": 0.4349, "step": 18500 }, { "compression_loss": 0.0, "epoch": 2.31, "learning_rate": 1.682832413760241e-05, "loss": 0.4178, "step": 19000 }, { "compression_loss": 0.0, "epoch": 2.37, "learning_rate": 1.6259159230414073e-05, "loss": 0.4274, "step": 19500 }, { "compression_loss": 0.0, "epoch": 2.43, "learning_rate": 1.5688162006610627e-05, "loss": 0.4216, "step": 20000 }, { "compression_loss": 0.0, "epoch": 2.49, "learning_rate": 1.5116163375957171e-05, "loss": 0.3889, "step": 20500 }, { "compression_loss": 0.0, "epoch": 2.55, "learning_rate": 1.4543995705456567e-05, "loss": 0.4117, "step": 21000 }, { "compression_loss": 0.0, "epoch": 2.61, "learning_rate": 1.3973633400994553e-05, "loss": 0.4197, "step": 21500 }, { "compression_loss": 0.0, "epoch": 2.67, "learning_rate": 1.3404758687969215e-05, "loss": 0.3958, "step": 22000 }, { "compression_loss": 0.0, "epoch": 2.73, "learning_rate": 1.283706355615611e-05, "loss": 0.4128, "step": 22500 }, { "compression_loss": 0.0, "epoch": 2.79, "learning_rate": 1.2272515909019886e-05, "loss": 0.4129, "step": 23000 }, { "compression_loss": 0.0, "epoch": 2.85, "learning_rate": 1.1711937270962604e-05, "loss": 0.3932, "step": 23500 }, { "compression_loss": 0.0, "epoch": 2.91, "learning_rate": 1.1156143390721824e-05, "loss": 0.3946, "step": 24000 }, { "compression_loss": 0.0, "epoch": 2.98, "learning_rate": 1.0605943054300711e-05, "loss": 0.3972, "step": 24500 }, { "Minimum SubNet": "OrderedDict([(, {0: 576, 1: 448, 2: 576, 3: 576, 4: 576, 5: 448, 6: 576, 7: 512, 8: 576, 9: 320, 10: 384, 11: 384, 12: 966, 13: 1052, 14: 1040, 15: 1007, 16: 989, 17: 938, 18: 789, 19: 693, 20: 517, 21: 280, 22: 332, 23: 505})])", "epoch": 3.0, "eval_HasAns_exact": 72.48650472334683, "eval_HasAns_f1": 79.20784888444516, "eval_HasAns_total": 5928, "eval_NoAns_exact": 74.4659377628259, "eval_NoAns_f1": 74.4659377628259, "eval_NoAns_total": 5945, "eval_best_exact": 73.47763833908868, "eval_best_exact_thresh": 0.0, "eval_best_f1": 76.83349854181672, "eval_best_f1_thresh": 0.0, "eval_exact": 73.47763833908868, "eval_f1": 76.83349854181674, "eval_runtime": 23.7252, "eval_samples_per_second": 511.439, "eval_steps_per_second": 4.004, "eval_total": 11873, "step": 24705 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 3.0, "eval_HasAns_exact": 74.61201079622133, "eval_HasAns_f1": 81.01977814822307, "eval_HasAns_total": 5928, "eval_NoAns_exact": 77.17409587888983, "eval_NoAns_f1": 77.17409587888983, "eval_NoAns_total": 5945, "eval_best_exact": 75.8948875600101, "eval_best_exact_thresh": 0.0, "eval_best_f1": 79.09418385097837, "eval_best_f1_thresh": 0.0, "eval_exact": 75.8948875600101, "eval_f1": 79.09418385097838, "eval_runtime": 33.7285, "eval_samples_per_second": 359.755, "eval_steps_per_second": 2.817, "eval_total": 11873, "step": 24705 }, { "compression_loss": 0.0, "epoch": 3.04, "learning_rate": 1.0063217611277036e-05, "loss": 0.3297, "step": 25000 }, { "compression_loss": 0.0, "epoch": 3.1, "learning_rate": 9.5276474237082e-06, "loss": 0.2814, "step": 25500 }, { "compression_loss": 0.0, "epoch": 3.16, "learning_rate": 8.998959819917635e-06, "loss": 0.2885, "step": 26000 }, { "compression_loss": 0.0, "epoch": 3.22, "learning_rate": 8.479004873097141e-06, "loss": 0.2824, "step": 26500 }, { "compression_loss": 0.0, "epoch": 3.28, "learning_rate": 7.968539216772597e-06, "loss": 0.2794, "step": 27000 }, { "compression_loss": 0.0, "epoch": 3.34, "learning_rate": 7.468305675744049e-06, "loss": 0.279, "step": 27500 }, { "compression_loss": 0.0, "epoch": 3.4, "learning_rate": 6.979032185133999e-06, "loss": 0.2758, "step": 28000 }, { "compression_loss": 0.0, "epoch": 3.46, "learning_rate": 6.501430731103145e-06, "loss": 0.2828, "step": 28500 }, { "compression_loss": 0.0, "epoch": 3.52, "learning_rate": 6.036196314774858e-06, "loss": 0.2727, "step": 29000 }, { "compression_loss": 0.0, "epoch": 3.58, "learning_rate": 5.584005940876061e-06, "loss": 0.2761, "step": 29500 }, { "compression_loss": 0.0, "epoch": 3.64, "learning_rate": 5.145517632566403e-06, "loss": 0.2683, "step": 30000 }, { "compression_loss": 0.0, "epoch": 3.7, "learning_rate": 4.721369473889145e-06, "loss": 0.2731, "step": 30500 }, { "compression_loss": 0.0, "epoch": 3.76, "learning_rate": 4.3129817358827764e-06, "loss": 0.2653, "step": 31000 }, { "compression_loss": 0.0, "epoch": 3.83, "learning_rate": 3.919312074840663e-06, "loss": 0.2557, "step": 31500 }, { "compression_loss": 0.0, "epoch": 3.89, "learning_rate": 3.541766926172923e-06, "loss": 0.2702, "step": 32000 }, { "compression_loss": 0.0, "epoch": 3.95, "learning_rate": 3.18089569001271e-06, "loss": 0.2674, "step": 32500 }, { "Minimum SubNet": "OrderedDict([(, {0: 576, 1: 448, 2: 576, 3: 576, 4: 576, 5: 448, 6: 576, 7: 512, 8: 576, 9: 320, 10: 384, 11: 384, 12: 966, 13: 1052, 14: 1040, 15: 1007, 16: 989, 17: 938, 18: 789, 19: 693, 20: 517, 21: 280, 22: 332, 23: 505})])", "epoch": 4.0, "eval_HasAns_exact": 73.6336032388664, "eval_HasAns_f1": 80.4921220873396, "eval_HasAns_total": 5928, "eval_NoAns_exact": 73.4735071488646, "eval_NoAns_f1": 73.4735071488646, "eval_NoAns_total": 5945, "eval_best_exact": 73.55344057946601, "eval_best_exact_thresh": 0.0, "eval_best_f1": 76.97778992114449, "eval_best_f1_thresh": 0.0, "eval_exact": 73.55344057946601, "eval_f1": 76.97778992114453, "eval_runtime": 23.6627, "eval_samples_per_second": 512.79, "eval_steps_per_second": 4.015, "eval_total": 11873, "step": 32940 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 4.0, "eval_HasAns_exact": 75.94466936572199, "eval_HasAns_f1": 82.29694376223831, "eval_HasAns_total": 5928, "eval_NoAns_exact": 75.20605550883096, "eval_NoAns_f1": 75.20605550883096, "eval_NoAns_total": 5945, "eval_best_exact": 75.57483365619473, "eval_best_exact_thresh": 0.0, "eval_best_f1": 78.74642319738464, "eval_best_f1_thresh": 0.0, "eval_exact": 75.57483365619473, "eval_f1": 78.7464231973847, "eval_runtime": 33.7341, "eval_samples_per_second": 359.695, "eval_steps_per_second": 2.816, "eval_total": 11873, "step": 32940 }, { "compression_loss": 0.0, "epoch": 4.01, "learning_rate": 2.83856325972758e-06, "loss": 0.2597, "step": 33000 }, { "compression_loss": 0.0, "epoch": 4.07, "learning_rate": 2.5125184709830478e-06, "loss": 0.2285, "step": 33500 }, { "compression_loss": 0.0, "epoch": 4.13, "learning_rate": 2.204645347373324e-06, "loss": 0.2257, "step": 34000 }, { "compression_loss": 0.0, "epoch": 4.19, "learning_rate": 1.9153919029589925e-06, "loss": 0.2191, "step": 34500 }, { "compression_loss": 0.0, "epoch": 4.25, "learning_rate": 1.6457002137685011e-06, "loss": 0.2288, "step": 35000 }, { "compression_loss": 0.0, "epoch": 4.31, "learning_rate": 1.394881935346834e-06, "loss": 0.2239, "step": 35500 }, { "compression_loss": 0.0, "epoch": 4.37, "learning_rate": 1.1638616961868014e-06, "loss": 0.2249, "step": 36000 }, { "compression_loss": 0.0, "epoch": 4.43, "learning_rate": 9.529756747576662e-07, "loss": 0.2158, "step": 36500 }, { "compression_loss": 0.0, "epoch": 4.49, "learning_rate": 7.625307504052814e-07, "loss": 0.2238, "step": 37000 }, { "compression_loss": 0.0, "epoch": 4.55, "learning_rate": 5.928040567841786e-07, "loss": 0.2208, "step": 37500 }, { "compression_loss": 0.0, "epoch": 4.61, "learning_rate": 4.4404257857527e-07, "loss": 0.2302, "step": 38000 }, { "compression_loss": 0.0, "epoch": 4.68, "learning_rate": 3.1646279207609816e-07, "loss": 0.2286, "step": 38500 }, { "compression_loss": 0.0, "epoch": 4.74, "learning_rate": 2.1025035018655492e-07, "loss": 0.2334, "step": 39000 }, { "compression_loss": 0.0, "epoch": 4.8, "learning_rate": 1.2555981224854353e-07, "loss": 0.2191, "step": 39500 }, { "compression_loss": 0.0, "epoch": 4.86, "learning_rate": 6.251441913268763e-08, "loss": 0.2244, "step": 40000 }, { "compression_loss": 0.0, "epoch": 4.92, "learning_rate": 2.1205913899372543e-08, "loss": 0.2174, "step": 40500 }, { "compression_loss": 0.0, "epoch": 4.98, "learning_rate": 1.7116537419809008e-09, "loss": 0.2197, "step": 41000 }, { "Minimum SubNet": "OrderedDict([(, {0: 576, 1: 448, 2: 576, 3: 576, 4: 576, 5: 448, 6: 576, 7: 512, 8: 576, 9: 320, 10: 384, 11: 384, 12: 966, 13: 1052, 14: 1040, 15: 1007, 16: 989, 17: 938, 18: 789, 19: 693, 20: 517, 21: 280, 22: 332, 23: 505})])", "epoch": 5.0, "eval_HasAns_exact": 73.36369770580296, "eval_HasAns_f1": 80.20962919069306, "eval_HasAns_total": 5928, "eval_NoAns_exact": 74.26408746846089, "eval_NoAns_f1": 74.26408746846089, "eval_NoAns_total": 5945, "eval_best_exact": 73.81453718521014, "eval_best_exact_thresh": 0.0, "eval_best_f1": 77.23260185651709, "eval_best_f1_thresh": 0.0, "eval_exact": 73.81453718521014, "eval_f1": 77.23260185651719, "eval_runtime": 23.6601, "eval_samples_per_second": 512.846, "eval_steps_per_second": 4.015, "eval_total": 11873, "step": 41175 }, { "SuperNet": "OrderedDict([(, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", "epoch": 5.0, "eval_HasAns_exact": 75.42172739541161, "eval_HasAns_f1": 81.86740867460124, "eval_HasAns_total": 5928, "eval_NoAns_exact": 75.34062237174096, "eval_NoAns_f1": 75.34062237174096, "eval_NoAns_total": 5945, "eval_best_exact": 75.38111681967489, "eval_best_exact_thresh": 0.0, "eval_best_f1": 78.59934293127556, "eval_best_f1_thresh": 0.0, "eval_exact": 75.38111681967489, "eval_f1": 78.59934293127563, "eval_runtime": 33.7058, "eval_samples_per_second": 359.997, "eval_steps_per_second": 2.819, "eval_total": 11873, "step": 41175 }, { "epoch": 5.0, "step": 41175, "total_flos": 1.2910084832623104e+17, "train_loss": 0.7438544612808089, "train_runtime": 64410.4338, "train_samples_per_second": 10.228, "train_steps_per_second": 0.639 } ], "max_steps": 41175, "min_subnet_acc": null, "min_subnet_best_acc": null, "num_train_epochs": 5, "supernet_acc": null, "supernet_best_acc": null, "total_flos": 1.2910084832623104e+17, "trial_name": null, "trial_params": null }