{ "best_metric": 0.1380617767572403, "best_model_checkpoint": "output/multi/quirky_sciq_raw/checkpoint-2000", "epoch": 6.387225548902196, "eval_steps": 500, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.16, "learning_rate": 7.102272727272729e-07, "loss": 1.4268, "step": 50 }, { "epoch": 0.32, "learning_rate": 1.4204545454545458e-06, "loss": 1.2946, "step": 100 }, { "epoch": 0.48, "learning_rate": 2.1306818181818183e-06, "loss": 0.8201, "step": 150 }, { "epoch": 0.64, "learning_rate": 2.8409090909090916e-06, "loss": 0.4094, "step": 200 }, { "epoch": 0.8, "learning_rate": 3.5511363636363636e-06, "loss": 0.3023, "step": 250 }, { "epoch": 0.96, "learning_rate": 4.2613636363636365e-06, "loss": 0.3078, "step": 300 }, { "epoch": 1.12, "learning_rate": 4.9715909090909094e-06, "loss": 0.2638, "step": 350 }, { "epoch": 1.28, "learning_rate": 5.681818181818183e-06, "loss": 0.2555, "step": 400 }, { "epoch": 1.44, "learning_rate": 6.392045454545454e-06, "loss": 0.2116, "step": 450 }, { "epoch": 1.6, "learning_rate": 7.102272727272727e-06, "loss": 0.262, "step": 500 }, { "epoch": 1.6, "eval_val_acc_stderr": 0.010449377840435672, "eval_val_accuracy": 0.8964705882352941, "eval_val_loss": 0.27390462160110474, "eval_val_runtime": 33.9031, "eval_val_samples_per_second": 25.071, "eval_val_steps_per_second": 3.156, "step": 500 }, { "epoch": 1.6, "eval_val_alice_acc_stderr": 0.015358632353702726, "eval_val_alice_accuracy": 0.8845265588914549, "eval_val_alice_loss": 0.26470133662223816, "eval_val_alice_runtime": 17.1464, "eval_val_alice_samples_per_second": 25.253, "eval_val_alice_steps_per_second": 3.208, "step": 500 }, { "epoch": 1.6, "eval_val_bob_acc_stderr": 0.014258509225908407, "eval_val_bob_accuracy": 0.9064748201438849, "eval_val_bob_loss": 0.28322896361351013, "eval_val_bob_runtime": 17.1893, "eval_val_bob_samples_per_second": 24.259, "eval_val_bob_steps_per_second": 3.083, "step": 500 }, { "epoch": 1.6, "eval_val_bob_gt_acc_stderr": 0.014093125547753299, "eval_val_bob_gt_accuracy": 0.9088729016786571, "eval_val_bob_gt_loss": 0.24366062879562378, "eval_val_bob_gt_runtime": 17.1843, "eval_val_bob_gt_samples_per_second": 24.266, "eval_val_bob_gt_steps_per_second": 3.084, "step": 500 }, { "epoch": 1.76, "learning_rate": 7.8125e-06, "loss": 0.246, "step": 550 }, { "epoch": 1.92, "learning_rate": 8.522727272727273e-06, "loss": 0.2034, "step": 600 }, { "epoch": 2.08, "learning_rate": 9.232954545454546e-06, "loss": 0.2155, "step": 650 }, { "epoch": 2.24, "learning_rate": 9.943181818181819e-06, "loss": 0.1785, "step": 700 }, { "epoch": 2.4, "learning_rate": 1.0653409090909092e-05, "loss": 0.1693, "step": 750 }, { "epoch": 2.55, "learning_rate": 1.1363636363636366e-05, "loss": 0.1812, "step": 800 }, { "epoch": 2.71, "learning_rate": 1.2073863636363636e-05, "loss": 0.1656, "step": 850 }, { "epoch": 2.87, "learning_rate": 1.2784090909090909e-05, "loss": 0.1547, "step": 900 }, { "epoch": 3.03, "learning_rate": 1.3494318181818182e-05, "loss": 0.1481, "step": 950 }, { "epoch": 3.19, "learning_rate": 1.4204545454545455e-05, "loss": 0.1085, "step": 1000 }, { "epoch": 3.19, "eval_val_acc_stderr": 0.007680257984675673, "eval_val_accuracy": 0.9470588235294117, "eval_val_loss": 0.161184623837471, "eval_val_runtime": 33.8595, "eval_val_samples_per_second": 25.104, "eval_val_steps_per_second": 3.16, "step": 1000 }, { "epoch": 3.19, "eval_val_alice_acc_stderr": 0.01077764816095986, "eval_val_alice_accuracy": 0.9468822170900693, "eval_val_alice_loss": 0.17345106601715088, "eval_val_alice_runtime": 17.1237, "eval_val_alice_samples_per_second": 25.287, "eval_val_alice_steps_per_second": 3.212, "step": 1000 }, { "epoch": 3.19, "eval_val_bob_acc_stderr": 0.010709104534851776, "eval_val_bob_accuracy": 0.9496402877697842, "eval_val_bob_loss": 0.14615066349506378, "eval_val_bob_runtime": 17.1478, "eval_val_bob_samples_per_second": 24.318, "eval_val_bob_steps_per_second": 3.091, "step": 1000 }, { "epoch": 3.19, "eval_val_bob_gt_acc_stderr": 0.01809102140047306, "eval_val_bob_gt_accuracy": 0.8369304556354916, "eval_val_bob_gt_loss": 0.6338604688644409, "eval_val_bob_gt_runtime": 17.1654, "eval_val_bob_gt_samples_per_second": 24.293, "eval_val_bob_gt_steps_per_second": 3.088, "step": 1000 }, { "epoch": 3.35, "learning_rate": 1.4914772727272729e-05, "loss": 0.1122, "step": 1050 }, { "epoch": 3.51, "learning_rate": 1.5625e-05, "loss": 0.1222, "step": 1100 }, { "epoch": 3.67, "learning_rate": 1.6335227272727275e-05, "loss": 0.0913, "step": 1150 }, { "epoch": 3.83, "learning_rate": 1.7045454545454546e-05, "loss": 0.0863, "step": 1200 }, { "epoch": 3.99, "learning_rate": 1.775568181818182e-05, "loss": 0.0978, "step": 1250 }, { "epoch": 4.15, "learning_rate": 1.8465909090909092e-05, "loss": 0.0741, "step": 1300 }, { "epoch": 4.31, "learning_rate": 1.9176136363636366e-05, "loss": 0.0756, "step": 1350 }, { "epoch": 4.47, "learning_rate": 1.9886363636363638e-05, "loss": 0.068, "step": 1400 }, { "epoch": 4.63, "learning_rate": 1.9894763217238787e-05, "loss": 0.0564, "step": 1450 }, { "epoch": 4.79, "learning_rate": 1.976948133299925e-05, "loss": 0.0682, "step": 1500 }, { "epoch": 4.79, "eval_val_acc_stderr": 0.007088289135317922, "eval_val_accuracy": 0.9552941176470588, "eval_val_loss": 0.21694479882717133, "eval_val_runtime": 33.9001, "eval_val_samples_per_second": 25.074, "eval_val_steps_per_second": 3.156, "step": 1500 }, { "epoch": 4.79, "eval_val_alice_acc_stderr": 0.01077764816095986, "eval_val_alice_accuracy": 0.9468822170900693, "eval_val_alice_loss": 0.24521102011203766, "eval_val_alice_runtime": 17.1404, "eval_val_alice_samples_per_second": 25.262, "eval_val_alice_steps_per_second": 3.209, "step": 1500 }, { "epoch": 4.79, "eval_val_bob_acc_stderr": 0.009119154497166923, "eval_val_bob_accuracy": 0.9640287769784173, "eval_val_bob_loss": 0.1885310858488083, "eval_val_bob_runtime": 17.195, "eval_val_bob_samples_per_second": 24.251, "eval_val_bob_steps_per_second": 3.082, "step": 1500 }, { "epoch": 4.79, "eval_val_bob_gt_acc_stderr": 0.016945607332261307, "eval_val_bob_gt_accuracy": 0.8609112709832134, "eval_val_bob_gt_loss": 0.5383512377738953, "eval_val_bob_gt_runtime": 17.1938, "eval_val_bob_gt_samples_per_second": 24.253, "eval_val_bob_gt_steps_per_second": 3.083, "step": 1500 }, { "epoch": 4.95, "learning_rate": 1.964419944875971e-05, "loss": 0.0996, "step": 1550 }, { "epoch": 5.11, "learning_rate": 1.951891756452017e-05, "loss": 0.0458, "step": 1600 }, { "epoch": 5.27, "learning_rate": 1.9393635680280633e-05, "loss": 0.0424, "step": 1650 }, { "epoch": 5.43, "learning_rate": 1.9268353796041094e-05, "loss": 0.0406, "step": 1700 }, { "epoch": 5.59, "learning_rate": 1.9143071911801552e-05, "loss": 0.0559, "step": 1750 }, { "epoch": 5.75, "learning_rate": 1.9017790027562014e-05, "loss": 0.037, "step": 1800 }, { "epoch": 5.91, "learning_rate": 1.8892508143322475e-05, "loss": 0.0334, "step": 1850 }, { "epoch": 6.07, "learning_rate": 1.8767226259082937e-05, "loss": 0.0337, "step": 1900 }, { "epoch": 6.23, "learning_rate": 1.8641944374843398e-05, "loss": 0.0249, "step": 1950 }, { "epoch": 6.39, "learning_rate": 1.851666249060386e-05, "loss": 0.0389, "step": 2000 }, { "epoch": 6.39, "eval_val_acc_stderr": 0.005681555533037121, "eval_val_accuracy": 0.971764705882353, "eval_val_loss": 0.1380617767572403, "eval_val_runtime": 33.9624, "eval_val_samples_per_second": 25.028, "eval_val_steps_per_second": 3.151, "step": 2000 }, { "epoch": 6.39, "eval_val_alice_acc_stderr": 0.009065592097915486, "eval_val_alice_accuracy": 0.9630484988452656, "eval_val_alice_loss": 0.18371498584747314, "eval_val_alice_runtime": 17.17, "eval_val_alice_samples_per_second": 25.218, "eval_val_alice_steps_per_second": 3.203, "step": 2000 }, { "epoch": 6.39, "eval_val_bob_acc_stderr": 0.007116185390941344, "eval_val_bob_accuracy": 0.9784172661870504, "eval_val_bob_loss": 0.09321955591440201, "eval_val_bob_runtime": 17.2178, "eval_val_bob_samples_per_second": 24.219, "eval_val_bob_steps_per_second": 3.078, "step": 2000 }, { "epoch": 6.39, "eval_val_bob_gt_acc_stderr": 0.017422318096349455, "eval_val_bob_gt_accuracy": 0.8513189448441247, "eval_val_bob_gt_loss": 1.3637529611587524, "eval_val_bob_gt_runtime": 17.1982, "eval_val_bob_gt_samples_per_second": 24.247, "eval_val_bob_gt_steps_per_second": 3.082, "step": 2000 } ], "logging_steps": 50, "max_steps": 9390, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 500, "total_flos": 7.258415058505728e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }